[PATCH] Optimize verilog parsers, add verilog ifdef/elsif/else test-case

Please find below a couple of patches, the first is a test case that fails for the existing code, the second patch is an attempt to improve the performance of the verilog parser (and which revealed this test case).

From 3bc6d67c67d4661aba1b5ec7f75fcbffbcc7fb75 Mon Sep 17 00:00:00 2001                                                                                                                                                                                                                                               [105/1854]
From: Nick Brereton 
Date: Mon, 9 Sep 2019 22:36:55 -0400
Subject: [PATCH 1/2] Add verilog ifdef/elsif/else test-case

---
 testsuite/081vlog_ifdef_elsif_else/Makefile.ref | 66 +++++++++++++++++++++++++
 testsuite/081vlog_ifdef_elsif_else/Manifest.py  |  7 +++
 testsuite/081vlog_ifdef_elsif_else/mod_a.v      |  2 +
 testsuite/081vlog_ifdef_elsif_else/mod_b.v      |  2 +
 testsuite/081vlog_ifdef_elsif_else/mod_c.v      |  2 +
 testsuite/081vlog_ifdef_elsif_else/vlog.v       | 12 +++++
 testsuite/test_all.py                           |  3 ++
 7 files changed, 94 insertions(+)
 create mode 100644 testsuite/081vlog_ifdef_elsif_else/Makefile.ref
 create mode 100644 testsuite/081vlog_ifdef_elsif_else/Manifest.py
 create mode 100644 testsuite/081vlog_ifdef_elsif_else/mod_a.v
 create mode 100644 testsuite/081vlog_ifdef_elsif_else/mod_b.v
 create mode 100644 testsuite/081vlog_ifdef_elsif_else/mod_c.v
 create mode 100644 testsuite/081vlog_ifdef_elsif_else/vlog.v

diff --git a/testsuite/081vlog_ifdef_elsif_else/Makefile.ref b/testsuite/081vlog_ifdef_elsif_else/Makefile.ref
new file mode 100644
index 0000000..6b76e72
--- /dev/null
+++ b/testsuite/081vlog_ifdef_elsif_else/Makefile.ref
@@ -0,0 +1,66 @@
+########################################
+#  This file was generated by hdlmake  #
+#  http://ohwr.org/projects/hdl-make/  #
+########################################
+
+TOP_MODULE := gate
+PWD := $(shell pwd)
+
+MODELSIM_INI_PATH := ../linux_fakebin/..
+
+VCOM_FLAGS := -quiet -modelsimini modelsim.ini 
+VSIM_FLAGS := 
+VLOG_FLAGS := -quiet -modelsimini modelsim.ini 
+VMAP_FLAGS := -modelsimini modelsim.ini 
+#target for performing local simulation
+local: sim_pre_cmd simulation sim_post_cmd
+
+VERILOG_SRC := mod_a.v \
+vlog.v \
+
+VERILOG_OBJ := work/mod_a/.mod_a_v \
+work/vlog/.vlog_v \
+
+VHDL_SRC := 
+VHDL_OBJ := 
+INCLUDE_DIRS :=
+LIBS := work
+LIB_IND := work/.work
+
+simulation: modelsim.ini $(LIB_IND) $(VERILOG_OBJ) $(VHDL_OBJ)
+$(VERILOG_OBJ) : modelsim.ini
+$(VHDL_OBJ): $(LIB_IND) modelsim.ini
+
+modelsim.ini: $(MODELSIM_INI_PATH)/modelsim.ini
+               cp $< . 2>&1
+work/.work:
+       (vlib work && vmap $(VMAP_FLAGS) work && touch work/.work )|| rm -rf work 
+
+
+work/mod_a/.mod_a_v: mod_a.v
+               vlog -work work $(VLOG_FLAGS)  ${INCLUDE_DIRS} $<
+               @mkdir -p $(dir $@) && touch $@ 
+
+
+
+work/vlog/.vlog_v: vlog.v \
+work/mod_a/.mod_a_v
+               vlog -work work $(VLOG_FLAGS)  ${INCLUDE_DIRS} $<
+               @mkdir -p $(dir $@) && touch $@ 
+
+
+
+# USER SIM COMMANDS
+sim_pre_cmd:
+
+sim_post_cmd:
+
+
+CLEAN_TARGETS := $(LIBS) modelsim.ini transcript
+
+clean:
+               rm -rf $(CLEAN_TARGETS)
+mrproper: clean
+               rm -rf *.vcd *.wlf
+
+.PHONY: mrproper clean sim_pre_cmd sim_post_cmd simulation
diff --git a/testsuite/081vlog_ifdef_elsif_else/Manifest.py b/testsuite/081vlog_ifdef_elsif_else/Manifest.py
new file mode 100644
index 0000000..2d45369
--- /dev/null
+++ b/testsuite/081vlog_ifdef_elsif_else/Manifest.py
@@ -0,0 +1,7 @@
+action = "simulation"
+
+sim_tool="modelsim"
+
+top_module = "gate"
+
+files = [ "vlog.v", "mod_a.v", "mod_b.v", "mod_c.v" ]
diff --git a/testsuite/081vlog_ifdef_elsif_else/mod_a.v b/testsuite/081vlog_ifdef_elsif_else/mod_a.v
new file mode 100644
index 0000000..6f74ce4
--- /dev/null
+++ b/testsuite/081vlog_ifdef_elsif_else/mod_a.v
@@ -0,0 +1,2 @@
+module mod_a();
+endmodule
diff --git a/testsuite/081vlog_ifdef_elsif_else/mod_b.v b/testsuite/081vlog_ifdef_elsif_else/mod_b.v
new file mode 100644
index 0000000..423924a
--- /dev/null
+++ b/testsuite/081vlog_ifdef_elsif_else/mod_b.v
@@ -0,0 +1,2 @@
+module mod_b();
+endmodule
diff --git a/testsuite/081vlog_ifdef_elsif_else/mod_c.v b/testsuite/081vlog_ifdef_elsif_else/mod_c.v
new file mode 100644
index 0000000..7fffa65
--- /dev/null
+++ b/testsuite/081vlog_ifdef_elsif_else/mod_c.v
@@ -0,0 +1,2 @@
+module mod_c();
+endmodule
diff --git a/testsuite/081vlog_ifdef_elsif_else/vlog.v b/testsuite/081vlog_ifdef_elsif_else/vlog.v
new file mode 100644
index 0000000..1a7f63f
--- /dev/null
+++ b/testsuite/081vlog_ifdef_elsif_else/vlog.v
@@ -0,0 +1,12 @@
+`define DEF_A
+
+module gate;
+  // My comment
+`ifdef DEF_A
+mod_a mod_a ();
+`elsif DEF_B
+mod_b mod_b ();
+`else
+mod_c mod_c ();
+`endif
+endmodule
diff --git a/testsuite/test_all.py b/testsuite/test_all.py
index e53f215..f52034b 100644
--- a/testsuite/test_all.py
+++ b/testsuite/test_all.py
@@ -382,6 +382,9 @@ def test_err_vlog_recursion():
     with pytest.raises(SystemExit) as _:
         run([], path="080err_vlg_recursion")
 
+def test_vlog_ifdef_elsif_else_081():
+    run_compare(path="081vlog_ifdef_elsif_else")
+
 def test_dep_level():
     run(['list-files'], path="053vlog_dep_level")
     run(['list-files', '--delimiter', ','], path="053vlog_dep_level")
-- 
2.5.5

Below is the second patch, the aim here is to improve the performance of the verilog parser as it can be very slow on large files – I have one file which is an IP block, it is supplied as obsfuticated but otherwise plain verilog and is a single file weighing in at about 3MB with ~800k lines and it plays very badly with python’s strings and regexs – a single file which was taking ~6 minutes to parse is now parsed in ~10 seconds with this patch (however it is a particularly pathelogical case) time for a full project has scaled accordingly.

I seem to be able to get much better performance from python by using re.split to eliminate much of the disintersting text for verilog parser, or to in the case of the preprocessor to break the text up into pre-processor & verilog chunks. On the downside, this does lead to some farily lengthy regexs – the alternative would be to lean on either an external parser library (or a parser implemented in cython) neither of which I suspect is an appealing option.

From 6daf07b6375d7b1e83be867b35ec3e0885774d23 Mon Sep 17 00:00:00 2001                                                                                                                                                                                                                                               [308/1863]
From: Nick Brereton 
Date: Mon, 9 Sep 2019 22:38:05 -0400
Subject: [PATCH 2/2] vlog_parser: optimize preprocessor and main parser speed.

---
 hdlmake/vlog_parser.py | 292 +++++++++++++++++++++++++------------------------
 1 file changed, 149 insertions(+), 143 deletions(-)

diff --git a/hdlmake/vlog_parser.py b/hdlmake/vlog_parser.py
index 394f47f..bf7b33a 100644
--- a/hdlmake/vlog_parser.py
+++ b/hdlmake/vlog_parser.py
@@ -35,6 +35,7 @@ import logging
 from .new_dep_solver import DepParser
 from .dep_file import DepRelation
 from hdlmake.srcfile import create_source_file
+from collections import namedtuple
 import six
 
 
@@ -64,38 +65,13 @@ class VerilogPreprocessor(object):
             self.args = args
             self.expansion = expansion
 
-    class VLStack(object):
-
-        """Class that provides a simple binary (true/false) stack
-        for Verilog Defines for nested `ifdefs evaluation"""
-
-        def __init__(self):
-            self.stack = []
-
-        def push(self, v_element):
-            """Push element to the stack"""
-            self.stack.append(v_element)
-
-        def pop(self):
-            "Pop element from the stack"""
-            return self.stack.pop()
-
-        def all_true(self):
-            """Returns true if the stack is empty or all the contained
-            elements are True"""
-            return len(self.stack) == 0 or all(self.stack)
-
-        def flip(self):
-            """Toggle the following element"""
-            self.push(not self.pop())
-
     def __init__(self):
-        self.vpp_stack = self.VLStack()
         self.vlog_file = None
         # List of macro definitions
         self.vpp_macros = []
         # Dictionary of files sub-included by each file parsed
         self.vpp_filedeps = {}
+        self.macro_depth = 0
 
     def _find_macro(self, name):
         """Get the Verilog preprocessor macro named 'name'"""
@@ -153,121 +129,145 @@ class VerilogPreprocessor(object):
                 re.DOTALL | re.MULTILINE)
             return re.sub(pattern, replacer, text)
 
-        def _degapize(text):
-            """ Create a list in which the verilog sentences are                                                                                                                                                                                                                                                    [242/1863]
-            stored in an ordered way -- and without empty 'gaps'"""
-            lempty = re.compile(r"^\s*$")
-            cline = None
-            lines = []
-            for line_aux in text.splitlines(False):
-                if re.match(lempty, line_aux) is not None:
-                    continue
-                if line_aux.endswith('\\'):
-                    if cline is None:
-                        cline = ""
-                    cline += line_aux[:len(line_aux) - 1]
-                    continue
-                elif cline:
-                    line_aux = cline + line_aux
-                    cline = None
+        def _filter_protected_regions(text):
+            '''Remove regions demarked by `pragma protect being_protected/end_protected'''
+            return re.sub(r'\s*`pragma\s+protect\s+begin_protected.*`pragma\s+protect\s+end_protected\b', '', text, flags=re.DOTALL)
+
+        def _handle_macros(text):
+            '''Process text to implement ifdef/ifndef/elsif/else/endif & define logic'''
+            from pprint import pprint
+
+            vpp_match = namedtuple('vpp_match', ['mtext','pptype','ppident', 'macroident','ppargs','ppdefn','incfile','substid'])
+            vpp_macrodefn = namedtuple('vpp_macrodefn', ['params', 'expansion'])
+
+            def _munge_list(flist):
+                '''Take the split list & normalize into a list of string literals & seperator matches'''
+                from pprint import pprint
+                if not flist:
+                    return []
+                is_match = False # if nothing was present, split inserts an empty element
+                rlist = []
+                while flist:
+                    if is_match:
+                        if len(flist) < 9:
+                            raise Exception("_munge_list: insufficient arguments for match object")
+                        rlist.append(vpp_match(flist[0],flist[1],None if not flist[2] else flist[2].strip(),
+                                               flist[3],flist[4],'' if not flist[5] else flist[5].replace('\\\n',''),flist[6],flist[7]))
+                        flist = flist[9:]
+                    else:
+                        rlist.append(flist.pop(0))
+                    is_match = not is_match
+                return rlist
+
+            def _tok_string(text):
+                toks = re.split(r'''((?:`(ifn?def|elsif|else|endif|define|include)((?<=ifdef\b)\s+(?:\w+)|(?<=ifndef\b)\s+(?:\w+)|(?<=elsif\b)\s+(?:\w+)|(?:(?<=define\b)\s+(\w+)(?:\(([\w\s,]*)\))?[ \t]*((?:\\\n|[^\n\r])*)$)|(?<=include\b)\s+"(.+?)")?)|(?:`(\w+)(?:\(([\w\s,]*)\))?))''', text, flags=re.MULTILINE)
+                return _munge_list(toks)
+
+            parts = _tok_string(text)
+
+            # PP tokens
+            vpp_macros = {}
+
+            def _proc_macros_layer(parts, gmacros):
+                '''Process a level of macros'''
+                lbuf    = ""
+                front   = parts.pop(0)
+                enabled = True
+                handled = False # we've handled an if condition
+                lmacros = dict(gmacros)
+
+                # we should only arrive here because either the start of a string was seen
+                # or an ifdef was detected
+                if isinstance(front, str):
+                    lbuf = front                                                                                                                                                                                                                                                                                    [176/1863]
+                elif front.pptype in ('ifdef','ifndef'):
+                    enabled = front.ppident in lmacros
+                    if front.pptype == 'ifndef':
+                        enabled = not enabled
+                    handled = enabled
                 else:
-                    cline = None
-                lines.append(line_aux)
-            return lines
-        exps = {
-            "include": re.compile(r"^\s*`include\s+\"(.+)\""),
-            "define": re.compile(r"^\s*`define\s+(\w+)(?:\(([\w\s,]*)\))?(.*)"),
-            "ifdef_elsif": re.compile(r"^\s*`(ifdef|ifndef|elsif)\s+(\w+)[\s\S]*$"),
-            "endif_else": re.compile(r"^\s*`(endif|else)\s*$"),
-            "begin_protected": re.compile(r"^\s*`pragma\s*protect\s*begin_protected\s*$"),
-            "end_protected": re.compile(r"^\s*`pragma\s*protect\s*end_protected\s*$")}
-        vl_macro_expand = re.compile(r"`(\w+)(?:\(([\w\s,]*)\))?")
+                    raise Exception("verilog preprocessor: unexpected token '%s'" % front[1])
+
+                while parts:
+                    # handle further ifdefs recusively
+                    front = parts.pop(0)
+                    if isinstance(front, str):
+                        if enabled:
+                            lbuf += front
+                    elif front.pptype in ('ifdef', 'ifndef'):
+                        # ifdef requires a new level, reinsert element & parse next level
+                        parts.insert(0, front)
+                        ctext, parts, cmacros = _proc_macros_layer(parts, lmacros)
+                        if enabled:
+                            lbuf    += ctext
+                            lmacros  = cmacros
+                    elif front.pptype == 'elsif':
+                        if not handled:
+                            enabled = front.ppident in lmacros
+                            if front.pptype == 'ifndef':
+                                enabled = not enabled
+                            handled = enabled
+                        else: # if a clause was already selected, skip this one
+                            enabled = False
+                    elif front.pptype == 'else':
+                        if not handled:
+                            enabled = True
+                            handled = True
+                        else:
+                            enabled = False
+                    elif front.pptype == 'endif':
+                        return lbuf, parts, lmacros
+                    elif front.pptype == 'define':
+                        if enabled:
+                            if front.macroident in self.vpp_keywords:
+                                raise Exception("Attempt to `define a reserved preprocessor keyword")
+                            lmacros[front.macroident] = vpp_macrodefn(front.ppargs, front.ppdefn)
+                            lbuf += front.mtext.replace('\\\n','')
+                    elif front.pptype == "include":
+                        if enabled:
+                            # maybe add a check for recusion here?
+                            included_file_path = self._search_include(front.incfile, os.path.dirname(file_name))
+                            logging.debug("File being parsed %s (library %s) "
+                                          "includes %s",
+                                          file_name, library, included_file_path)
+                            # add include file to the dependancies
+                            self.vpp_filedeps[file_name + library].append(
+                                included_file_path)
+                            # tokenize the file & prepend to the current stack
+                            decomment = _remove_comment(open(included_file_path).read())                                                                                                                                                                                                                            [110/1863]
+                            tokens = _tok_string(decomment)
+                            parts = tokens + parts
+                    elif front.pptype == 'pop_macro':
+                        self.macro_depth -= 1
+                        assert self.macro_depth >= 0
+                    elif front.substid is not None:
+                        if enabled:
+                            # if not front.substid in lmacros:
+                            #     raise Exception("substitute unknown identifier! (%s)" % str(front))
+                            if front.substid in lmacros:
+                                tokens = _tok_string(lmacros[front.substid].expansion)
+                                tokens.append(vpp_match(None, 'pop_macro', front.substid, None, None, None, None, None))
+                                parts = tokens + parts
+                                self.macro_depth += 1
+                                if self.macro_depth > 30:
+                                    raise Exception("Recursion level exceeded. Nested `includes?")
+                            else:
+                                lbuf += front.mtext
+                    else:
+                        raise Exception("verilog preprocessor: unexpected token '%s' from %s" % (front[1], str(front)))
+
+                return lbuf, parts, lmacros
+
+            return re.sub(r'^\s*\n','', _proc_macros_layer(parts, vpp_macros)[0], flags=re.MULTILINE)
+
         # init dependencies
         self.vpp_filedeps[file_name + library] = []
         cur_iter = 0
         logging.debug("preprocess file %s (of length %d) in library %s",
                       file_name, len(file_content), library)
-        buf = _remove_comment(file_content)
-        protected_region = False
-        while True:
-            new_buf = ""
-            n_expansions = 0
-            cur_iter += 1
-            if cur_iter > 30:
-                raise Exception("Recursion level exceeded. Nested `includes?")
-            for line in _degapize(buf):
-                matches = {}
-                last = None
-                for statement, stmt_regex in six.iteritems(exps):
-                    matches[statement] = re.match(stmt_regex, line)
-                    if matches[statement]:
-                        last = matches[statement]
-                if matches["begin_protected"]:
-                    protected_region = True
-                    continue
-                if matches["end_protected"]:
-                    protected_region = False
-                    continue
-                if protected_region:
-                    continue
-                if matches["ifdef_elsif"]:
-                    cond_true = self._find_macro(last.group(2)) is not None
-                    if last.group(1) == "ifndef":
-                        cond_true = not cond_true
-                    elif last.group(1) == "elsif":
-                        self.vpp_stack.pop()
-                    self.vpp_stack.push(cond_true)
-                    continue
-                elif matches["endif_else"]:
-                    if last.group(1) == "endif":
-                        self.vpp_stack.pop()
-                    else:  # `else
-                        self.vpp_stack.flip()                                                                                                                                                                                                                                                                        [44/1863]
-                    continue
-                if not self.vpp_stack.all_true():
-                    continue
-                if matches["include"]:
-                    included_file_path = self._search_include(
-                        last.group(1), os.path.dirname(file_name))
-                    logging.debug("File being parsed %s (library %s) "
-                                  "includes %s",
-                                  file_name, library, included_file_path)
-                    line = self._preprocess_file(
-                        file_content=open(included_file_path, "r").read(),
-                        file_name=included_file_path, library=library)
-                    self.vpp_filedeps[file_name + library].append(
-                        included_file_path)
-                    # add the whole include chain to the dependencies of the
-                    # currently parsed file
-                    self.vpp_filedeps[file_name + library].extend(
-                        self.vpp_filedeps[included_file_path + library])
-                    new_buf += line + '\n'
-                    n_expansions += 1
-                    continue
-                elif matches["define"]:
-                    self._parse_macro_def(matches["define"])
-
-                def do_expand(what):
-                    """Function to be applied by re.sub to every match of the
-                    vl_macro_expand in the Verilof code -- group() returns
-                    positive matches as indexed plain strings."""
-                    if what.group(1) in self.vpp_keywords:
-                        return '`' + what.group(1)
-                    macro = self._find_macro(what.group(1))
-                    if macro:
-                        return macro.expansion
-                    else:
-                        logging.error("No expansion for macro '`%s' (%s) (%s)",
-                                      what.group(1), line[:50]
-                                      if len(line) > 50 else line, file_name)
-                repl_line = re.sub(vl_macro_expand, do_expand, line)
-                new_buf += repl_line + '\n'
-                # if there was any expansion, then keep on iterating
-                if repl_line != line:
-                    n_expansions += 1
-            buf = new_buf
-            if n_expansions == 0:
-                return new_buf
+        buf = _filter_protected_regions(_remove_comment(file_content))
+
+        return _handle_macros(buf)
 
     def preprocess(self, vlog_file):
         """Assign the provided 'vlog_file' to the associated class property
@@ -573,7 +573,7 @@ class VerilogParser(DepParser):
             dep_file.add_relation(
                 DepRelation("%s.%s" % (dep_file.library, text.group(1)),
                             DepRelation.USE, DepRelation.PACKAGE))
-        re.subn(import_pattern, do_imports, buf)
+        import_pattern.subn(do_imports, buf)
         # packages
         m_inside_package = re.compile(
             r"package\s+(\w+)\s*(?:\(.*?\))?\s*(.+?)endpackage",
@@ -589,17 +589,20 @@ class VerilogParser(DepParser):
             dep_file.add_relation(
                 DepRelation("%s.%s" % (dep_file.library, text.group(1)),
                             DepRelation.PROVIDE, DepRelation.PACKAGE))
-        re.subn(m_inside_package, do_package, buf)
+        m_inside_package.subn(do_package, buf)
 
         # modules and instantiations
         m_inside_module = re.compile(
-            r"(?:module|interface)\s+(\w+)\s*(?:\(.*?\))?\s*(.+?)"
+            r"(?:module|interface)\s+(\w+)\s*(?:#\s*\(.*?\)\s*)?(?:\(.*?\))?\s*;\s*(.+?)"
             r"(?:endmodule|endinterface)",
             re.DOTALL | re.MULTILINE)
         m_instantiation = re.compile(
-            r"(?:\A|\s*)\s*(\w+)\s+(?:#\s*\(.*?\)\s*)?(\w+)\s*\(.*?\)\s*",
+            r"\s*\b(\w+)\s+(?:#\s*\(.*?\)\s*)?(\w+)\s*(?:\[.*?\]\s*)?\(.*?\)$",
             re.DOTALL | re.MULTILINE)
 
+        m_stmt = re.compile(r'''(?:\s*(?:(?:\b(?:function|task)\b.*?\bend(?:function|task)\b)|(?:\bbegin(?:\s*:\s*\w+)?)|(?:\bend\b(?:\s*:\s*\w+)?)|(?:end(?:generate|case)\b)|(?:\b(?:case|if|for)\s*\(.*?\))|(?:\b(?:else|generate)\b)|\b(?:assign|localparam|wire|logic|reg)\b[^;]*?(?:=.*?)?;|\balways(?:_ff|_la
tch|_comb)?\b\s*(?:@\s*(?:\*|(?:\(.*?\))))?|;)\s*)+''',
+                            re.MULTILINE | re.DOTALL)
+
         def do_module(text):
             """Function to be applied by re.sub to every match of the
             m_inside_module in the Verilog code -- group() returns
@@ -620,12 +623,15 @@ class VerilogParser(DepParser):
                 if mod_name in self.reserved_words:
                     return
                 logging.debug("-> instantiates %s.%s as %s",
-                              dep_file.library, mod_name, text.group(2))
+                              dep_file.library, text.group(1), text.group(2))
                 dep_file.add_relation(
-                    DepRelation("%s.%s" % (dep_file.library, mod_name),
+                    DepRelation("%s.%s" % (dep_file.library, text.group(1)),
                                 DepRelation.USE, DepRelation.MODULE))
-            re.subn(m_instantiation, do_inst, text.group(2))
-        re.subn(m_inside_module, do_module, buf)
+            for stmt in [x for x in m_stmt.split(text.group(2)) if x and x[-1] == ")"]:
+                match = m_instantiation.match(stmt)
+                if match:
+                    do_inst(match)
+        m_inside_module.subn(do_module, buf)
         dep_file.add_relation(
             DepRelation(
                 dep_file.path,
-- 
2.5.5

Thank you for the patches. I suppose you’d like to have the first issue fixed ?

I will have a look at it.

Tristan.

Hi Tristan,

I believe that the second patch will fix the first problem, amongst other things; I fear that the fix in the original code might be non-trivial, but ymmv.

Nick

Ah, that’s great. I wrongly thought it was only for speed improvement. So I have just to merge.

Tristan.

Can you upload patch 2/2 ? The inline copy is corrupted.

Thanks!

Tristan, unfortunately when I use the upload button I get a message saying “Sorry, new users can not upload attachments.”

I’ll try posting it again in the vain hope that the gods will allow it this time, otherwise I suppose I either have to qualify as not a new user (or perhaps someone has to click a button to make that happen?)

Nick

From 6daf07b6375d7b1e83be867b35ec3e0885774d23 Mon Sep 17 00:00:00 2001
From: Nick Brereton 
Date: Mon, 9 Sep 2019 22:38:05 -0400
Subject: [PATCH 2/2] vlog_parser: optimize preprocessor and main parser speed.

---
 hdlmake/vlog_parser.py | 292 +++++++++++++++++++++++++------------------------
 1 file changed, 149 insertions(+), 143 deletions(-)

diff --git a/hdlmake/vlog_parser.py b/hdlmake/vlog_parser.py
index 394f47f..bf7b33a 100644
--- a/hdlmake/vlog_parser.py
+++ b/hdlmake/vlog_parser.py
@@ -35,6 +35,7 @@ import logging
 from .new_dep_solver import DepParser
 from .dep_file import DepRelation
 from hdlmake.srcfile import create_source_file
+from collections import namedtuple
 import six
 
 
@@ -64,38 +65,13 @@ class VerilogPreprocessor(object):
             self.args = args
             self.expansion = expansion
 
-    class VLStack(object):
-
-        """Class that provides a simple binary (true/false) stack
-        for Verilog Defines for nested `ifdefs evaluation"""
-
-        def __init__(self):
-            self.stack = []
-
-        def push(self, v_element):
-            """Push element to the stack"""
-            self.stack.append(v_element)
-
-        def pop(self):
-            "Pop element from the stack"""
-            return self.stack.pop()
-
-        def all_true(self):
-            """Returns true if the stack is empty or all the contained
-            elements are True"""
-            return len(self.stack) == 0 or all(self.stack)
-
-        def flip(self):
-            """Toggle the following element"""
-            self.push(not self.pop())
-
     def __init__(self):
-        self.vpp_stack = self.VLStack()
         self.vlog_file = None
         # List of macro definitions
         self.vpp_macros = []
         # Dictionary of files sub-included by each file parsed
         self.vpp_filedeps = {}
+        self.macro_depth = 0
 
     def _find_macro(self, name):
         """Get the Verilog preprocessor macro named 'name'"""
@@ -153,121 +129,145 @@ class VerilogPreprocessor(object):
                 re.DOTALL | re.MULTILINE)
             return re.sub(pattern, replacer, text)
 
-        def _degapize(text):
-            """ Create a list in which the verilog sentences are
-            stored in an ordered way -- and without empty 'gaps'"""
-            lempty = re.compile(r"^\s*$")
-            cline = None
-            lines = []
-            for line_aux in text.splitlines(False):
-                if re.match(lempty, line_aux) is not None:
-                    continue
-                if line_aux.endswith('\\'):
-                    if cline is None:
-                        cline = ""
-                    cline += line_aux[:len(line_aux) - 1]
-                    continue
-                elif cline:
-                    line_aux = cline + line_aux
-                    cline = None
+        def _filter_protected_regions(text):
+            '''Remove regions demarked by `pragma protect being_protected/end_protected'''
+            return re.sub(r'\s*`pragma\s+protect\s+begin_protected.*`pragma\s+protect\s+end_protected\b', '', text, flags=re.DOTALL)
+
+        def _handle_macros(text):
+            '''Process text to implement ifdef/ifndef/elsif/else/endif & define logic'''
+            from pprint import pprint
+
+            vpp_match = namedtuple('vpp_match', ['mtext','pptype','ppident', 'macroident','ppargs','ppdefn','incfile','substid'])
+            vpp_macrodefn = namedtuple('vpp_macrodefn', ['params', 'expansion'])
+
+            def _munge_list(flist):
+                '''Take the split list & normalize into a list of string literals & seperator matches'''
+                from pprint import pprint
+                if not flist:
+                    return []
+                is_match = False # if nothing was present, split inserts an empty element
+                rlist = []
+                while flist:
+                    if is_match:
+                        if len(flist) < 9:
+                            raise Exception("_munge_list: insufficient arguments for match object")
+                        rlist.append(vpp_match(flist[0],flist[1],None if not flist[2] else flist[2].strip(),
+                                               flist[3],flist[4],'' if not flist[5] else flist[5].replace('\\\n',''),flist[6],flist[7]))
+                        flist = flist[9:]
+                    else:
+                        rlist.append(flist.pop(0))
+                    is_match = not is_match
+                return rlist
+
+            def _tok_string(text):
+                toks = re.split(r'''((?:`(ifn?def|elsif|else|endif|define|include)((?<=ifdef\b)\s+(?:\w+)|(?<=ifndef\b)\s+(?:\w+)|(?<=elsif\b)\s+(?:\w+)|(?:(?<=define\b)\s+(\w+)(?:\(([\w\s,]*)\))?[ \t]*((?:\\\n|[^\n\r])*)$)|(?<=include\b)\s+"(.+?)")?)|(?:`(\w+)(?:\(([\w\s,]*)\))?))''', text, flags=re.MULTILINE)
+                return _munge_list(toks)
+
+            parts = _tok_string(text)
+
+            # PP tokens
+            vpp_macros = {}
+
+            def _proc_macros_layer(parts, gmacros):
+                '''Process a level of macros'''
+                lbuf    = ""
+                front   = parts.pop(0)
+                enabled = True
+                handled = False # we've handled an if condition
+                lmacros = dict(gmacros)
+
+                # we should only arrive here because either the start of a string was seen
+                # or an ifdef was detected
+                if isinstance(front, str):
+                    lbuf = front
+                elif front.pptype in ('ifdef','ifndef'):
+                    enabled = front.ppident in lmacros
+                    if front.pptype == 'ifndef':
+                        enabled = not enabled
+                    handled = enabled
                 else:
-                    cline = None
-                lines.append(line_aux)
-            return lines
-        exps = {
-            "include": re.compile(r"^\s*`include\s+\"(.+)\""),
-            "define": re.compile(r"^\s*`define\s+(\w+)(?:\(([\w\s,]*)\))?(.*)"),
-            "ifdef_elsif": re.compile(r"^\s*`(ifdef|ifndef|elsif)\s+(\w+)[\s\S]*$"),
-            "endif_else": re.compile(r"^\s*`(endif|else)\s*$"),
-            "begin_protected": re.compile(r"^\s*`pragma\s*protect\s*begin_protected\s*$"),
-            "end_protected": re.compile(r"^\s*`pragma\s*protect\s*end_protected\s*$")}
-        vl_macro_expand = re.compile(r"`(\w+)(?:\(([\w\s,]*)\))?")
+                    raise Exception("verilog preprocessor: unexpected token '%s'" % front[1])
+
+                while parts:
+                    # handle further ifdefs recusively
+                    front = parts.pop(0)
+                    if isinstance(front, str):
+                        if enabled:
+                            lbuf += front
+                    elif front.pptype in ('ifdef', 'ifndef'):
+                        # ifdef requires a new level, reinsert element & parse next level
+                        parts.insert(0, front)
+                        ctext, parts, cmacros = _proc_macros_layer(parts, lmacros)
+                        if enabled:
+                            lbuf    += ctext
+                            lmacros  = cmacros
+                    elif front.pptype == 'elsif':
+                        if not handled:
+                            enabled = front.ppident in lmacros
+                            if front.pptype == 'ifndef':
+                                enabled = not enabled
+                            handled = enabled
+                        else: # if a clause was already selected, skip this one
+                            enabled = False
+                    elif front.pptype == 'else':
+                        if not handled:
+                            enabled = True
+                            handled = True
+                        else:
+                            enabled = False
+                    elif front.pptype == 'endif':
+                        return lbuf, parts, lmacros
+                    elif front.pptype == 'define':
+                        if enabled:
+                            if front.macroident in self.vpp_keywords:
+                                raise Exception("Attempt to `define a reserved preprocessor keyword")
+                            lmacros[front.macroident] = vpp_macrodefn(front.ppargs, front.ppdefn)
+                            lbuf += front.mtext.replace('\\\n','')
+                    elif front.pptype == "include":
+                        if enabled:
+                            # maybe add a check for recusion here?
+                            included_file_path = self._search_include(front.incfile, os.path.dirname(file_name))
+                            logging.debug("File being parsed %s (library %s) "
+                                          "includes %s",
+                                          file_name, library, included_file_path)
+                            # add include file to the dependancies
+                            self.vpp_filedeps[file_name + library].append(
+                                included_file_path)
+                            # tokenize the file & prepend to the current stack
+                            decomment = _remove_comment(open(included_file_path).read())
+                            tokens = _tok_string(decomment)
+                            parts = tokens + parts
+                    elif front.pptype == 'pop_macro':
+                        self.macro_depth -= 1
+                        assert self.macro_depth >= 0
+                    elif front.substid is not None:
+                        if enabled:
+                            # if not front.substid in lmacros:
+                            #     raise Exception("substitute unknown identifier! (%s)" % str(front))
+                            if front.substid in lmacros:
+                                tokens = _tok_string(lmacros[front.substid].expansion)
+                                tokens.append(vpp_match(None, 'pop_macro', front.substid, None, None, None, None, None))
+                                parts = tokens + parts
+                                self.macro_depth += 1
+                                if self.macro_depth > 30:
+                                    raise Exception("Recursion level exceeded. Nested `includes?")
+                            else:
+                                lbuf += front.mtext
+                    else:
+                        raise Exception("verilog preprocessor: unexpected token '%s' from %s" % (front[1], str(front)))
+
+                return lbuf, parts, lmacros
+
+            return re.sub(r'^\s*\n','', _proc_macros_layer(parts, vpp_macros)[0], flags=re.MULTILINE)
+
         # init dependencies
         self.vpp_filedeps[file_name + library] = []
         cur_iter = 0
         logging.debug("preprocess file %s (of length %d) in library %s",
                       file_name, len(file_content), library)
-        buf = _remove_comment(file_content)
-        protected_region = False
-        while True:
-            new_buf = ""
-            n_expansions = 0
-            cur_iter += 1
-            if cur_iter > 30:
-                raise Exception("Recursion level exceeded. Nested `includes?")
-            for line in _degapize(buf):
-                matches = {}
-                last = None
-                for statement, stmt_regex in six.iteritems(exps):
-                    matches[statement] = re.match(stmt_regex, line)
-                    if matches[statement]:
-                        last = matches[statement]
-                if matches["begin_protected"]:
-                    protected_region = True
-                    continue
-                if matches["end_protected"]:
-                    protected_region = False
-                    continue
-                if protected_region:
-                    continue
-                if matches["ifdef_elsif"]:
-                    cond_true = self._find_macro(last.group(2)) is not None
-                    if last.group(1) == "ifndef":
-                        cond_true = not cond_true
-                    elif last.group(1) == "elsif":
-                        self.vpp_stack.pop()
-                    self.vpp_stack.push(cond_true)
-                    continue
-                elif matches["endif_else"]:
-                    if last.group(1) == "endif":
-                        self.vpp_stack.pop()
-                    else:  # `else
-                        self.vpp_stack.flip()
-                    continue
-                if not self.vpp_stack.all_true():
-                    continue
-                if matches["include"]:
-                    included_file_path = self._search_include(
-                        last.group(1), os.path.dirname(file_name))
-                    logging.debug("File being parsed %s (library %s) "
-                                  "includes %s",
-                                  file_name, library, included_file_path)
-                    line = self._preprocess_file(
-                        file_content=open(included_file_path, "r").read(),
-                        file_name=included_file_path, library=library)
-                    self.vpp_filedeps[file_name + library].append(
-                        included_file_path)
-                    # add the whole include chain to the dependencies of the
-                    # currently parsed file
-                    self.vpp_filedeps[file_name + library].extend(
-                        self.vpp_filedeps[included_file_path + library])
-                    new_buf += line + '\n'
-                    n_expansions += 1
-                    continue
-                elif matches["define"]:
-                    self._parse_macro_def(matches["define"])
-
-                def do_expand(what):
-                    """Function to be applied by re.sub to every match of the
-                    vl_macro_expand in the Verilof code -- group() returns
-                    positive matches as indexed plain strings."""
-                    if what.group(1) in self.vpp_keywords:
-                        return '`' + what.group(1)
-                    macro = self._find_macro(what.group(1))
-                    if macro:
-                        return macro.expansion
-                    else:
-                        logging.error("No expansion for macro '`%s' (%s) (%s)",
-                                      what.group(1), line[:50]
-                                      if len(line) > 50 else line, file_name)
-                repl_line = re.sub(vl_macro_expand, do_expand, line)
-                new_buf += repl_line + '\n'
-                # if there was any expansion, then keep on iterating
-                if repl_line != line:
-                    n_expansions += 1
-            buf = new_buf
-            if n_expansions == 0:
-                return new_buf
+        buf = _filter_protected_regions(_remove_comment(file_content))
+
+        return _handle_macros(buf)
 
     def preprocess(self, vlog_file):
         """Assign the provided 'vlog_file' to the associated class property
@@ -573,7 +573,7 @@ class VerilogParser(DepParser):
             dep_file.add_relation(
                 DepRelation("%s.%s" % (dep_file.library, text.group(1)),
                             DepRelation.USE, DepRelation.PACKAGE))
-        re.subn(import_pattern, do_imports, buf)
+        import_pattern.subn(do_imports, buf)
         # packages
         m_inside_package = re.compile(
             r"package\s+(\w+)\s*(?:\(.*?\))?\s*(.+?)endpackage",
@@ -589,17 +589,20 @@ class VerilogParser(DepParser):
             dep_file.add_relation(
                 DepRelation("%s.%s" % (dep_file.library, text.group(1)),
                             DepRelation.PROVIDE, DepRelation.PACKAGE))
-        re.subn(m_inside_package, do_package, buf)
+        m_inside_package.subn(do_package, buf)
 
         # modules and instantiations
         m_inside_module = re.compile(
-            r"(?:module|interface)\s+(\w+)\s*(?:\(.*?\))?\s*(.+?)"
+            r"(?:module|interface)\s+(\w+)\s*(?:#\s*\(.*?\)\s*)?(?:\(.*?\))?\s*;\s*(.+?)"
             r"(?:endmodule|endinterface)",
             re.DOTALL | re.MULTILINE)
         m_instantiation = re.compile(
-            r"(?:\A|\s*)\s*(\w+)\s+(?:#\s*\(.*?\)\s*)?(\w+)\s*\(.*?\)\s*",
+            r"\s*\b(\w+)\s+(?:#\s*\(.*?\)\s*)?(\w+)\s*(?:\[.*?\]\s*)?\(.*?\)$",
             re.DOTALL | re.MULTILINE)
 
+        m_stmt = re.compile(r'''(?:\s*(?:(?:\b(?:function|task)\b.*?\bend(?:function|task)\b)|(?:\bbegin(?:\s*:\s*\w+)?)|(?:\bend\b(?:\s*:\s*\w+)?)|(?:end(?:generate|case)\b)|(?:\b(?:case|if|for)\s*\(.*?\))|(?:\b(?:else|generate)\b)|\b(?:assign|localparam|wire|logic|reg)\b[^;]*?(?:=.*?)?;|\balways(?:_ff|_latch|_comb)?\b\s*(?:@\s*(?:\*|(?:\(.*?\))))?|;)\s*)+''',
+                            re.MULTILINE | re.DOTALL)
+
         def do_module(text):
             """Function to be applied by re.sub to every match of the
             m_inside_module in the Verilog code -- group() returns
@@ -620,12 +623,15 @@ class VerilogParser(DepParser):
                 if mod_name in self.reserved_words:
                     return
                 logging.debug("-> instantiates %s.%s as %s",
-                              dep_file.library, mod_name, text.group(2))
+                              dep_file.library, text.group(1), text.group(2))
                 dep_file.add_relation(
-                    DepRelation("%s.%s" % (dep_file.library, mod_name),
+                    DepRelation("%s.%s" % (dep_file.library, text.group(1)),
                                 DepRelation.USE, DepRelation.MODULE))
-            re.subn(m_instantiation, do_inst, text.group(2))
-        re.subn(m_inside_module, do_module, buf)
+            for stmt in [x for x in m_stmt.split(text.group(2)) if x and x[-1] == ")"]:
+                match = m_instantiation.match(stmt)
+                if match:
+                    do_inst(match)
+        m_inside_module.subn(do_module, buf)
         dep_file.add_relation(
             DepRelation(
                 dep_file.path,
-- 
2.5.5

Now merged and pushed. I finally get the patch from the email (instead of the web page).

Thanks!
Tristan.

Hi Nick!

I am David Cobas, maintainer of the ohwr.org site/forums. I just performed an upload using this identity (which is just a fake account with no administrative privileges whatsoever, and created some minutes ago). I am trying to understand why the site is refusing your uploads.

Here we have an uploaded. simple attachment containing just random text:
xxxx.patch (17.5 KB)

I will keep you posted as to what the parameters preventing your uploading might be.
Let me know (at my true address, dcobas@cern.ch) about any other information that might be relevant to the problem.

Hi Tristan & David,
Many thanks.