Port check_grep.sh from an increasingly complicated shell script to Python. (#19875)

* Port check_grep.sh from an increasingly complicated shell script to Python. * Exclude dmdoc-formatted multi-line comments from whitespace check * Add comment exception for both WS rules * Fixed mixed tab/space regex. * hate hate hate hate * Add doctests to preserve my sanity * Remove checks involving newlines. * Add back newline-involved tests. Cry. * Several large-scale changes. - Remove all the map-related changes for now, in order to get the code formatting changes in sooner and prevent more issues from leaking in while the PR issues are burned down. - Flip the method for processing files. Instead of trying to sort errors by type, running over each file for each error, I'm simply breaking from the original output format and returning a lint-like set of errors. In this design, each file is opened, the checks are run, and the file handle is reused for each check. - Standardize failures so that the filename, line number, and message can all be retrieved and formatted nicely for stdout. - Compile regexes. The consensus seeeeeeeems to be that there's a negligible difference in performance when compiling regexes but there's a possibility that running them a lot of times may be faster, and I could actually time it, but ugggggggh.
2025-12-22 08:11:06 +00:00 · 2023-07-07 12:31:05 -04:00
parent cdce533eb7
commit edcaaf4241
2 changed files with 133 additions and 1 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -26,7 +26,7 @@ jobs:
        run: |
          tools/ci/check_json.sh
          tools/ci/build_tgui.sh
-          tools/ci/check_grep.sh
+          python3 tools/ci/check_grep2.py
          python3 tools/ci/check_line_endings.py
          python3 tools/ci/check_file_names.py
          python3 tools/ci/unticked_files.py ${GITHUB_WORKSPACE}
--- a/tools/ci/check_grep2.py
+++ b/tools/ci/check_grep2.py
@@ -0,0 +1,132 @@
 import glob
 import re
 import os
 import sys
 import time
 from collections import namedtuple
 Failure = namedtuple("Failure", ["lineno", "message"])
 RED = "\033[0;31m"
 GREEN = "\033[0;32m"
 BLUE = "\033[0;34m"
 NC = "\033[0m"  # No Color
 IGNORE_515_PROC_MARKER_FILENAME = "__byond_version_compat.dm"
 CHECK_515_PROC_MARKER_RE = re.compile(r"\.proc/")
 def check_515_proc_syntax(lines):
    for idx, line in enumerate(lines):
        if CHECK_515_PROC_MARKER_RE.match(line):
            return Failure(idx + 1, "Outdated proc reference use detected in code. Please use proc reference helpers.")
 CHECK_SPACE_INDENTATION_RE = re.compile(r"^ {2,}[^\*]")
 def check_space_indentation(lines):
    """
    Check specifically for space-significant indentation. Excludes dmdoc
    block comment lines so long as there is an asterisk immediately after the
    leading spaces.
    >>> bool(check_space_indentation(["  foo"]))
    True
    >>> bool(check_space_indentation(["\\tfoo"]))
    False
    >>> bool(check_space_indentation(["  * foo"]))
    False
    """
    for idx, line in enumerate(lines):
        if CHECK_SPACE_INDENTATION_RE.match(line):
            return Failure(idx + 1, "Space indentation detected, please use tab indentation.")
 CHECK_MIXED_INDENTATION_RE = re.compile(r"^(\t+ | +\t)\s*[^\s\*]")
 def check_mixed_indentation(lines):
    """
    Check specifically for leading whitespace which contains a mix of tab and
    space characters. Excludes dmdoc block comment lines so long as there is an
    asterisk immediately after the leading whitespace.
    >>> bool(check_mixed_indentation(["\\t\\t foo"]))
    True
    >>> bool(check_mixed_indentation(["\\t \\t foo"]))
    True
    >>> bool(check_mixed_indentation(["\\t // foo"]))
    True
    >>> bool(check_mixed_indentation([" \\tfoo"]))
    True
    >>> bool(check_mixed_indentation(["  \\t  foo"]))
    True
    >>> bool(check_mixed_indentation(["\\t  * foo"]))
    False
    >>> bool(check_mixed_indentation(["\\t\\t* foo"]))
    False
    >>> bool(check_mixed_indentation(["\\t \\t  * foo"]))
    False
    """
    for idx, line in enumerate(lines):
        if CHECK_MIXED_INDENTATION_RE.match(line):
            return Failure(idx + 1, "Mixed <tab><space> indentation detected, please stick to tab indentation.")
 def check_trailing_newlines(lines):
    if not lines:
        return
    last_line = [x for x in lines][-1]
    if not last_line.endswith("\n"):
        return Failure(len(lines), "Missing a trailing newline")
 GLOBAL_VARS_RE = re.compile(r"^/*var/")
 def check_global_vars(lines):
    for idx, line in enumerate(lines):
        if GLOBAL_VARS_RE.match(line):
            return Failure(idx + 1, "Unmanaged global var use detected in code, please use the helpers.")
 PROC_ARGS_WITH_VAR_PREFIX_RE = re.compile(r"^/[\w/]\S+\(.*(var/|, ?var/.*).*\)")
 def check_proc_args_with_var_prefix(lines):
    for idx, line in enumerate(lines):
        if PROC_ARGS_WITH_VAR_PREFIX_RE.match(line):
            return Failure(idx + 1, "Changed files contains a proc argument starting with 'var'.")
 CODE_CHECKS = [
    check_space_indentation,
    check_mixed_indentation,
    check_trailing_newlines,
    check_global_vars,
    check_proc_args_with_var_prefix,
 ]
 if __name__ == "__main__":
    print("check_grep2 started")
    exit_code = 0
    start = time.time()
    for code_filepath in glob.glob("**/*.dm", recursive=True):
        with open(code_filepath, encoding="UTF-8") as code:
            filename = code_filepath.split(os.path.sep)[-1]
            # 515 proc syntax check is unique in running on all files but one,
            # but I'm not going to make some disproportionately generic "check"
            # that also validates that the test should be run, so it just goes
            # here.
            if filename != IGNORE_515_PROC_MARKER_FILENAME:
                if failure := check_515_proc_syntax(code):
                    exit_code = 1
                    print(f"{code_filepath}:{failure.lineno}: {RED}{failure.message}{NC}")
            for check in CODE_CHECKS:
                code.seek(0)
                if failure := check(code):
                    exit_code = 1
                    print(f"{code_filepath}:{failure.lineno}: {RED}{failure.message}{NC}")
    end = time.time()
    print(f"\ncheck_grep2 tests completed in {end - start:.2f}s\n")
    sys.exit(exit_code)