Port check_grep.sh from an increasingly complicated shell script to Python. (#19875)

* Port check_grep.sh from an increasingly complicated shell script to Python.

* Exclude dmdoc-formatted multi-line comments from whitespace check

* Add comment exception for both WS rules

* Fixed mixed tab/space regex.

* hate hate hate hate

* Add doctests to preserve my sanity

* Remove checks involving newlines.

* Add back newline-involved tests. Cry.

* Several large-scale changes.

- Remove all the map-related changes for now, in order to get the
  code formatting changes in sooner and prevent more issues from leaking
  in while the PR issues are burned down.

- Flip the method for processing files. Instead of trying to sort errors
  by type, running over each file for each error, I'm simply breaking
  from the original output format and returning a lint-like set of
  errors. In this design, each file is opened, the checks are run, and
  the file handle is reused for each check.

- Standardize failures so that the filename, line number, and message
  can all be retrieved and formatted nicely for stdout.

- Compile regexes. The consensus seeeeeeeems to be that there's a
  negligible difference in performance when compiling regexes but
  there's a possibility that running them a lot of times may be faster,
  and I could actually time it, but ugggggggh.
This commit is contained in:
warriorstar-orion
2023-07-07 12:31:05 -04:00
committed by GitHub
parent cdce533eb7
commit edcaaf4241
2 changed files with 133 additions and 1 deletions

View File

@@ -26,7 +26,7 @@ jobs:
run: | run: |
tools/ci/check_json.sh tools/ci/check_json.sh
tools/ci/build_tgui.sh tools/ci/build_tgui.sh
tools/ci/check_grep.sh python3 tools/ci/check_grep2.py
python3 tools/ci/check_line_endings.py python3 tools/ci/check_line_endings.py
python3 tools/ci/check_file_names.py python3 tools/ci/check_file_names.py
python3 tools/ci/unticked_files.py ${GITHUB_WORKSPACE} python3 tools/ci/unticked_files.py ${GITHUB_WORKSPACE}

132
tools/ci/check_grep2.py Normal file
View File

@@ -0,0 +1,132 @@
import glob
import re
import os
import sys
import time
from collections import namedtuple
Failure = namedtuple("Failure", ["lineno", "message"])
RED = "\033[0;31m"
GREEN = "\033[0;32m"
BLUE = "\033[0;34m"
NC = "\033[0m" # No Color
IGNORE_515_PROC_MARKER_FILENAME = "__byond_version_compat.dm"
CHECK_515_PROC_MARKER_RE = re.compile(r"\.proc/")
def check_515_proc_syntax(lines):
for idx, line in enumerate(lines):
if CHECK_515_PROC_MARKER_RE.match(line):
return Failure(idx + 1, "Outdated proc reference use detected in code. Please use proc reference helpers.")
CHECK_SPACE_INDENTATION_RE = re.compile(r"^ {2,}[^\*]")
def check_space_indentation(lines):
"""
Check specifically for space-significant indentation. Excludes dmdoc
block comment lines so long as there is an asterisk immediately after the
leading spaces.
>>> bool(check_space_indentation([" foo"]))
True
>>> bool(check_space_indentation(["\\tfoo"]))
False
>>> bool(check_space_indentation([" * foo"]))
False
"""
for idx, line in enumerate(lines):
if CHECK_SPACE_INDENTATION_RE.match(line):
return Failure(idx + 1, "Space indentation detected, please use tab indentation.")
CHECK_MIXED_INDENTATION_RE = re.compile(r"^(\t+ | +\t)\s*[^\s\*]")
def check_mixed_indentation(lines):
"""
Check specifically for leading whitespace which contains a mix of tab and
space characters. Excludes dmdoc block comment lines so long as there is an
asterisk immediately after the leading whitespace.
>>> bool(check_mixed_indentation(["\\t\\t foo"]))
True
>>> bool(check_mixed_indentation(["\\t \\t foo"]))
True
>>> bool(check_mixed_indentation(["\\t // foo"]))
True
>>> bool(check_mixed_indentation([" \\tfoo"]))
True
>>> bool(check_mixed_indentation([" \\t foo"]))
True
>>> bool(check_mixed_indentation(["\\t * foo"]))
False
>>> bool(check_mixed_indentation(["\\t\\t* foo"]))
False
>>> bool(check_mixed_indentation(["\\t \\t * foo"]))
False
"""
for idx, line in enumerate(lines):
if CHECK_MIXED_INDENTATION_RE.match(line):
return Failure(idx + 1, "Mixed <tab><space> indentation detected, please stick to tab indentation.")
def check_trailing_newlines(lines):
if not lines:
return
last_line = [x for x in lines][-1]
if not last_line.endswith("\n"):
return Failure(len(lines), "Missing a trailing newline")
GLOBAL_VARS_RE = re.compile(r"^/*var/")
def check_global_vars(lines):
for idx, line in enumerate(lines):
if GLOBAL_VARS_RE.match(line):
return Failure(idx + 1, "Unmanaged global var use detected in code, please use the helpers.")
PROC_ARGS_WITH_VAR_PREFIX_RE = re.compile(r"^/[\w/]\S+\(.*(var/|, ?var/.*).*\)")
def check_proc_args_with_var_prefix(lines):
for idx, line in enumerate(lines):
if PROC_ARGS_WITH_VAR_PREFIX_RE.match(line):
return Failure(idx + 1, "Changed files contains a proc argument starting with 'var'.")
CODE_CHECKS = [
check_space_indentation,
check_mixed_indentation,
check_trailing_newlines,
check_global_vars,
check_proc_args_with_var_prefix,
]
if __name__ == "__main__":
print("check_grep2 started")
exit_code = 0
start = time.time()
for code_filepath in glob.glob("**/*.dm", recursive=True):
with open(code_filepath, encoding="UTF-8") as code:
filename = code_filepath.split(os.path.sep)[-1]
# 515 proc syntax check is unique in running on all files but one,
# but I'm not going to make some disproportionately generic "check"
# that also validates that the test should be run, so it just goes
# here.
if filename != IGNORE_515_PROC_MARKER_FILENAME:
if failure := check_515_proc_syntax(code):
exit_code = 1
print(f"{code_filepath}:{failure.lineno}: {RED}{failure.message}{NC}")
for check in CODE_CHECKS:
code.seek(0)
if failure := check(code):
exit_code = 1
print(f"{code_filepath}:{failure.lineno}: {RED}{failure.message}{NC}")
end = time.time()
print(f"\ncheck_grep2 tests completed in {end - start:.2f}s\n")
sys.exit(exit_code)