thinkwee
commited on
Commit
·
4a7a641
1
Parent(s):
e5e8568
Fix: Enhance acronym detection by supporting `~` in definitions and reducing false positives for undefined or pre-defined usages on the same line.
Browse files
src/checkers/acronym_checker.py
CHANGED
|
@@ -23,7 +23,7 @@ class AcronymChecker(BaseChecker):
|
|
| 23 |
# Enhanced pattern to find defined acronyms with LaTeX formatting support
|
| 24 |
# Matches: "Full Name (ACRONYM)", "(ACRONYM; Full Name)", "Full Name (\textbf{ACRONYM})", etc.
|
| 25 |
DEFINITION_PATTERN = re.compile(
|
| 26 |
-
r'([A-Z][a-zA-Z\s\-]+)\s*\((?:\\(?:textbf|emph|textit|texttt)\{)?([A-Z]{3,}s?)(?:\})?\)|' # Full Name (ABC) or Full Name (\textbf{ABC})
|
| 27 |
r'\((?:\\(?:textbf|emph|textit|texttt)\{)?([A-Z]{3,}s?)(?:\})?;\s*([A-Za-z\s\-]+)\)', # (ABC; Full Name) or (\textbf{ABC}; Full Name)
|
| 28 |
re.MULTILINE
|
| 29 |
)
|
|
@@ -116,6 +116,15 @@ class AcronymChecker(BaseChecker):
|
|
| 116 |
line_num = self._find_line_number(content, first_pos)
|
| 117 |
full_form = acronym_full_forms[acronym]
|
| 118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
results.append(self._create_result(
|
| 120 |
passed=False,
|
| 121 |
severity=CheckSeverity.WARNING,
|
|
@@ -126,9 +135,27 @@ class AcronymChecker(BaseChecker):
|
|
| 126 |
else:
|
| 127 |
# Check if used before definition
|
| 128 |
def_pos = defined_acronyms[acronym]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
for pos in positions:
|
| 130 |
if pos < def_pos:
|
| 131 |
line_num = self._find_line_number(content, pos)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
results.append(self._create_result(
|
| 133 |
passed=False,
|
| 134 |
severity=CheckSeverity.WARNING,
|
|
|
|
| 23 |
# Enhanced pattern to find defined acronyms with LaTeX formatting support
|
| 24 |
# Matches: "Full Name (ACRONYM)", "(ACRONYM; Full Name)", "Full Name (\textbf{ACRONYM})", etc.
|
| 25 |
DEFINITION_PATTERN = re.compile(
|
| 26 |
+
r'([A-Z][a-zA-Z\s\-]+)[\s~]*\((?:\\(?:textbf|emph|textit|texttt)\{)?([A-Z]{3,}s?)(?:\})?\)|' # Full Name (ABC) or Full Name (\textbf{ABC}) with optional ~
|
| 27 |
r'\((?:\\(?:textbf|emph|textit|texttt)\{)?([A-Z]{3,}s?)(?:\})?;\s*([A-Za-z\s\-]+)\)', # (ABC; Full Name) or (\textbf{ABC}; Full Name)
|
| 28 |
re.MULTILINE
|
| 29 |
)
|
|
|
|
| 116 |
line_num = self._find_line_number(content, first_pos)
|
| 117 |
full_form = acronym_full_forms[acronym]
|
| 118 |
|
| 119 |
+
# Check if full form is present in the same line (loose definition check)
|
| 120 |
+
# This handles cases like: "The Unified Modeling Language (UML) is..." where regex missed it
|
| 121 |
+
# or "UML (Unified Modeling Language)" or just "Unified Modeling Language ... UML"
|
| 122 |
+
line_content = self._get_line_content(content, line_num)
|
| 123 |
+
|
| 124 |
+
# Check if full form is in line content (ignoring case)
|
| 125 |
+
if full_form.lower() in line_content.lower():
|
| 126 |
+
continue
|
| 127 |
+
|
| 128 |
results.append(self._create_result(
|
| 129 |
passed=False,
|
| 130 |
severity=CheckSeverity.WARNING,
|
|
|
|
| 135 |
else:
|
| 136 |
# Check if used before definition
|
| 137 |
def_pos = defined_acronyms[acronym]
|
| 138 |
+
|
| 139 |
+
# Get the line number of the definition
|
| 140 |
+
def_line_num = self._find_line_number(content, def_pos)
|
| 141 |
+
|
| 142 |
for pos in positions:
|
| 143 |
if pos < def_pos:
|
| 144 |
line_num = self._find_line_number(content, pos)
|
| 145 |
+
|
| 146 |
+
# Special case: if usage is on the same line as definition, it might be the definition itself
|
| 147 |
+
# (e.g. if the regex matched slightly later than the acronym usage starts?)
|
| 148 |
+
# But typically DEFINITION_PATTERN captures the whole block.
|
| 149 |
+
# However, if we have "The Unified Modeling Language (UML)..." and usage finds "UML"
|
| 150 |
+
# technically "UML" inside "(UML)" is usage?
|
| 151 |
+
# `_find_all_usages` excludes special contexts like `(ACRONYM)`.
|
| 152 |
+
# So if we are here, it's a usage outside of parens.
|
| 153 |
+
|
| 154 |
+
# If usage is on the same line as definition, let's look closer.
|
| 155 |
+
if line_num == def_line_num:
|
| 156 |
+
# It's likely fine if on same line
|
| 157 |
+
continue
|
| 158 |
+
|
| 159 |
results.append(self._create_result(
|
| 160 |
passed=False,
|
| 161 |
severity=CheckSeverity.WARNING,
|