thinkwee commited on
Commit
4a7a641
·
1 Parent(s): e5e8568

Fix: Enhance acronym detection by supporting `~` in definitions and reducing false positives for undefined or pre-defined usages on the same line.

Browse files
Files changed (1) hide show
  1. src/checkers/acronym_checker.py +28 -1
src/checkers/acronym_checker.py CHANGED
@@ -23,7 +23,7 @@ class AcronymChecker(BaseChecker):
23
  # Enhanced pattern to find defined acronyms with LaTeX formatting support
24
  # Matches: "Full Name (ACRONYM)", "(ACRONYM; Full Name)", "Full Name (\textbf{ACRONYM})", etc.
25
  DEFINITION_PATTERN = re.compile(
26
- r'([A-Z][a-zA-Z\s\-]+)\s*\((?:\\(?:textbf|emph|textit|texttt)\{)?([A-Z]{3,}s?)(?:\})?\)|' # Full Name (ABC) or Full Name (\textbf{ABC})
27
  r'\((?:\\(?:textbf|emph|textit|texttt)\{)?([A-Z]{3,}s?)(?:\})?;\s*([A-Za-z\s\-]+)\)', # (ABC; Full Name) or (\textbf{ABC}; Full Name)
28
  re.MULTILINE
29
  )
@@ -116,6 +116,15 @@ class AcronymChecker(BaseChecker):
116
  line_num = self._find_line_number(content, first_pos)
117
  full_form = acronym_full_forms[acronym]
118
 
 
 
 
 
 
 
 
 
 
119
  results.append(self._create_result(
120
  passed=False,
121
  severity=CheckSeverity.WARNING,
@@ -126,9 +135,27 @@ class AcronymChecker(BaseChecker):
126
  else:
127
  # Check if used before definition
128
  def_pos = defined_acronyms[acronym]
 
 
 
 
129
  for pos in positions:
130
  if pos < def_pos:
131
  line_num = self._find_line_number(content, pos)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  results.append(self._create_result(
133
  passed=False,
134
  severity=CheckSeverity.WARNING,
 
23
  # Enhanced pattern to find defined acronyms with LaTeX formatting support
24
  # Matches: "Full Name (ACRONYM)", "(ACRONYM; Full Name)", "Full Name (\textbf{ACRONYM})", etc.
25
  DEFINITION_PATTERN = re.compile(
26
+ r'([A-Z][a-zA-Z\s\-]+)[\s~]*\((?:\\(?:textbf|emph|textit|texttt)\{)?([A-Z]{3,}s?)(?:\})?\)|' # Full Name (ABC) or Full Name (\textbf{ABC}) with optional ~
27
  r'\((?:\\(?:textbf|emph|textit|texttt)\{)?([A-Z]{3,}s?)(?:\})?;\s*([A-Za-z\s\-]+)\)', # (ABC; Full Name) or (\textbf{ABC}; Full Name)
28
  re.MULTILINE
29
  )
 
116
  line_num = self._find_line_number(content, first_pos)
117
  full_form = acronym_full_forms[acronym]
118
 
119
+ # Check if full form is present in the same line (loose definition check)
120
+ # This handles cases like: "The Unified Modeling Language (UML) is..." where regex missed it
121
+ # or "UML (Unified Modeling Language)" or just "Unified Modeling Language ... UML"
122
+ line_content = self._get_line_content(content, line_num)
123
+
124
+ # Check if full form is in line content (ignoring case)
125
+ if full_form.lower() in line_content.lower():
126
+ continue
127
+
128
  results.append(self._create_result(
129
  passed=False,
130
  severity=CheckSeverity.WARNING,
 
135
  else:
136
  # Check if used before definition
137
  def_pos = defined_acronyms[acronym]
138
+
139
+ # Get the line number of the definition
140
+ def_line_num = self._find_line_number(content, def_pos)
141
+
142
  for pos in positions:
143
  if pos < def_pos:
144
  line_num = self._find_line_number(content, pos)
145
+
146
+ # Special case: if usage is on the same line as definition, it might be the definition itself
147
+ # (e.g. if the regex matched slightly later than the acronym usage starts?)
148
+ # But typically DEFINITION_PATTERN captures the whole block.
149
+ # However, if we have "The Unified Modeling Language (UML)..." and usage finds "UML"
150
+ # technically "UML" inside "(UML)" is usage?
151
+ # `_find_all_usages` excludes special contexts like `(ACRONYM)`.
152
+ # So if we are here, it's a usage outside of parens.
153
+
154
+ # If usage is on the same line as definition, let's look closer.
155
+ if line_num == def_line_num:
156
+ # It's likely fine if on same line
157
+ continue
158
+
159
  results.append(self._create_result(
160
  passed=False,
161
  severity=CheckSeverity.WARNING,