Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -735,6 +735,7 @@ class FAADocumentChecker(DocumentChecker):
|
|
| 735 |
def __init__(self, config_path: Optional[str] = None):
|
| 736 |
super().__init__(config_path)
|
| 737 |
self.HEADING_WORDS = HEADING_WORDS
|
|
|
|
| 738 |
|
| 739 |
# Core Check Methods
|
| 740 |
@profile_performance
|
|
@@ -934,10 +935,8 @@ class FAADocumentChecker(DocumentChecker):
|
|
| 934 |
if not self.validate_input(doc):
|
| 935 |
return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
|
| 936 |
|
| 937 |
-
#
|
| 938 |
heading_words = self.config_manager.config.get('heading_words', self.HEADING_WORDS)
|
| 939 |
-
|
| 940 |
-
# Standard acronyms that don't need to be defined
|
| 941 |
predefined_acronyms = self.config_manager.config.get('predefined_acronyms', self.PREDEFINED_ACRONYMS)
|
| 942 |
|
| 943 |
# Tracking structures
|
|
@@ -947,16 +946,15 @@ class FAADocumentChecker(DocumentChecker):
|
|
| 947 |
|
| 948 |
# Patterns
|
| 949 |
defined_pattern = re.compile(r'\b([\w\s&]+?)\s*\((\b[A-Z]{2,}\b)\)')
|
| 950 |
-
# Modified acronym pattern
|
| 951 |
acronym_pattern = re.compile(r'(?<!\()\b[A-Z]{2,}\b(?!\s*[:.]\s*)')
|
| 952 |
|
| 953 |
for paragraph in doc:
|
| 954 |
-
# Skip lines that appear to be headings
|
| 955 |
words = paragraph.strip().split()
|
| 956 |
if all(word.isupper() for word in words) and any(word in heading_words for word in words):
|
| 957 |
continue
|
| 958 |
|
| 959 |
-
# Check for acronym definitions
|
| 960 |
defined_matches = defined_pattern.findall(paragraph)
|
| 961 |
for full_term, acronym in defined_matches:
|
| 962 |
if acronym not in predefined_acronyms:
|
|
@@ -964,11 +962,8 @@ class FAADocumentChecker(DocumentChecker):
|
|
| 964 |
defined_acronyms[acronym] = {
|
| 965 |
'full_term': full_term.strip(),
|
| 966 |
'defined_at': paragraph.strip(),
|
| 967 |
-
'used': False
|
| 968 |
}
|
| 969 |
-
else:
|
| 970 |
-
# Handle duplicate definitions if necessary
|
| 971 |
-
pass # You may add logic for duplicate definitions
|
| 972 |
|
| 973 |
# Check for acronym usage
|
| 974 |
usage_matches = acronym_pattern.finditer(paragraph)
|
|
@@ -979,15 +974,9 @@ class FAADocumentChecker(DocumentChecker):
|
|
| 979 |
if acronym in predefined_acronyms:
|
| 980 |
continue
|
| 981 |
|
| 982 |
-
# Skip if it's part of a heading or contains non-letter characters
|
| 983 |
-
if (acronym in heading_words or
|
| 984 |
-
any(not c.isalpha() for c in acronym) or
|
| 985 |
-
len(acronym) > 10): # Usually acronyms aren't this long
|
| 986 |
-
continue
|
| 987 |
-
|
| 988 |
if acronym not in defined_acronyms:
|
| 989 |
# Undefined acronym used
|
| 990 |
-
issues.append(acronym)
|
| 991 |
else:
|
| 992 |
# Mark as used
|
| 993 |
defined_acronyms[acronym]['used'] = True
|
|
@@ -996,7 +985,6 @@ class FAADocumentChecker(DocumentChecker):
|
|
| 996 |
# Define success based on whether there are any undefined acronyms
|
| 997 |
success = len(issues) == 0
|
| 998 |
|
| 999 |
-
# Return the result with only undefined acronyms
|
| 1000 |
return DocumentCheckResult(success=success, issues=list(set(issues)))
|
| 1001 |
|
| 1002 |
@profile_performance
|
|
|
|
| 735 |
def __init__(self, config_path: Optional[str] = None):
|
| 736 |
super().__init__(config_path)
|
| 737 |
self.HEADING_WORDS = HEADING_WORDS
|
| 738 |
+
self.PREDEFINED_ACRONYMS = PREDEFINED_ACRONYMS
|
| 739 |
|
| 740 |
# Core Check Methods
|
| 741 |
@profile_performance
|
|
|
|
| 935 |
if not self.validate_input(doc):
|
| 936 |
return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
|
| 937 |
|
| 938 |
+
# Use instance variables for heading words and predefined acronyms
|
| 939 |
heading_words = self.config_manager.config.get('heading_words', self.HEADING_WORDS)
|
|
|
|
|
|
|
| 940 |
predefined_acronyms = self.config_manager.config.get('predefined_acronyms', self.PREDEFINED_ACRONYMS)
|
| 941 |
|
| 942 |
# Tracking structures
|
|
|
|
| 946 |
|
| 947 |
# Patterns
|
| 948 |
defined_pattern = re.compile(r'\b([\w\s&]+?)\s*\((\b[A-Z]{2,}\b)\)')
|
|
|
|
| 949 |
acronym_pattern = re.compile(r'(?<!\()\b[A-Z]{2,}\b(?!\s*[:.]\s*)')
|
| 950 |
|
| 951 |
for paragraph in doc:
|
| 952 |
+
# Skip lines that appear to be headings
|
| 953 |
words = paragraph.strip().split()
|
| 954 |
if all(word.isupper() for word in words) and any(word in heading_words for word in words):
|
| 955 |
continue
|
| 956 |
|
| 957 |
+
# Check for acronym definitions
|
| 958 |
defined_matches = defined_pattern.findall(paragraph)
|
| 959 |
for full_term, acronym in defined_matches:
|
| 960 |
if acronym not in predefined_acronyms:
|
|
|
|
| 962 |
defined_acronyms[acronym] = {
|
| 963 |
'full_term': full_term.strip(),
|
| 964 |
'defined_at': paragraph.strip(),
|
| 965 |
+
'used': False
|
| 966 |
}
|
|
|
|
|
|
|
|
|
|
| 967 |
|
| 968 |
# Check for acronym usage
|
| 969 |
usage_matches = acronym_pattern.finditer(paragraph)
|
|
|
|
| 974 |
if acronym in predefined_acronyms:
|
| 975 |
continue
|
| 976 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 977 |
if acronym not in defined_acronyms:
|
| 978 |
# Undefined acronym used
|
| 979 |
+
issues.append(acronym)
|
| 980 |
else:
|
| 981 |
# Mark as used
|
| 982 |
defined_acronyms[acronym]['used'] = True
|
|
|
|
| 985 |
# Define success based on whether there are any undefined acronyms
|
| 986 |
success = len(issues) == 0
|
| 987 |
|
|
|
|
| 988 |
return DocumentCheckResult(success=success, issues=list(set(issues)))
|
| 989 |
|
| 990 |
@profile_performance
|