Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,6 +10,7 @@ from docx import Document
|
|
| 10 |
import io
|
| 11 |
import os
|
| 12 |
import traceback
|
|
|
|
| 13 |
|
| 14 |
@dataclass
|
| 15 |
class DocumentCheckResult:
|
|
@@ -934,42 +935,171 @@ class FAADocumentChecker(DocumentChecker):
|
|
| 934 |
|
| 935 |
return results
|
| 936 |
|
| 937 |
-
|
| 938 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 939 |
try:
|
| 940 |
-
#
|
|
|
|
|
|
|
|
|
|
| 941 |
if isinstance(file_obj, bytes):
|
| 942 |
file_obj = io.BytesIO(file_obj)
|
| 943 |
-
|
| 944 |
-
|
| 945 |
doc = Document(file_obj)
|
| 946 |
paragraphs = [para.text for para in doc.paragraphs if para.text.strip()]
|
| 947 |
-
|
| 948 |
-
# Rewind
|
| 949 |
file_obj.seek(0)
|
| 950 |
-
|
| 951 |
# Run all checks
|
| 952 |
-
results =
|
| 953 |
-
|
| 954 |
-
results
|
| 955 |
-
results
|
| 956 |
-
|
| 957 |
-
results['section_symbol_check'] = checker.check_section_symbol_usage(paragraphs)
|
| 958 |
-
results['table_caption_check'] = checker.caption_check(paragraphs, doc_type, 'Table')
|
| 959 |
-
results['figure_caption_check'] = checker.caption_check(paragraphs, doc_type, 'Figure')
|
| 960 |
-
results['references_check'] = checker.table_figure_reference_check(paragraphs, doc_type)
|
| 961 |
-
results['title_check'] = checker.document_title_check(file_obj, doc_type)
|
| 962 |
-
results['double_period_check'] = checker.double_period_check(paragraphs)
|
| 963 |
-
results['spacing_check'] = checker.spacing_check(paragraphs)
|
| 964 |
-
results['abbreviation_check'] = checker.check_abbreviation_usage(paragraphs)
|
| 965 |
-
results['date_check'] = checker.check_date_formats(paragraphs)
|
| 966 |
-
results['placeholder_check'] = checker.check_placeholders(paragraphs)
|
| 967 |
-
|
| 968 |
-
return format_results_for_gradio(results, doc_type)
|
| 969 |
except Exception as e:
|
| 970 |
-
|
| 971 |
-
traceback.print_exc()
|
| 972 |
-
return f"
|
| 973 |
|
| 974 |
def format_results_for_gradio(results: Dict[str, DocumentCheckResult], doc_type: str) -> str:
|
| 975 |
"""Format the results for display in Gradio."""
|
|
@@ -1026,67 +1156,123 @@ def format_results_for_gradio(results: Dict[str, DocumentCheckResult], doc_type:
|
|
| 1026 |
|
| 1027 |
return "\n".join(output)
|
| 1028 |
|
| 1029 |
-
|
| 1030 |
-
|
| 1031 |
-
|
| 1032 |
-
with demo:
|
| 1033 |
-
gr.Markdown("# Document Checker Tool")
|
| 1034 |
-
gr.Markdown("Upload a Word (docx) document to check for compliance with U.S. federal documentation standards.")
|
| 1035 |
-
gr.Markdown("*This tool is still in development and you might get false positives in your results*")
|
| 1036 |
-
gr.Markdown("Contact Eric Putnam if you have questions and comments.")
|
| 1037 |
-
gr.Markdown("""
|
| 1038 |
-
1. Upload a clean (no track changes or comments) Word file.
|
| 1039 |
-
2. Choose **Check Document**.""")
|
| 1040 |
|
| 1041 |
document_types = [
|
| 1042 |
-
"Advisory Circular",
|
| 1043 |
-
"
|
| 1044 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1045 |
]
|
| 1046 |
|
| 1047 |
template_types = ["Short AC template AC", "Long AC template AC"]
|
| 1048 |
|
| 1049 |
-
|
| 1050 |
-
|
| 1051 |
-
|
| 1052 |
-
|
| 1053 |
-
|
| 1054 |
-
|
| 1055 |
-
|
| 1056 |
-
|
| 1057 |
-
|
| 1058 |
-
|
| 1059 |
-
|
| 1060 |
-
|
| 1061 |
-
|
| 1062 |
-
|
| 1063 |
-
|
| 1064 |
-
|
| 1065 |
-
|
| 1066 |
-
)
|
| 1067 |
-
submit_btn = gr.Button("Check Document", variant="primary")
|
| 1068 |
-
|
| 1069 |
-
with gr.Column(scale=2):
|
| 1070 |
-
output = gr.Markdown(
|
| 1071 |
-
label="Check Results",
|
| 1072 |
-
value="Results will appear here after processing..."
|
| 1073 |
-
)
|
| 1074 |
-
|
| 1075 |
-
def update_template_visibility(doc_type):
|
| 1076 |
-
return gr.update(visible=doc_type == "Advisory Circular")
|
| 1077 |
|
| 1078 |
-
|
| 1079 |
-
|
| 1080 |
-
|
| 1081 |
-
|
| 1082 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1083 |
|
| 1084 |
-
|
| 1085 |
-
fn=process_document,
|
| 1086 |
-
inputs=[file_input, doc_type, template_type],
|
| 1087 |
-
outputs=[output]
|
| 1088 |
-
)
|
| 1089 |
|
| 1090 |
-
#
|
| 1091 |
if __name__ == "__main__":
|
|
|
|
| 1092 |
demo.launch()
|
|
|
|
| 10 |
import io
|
| 11 |
import os
|
| 12 |
import traceback
|
| 13 |
+
from datetime import datetime
|
| 14 |
|
| 15 |
@dataclass
|
| 16 |
class DocumentCheckResult:
|
|
|
|
| 935 |
|
| 936 |
return results
|
| 937 |
|
| 938 |
+
@dataclass
|
| 939 |
+
class DocumentCheckResult:
|
| 940 |
+
"""Structured result for document checks."""
|
| 941 |
+
success: bool
|
| 942 |
+
issues: List[Dict[str, Any]]
|
| 943 |
+
details: Optional[Dict[str, Any]] = None
|
| 944 |
+
|
| 945 |
+
def format_check_results(results: Dict[str, DocumentCheckResult], doc_type: str) -> str:
|
| 946 |
+
"""Format check results into a Markdown string for display."""
|
| 947 |
+
output = []
|
| 948 |
+
|
| 949 |
+
# Add header with timestamp
|
| 950 |
+
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 951 |
+
output.extend([
|
| 952 |
+
f"# Document Check Results - {current_time}",
|
| 953 |
+
f"## Document Type: {doc_type}",
|
| 954 |
+
"---\n"
|
| 955 |
+
])
|
| 956 |
+
|
| 957 |
+
# Count issues
|
| 958 |
+
total_issues = sum(1 for r in results.values() if not r.success)
|
| 959 |
+
|
| 960 |
+
if total_issues == 0:
|
| 961 |
+
output.append("β
**All checks passed successfully!**\n")
|
| 962 |
+
return "\n".join(output)
|
| 963 |
+
|
| 964 |
+
output.append(f"β Found issues in {total_issues} check categories\n")
|
| 965 |
+
|
| 966 |
+
# Define check categories and their display names
|
| 967 |
+
check_categories = {
|
| 968 |
+
'heading_title_check': {
|
| 969 |
+
'title': 'π Required Headings',
|
| 970 |
+
'priority': 1
|
| 971 |
+
},
|
| 972 |
+
'heading_title_period_check': {
|
| 973 |
+
'title': 'π Heading Period Usage',
|
| 974 |
+
'priority': 1
|
| 975 |
+
},
|
| 976 |
+
'acronym_check': {
|
| 977 |
+
'title': 'π Acronym Definitions',
|
| 978 |
+
'priority': 2
|
| 979 |
+
},
|
| 980 |
+
'terminology_check': {
|
| 981 |
+
'title': 'π Terminology Usage',
|
| 982 |
+
'priority': 2
|
| 983 |
+
},
|
| 984 |
+
'section_symbol_usage_check': {
|
| 985 |
+
'title': 'Β§ Section Symbol Usage',
|
| 986 |
+
'priority': 2
|
| 987 |
+
},
|
| 988 |
+
'caption_check_table': {
|
| 989 |
+
'title': 'π Table Captions',
|
| 990 |
+
'priority': 3
|
| 991 |
+
},
|
| 992 |
+
'caption_check_figure': {
|
| 993 |
+
'title': 'πΌοΈ Figure Captions',
|
| 994 |
+
'priority': 3
|
| 995 |
+
},
|
| 996 |
+
'table_figure_reference_check': {
|
| 997 |
+
'title': 'π Table/Figure References',
|
| 998 |
+
'priority': 3
|
| 999 |
+
},
|
| 1000 |
+
'document_title_check': {
|
| 1001 |
+
'title': 'π Document Title Format',
|
| 1002 |
+
'priority': 1
|
| 1003 |
+
},
|
| 1004 |
+
'double_period_check': {
|
| 1005 |
+
'title': 'β‘ Double Periods',
|
| 1006 |
+
'priority': 4
|
| 1007 |
+
},
|
| 1008 |
+
'spacing_check': {
|
| 1009 |
+
'title': 'β¨οΈ Spacing Issues',
|
| 1010 |
+
'priority': 4
|
| 1011 |
+
},
|
| 1012 |
+
'abbreviation_usage_check': {
|
| 1013 |
+
'title': 'π Abbreviation Usage',
|
| 1014 |
+
'priority': 3
|
| 1015 |
+
},
|
| 1016 |
+
'date_formats_check': {
|
| 1017 |
+
'title': 'π
Date Formats',
|
| 1018 |
+
'priority': 3
|
| 1019 |
+
},
|
| 1020 |
+
'placeholders_check': {
|
| 1021 |
+
'title': 'π© Placeholder Content',
|
| 1022 |
+
'priority': 1
|
| 1023 |
+
}
|
| 1024 |
+
}
|
| 1025 |
+
|
| 1026 |
+
# Sort checks by priority
|
| 1027 |
+
sorted_checks = sorted(
|
| 1028 |
+
[(name, result) for name, result in results.items()],
|
| 1029 |
+
key=lambda x: check_categories.get(x[0], {'priority': 999})['priority']
|
| 1030 |
+
)
|
| 1031 |
+
|
| 1032 |
+
# Process each check result
|
| 1033 |
+
for check_name, result in sorted_checks:
|
| 1034 |
+
if not result.success:
|
| 1035 |
+
category = check_categories.get(check_name, {'title': check_name.replace('_', ' ').title()})
|
| 1036 |
+
|
| 1037 |
+
output.append(f"### {category['title']}")
|
| 1038 |
+
|
| 1039 |
+
if isinstance(result.issues, list):
|
| 1040 |
+
for issue in result.issues[:5]: # Show first 5 issues
|
| 1041 |
+
if isinstance(issue, dict):
|
| 1042 |
+
# Format dictionary issues
|
| 1043 |
+
for key, value in issue.items():
|
| 1044 |
+
if isinstance(value, list):
|
| 1045 |
+
output.extend([f"- {item}" for item in value])
|
| 1046 |
+
else:
|
| 1047 |
+
output.append(f"- {key}: {value}")
|
| 1048 |
+
else:
|
| 1049 |
+
output.append(f"- {issue}")
|
| 1050 |
+
|
| 1051 |
+
# Show count of remaining issues
|
| 1052 |
+
if len(result.issues) > 5:
|
| 1053 |
+
output.append(f"\n*...and {len(result.issues) - 5} more similar issues*")
|
| 1054 |
+
|
| 1055 |
+
output.append("") # Add spacing between sections
|
| 1056 |
+
|
| 1057 |
+
# Add summary and recommendations
|
| 1058 |
+
output.extend([
|
| 1059 |
+
"## π Summary and Recommendations",
|
| 1060 |
+
"",
|
| 1061 |
+
"### Priority Order for Fixes:",
|
| 1062 |
+
"1. π΄ Critical: Heading formats, required content, and document structure",
|
| 1063 |
+
"2. π‘ Important: Terminology, acronyms, and references",
|
| 1064 |
+
"3. π’ Standard: Formatting, spacing, and style consistency",
|
| 1065 |
+
"",
|
| 1066 |
+
"### Next Steps:",
|
| 1067 |
+
"1. Address issues in priority order",
|
| 1068 |
+
"2. Use search/replace for consistent fixes",
|
| 1069 |
+
"3. Re-run checker after making changes",
|
| 1070 |
+
"4. Update your document template if needed",
|
| 1071 |
+
""
|
| 1072 |
+
])
|
| 1073 |
+
|
| 1074 |
+
return "\n".join(output)
|
| 1075 |
+
|
| 1076 |
+
def process_document(file_obj, doc_type: str, template_type: Optional[str] = None) -> str:
|
| 1077 |
+
"""Process document and run all checks."""
|
| 1078 |
try:
|
| 1079 |
+
# Initialize checker
|
| 1080 |
+
checker = FAADocumentChecker()
|
| 1081 |
+
|
| 1082 |
+
# Convert file object to BytesIO if needed
|
| 1083 |
if isinstance(file_obj, bytes):
|
| 1084 |
file_obj = io.BytesIO(file_obj)
|
| 1085 |
+
|
| 1086 |
+
# Extract paragraphs
|
| 1087 |
doc = Document(file_obj)
|
| 1088 |
paragraphs = [para.text for para in doc.paragraphs if para.text.strip()]
|
| 1089 |
+
|
| 1090 |
+
# Rewind file object
|
| 1091 |
file_obj.seek(0)
|
| 1092 |
+
|
| 1093 |
# Run all checks
|
| 1094 |
+
results = checker.run_all_checks(file_obj, doc_type, template_type)
|
| 1095 |
+
|
| 1096 |
+
# Format results for display
|
| 1097 |
+
return format_check_results(results, doc_type)
|
| 1098 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1099 |
except Exception as e:
|
| 1100 |
+
logging.error(f"Error processing document: {str(e)}")
|
| 1101 |
+
traceback.print_exc()
|
| 1102 |
+
return f"β Error processing document: {str(e)}\n\nPlease ensure the file is a valid .docx document and try again."
|
| 1103 |
|
| 1104 |
def format_results_for_gradio(results: Dict[str, DocumentCheckResult], doc_type: str) -> str:
|
| 1105 |
"""Format the results for display in Gradio."""
|
|
|
|
| 1156 |
|
| 1157 |
return "\n".join(output)
|
| 1158 |
|
| 1159 |
+
def create_interface():
|
| 1160 |
+
"""Create and configure the Gradio interface."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1161 |
|
| 1162 |
document_types = [
|
| 1163 |
+
"Advisory Circular",
|
| 1164 |
+
"Airworthiness Criteria",
|
| 1165 |
+
"Deviation Memo",
|
| 1166 |
+
"Exemption",
|
| 1167 |
+
"Federal Register Notice",
|
| 1168 |
+
"Order",
|
| 1169 |
+
"Policy Statement",
|
| 1170 |
+
"Rule",
|
| 1171 |
+
"Special Condition",
|
| 1172 |
+
"Technical Standard Order",
|
| 1173 |
+
"Other"
|
| 1174 |
]
|
| 1175 |
|
| 1176 |
template_types = ["Short AC template AC", "Long AC template AC"]
|
| 1177 |
|
| 1178 |
+
# Custom CSS for better styling
|
| 1179 |
+
custom_css = """
|
| 1180 |
+
.gradio-container {
|
| 1181 |
+
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
| 1182 |
+
}
|
| 1183 |
+
.container {
|
| 1184 |
+
max-width: 900px;
|
| 1185 |
+
margin: auto;
|
| 1186 |
+
}
|
| 1187 |
+
.alert {
|
| 1188 |
+
padding: 1rem;
|
| 1189 |
+
margin-bottom: 1rem;
|
| 1190 |
+
border-radius: 0.5rem;
|
| 1191 |
+
background-color: #f8f9fa;
|
| 1192 |
+
border: 1px solid #dee2e6;
|
| 1193 |
+
}
|
| 1194 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1195 |
|
| 1196 |
+
with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as demo:
|
| 1197 |
+
gr.Markdown(
|
| 1198 |
+
"""
|
| 1199 |
+
# π Document Checker Tool
|
| 1200 |
+
|
| 1201 |
+
### Purpose
|
| 1202 |
+
This tool checks Word documents for compliance with U.S. federal documentation standards.
|
| 1203 |
+
|
| 1204 |
+
### How to Use
|
| 1205 |
+
1. Upload your Word document (.docx format)
|
| 1206 |
+
2. Select the document type
|
| 1207 |
+
3. Click "Check Document"
|
| 1208 |
+
|
| 1209 |
+
> **Note:** Please ensure your document is clean (no track changes or comments)
|
| 1210 |
+
"""
|
| 1211 |
+
)
|
| 1212 |
+
|
| 1213 |
+
with gr.Row():
|
| 1214 |
+
with gr.Column(scale=1):
|
| 1215 |
+
file_input = gr.File(
|
| 1216 |
+
label="π Upload Word Document (.docx)",
|
| 1217 |
+
file_types=[".docx"],
|
| 1218 |
+
type="binary"
|
| 1219 |
+
)
|
| 1220 |
+
|
| 1221 |
+
doc_type = gr.Dropdown(
|
| 1222 |
+
choices=document_types,
|
| 1223 |
+
label="π Document Type",
|
| 1224 |
+
value="Advisory Circular",
|
| 1225 |
+
info="Select the type of document you're checking"
|
| 1226 |
+
)
|
| 1227 |
+
|
| 1228 |
+
template_type = gr.Radio(
|
| 1229 |
+
choices=template_types,
|
| 1230 |
+
label="π Template Type",
|
| 1231 |
+
visible=False,
|
| 1232 |
+
info="Only applicable for Advisory Circulars"
|
| 1233 |
+
)
|
| 1234 |
+
|
| 1235 |
+
submit_btn = gr.Button(
|
| 1236 |
+
"π Check Document",
|
| 1237 |
+
variant="primary"
|
| 1238 |
+
)
|
| 1239 |
+
|
| 1240 |
+
with gr.Column(scale=2):
|
| 1241 |
+
results = gr.Markdown(
|
| 1242 |
+
label="Check Results",
|
| 1243 |
+
value="Results will appear here after processing...",
|
| 1244 |
+
elem_classes=["results-panel"]
|
| 1245 |
+
)
|
| 1246 |
+
|
| 1247 |
+
# Update template type visibility based on document type
|
| 1248 |
+
def update_template_visibility(doc_type):
|
| 1249 |
+
return gr.update(visible=doc_type == "Advisory Circular")
|
| 1250 |
+
|
| 1251 |
+
doc_type.change(
|
| 1252 |
+
fn=update_template_visibility,
|
| 1253 |
+
inputs=[doc_type],
|
| 1254 |
+
outputs=[template_type]
|
| 1255 |
+
)
|
| 1256 |
+
|
| 1257 |
+
# Handle document processing
|
| 1258 |
+
submit_btn.click(
|
| 1259 |
+
fn=process_document,
|
| 1260 |
+
inputs=[file_input, doc_type, template_type],
|
| 1261 |
+
outputs=[results]
|
| 1262 |
+
)
|
| 1263 |
+
|
| 1264 |
+
gr.Markdown(
|
| 1265 |
+
"""
|
| 1266 |
+
### π Important Notes
|
| 1267 |
+
- This tool is in development; you may encounter false positives
|
| 1268 |
+
- For questions or feedback, contact Eric Putnam
|
| 1269 |
+
- Results are not stored or saved
|
| 1270 |
+
"""
|
| 1271 |
+
)
|
| 1272 |
|
| 1273 |
+
return demo
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1274 |
|
| 1275 |
+
# Initialize and launch the interface
|
| 1276 |
if __name__ == "__main__":
|
| 1277 |
+
demo = create_interface()
|
| 1278 |
demo.launch()
|