Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -11,12 +11,8 @@ import io
|
|
| 11 |
import os
|
| 12 |
import traceback
|
| 13 |
from datetime import datetime
|
| 14 |
-
import gc
|
| 15 |
|
| 16 |
-
#
|
| 17 |
-
from main import FAADocumentChecker, DocumentCheckResult
|
| 18 |
-
|
| 19 |
-
# Core data structures and utilities
|
| 20 |
@dataclass
|
| 21 |
class DocumentCheckResult:
|
| 22 |
"""Structured result for document checks."""
|
|
@@ -187,7 +183,7 @@ class DocumentChecker:
|
|
| 187 |
return []
|
| 188 |
|
| 189 |
class FAADocumentChecker(DocumentChecker):
|
| 190 |
-
"""Main document checker implementation
|
| 191 |
def __init__(self, config_path: Optional[str] = None):
|
| 192 |
super().__init__(config_path)
|
| 193 |
|
|
@@ -869,6 +865,33 @@ class FAADocumentChecker(DocumentChecker):
|
|
| 869 |
|
| 870 |
return results
|
| 871 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 872 |
def format_markdown_results(results: Dict[str, DocumentCheckResult], doc_type: str) -> str:
|
| 873 |
"""Format check results into a Markdown string for Gradio display."""
|
| 874 |
output = []
|
|
@@ -951,35 +974,6 @@ def format_markdown_results(results: Dict[str, DocumentCheckResult], doc_type: s
|
|
| 951 |
return "\n".join(output)
|
| 952 |
|
| 953 |
def create_interface():
|
| 954 |
-
|
| 955 |
-
def process_document(file_obj, doc_type: str, template_type: Optional[str] = None) -> str:
|
| 956 |
-
"""Process document and run all checks."""
|
| 957 |
-
try:
|
| 958 |
-
checker = FAADocumentChecker()
|
| 959 |
-
|
| 960 |
-
if isinstance(file_obj, bytes):
|
| 961 |
-
file_obj = io.BytesIO(file_obj)
|
| 962 |
-
|
| 963 |
-
results = checker.run_all_checks(file_obj, doc_type, template_type)
|
| 964 |
-
return format_markdown_results(results, doc_type)
|
| 965 |
-
|
| 966 |
-
except Exception as e:
|
| 967 |
-
logging.error(f"Error processing document: {str(e)}")
|
| 968 |
-
traceback.print_exc()
|
| 969 |
-
return f"""
|
| 970 |
-
# ❌ Error Processing Document
|
| 971 |
-
|
| 972 |
-
**Error Details:** {str(e)}
|
| 973 |
-
|
| 974 |
-
Please ensure:
|
| 975 |
-
1. The file is a valid .docx document
|
| 976 |
-
2. The file is not corrupted or password protected
|
| 977 |
-
3. The file is properly formatted
|
| 978 |
-
|
| 979 |
-
Try again after checking these issues. If the problem persists, contact support.
|
| 980 |
-
"""
|
| 981 |
-
|
| 982 |
-
def create_interface():
|
| 983 |
"""Create and configure the Gradio interface."""
|
| 984 |
document_types = [
|
| 985 |
"Advisory Circular",
|
|
@@ -1149,100 +1143,19 @@ def create_interface():
|
|
| 1149 |
|
| 1150 |
return demo
|
| 1151 |
|
| 1152 |
-
|
| 1153 |
-
|
| 1154 |
-
|
| 1155 |
-
|
| 1156 |
-
|
| 1157 |
-
|
| 1158 |
-
|
| 1159 |
-
f"## Document Type: {doc_type}",
|
| 1160 |
-
"---\n"
|
| 1161 |
-
])
|
| 1162 |
-
|
| 1163 |
-
total_issues = sum(1 for r in results.values() if not r.success)
|
| 1164 |
-
|
| 1165 |
-
if total_issues == 0:
|
| 1166 |
-
output.append("✅ **All checks passed successfully!**\n")
|
| 1167 |
-
return "\n".join(output)
|
| 1168 |
-
|
| 1169 |
-
output.append(f"❗ Found issues in {total_issues} check categories\n")
|
| 1170 |
-
|
| 1171 |
-
check_categories = {
|
| 1172 |
-
'heading_title_check': {'title': '📋 Required Headings', 'priority': 1},
|
| 1173 |
-
'heading_title_period_check': {'title': '🔍 Heading Period Usage', 'priority': 1},
|
| 1174 |
-
'acronym_check': {'title': '📝 Acronym Definitions', 'priority': 2},
|
| 1175 |
-
'terminology_check': {'title': '📖 Terminology Usage', 'priority': 2},
|
| 1176 |
-
'section_symbol_usage_check': {'title': '§ Section Symbol Usage', 'priority': 2},
|
| 1177 |
-
'caption_check_table': {'title': '📊 Table Captions', 'priority': 3},
|
| 1178 |
-
'caption_check_figure': {'title': '🖼️ Figure Captions', 'priority': 3},
|
| 1179 |
-
'table_figure_reference_check': {'title': '🔗 Table/Figure References', 'priority': 3},
|
| 1180 |
-
'document_title_check': {'title': '📑 Document Title Format', 'priority': 1},
|
| 1181 |
-
'double_period_check': {'title': '⚡ Double Periods', 'priority': 4},
|
| 1182 |
-
'spacing_check': {'title': '⌨️ Spacing Issues', 'priority': 4},
|
| 1183 |
-
'abbreviation_usage_check': {'title': '📎 Abbreviation Usage', 'priority': 3},
|
| 1184 |
-
'date_formats_check': {'title': '📅 Date Formats', 'priority': 3},
|
| 1185 |
-
'placeholders_check': {'title': '🚩 Placeholder Content', 'priority': 1}
|
| 1186 |
-
}
|
| 1187 |
-
|
| 1188 |
-
sorted_checks = sorted(
|
| 1189 |
-
[(name, result) for name, result in results.items()],
|
| 1190 |
-
key=lambda x: check_categories.get(x[0], {'priority': 999})['priority']
|
| 1191 |
-
)
|
| 1192 |
-
|
| 1193 |
-
for check_name, result in sorted_checks:
|
| 1194 |
-
if not result.success:
|
| 1195 |
-
category = check_categories.get(check_name, {'title': check_name.replace('_', ' ').title()})
|
| 1196 |
-
|
| 1197 |
-
output.append(f"### {category['title']}")
|
| 1198 |
-
|
| 1199 |
-
if isinstance(result.issues, list):
|
| 1200 |
-
for issue in result.issues[:5]:
|
| 1201 |
-
if isinstance(issue, dict):
|
| 1202 |
-
for key, value in issue.items():
|
| 1203 |
-
if isinstance(value, list):
|
| 1204 |
-
output.extend([f"- {item}" for item in value])
|
| 1205 |
-
else:
|
| 1206 |
-
output.append(f"- {key}: {value}")
|
| 1207 |
-
else:
|
| 1208 |
-
output.append(f"- {issue}")
|
| 1209 |
-
|
| 1210 |
-
if len(result.issues) > 5:
|
| 1211 |
-
output.append(f"\n*...and {len(result.issues) - 5} more similar issues*")
|
| 1212 |
-
|
| 1213 |
-
output.append("")
|
| 1214 |
-
|
| 1215 |
-
output.extend([
|
| 1216 |
-
"## 📋 Summary and Recommendations",
|
| 1217 |
-
"",
|
| 1218 |
-
"### Priority Order for Fixes:",
|
| 1219 |
-
"1. 🔴 Critical: Heading formats, required content, and document structure",
|
| 1220 |
-
"2. 🟡 Important: Terminology, acronyms, and references",
|
| 1221 |
-
"3. 🟢 Standard: Formatting, spacing, and style consistency",
|
| 1222 |
-
"",
|
| 1223 |
-
"### Next Steps:",
|
| 1224 |
-
"1. Address issues in priority order",
|
| 1225 |
-
"2. Use search/replace for consistent fixes",
|
| 1226 |
-
"3. Re-run checker after making changes",
|
| 1227 |
-
"4. Update your document template if needed",
|
| 1228 |
-
""
|
| 1229 |
-
])
|
| 1230 |
-
|
| 1231 |
-
return "\n".join(output)
|
| 1232 |
-
|
| 1233 |
-
# Initialize and launch the interface
|
| 1234 |
-
if __name__ == "__main__":
|
| 1235 |
-
# Setup logging
|
| 1236 |
-
logging.basicConfig(
|
| 1237 |
-
level=logging.INFO,
|
| 1238 |
-
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
| 1239 |
-
)
|
| 1240 |
|
| 1241 |
-
|
| 1242 |
-
|
| 1243 |
-
|
| 1244 |
-
|
| 1245 |
-
|
| 1246 |
-
|
| 1247 |
-
|
| 1248 |
-
|
|
|
|
| 11 |
import os
|
| 12 |
import traceback
|
| 13 |
from datetime import datetime
|
|
|
|
| 14 |
|
| 15 |
+
# Core data structures
|
|
|
|
|
|
|
|
|
|
| 16 |
@dataclass
|
| 17 |
class DocumentCheckResult:
|
| 18 |
"""Structured result for document checks."""
|
|
|
|
| 183 |
return []
|
| 184 |
|
| 185 |
class FAADocumentChecker(DocumentChecker):
|
| 186 |
+
"""Main document checker implementation."""
|
| 187 |
def __init__(self, config_path: Optional[str] = None):
|
| 188 |
super().__init__(config_path)
|
| 189 |
|
|
|
|
| 865 |
|
| 866 |
return results
|
| 867 |
|
| 868 |
+
def process_document(file_obj, doc_type: str, template_type: Optional[str] = None) -> str:
|
| 869 |
+
"""Process document and run all checks."""
|
| 870 |
+
try:
|
| 871 |
+
checker = FAADocumentChecker()
|
| 872 |
+
|
| 873 |
+
if isinstance(file_obj, bytes):
|
| 874 |
+
file_obj = io.BytesIO(file_obj)
|
| 875 |
+
|
| 876 |
+
results = checker.run_all_checks(file_obj, doc_type, template_type)
|
| 877 |
+
return format_markdown_results(results, doc_type)
|
| 878 |
+
|
| 879 |
+
except Exception as e:
|
| 880 |
+
logging.error(f"Error processing document: {str(e)}")
|
| 881 |
+
traceback.print_exc()
|
| 882 |
+
return f"""
|
| 883 |
+
# ❌ Error Processing Document
|
| 884 |
+
|
| 885 |
+
**Error Details:** {str(e)}
|
| 886 |
+
|
| 887 |
+
Please ensure:
|
| 888 |
+
1. The file is a valid .docx document
|
| 889 |
+
2. The file is not corrupted or password protected
|
| 890 |
+
3. The file is properly formatted
|
| 891 |
+
|
| 892 |
+
Try again after checking these issues. If the problem persists, contact support.
|
| 893 |
+
"""
|
| 894 |
+
|
| 895 |
def format_markdown_results(results: Dict[str, DocumentCheckResult], doc_type: str) -> str:
|
| 896 |
"""Format check results into a Markdown string for Gradio display."""
|
| 897 |
output = []
|
|
|
|
| 974 |
return "\n".join(output)
|
| 975 |
|
| 976 |
def create_interface():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 977 |
"""Create and configure the Gradio interface."""
|
| 978 |
document_types = [
|
| 979 |
"Advisory Circular",
|
|
|
|
| 1143 |
|
| 1144 |
return demo
|
| 1145 |
|
| 1146 |
+
# Initialize and launch the interface
|
| 1147 |
+
if __name__ == "__main__":
|
| 1148 |
+
# Setup logging
|
| 1149 |
+
logging.basicConfig(
|
| 1150 |
+
level=logging.INFO,
|
| 1151 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
| 1152 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1153 |
|
| 1154 |
+
# Create and launch the interface
|
| 1155 |
+
demo = create_interface()
|
| 1156 |
+
demo.launch(
|
| 1157 |
+
share=False, # Set to True if you want to generate a public link
|
| 1158 |
+
server_name="0.0.0.0", # Allows external access
|
| 1159 |
+
server_port=7860, # Default Gradio port
|
| 1160 |
+
debug=True
|
| 1161 |
+
)
|