| import gradio as gr
|
| import os
|
| import pandas as pd
|
| import shutil
|
| import sys
|
|
|
|
|
|
|
|
|
|
|
| try:
|
| import logiccode
|
| except ImportError as e:
|
| print("CRITICAL ERROR: Could not import 'logiccode.py'.")
|
| print(f"Ensure logiccode.py is in the same directory as app.py. Error: {e}")
|
| sys.exit(1)
|
|
|
|
|
|
|
|
|
|
|
| class MockArgs:
|
| def __init__(self):
|
| self.debug = False
|
| self.pages = 3
|
| self.file = []
|
| self.inputkeywords = ""
|
| self.required = []
|
| self.fuzzy = True
|
| self.visualize = False
|
|
|
|
|
| if not hasattr(logiccode, 'args'):
|
| logiccode.args = MockArgs()
|
|
|
|
|
|
|
|
|
| def process_documents(files, keywords_input, required_docs, fuzzy_match_enabled, debug_enabled):
|
| """
|
| Process uploaded files using the imported logiccode module.
|
| """
|
|
|
| logiccode.args.debug = debug_enabled
|
| logiccode.args.fuzzy = fuzzy_match_enabled
|
|
|
|
|
| results = []
|
| gallery_images = []
|
| logs = []
|
|
|
|
|
| user_keywords = [kw.strip() for kw in keywords_input.split() if kw.strip()]
|
|
|
|
|
| found_documents = set()
|
| all_matched_keywords_per_file = []
|
|
|
| if not files:
|
| return "<h3>β οΈ No files uploaded</h3>", [], pd.DataFrame(), "Please upload files to begin."
|
|
|
| logs.append(f"Starting processing of {len(files)} files...")
|
| logs.append(f"Target Keywords: {user_keywords}")
|
| logs.append(f"Required Documents: {required_docs}")
|
|
|
|
|
| for file_obj in files:
|
| file_path = file_obj.name
|
| filename = os.path.basename(file_path)
|
|
|
| logs.append(f"\n--- Processing: {filename} ---")
|
|
|
|
|
| if file_path.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
|
| gallery_images.append((file_path, filename))
|
|
|
| elif file_path.lower().endswith('.pdf'):
|
| try:
|
|
|
| preview_pages, _, _ = logiccode.pdf_to_images(file_path, max_pages=1)
|
| if preview_pages:
|
| gallery_images.append((preview_pages[0], f"{filename} (PDF Preview)"))
|
| logs.append(f"Generated PDF preview for {filename}")
|
| except Exception as e:
|
| logs.append(f"β οΈ PDF Preview failed for {filename}: {e}")
|
|
|
|
|
| try:
|
|
|
| ocr_texts = logiccode.get_ocr_text(file_path, logiccode.args.pages)
|
|
|
| if not ocr_texts:
|
| logs.append(f"β οΈ Warning: No text extracted from {filename}")
|
| results.append({
|
| "File": filename, "Type": "Unreadable", "Score": 0,
|
| "Status": "FAILED", "Matched Keywords": ""
|
| })
|
| continue
|
|
|
|
|
| full_text = " ".join(ocr_texts)
|
| ocr_tokens = logiccode.normalize_text(full_text)
|
|
|
|
|
| doc_type, doc_score = logiccode.calculate_doc_type(ocr_tokens, debug=debug_enabled)
|
| found_documents.add(doc_type)
|
| logs.append(f"Classified as: {doc_type} (Confidence: {doc_score:.1f}%)")
|
|
|
|
|
|
|
| verification_results = logiccode.verify_keywords(ocr_tokens, user_keywords, fuzzy_match_enabled)
|
|
|
| matched_kws = [r['keyword'] for r in verification_results if r['matched']]
|
| all_matched_keywords_per_file.append(set(matched_kws))
|
|
|
|
|
|
|
| if user_keywords:
|
| file_status = "VERIFIED" if len(matched_kws) == len(user_keywords) else "PARTIAL"
|
| if len(matched_kws) == 0: file_status = "FAILED"
|
| else:
|
| file_status = "INFO ONLY"
|
|
|
| logs.append(f"Matched: {matched_kws if matched_kws else 'None'}")
|
|
|
| results.append({
|
| "File": filename,
|
| "Type": doc_type,
|
| "Score": f"{doc_score:.1f}%",
|
| "Status": file_status,
|
| "Matched Keywords": ", ".join(matched_kws)
|
| })
|
|
|
| except Exception as e:
|
| error_msg = f"Error processing {filename}: {str(e)}"
|
| logs.append(error_msg)
|
| if debug_enabled:
|
| import traceback
|
| logs.append(traceback.format_exc())
|
|
|
| results.append({
|
| "File": filename, "Type": "Error", "Score": 0,
|
| "Status": "ERROR", "Matched Keywords": str(e)
|
| })
|
|
|
|
|
| required_set = set(required_docs)
|
| missing_docs = required_set - found_documents
|
|
|
| all_user_keywords = set(user_keywords)
|
| keywords_found_across_all_files = set()
|
| for file_kw_set in all_matched_keywords_per_file:
|
| keywords_found_across_all_files.update(file_kw_set)
|
|
|
| missing_keywords = all_user_keywords - keywords_found_across_all_files
|
|
|
|
|
| return build_html_summary(required_set, missing_docs, missing_keywords), gallery_images, pd.DataFrame(results), "\n".join(logs)
|
|
|
| def build_html_summary(required_set, missing_docs, missing_keywords):
|
| html = """
|
| <div style='padding: 20px; background-color: white; border-radius: 10px; border: 1px solid #e5e7eb;'>
|
| <h3 style='margin-top: 0; color: #333;'>Verification Summary</h3>
|
| """
|
|
|
|
|
| doc_status_bool = True
|
| if required_set:
|
| if missing_docs:
|
| doc_status_bool = False
|
| html += f"<div style='margin-bottom: 8px;'>β <b>Missing Documents:</b> <span style='color: #ef4444;'>{', '.join(sorted(missing_docs))}</span></div>"
|
| else:
|
| html += f"<div style='margin-bottom: 8px;'>β
<b>Documents:</b> All required types found.</div>"
|
| else:
|
| html += "<div style='margin-bottom: 8px; color: #666;'>βΉοΈ No specific document types required.</div>"
|
|
|
|
|
| kw_status_bool = True
|
| if missing_keywords:
|
| kw_status_bool = False
|
| html += f"<div style='margin-bottom: 8px;'>β <b>Missing Keywords:</b> <span style='color: #ef4444;'>{', '.join(sorted(missing_keywords))}</span></div>"
|
| else:
|
| html += f"<div style='margin-bottom: 8px;'>β
<b>Keywords:</b> All keywords found.</div>"
|
|
|
|
|
| overall_color = "#10b981" if (doc_status_bool and kw_status_bool) else "#ef4444"
|
| overall_text = "VERIFIED" if (doc_status_bool and kw_status_bool) else "ACTION REQUIRED"
|
|
|
| html += f"<hr style='margin: 15px 0; border-color: #eee;'>"
|
| html += f"<h2 style='color: {overall_color}; margin: 0; text-align: center;'>{overall_text}</h2>"
|
| html += "</div>"
|
| return html
|
|
|
|
|
|
|
|
|
| theme = gr.themes.Soft(
|
| primary_hue="blue",
|
| secondary_hue="slate",
|
| ).set(
|
| body_background_fill="#f9fafb",
|
| block_background_fill="white",
|
| block_border_width="1px"
|
| )
|
|
|
| with gr.Blocks(theme=theme, title="DocuVerify Pro") as demo:
|
| gr.Markdown(
|
| """
|
| # π Intelligent Document Verification
|
| Upload documents, specify required types, and verify content matches automatically.
|
| """
|
| )
|
|
|
| with gr.Row():
|
|
|
| with gr.Column(scale=4):
|
| files_input = gr.File(
|
| file_count="multiple",
|
| label="1. Upload Documents",
|
| file_types=[".pdf", ".png", ".jpg", ".jpeg", ".bmp"],
|
| height=250
|
| )
|
|
|
| keywords_input = gr.Textbox(
|
| label="2. Keywords to Verify",
|
| placeholder="Name, ID Number, Date of Birth...",
|
| info="Enter values that MUST appear in the documents (space separated)",
|
| lines=2
|
| )
|
|
|
|
|
| with gr.Column(scale=3):
|
|
|
| available_types = sorted(list(logiccode.DOC_KEYWORDS.keys())) if hasattr(logiccode, 'DOC_KEYWORDS') else []
|
|
|
| required_docs_input = gr.Dropdown(
|
| choices=available_types,
|
| multiselect=True,
|
| label="3. Required Document Types",
|
| info="Which documents are mandatory?",
|
| value=[]
|
| )
|
|
|
| with gr.Group():
|
| gr.Markdown("### Settings")
|
| fuzzy_checkbox = gr.Checkbox(value=True, label="Enable Fuzzy Matching (Approximate spelling)")
|
| debug_checkbox = gr.Checkbox(value=False, label="Show Debug Logs")
|
|
|
| verify_btn = gr.Button("π Verify Documents", variant="primary", size="lg")
|
|
|
| gr.Markdown("---")
|
|
|
|
|
| with gr.Row():
|
|
|
| with gr.Column(scale=1):
|
| status_output = gr.HTML(label="Overall Status")
|
|
|
|
|
| with gr.Column(scale=2):
|
| with gr.Tabs():
|
| with gr.TabItem("π Results Table"):
|
| results_df = gr.Dataframe(
|
| headers=["File", "Type", "Score", "Status", "Matched Keywords"],
|
| interactive=False
|
| )
|
|
|
| with gr.TabItem("πΌοΈ Document Gallery"):
|
| gallery = gr.Gallery(
|
| label="Processed Images",
|
| show_label=False,
|
| columns=[3], rows=[2],
|
| object_fit="contain",
|
| height="auto"
|
| )
|
|
|
| with gr.TabItem("π System Logs"):
|
| logs_output = gr.Textbox(
|
| label="Processing Logs",
|
| lines=15,
|
| interactive=False,
|
| show_copy_button=True
|
| )
|
|
|
|
|
| verify_btn.click(
|
| fn=process_documents,
|
| inputs=[files_input, keywords_input, required_docs_input, fuzzy_checkbox, debug_checkbox],
|
| outputs=[status_output, gallery, results_df, logs_output]
|
| )
|
|
|
| if __name__ == "__main__":
|
|
|
| demo.launch(share=False, server_name="0.0.0.0") |