import gradio as gr import json import os import tempfile import img2pdf from img2pdf import Rotation from pathlib import Path # ============================== # PIPELINE IMPORT # ============================== try: from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH except ImportError: print("Warning: 'working_yolo_pipeline.py' not found. Using dummy paths.") def run_document_pipeline(*args): return {"error": "Placeholder pipeline function called."} DEFAULT_LAYOUTLMV3_MODEL_PATH = "./models/layoutlmv3_model" WEIGHTS_PATH = "./weights/yolo_weights.pt" def process_file(uploaded_files, layoutlmv3_model_path=None): """ Robust handler for multiple or single file uploads. """ if uploaded_files is None: return "❌ Error: No files uploaded.", None # --- THE ROBUST FIX --- # Gradio sometimes sends a single dict even when set to multiple. # We force everything into a list so the rest of the logic doesn't break. if not isinstance(uploaded_files, list): file_list = [uploaded_files] else: file_list = uploaded_files if len(file_list) == 0: return "❌ Error: Empty file list.", None # ---------------------- # 1. Resolve all file paths safely resolved_paths = [] for f in file_list: try: if isinstance(f, dict) and "path" in f: resolved_paths.append(f["path"]) elif hasattr(f, 'path'): resolved_paths.append(f.path) else: resolved_paths.append(str(f)) except Exception as e: print(f"Error resolving path for {f}: {e}") if not resolved_paths: return "❌ Error: Could not resolve file paths.", None # 2. Determine if we should merge into a single PDF first_file = Path(resolved_paths[0]) is_image = first_file.suffix.lower() in ['.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'] try: # If it's multiple files or just one image, wrap it in a PDF if len(resolved_paths) > 1 or is_image: print(f"📦 Converting {len(resolved_paths)} image(s) to a single PDF...") temp_pdf = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") with open(temp_pdf.name, "wb") as f_out: # f_out.write(img2pdf.convert(resolved_paths)) f_out.write(img2pdf.convert(resolved_paths, rotation=Rotation.ifvalid)) processing_path = temp_pdf.name else: # It's a single PDF processing_path = resolved_paths[0] # 3. Standard Pipeline Checks final_model_path = layoutlmv3_model_path or DEFAULT_LAYOUTLMV3_MODEL_PATH if not os.path.exists(final_model_path): return f"❌ Error: Model not found at {final_model_path}", None # 4. Call the pipeline print(f"🚀 Starting pipeline for: {processing_path}") result = run_document_pipeline(processing_path, final_model_path) if result is None: return "❌ Error: Pipeline returned None.", None # 5. Prepare output temp_output = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', prefix='analysis_') with open(temp_output.name, 'w', encoding='utf-8') as f: json.dump(result, f, indent=2, ensure_ascii=False) return json.dumps(result, indent=2, ensure_ascii=False), temp_output.name except Exception as e: import traceback traceback.print_exc() return f"❌ Error: {str(e)}", None # ============================== # GRADIO INTERFACE # ============================== with gr.Blocks(title="Document Analysis Pipeline") as demo: gr.Markdown("# 📄 Document & Image Analysis Pipeline") with gr.Row(): with gr.Column(scale=1): file_input = gr.File( label="Upload PDFs or Images", file_types=[".pdf", ".jpg", ".jpeg", ".png", ".bmp", ".webp", ".tiff"], file_count="multiple", # Keep this type="filepath" # Keep this ) model_path_input = gr.Textbox( label="Model Path", value=DEFAULT_LAYOUTLMV3_MODEL_PATH ) process_btn = gr.Button("🚀 Process Files", variant="primary") with gr.Column(scale=2): json_output = gr.Code(label="JSON Output", language="json", lines=20) download_output = gr.File(label="Download JSON") process_btn.click( fn=process_file, inputs=[file_input, model_path_input], outputs=[json_output, download_output] ) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)