layout_latex / app.py
heerjtdev's picture
Update app.py
6bc4d1a verified
raw
history blame
9.8 kB
# import gradio as gr
# print("GRADIO VERSION:", gr.__version__)
# import json
# import os
# import tempfile
# from pathlib import Path
# # ==============================
# # PIPELINE IMPORT
# # ==============================
# try:
# from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH
# except ImportError:
# print("Warning: 'working_yolo_pipeline.py' not found. Using dummy paths.")
# def run_document_pipeline(*args):
# return {"error": "Placeholder pipeline function called."}
# DEFAULT_LAYOUTLMV3_MODEL_PATH = "./models/layoutlmv3_model"
# WEIGHTS_PATH = "./weights/yolo_weights.pt"
# def process_file(uploaded_file, layoutlmv3_model_path=None):
# """
# Handles both PDF and Image uploads and routes them to the YOLO/OCR pipeline.
# """
# if uploaded_file is None:
# return "❌ Error: No file uploaded.", None
# if not layoutlmv3_model_path:
# layoutlmv3_model_path = DEFAULT_LAYOUTLMV3_MODEL_PATH
# if not os.path.exists(layoutlmv3_model_path):
# return f"❌ Error: LayoutLMv3 model not found at {layoutlmv3_model_path}", None
# if not os.path.exists(WEIGHTS_PATH):
# return f"❌ Error: YOLO weights not found at {WEIGHTS_PATH}", None
# try:
# file_path = uploaded_file.name
# # Determine file type for logging
# ext = Path(file_path).suffix.lower()
# file_type = "Image" if ext in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp'] else "PDF"
# print(f"πŸš€ Starting pipeline for {file_type}: {file_path}")
# # Call the pipeline exactly as before.
# # Our modified working_yolo_pipeline now handles the branching internally.
# result = run_document_pipeline(file_path, layoutlmv3_model_path)
# if result is None:
# return "❌ Error: Pipeline failed to process the document. Check console for details.", None
# # Prepare output file for download
# output_filename = f"{Path(file_path).stem}_analysis.json"
# temp_output = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', prefix='analysis_')
# with open(temp_output.name, 'w', encoding='utf-8') as f:
# json.dump(result, f, indent=2, ensure_ascii=False)
# json_display = json.dumps(result, indent=2, ensure_ascii=False)
# return json_display, temp_output.name
# except Exception as e:
# return f"❌ Error during processing: {str(e)}", None
# # ==============================
# # GRADIO INTERFACE
# # ==============================
# with gr.Blocks(title="Document Analysis Pipeline") as demo:
# gr.Markdown("""
# # πŸ“„ Document & Image Analysis Pipeline
# Upload a **PDF document** or an **Image (JPG/PNG)** to extract structured data.
# **Supported Formats:** `.pdf`, `.jpg`, `.jpeg`, `.png`, `.bmp`, `.webp`
# **Pipeline Steps:**
# 1. πŸ” **YOLO/OCR**: Word extraction + Figure/Equation detection
# 2. πŸ€– **LayoutLMv3**: BIO tagging and structural analysis
# 3. πŸ“Š **Decoding**: Conversion to hierarchical JSON
# 4. πŸ–ΌοΈ **Extraction**: Base64 embedding of detected visual elements
# """)
# with gr.Row():
# with gr.Column(scale=1):
# file_input = gr.File(
# label="Upload PDF or Image",
# file_types=[".pdf", ".jpg", ".jpeg", ".png", ".bmp", ".webp", ".tiff"],
# type="filepath"
# )
# model_path_input = gr.Textbox(
# label="LayoutLMv3 Model Path (optional)",
# placeholder=DEFAULT_LAYOUTLMV3_MODEL_PATH,
# value=DEFAULT_LAYOUTLMV3_MODEL_PATH,
# interactive=True
# )
# process_btn = gr.Button("πŸš€ Process File", variant="primary", size="lg")
# gr.Markdown("""
# ### ℹ️ Notes:
# - **Images** are treated as single-page documents.
# - **PDFs** are processed page-by-page.
# - High-resolution Tesseract OCR is used for all image content.
# """)
# with gr.Column(scale=2):
# json_output = gr.Code(
# label="Structured JSON Output",
# language="json",
# lines=25
# )
# download_output = gr.File(
# label="Download Full JSON",
# interactive=False
# )
# # UI Logic
# process_btn.click(
# fn=process_file,
# inputs=[file_input, model_path_input],
# outputs=[json_output, download_output],
# api_name="process_document"
# )
# if __name__ == "__main__":
# demo.launch(
# server_name="0.0.0.0",
# server_port=7860,
# share=False,
# show_error=True
# )
import gradio as gr
print("GRADIO VERSION:", gr.__version__)
import json
import os
import tempfile
from pathlib import Path
# ==============================
# PIPELINE IMPORT
# ==============================
try:
from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH
except ImportError:
print("Warning: 'working_yolo_pipeline.py' not found. Using dummy paths.")
def run_document_pipeline(*args):
return {"error": "Placeholder pipeline function called."}
DEFAULT_LAYOUTLMV3_MODEL_PATH = "./models/layoutlmv3_model"
WEIGHTS_PATH = "./weights/yolo_weights.pt"
def process_file(uploaded_file, layoutlmv3_model_path=None):
"""
Handles both PDF and Image uploads and routes them to the YOLO/OCR pipeline.
"""
if uploaded_file is None:
return "❌ Error: No file uploaded.", None
# --- FIX FOR GRADIO 6.x FILE HANDLING ---
# If multiple files were somehow uploaded or Gradio returned a list
if isinstance(uploaded_file, list):
uploaded_file = uploaded_file[0]
# Extract the actual file path string.
# Gradio File objects have a '.path' attribute for the temporary local location.
try:
if hasattr(uploaded_file, 'path'):
file_path = uploaded_file.path
elif isinstance(uploaded_file, dict):
file_path = uploaded_file.get("path")
else:
file_path = str(uploaded_file)
except Exception as e:
return f"❌ Error resolving file path: {str(e)}", None
# ---------------------------------------
if not layoutlmv3_model_path:
layoutlmv3_model_path = DEFAULT_LAYOUTLMV3_MODEL_PATH
if not os.path.exists(layoutlmv3_model_path):
return f"❌ Error: LayoutLMv3 model not found at {layoutlmv3_model_path}", None
if not os.path.exists(WEIGHTS_PATH):
return f"❌ Error: YOLO weights not found at {WEIGHTS_PATH}", None
try:
# Determine file type for logging safely
ext = Path(file_path).suffix.lower()
file_type = "Image" if ext in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp'] else "PDF"
print(f"πŸš€ Starting pipeline for {file_type}: {file_path}")
# Call the pipeline
result = run_document_pipeline(file_path, layoutlmv3_model_path)
if result is None:
return "❌ Error: Pipeline failed to process the document. Check console for details.", None
# Prepare output file for download
output_filename = f"{Path(file_path).stem}_analysis.json"
temp_output = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', prefix='analysis_')
with open(temp_output.name, 'w', encoding='utf-8') as f:
json.dump(result, f, indent=2, ensure_ascii=False)
json_display = json.dumps(result, indent=2, ensure_ascii=False)
return json_display, temp_output.name
except Exception as e:
# This is where your previous error message was being caught and returned
import traceback
traceback.print_exc() # This prints the full error to your terminal for debugging
return f"❌ Error during processing: {str(e)}", None
# ==============================
# GRADIO INTERFACE
# ==============================
with gr.Blocks(title="Document Analysis Pipeline") as demo:
gr.Markdown("""
# πŸ“„ Document & Image Analysis Pipeline
Upload a **PDF document** or an **Image (JPG/PNG)** to extract structured data.
""")
with gr.Row():
with gr.Column(scale=1):
file_input = gr.File(
label="Upload PDF or Image",
file_types=[".pdf", ".jpg", ".jpeg", ".png", ".bmp", ".webp", ".tiff"],
type="filepath",
file_count="single" # Force single file to avoid list/tuple issues
)
model_path_input = gr.Textbox(
label="LayoutLMv3 Model Path (optional)",
placeholder=DEFAULT_LAYOUTLMV3_MODEL_PATH,
value=DEFAULT_LAYOUTLMV3_MODEL_PATH,
interactive=True
)
process_btn = gr.Button("πŸš€ Process File", variant="primary", size="lg")
with gr.Column(scale=2):
json_output = gr.Code(
label="Structured JSON Output",
language="json",
lines=25
)
download_output = gr.File(
label="Download Full JSON",
interactive=False
)
# UI Logic
process_btn.click(
fn=process_file,
inputs=[file_input, model_path_input],
outputs=[json_output, download_output],
api_name="process_document"
)
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
show_error=True
)