Spaces:

heerjtdev
/

layout_latex

Running

App Files Files Community

heerjtdev commited on Jan 16

Commit

78471d0

verified ·

1 Parent(s): fa67ff4

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -176

app.py CHANGED Viewed

@@ -2,152 +2,11 @@
-# import gradio as gr
-# import json
-# import os
-# import tempfile
-# import img2pdf
-# from img2pdf import Rotation
-# from pathlib import Path
-# # ==============================
-# # PIPELINE IMPORT
-# # ==============================
-# try:
-#     from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH
-# except ImportError:
-#     print("Warning: 'working_yolo_pipeline.py' not found. Using dummy paths.")
-#     def run_document_pipeline(*args):
-#         return {"error": "Placeholder pipeline function called."}
-#     DEFAULT_LAYOUTLMV3_MODEL_PATH = "./models/layoutlmv3_model"
-#     WEIGHTS_PATH = "./weights/yolo_weights.pt"
-# def process_file(uploaded_files, layoutlmv3_model_path=None):
-#     """
-#     Robust handler for multiple or single file uploads.
-#     """
-#     if uploaded_files is None:
-#         return "❌ Error: No files uploaded.", None
-#     # --- THE ROBUST FIX ---
-#     # Gradio sometimes sends a single dict even when set to multiple.
-#     # We force everything into a list so the rest of the logic doesn't break.
-#     if not isinstance(uploaded_files, list):
-#         file_list = [uploaded_files]
-#     else:
-#         file_list = uploaded_files
-#     if len(file_list) == 0:
-#         return "❌ Error: Empty file list.", None
-#     # ----------------------
-#     # 1. Resolve all file paths safely
-#     resolved_paths = []
-#     for f in file_list:
-#         try:
-#             if isinstance(f, dict) and "path" in f:
-#                 resolved_paths.append(f["path"])
-#             elif hasattr(f, 'path'):
-#                 resolved_paths.append(f.path)
-#             else:
-#                 resolved_paths.append(str(f))
-#         except Exception as e:
-#             print(f"Error resolving path for {f}: {e}")
-#     if not resolved_paths:
-#         return "❌ Error: Could not resolve file paths.", None
-#     # 2. Determine if we should merge into a single PDF
-#     first_file = Path(resolved_paths[0])
-#     is_image = first_file.suffix.lower() in ['.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff']
-#     try:
-#         # If it's multiple files or just one image, wrap it in a PDF
-#         if len(resolved_paths) > 1 or is_image:
-#             print(f"📦 Converting {len(resolved_paths)} image(s) to a single PDF...")
-#             temp_pdf = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
-#             with open(temp_pdf.name, "wb") as f_out:
-#                 # f_out.write(img2pdf.convert(resolved_paths))
-#                 f_out.write(img2pdf.convert(resolved_paths, rotation=Rotation.ifvalid))
-#             processing_path = temp_pdf.name
-#         else:
-#             # It's a single PDF
-#             processing_path = resolved_paths[0]
-#         # 3. Standard Pipeline Checks
-#         final_model_path = layoutlmv3_model_path or DEFAULT_LAYOUTLMV3_MODEL_PATH
-#         if not os.path.exists(final_model_path):
-#             return f"❌ Error: Model not found at {final_model_path}", None
-#         # 4. Call the pipeline
-#         print(f"🚀 Starting pipeline for: {processing_path}")
-#         result = run_document_pipeline(processing_path, final_model_path)
-#         if result is None:
-#             return "❌ Error: Pipeline returned None.", None
-#         # 5. Prepare output
-#         temp_output = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', prefix='analysis_')
-#         with open(temp_output.name, 'w', encoding='utf-8') as f:
-#             json.dump(result, f, indent=2, ensure_ascii=False)
-#         return json.dumps(result, indent=2, ensure_ascii=False), temp_output.name
-#     except Exception as e:
-#         import traceback
-#         traceback.print_exc()
-#         return f"❌ Error: {str(e)}", None
-# # ==============================
-# # GRADIO INTERFACE
-# # ==============================
-# with gr.Blocks(title="Document Analysis Pipeline") as demo:
-#     gr.Markdown("# 📄 Document & Image Analysis Pipeline")
-#     with gr.Row():
-#         with gr.Column(scale=1):
-#             file_input = gr.File(
-#                 label="Upload PDFs or Images",
-#                 file_types=[".pdf", ".jpg", ".jpeg", ".png", ".bmp", ".webp", ".tiff"],
-#                 file_count="multiple", # Keep this
-#                 type="filepath"       # Keep this
-#             )
-#             model_path_input = gr.Textbox(
-#                 label="Model Path",
-#                 value=DEFAULT_LAYOUTLMV3_MODEL_PATH
-#             )
-#             process_btn = gr.Button("🚀 Process Files", variant="primary")
-#         with gr.Column(scale=2):
-#             json_output = gr.Code(label="JSON Output", language="json", lines=20)
-#             download_output = gr.File(label="Download JSON")
-#     process_btn.click(
-#         fn=process_file,
-#         inputs=[file_input, model_path_input],
-#         outputs=[json_output, download_output]
-#     )
-# if __name__ == "__main__":
-#     demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
 import gradio as gr
 import json
 import os
 import tempfile
 import img2pdf
-import glob
 from img2pdf import Rotation
 from pathlib import Path
@@ -166,8 +25,6 @@ except ImportError:
 def process_file(uploaded_files, layoutlmv3_model_path=None):
     """
     Robust handler for multiple or single file uploads.
-    Returns the final JSON and the file path for download.
-    If the pipeline fails at BIO conversion, it attempts to return the raw predictions for debugging.
     """
     if uploaded_files is None:
         return "❌ Error: No files uploaded.", None
@@ -210,7 +67,9 @@ def process_file(uploaded_files, layoutlmv3_model_path=None):
             print(f"📦 Converting {len(resolved_paths)} image(s) to a single PDF...")
             temp_pdf = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
             with open(temp_pdf.name, "wb") as f_out:
                 f_out.write(img2pdf.convert(resolved_paths, rotation=Rotation.ifvalid))
             processing_path = temp_pdf.name
         else:
             # It's a single PDF
@@ -225,33 +84,10 @@ def process_file(uploaded_files, layoutlmv3_model_path=None):
         print(f"🚀 Starting pipeline for: {processing_path}")
         result = run_document_pipeline(processing_path, final_model_path)
-        # --- DEBUGGING LOGIC FOR STEP 3 FAILURE ---
-        if result is None or (isinstance(result, list) and len(result) == 0):
-            print("⚠️ Pipeline returned no structured data. Looking for raw predictions for debugging...")
-            # Based on your logs, the pipeline creates a folder like /tmp/pipeline_run_[filename]
-            base_name = Path(processing_path).stem
-            search_pattern = f"/tmp/pipeline_run_{base_name}*/*_raw_predictions.json"
-            possible_files = glob.glob(search_pattern)
-            if possible_files:
-                debug_file = possible_files[0]
-                print(f"🔍 DEBUG: Found raw predictions at {debug_file}")
-                with open(debug_file, 'r', encoding='utf-8') as f:
-                    raw_data = json.load(f)
-                # Return the raw labels to the UI so you can see why it failed
-                return (
-                    "⚠️ WARNING: BIO Decoding Failed (Step 3).\n"
-                    "Showing RAW LayoutLMv3 predictions instead for analysis:\n\n" +
-                    json.dumps(raw_data, indent=2, ensure_ascii=False),
-                    debug_file
-                )
-            return "❌ Error: Pipeline failed and no intermediate raw prediction file was found.", None
-        # ------------------------------------------
-        # 5. Prepare output (Successful Path)
         temp_output = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', prefix='analysis_')
         with open(temp_output.name, 'w', encoding='utf-8') as f:
             json.dump(result, f, indent=2, ensure_ascii=False)
@@ -269,15 +105,14 @@ def process_file(uploaded_files, layoutlmv3_model_path=None):
 with gr.Blocks(title="Document Analysis Pipeline") as demo:
     gr.Markdown("# 📄 Document & Image Analysis Pipeline")
-    gr.Markdown("### 🛠 Debug Mode Active: If Step 3 fails, the Raw Prediction file will be returned.")
     with gr.Row():
         with gr.Column(scale=1):
             file_input = gr.File(
                 label="Upload PDFs or Images",
                 file_types=[".pdf", ".jpg", ".jpeg", ".png", ".bmp", ".webp", ".tiff"],
-                file_count="multiple",
-                type="filepath"
             )
             model_path_input = gr.Textbox(
@@ -288,8 +123,8 @@ with gr.Blocks(title="Document Analysis Pipeline") as demo:
             process_btn = gr.Button("🚀 Process Files", variant="primary")
         with gr.Column(scale=2):
-            json_output = gr.Code(label="JSON Output (Structured or Raw Predictions)", language="json", lines=20)
-            download_output = gr.File(label="Download JSON File")
     process_btn.click(
         fn=process_file,
@@ -298,5 +133,9 @@ with gr.Blocks(title="Document Analysis Pipeline") as demo:
     )
 if __name__ == "__main__":
-    # Note: 0.0.0.0 allows access from outside the container/host
-    demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)

 import gradio as gr
 import json
 import os
 import tempfile
 import img2pdf
 from img2pdf import Rotation
 from pathlib import Path
 def process_file(uploaded_files, layoutlmv3_model_path=None):
     """
     Robust handler for multiple or single file uploads.
     """
     if uploaded_files is None:
         return "❌ Error: No files uploaded.", None
             print(f"📦 Converting {len(resolved_paths)} image(s) to a single PDF...")
             temp_pdf = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
             with open(temp_pdf.name, "wb") as f_out:
+                # f_out.write(img2pdf.convert(resolved_paths))
                 f_out.write(img2pdf.convert(resolved_paths, rotation=Rotation.ifvalid))
             processing_path = temp_pdf.name
         else:
             # It's a single PDF
         print(f"🚀 Starting pipeline for: {processing_path}")
         result = run_document_pipeline(processing_path, final_model_path)
+        if result is None:
+            return "❌ Error: Pipeline returned None.", None
+        # 5. Prepare output
         temp_output = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', prefix='analysis_')
         with open(temp_output.name, 'w', encoding='utf-8') as f:
             json.dump(result, f, indent=2, ensure_ascii=False)
 with gr.Blocks(title="Document Analysis Pipeline") as demo:
     gr.Markdown("# 📄 Document & Image Analysis Pipeline")
     with gr.Row():
         with gr.Column(scale=1):
             file_input = gr.File(
                 label="Upload PDFs or Images",
                 file_types=[".pdf", ".jpg", ".jpeg", ".png", ".bmp", ".webp", ".tiff"],
+                file_count="multiple", # Keep this
+                type="filepath"       # Keep this
             )
             model_path_input = gr.Textbox(
             process_btn = gr.Button("🚀 Process Files", variant="primary")
         with gr.Column(scale=2):
+            json_output = gr.Code(label="JSON Output", language="json", lines=20)
+            download_output = gr.File(label="Download JSON")
     process_btn.click(
         fn=process_file,
     )
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)