heerjtdev commited on
Commit
ec49c23
Β·
verified Β·
1 Parent(s): 17d97ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -187
app.py CHANGED
@@ -1,149 +1,8 @@
1
- # import gradio as gr
2
- # print("GRADIO VERSION:", gr.__version__)
3
- # import json
4
- # import os
5
- # import tempfile
6
- # from pathlib import Path
7
-
8
- # # ==============================
9
- # # PIPELINE IMPORT
10
- # # ==============================
11
- # try:
12
- # from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH
13
- # except ImportError:
14
- # print("Warning: 'working_yolo_pipeline.py' not found. Using dummy paths.")
15
- # def run_document_pipeline(*args):
16
- # return {"error": "Placeholder pipeline function called."}
17
- # DEFAULT_LAYOUTLMV3_MODEL_PATH = "./models/layoutlmv3_model"
18
- # WEIGHTS_PATH = "./weights/yolo_weights.pt"
19
-
20
-
21
- # def process_file(uploaded_file, layoutlmv3_model_path=None):
22
- # """
23
- # Handles both PDF and Image uploads and routes them to the YOLO/OCR pipeline.
24
- # """
25
- # if uploaded_file is None:
26
- # return "❌ Error: No file uploaded.", None
27
-
28
- # if not layoutlmv3_model_path:
29
- # layoutlmv3_model_path = DEFAULT_LAYOUTLMV3_MODEL_PATH
30
-
31
- # if not os.path.exists(layoutlmv3_model_path):
32
- # return f"❌ Error: LayoutLMv3 model not found at {layoutlmv3_model_path}", None
33
-
34
- # if not os.path.exists(WEIGHTS_PATH):
35
- # return f"❌ Error: YOLO weights not found at {WEIGHTS_PATH}", None
36
-
37
- # try:
38
- # file_path = uploaded_file.name
39
-
40
- # # Determine file type for logging
41
- # ext = Path(file_path).suffix.lower()
42
- # file_type = "Image" if ext in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp'] else "PDF"
43
- # print(f"πŸš€ Starting pipeline for {file_type}: {file_path}")
44
-
45
- # # Call the pipeline exactly as before.
46
- # # Our modified working_yolo_pipeline now handles the branching internally.
47
- # result = run_document_pipeline(file_path, layoutlmv3_model_path)
48
-
49
- # if result is None:
50
- # return "❌ Error: Pipeline failed to process the document. Check console for details.", None
51
-
52
- # # Prepare output file for download
53
- # output_filename = f"{Path(file_path).stem}_analysis.json"
54
- # temp_output = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', prefix='analysis_')
55
-
56
- # with open(temp_output.name, 'w', encoding='utf-8') as f:
57
- # json.dump(result, f, indent=2, ensure_ascii=False)
58
-
59
- # json_display = json.dumps(result, indent=2, ensure_ascii=False)
60
-
61
- # return json_display, temp_output.name
62
-
63
- # except Exception as e:
64
- # return f"❌ Error during processing: {str(e)}", None
65
-
66
-
67
- # # ==============================
68
- # # GRADIO INTERFACE
69
- # # ==============================
70
- # with gr.Blocks(title="Document Analysis Pipeline") as demo:
71
-
72
- # gr.Markdown("""
73
- # # πŸ“„ Document & Image Analysis Pipeline
74
-
75
- # Upload a **PDF document** or an **Image (JPG/PNG)** to extract structured data.
76
-
77
- # **Supported Formats:** `.pdf`, `.jpg`, `.jpeg`, `.png`, `.bmp`, `.webp`
78
-
79
- # **Pipeline Steps:**
80
- # 1. πŸ” **YOLO/OCR**: Word extraction + Figure/Equation detection
81
- # 2. πŸ€– **LayoutLMv3**: BIO tagging and structural analysis
82
- # 3. πŸ“Š **Decoding**: Conversion to hierarchical JSON
83
- # 4. πŸ–ΌοΈ **Extraction**: Base64 embedding of detected visual elements
84
- # """)
85
-
86
- # with gr.Row():
87
- # with gr.Column(scale=1):
88
- # file_input = gr.File(
89
- # label="Upload PDF or Image",
90
- # file_types=[".pdf", ".jpg", ".jpeg", ".png", ".bmp", ".webp", ".tiff"],
91
- # type="filepath"
92
- # )
93
-
94
- # model_path_input = gr.Textbox(
95
- # label="LayoutLMv3 Model Path (optional)",
96
- # placeholder=DEFAULT_LAYOUTLMV3_MODEL_PATH,
97
- # value=DEFAULT_LAYOUTLMV3_MODEL_PATH,
98
- # interactive=True
99
- # )
100
-
101
- # process_btn = gr.Button("πŸš€ Process File", variant="primary", size="lg")
102
-
103
- # gr.Markdown("""
104
- # ### ℹ️ Notes:
105
- # - **Images** are treated as single-page documents.
106
- # - **PDFs** are processed page-by-page.
107
- # - High-resolution Tesseract OCR is used for all image content.
108
- # """)
109
-
110
- # with gr.Column(scale=2):
111
- # json_output = gr.Code(
112
- # label="Structured JSON Output",
113
- # language="json",
114
- # lines=25
115
- # )
116
-
117
- # download_output = gr.File(
118
- # label="Download Full JSON",
119
- # interactive=False
120
- # )
121
-
122
- # # UI Logic
123
- # process_btn.click(
124
- # fn=process_file,
125
- # inputs=[file_input, model_path_input],
126
- # outputs=[json_output, download_output],
127
- # api_name="process_document"
128
- # )
129
-
130
- # if __name__ == "__main__":
131
- # demo.launch(
132
- # server_name="0.0.0.0",
133
- # server_port=7860,
134
- # share=False,
135
- # show_error=True
136
- # )
137
-
138
-
139
-
140
-
141
-
142
  import gradio as gr
143
- print("GRADIO VERSION:", gr.__version__)
144
  import json
145
  import os
146
  import tempfile
 
147
  from pathlib import Path
148
 
149
  # ==============================
@@ -158,71 +17,78 @@ except ImportError:
158
  DEFAULT_LAYOUTLMV3_MODEL_PATH = "./models/layoutlmv3_model"
159
  WEIGHTS_PATH = "./weights/yolo_weights.pt"
160
 
161
-
162
- def process_file(uploaded_file, layoutlmv3_model_path=None):
163
  """
164
- Handles both PDF and Image uploads and routes them to the YOLO/OCR pipeline.
 
165
  """
166
- if uploaded_file is None:
167
- return "❌ Error: No file uploaded.", None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
- # --- FIX FOR GRADIO 6.x FILE HANDLING ---
170
- # If multiple files were somehow uploaded or Gradio returned a list
171
- if isinstance(uploaded_file, list):
172
- uploaded_file = uploaded_file[0]
 
 
173
 
174
- # Extract the actual file path string.
175
- # Gradio File objects have a '.path' attribute for the temporary local location.
176
  try:
177
- if hasattr(uploaded_file, 'path'):
178
- file_path = uploaded_file.path
179
- elif isinstance(uploaded_file, dict):
180
- file_path = uploaded_file.get("path")
 
 
 
181
  else:
182
- file_path = str(uploaded_file)
183
- except Exception as e:
184
- return f"❌ Error resolving file path: {str(e)}", None
185
- # ---------------------------------------
186
 
187
- if not layoutlmv3_model_path:
188
- layoutlmv3_model_path = DEFAULT_LAYOUTLMV3_MODEL_PATH
 
189
 
190
- if not os.path.exists(layoutlmv3_model_path):
191
- return f"❌ Error: LayoutLMv3 model not found at {layoutlmv3_model_path}", None
192
 
193
- if not os.path.exists(WEIGHTS_PATH):
194
- return f"❌ Error: YOLO weights not found at {WEIGHTS_PATH}", None
195
 
196
- try:
197
- # Determine file type for logging safely
198
- ext = Path(file_path).suffix.lower()
199
- file_type = "Image" if ext in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp'] else "PDF"
200
- print(f"πŸš€ Starting pipeline for {file_type}: {file_path}")
201
 
202
- # Call the pipeline
203
- result = run_document_pipeline(file_path, layoutlmv3_model_path)
204
 
205
  if result is None:
206
- return "❌ Error: Pipeline failed to process the document. Check console for details.", None
207
 
208
- # Prepare output file for download
209
- output_filename = f"{Path(file_path).stem}_analysis.json"
210
  temp_output = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', prefix='analysis_')
211
-
212
  with open(temp_output.name, 'w', encoding='utf-8') as f:
213
  json.dump(result, f, indent=2, ensure_ascii=False)
214
 
215
  json_display = json.dumps(result, indent=2, ensure_ascii=False)
216
-
217
  return json_display, temp_output.name
218
 
219
  except Exception as e:
220
- # This is where your previous error message was being caught and returned
221
  import traceback
222
- traceback.print_exc() # This prints the full error to your terminal for debugging
223
  return f"❌ Error during processing: {str(e)}", None
224
 
225
-
226
  # ==============================
227
  # GRADIO INTERFACE
228
  # ==============================
@@ -230,16 +96,16 @@ with gr.Blocks(title="Document Analysis Pipeline") as demo:
230
 
231
  gr.Markdown("""
232
  # πŸ“„ Document & Image Analysis Pipeline
233
- Upload a **PDF document** or an **Image (JPG/PNG)** to extract structured data.
234
  """)
235
 
236
  with gr.Row():
237
  with gr.Column(scale=1):
238
  file_input = gr.File(
239
- label="Upload PDF or Image",
240
  file_types=[".pdf", ".jpg", ".jpeg", ".png", ".bmp", ".webp", ".tiff"],
241
  type="filepath",
242
- file_count="single" # Force single file to avoid list/tuple issues
243
  )
244
 
245
  model_path_input = gr.Textbox(
@@ -249,11 +115,11 @@ with gr.Blocks(title="Document Analysis Pipeline") as demo:
249
  interactive=True
250
  )
251
 
252
- process_btn = gr.Button("πŸš€ Process File", variant="primary", size="lg")
253
 
254
  with gr.Column(scale=2):
255
  json_output = gr.Code(
256
- label="Structured JSON Output",
257
  language="json",
258
  lines=25
259
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
2
  import json
3
  import os
4
  import tempfile
5
+ import img2pdf
6
  from pathlib import Path
7
 
8
  # ==============================
 
17
  DEFAULT_LAYOUTLMV3_MODEL_PATH = "./models/layoutlmv3_model"
18
  WEIGHTS_PATH = "./weights/yolo_weights.pt"
19
 
20
+ def process_file(uploaded_files, layoutlmv3_model_path=None):
 
21
  """
22
+ Converts multiple images into a single PDF (if necessary) and routes
23
+ the result to the YOLO/OCR pipeline as a single entity.
24
  """
25
+ if not uploaded_files:
26
+ return "❌ Error: No files uploaded.", None
27
+
28
+ # Ensure we are working with a list of files (Gradio file_count="multiple" returns a list)
29
+ if not isinstance(uploaded_files, list):
30
+ uploaded_files = [uploaded_files]
31
+
32
+ # 1. Resolve all file paths
33
+ resolved_paths = []
34
+ for f in uploaded_files:
35
+ if hasattr(f, 'path'):
36
+ resolved_paths.append(f.path)
37
+ elif isinstance(f, dict):
38
+ resolved_paths.append(f.get("path"))
39
+ else:
40
+ resolved_paths.append(str(f))
41
 
42
+ # 2. Determine if we should merge into a single PDF
43
+ # We merge if there are multiple files OR if the single file is an image
44
+ first_file = Path(resolved_paths[0])
45
+ is_image = first_file.suffix.lower() in ['.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff']
46
+
47
+ processing_path = None
48
 
 
 
49
  try:
50
+ if len(resolved_paths) > 1 or (len(resolved_paths) == 1 and is_image):
51
+ print(f"πŸ“¦ Converting {len(resolved_paths)} image(s) to a single PDF entity...")
52
+ temp_pdf = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
53
+ # img2pdf.convert converts a list of image paths into PDF bytes
54
+ with open(temp_pdf.name, "wb") as f:
55
+ f.write(img2pdf.convert(resolved_paths))
56
+ processing_path = temp_pdf.name
57
  else:
58
+ # It's a single PDF, process directly
59
+ processing_path = resolved_paths[0]
 
 
60
 
61
+ # 3. Standard Pipeline Checks
62
+ if not layoutlmv3_model_path:
63
+ layoutlmv3_model_path = DEFAULT_LAYOUTLMV3_MODEL_PATH
64
 
65
+ if not os.path.exists(layoutlmv3_model_path):
66
+ return f"❌ Error: LayoutLMv3 model not found at {layoutlmv3_model_path}", None
67
 
68
+ if not os.path.exists(WEIGHTS_PATH):
69
+ return f"❌ Error: YOLO weights not found at {WEIGHTS_PATH}", None
70
 
71
+ print(f"πŸš€ Starting pipeline for merged entity: {processing_path}")
 
 
 
 
72
 
73
+ # 4. Call the pipeline
74
+ result = run_document_pipeline(processing_path, layoutlmv3_model_path)
75
 
76
  if result is None:
77
+ return "❌ Error: Pipeline failed to process the document.", None
78
 
79
+ # 5. Prepare output
 
80
  temp_output = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', prefix='analysis_')
 
81
  with open(temp_output.name, 'w', encoding='utf-8') as f:
82
  json.dump(result, f, indent=2, ensure_ascii=False)
83
 
84
  json_display = json.dumps(result, indent=2, ensure_ascii=False)
 
85
  return json_display, temp_output.name
86
 
87
  except Exception as e:
 
88
  import traceback
89
+ traceback.print_exc()
90
  return f"❌ Error during processing: {str(e)}", None
91
 
 
92
  # ==============================
93
  # GRADIO INTERFACE
94
  # ==============================
 
96
 
97
  gr.Markdown("""
98
  # πŸ“„ Document & Image Analysis Pipeline
99
+ Upload **multiple images** or a **PDF**. Multiple images will be processed together as a single continuous document.
100
  """)
101
 
102
  with gr.Row():
103
  with gr.Column(scale=1):
104
  file_input = gr.File(
105
+ label="Upload PDFs or Images",
106
  file_types=[".pdf", ".jpg", ".jpeg", ".png", ".bmp", ".webp", ".tiff"],
107
  type="filepath",
108
+ file_count="multiple" # ALLOWS MULTIPLE FILES
109
  )
110
 
111
  model_path_input = gr.Textbox(
 
115
  interactive=True
116
  )
117
 
118
+ process_btn = gr.Button("πŸš€ Process Files", variant="primary", size="lg")
119
 
120
  with gr.Column(scale=2):
121
  json_output = gr.Code(
122
+ label="Combined Structured JSON Output",
123
  language="json",
124
  lines=25
125
  )