heerjtdev commited on
Commit
193b094
Β·
verified Β·
1 Parent(s): 98a2928

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -230
app.py CHANGED
@@ -1,172 +1,3 @@
1
- # import gradio as gr
2
- # print("GRADIO VERSION:", gr.__version__)
3
- # import json
4
- # import os
5
- # import tempfile
6
- # from pathlib import Path
7
-
8
- # # NOTE: You must ensure that 'working_yolo_pipeline.py' exists
9
- # # and defines the following items correctly:
10
- # from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH
11
- # # Since I don't have this file, I am assuming the imports are correct.
12
-
13
- # # Define placeholders for assumed constants if the pipeline file isn't present
14
- # # You should replace these with your actual definitions if they are missing
15
- # try:
16
- # from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH
17
- # except ImportError:
18
- # print("Warning: 'working_yolo_pipeline.py' not found. Using dummy paths.")
19
- # def run_document_pipeline(*args):
20
- # return {"error": "Placeholder pipeline function called."}
21
- # DEFAULT_LAYOUTLMV3_MODEL_PATH = "./models/layoutlmv3_model"
22
- # WEIGHTS_PATH = "./weights/yolo_weights.pt"
23
-
24
-
25
- # def process_pdf(pdf_file, layoutlmv3_model_path=None):
26
- # """
27
- # Wrapper function for Gradio interface.
28
-
29
- # Args:
30
- # pdf_file: Gradio UploadButton file object
31
- # layoutlmv3_model_path: Optional custom model path
32
-
33
- # Returns:
34
- # Tuple of (JSON string, download file path)
35
- # """
36
- # if pdf_file is None:
37
- # return "❌ Error: No PDF file uploaded.", None
38
-
39
- # # Use default model path if not provided
40
- # if not layoutlmv3_model_path:
41
- # layoutlmv3_model_path = DEFAULT_LAYOUTLMV3_MODEL_PATH
42
-
43
- # # Verify model and weights exist
44
- # if not os.path.exists(layoutlmv3_model_path):
45
- # return f"❌ Error: LayoutLMv3 model not found at {layoutlmv3_model_path}", None
46
-
47
- # if not os.path.exists(WEIGHTS_PATH):
48
- # return f"❌ Error: YOLO weights not found at {WEIGHTS_PATH}", None
49
-
50
- # try:
51
- # # Get the uploaded PDF path
52
- # pdf_path = pdf_file.name
53
-
54
- # # Run the pipeline
55
- # result = run_document_pipeline(pdf_path, layoutlmv3_model_path, 'label_studio_import.json')
56
-
57
- # if result is None:
58
- # return "❌ Error: Pipeline failed to process the PDF. Check console for details.", None
59
-
60
- # # Create a temporary file for download
61
- # output_filename = f"{Path(pdf_path).stem}_analysis.json"
62
- # temp_output = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', prefix='analysis_')
63
-
64
- # # Dump results to the temporary file
65
- # with open(temp_output.name, 'w', encoding='utf-8') as f:
66
- # json.dump(result, f, indent=2, ensure_ascii=False)
67
-
68
- # # Format JSON for display
69
- # json_display = json.dumps(result, indent=2, ensure_ascii=False)
70
-
71
- # return json_display, temp_output.name
72
-
73
- # except Exception as e:
74
- # return f"❌ Error during processing: {str(e)}", None
75
-
76
-
77
- # # Create Gradio interface
78
- # # FIX APPLIED: Removed 'theme=gr.themes.Soft()' which caused the TypeError
79
- # with gr.Blocks(title="Document Analysis Pipeline") as demo:
80
- # gr.Markdown("""
81
- # # πŸ“„ Document Analysis Pipeline
82
-
83
- # Upload a PDF document to extract structured data including questions, options, answers, passages, and embedded images.
84
-
85
- # **Pipeline Steps:**
86
- # 1. πŸ” YOLO/OCR Preprocessing (word extraction + figure/equation detection)
87
- # 2. πŸ€– LayoutLMv3 Inference (BIO tagging)
88
- # 3. πŸ“Š Structured JSON Decoding
89
- # 4. πŸ–ΌοΈ Base64 Image Embedding
90
- # """)
91
-
92
- # with gr.Row():
93
- # with gr.Column(scale=1):
94
- # pdf_input = gr.File(
95
- # label="Upload PDF Document",
96
- # file_types=[".pdf"],
97
- # type="filepath"
98
- # )
99
-
100
- # model_path_input = gr.Textbox(
101
- # label="LayoutLMv3 Model Path (optional)",
102
- # placeholder=DEFAULT_LAYOUTLMV3_MODEL_PATH,
103
- # value=DEFAULT_LAYOUTLMV3_MODEL_PATH,
104
- # interactive=True
105
- # )
106
-
107
- # process_btn = gr.Button("πŸš€ Process Document", variant="primary", size="lg")
108
-
109
- # gr.Markdown("""
110
- # ### ℹ️ Notes:
111
- # - Processing may take several minutes depending on PDF size
112
- # - Figures and equations will be extracted and embedded as Base64
113
- # - The output JSON includes structured questions, options, and answers
114
- # """)
115
-
116
- # with gr.Column(scale=2):
117
- # json_output = gr.Code(
118
- # label="Structured JSON Output",
119
- # language="json",
120
- # lines=25
121
- # )
122
-
123
- # download_output = gr.File(
124
- # label="Download Full JSON",
125
- # interactive=False
126
- # )
127
-
128
- # # Status/Examples section
129
- # with gr.Row():
130
- # gr.Markdown("""
131
- # ### πŸ“‹ Output Format
132
- # The pipeline generates JSON with the following structure:
133
- # - **Questions**: Extracted question text
134
- # - **Options**: Multiple choice options (A, B, C, D, etc.)
135
- # - **Answers**: Correct answer(s)
136
- # - **Passages**: Associated reading passages
137
- # - **Images**: Base64-encoded figures and equations (embedded with keys like `figure1`, `equation2`)
138
- # """)
139
-
140
- # # Connect the button to the processing function
141
- # process_btn.click(
142
- # fn=process_pdf,
143
- # inputs=[pdf_input, model_path_input],
144
- # outputs=[json_output, download_output],
145
- # api_name="process_document"
146
- # )
147
-
148
- # # Example section (optional - add example PDFs if available)
149
- # # gr.Examples(
150
- # # examples=[
151
- # # ["examples/sample1.pdf"],
152
- # # ["examples/sample2.pdf"],
153
- # # ],
154
- # # inputs=pdf_input,
155
- # # )
156
-
157
- # # Launch the app
158
- # if __name__ == "__main__":
159
- # demo.launch(
160
- # server_name="0.0.0.0",
161
- # server_port=7860,
162
- # share=False,
163
- # show_error=True
164
- # )
165
-
166
-
167
-
168
-
169
-
170
  import gradio as gr
171
  print("GRADIO VERSION:", gr.__version__)
172
  import json
@@ -175,28 +6,8 @@ import tempfile
175
  from pathlib import Path
176
 
177
  # ==============================
178
- # WRITE CUSTOM CSS FOR FONTS
179
  # ==============================
180
-
181
- # CUSTOM_CSS = """
182
- # @font-face {
183
- # font-family: 'NotoSansMath';
184
- # src: url('./NotoSansMath-Regular.ttf') format('truetype');
185
- # font-weight: normal;
186
- # font-style: normal;
187
- # }
188
-
189
- # html, body, * {
190
- # font-family: 'NotoSansMath', sans-serif !important;
191
- # }
192
- # """
193
-
194
- # # Optionally write the CSS file if needed (not required for inline css)
195
- # if not os.path.exists("custom.css"):
196
- # with open("custom.css", "w") as f:
197
- # f.write(CUSTOM_CSS)
198
- # ==============================
199
-
200
  try:
201
  from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH
202
  except ImportError:
@@ -207,9 +18,12 @@ except ImportError:
207
  WEIGHTS_PATH = "./weights/yolo_weights.pt"
208
 
209
 
210
- def process_pdf(pdf_file, layoutlmv3_model_path=None):
211
- if pdf_file is None:
212
- return "❌ Error: No PDF file uploaded.", None
 
 
 
213
 
214
  if not layoutlmv3_model_path:
215
  layoutlmv3_model_path = DEFAULT_LAYOUTLMV3_MODEL_PATH
@@ -221,14 +35,22 @@ def process_pdf(pdf_file, layoutlmv3_model_path=None):
221
  return f"❌ Error: YOLO weights not found at {WEIGHTS_PATH}", None
222
 
223
  try:
224
- pdf_path = pdf_file.name
 
 
 
 
 
225
 
226
- result = run_document_pipeline(pdf_path, layoutlmv3_model_path, 'label_studio_import.json')
 
 
227
 
228
  if result is None:
229
- return "❌ Error: Pipeline failed to process the PDF. Check console for details.", None
230
 
231
- output_filename = f"{Path(pdf_path).stem}_analysis.json"
 
232
  temp_output = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', prefix='analysis_')
233
 
234
  with open(temp_output.name, 'w', encoding='utf-8') as f:
@@ -242,30 +64,30 @@ def process_pdf(pdf_file, layoutlmv3_model_path=None):
242
  return f"❌ Error during processing: {str(e)}", None
243
 
244
 
245
- with gr.Blocks(
246
- title="Document Analysis Pipeline"
247
- ) as demo:
248
-
249
-
250
- gr.HTML()
251
 
252
  gr.Markdown("""
253
- # πŸ“„ Document Analysis Pipeline
254
 
255
- Upload a PDF document to extract structured data including questions, options, answers, passages, and embedded images.
 
 
256
 
257
  **Pipeline Steps:**
258
- 1. πŸ” YOLO/OCR Preprocessing (word extraction + figure/equation detection)
259
- 2. πŸ€– LayoutLMv3 Inference (BIO tagging)
260
- 3. πŸ“Š Structured JSON Decoding
261
- 4. πŸ–ΌοΈ Base64 Image Embedding
262
  """)
263
 
264
  with gr.Row():
265
  with gr.Column(scale=1):
266
- pdf_input = gr.File(
267
- label="Upload PDF Document",
268
- file_types=[".pdf"],
269
  type="filepath"
270
  )
271
 
@@ -276,13 +98,13 @@ with gr.Blocks(
276
  interactive=True
277
  )
278
 
279
- process_btn = gr.Button("πŸš€ Process Document", variant="primary", size="lg")
280
 
281
  gr.Markdown("""
282
  ### ℹ️ Notes:
283
- - Processing may take several minutes depending on PDF size
284
- - Figures and equations will be extracted and embedded as Base64
285
- - The output JSON includes structured questions, options, and answers
286
  """)
287
 
288
  with gr.Column(scale=2):
@@ -297,25 +119,14 @@ with gr.Blocks(
297
  interactive=False
298
  )
299
 
300
- with gr.Row():
301
- gr.Markdown("""
302
- ### πŸ“‹ Output Format
303
- The pipeline generates JSON with the following structure:
304
- - **Questions**: Extracted question text
305
- - **Options**: Multiple choice options
306
- - **Answers**: Correct answer(s)
307
- - **Passages**: Associated reading passages
308
- - **Images**: Base64-encoded figures and equations
309
- """)
310
-
311
  process_btn.click(
312
- fn=process_pdf,
313
- inputs=[pdf_input, model_path_input],
314
  outputs=[json_output, download_output],
315
  api_name="process_document"
316
  )
317
 
318
-
319
  if __name__ == "__main__":
320
  demo.launch(
321
  server_name="0.0.0.0",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  print("GRADIO VERSION:", gr.__version__)
3
  import json
 
6
  from pathlib import Path
7
 
8
  # ==============================
9
+ # PIPELINE IMPORT
10
  # ==============================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  try:
12
  from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH
13
  except ImportError:
 
18
  WEIGHTS_PATH = "./weights/yolo_weights.pt"
19
 
20
 
21
+ def process_file(uploaded_file, layoutlmv3_model_path=None):
22
+ """
23
+ Handles both PDF and Image uploads and routes them to the YOLO/OCR pipeline.
24
+ """
25
+ if uploaded_file is None:
26
+ return "❌ Error: No file uploaded.", None
27
 
28
  if not layoutlmv3_model_path:
29
  layoutlmv3_model_path = DEFAULT_LAYOUTLMV3_MODEL_PATH
 
35
  return f"❌ Error: YOLO weights not found at {WEIGHTS_PATH}", None
36
 
37
  try:
38
+ file_path = uploaded_file.name
39
+
40
+ # Determine file type for logging
41
+ ext = Path(file_path).suffix.lower()
42
+ file_type = "Image" if ext in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp'] else "PDF"
43
+ print(f"πŸš€ Starting pipeline for {file_type}: {file_path}")
44
 
45
+ # Call the pipeline exactly as before.
46
+ # Our modified working_yolo_pipeline now handles the branching internally.
47
+ result = run_document_pipeline(file_path, layoutlmv3_model_path)
48
 
49
  if result is None:
50
+ return "❌ Error: Pipeline failed to process the document. Check console for details.", None
51
 
52
+ # Prepare output file for download
53
+ output_filename = f"{Path(file_path).stem}_analysis.json"
54
  temp_output = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', prefix='analysis_')
55
 
56
  with open(temp_output.name, 'w', encoding='utf-8') as f:
 
64
  return f"❌ Error during processing: {str(e)}", None
65
 
66
 
67
+ # ==============================
68
+ # GRADIO INTERFACE
69
+ # ==============================
70
+ with gr.Blocks(title="Document Analysis Pipeline") as demo:
 
 
71
 
72
  gr.Markdown("""
73
+ # πŸ“„ Document & Image Analysis Pipeline
74
 
75
+ Upload a **PDF document** or an **Image (JPG/PNG)** to extract structured data.
76
+
77
+ **Supported Formats:** `.pdf`, `.jpg`, `.jpeg`, `.png`, `.bmp`, `.webp`
78
 
79
  **Pipeline Steps:**
80
+ 1. πŸ” **YOLO/OCR**: Word extraction + Figure/Equation detection
81
+ 2. πŸ€– **LayoutLMv3**: BIO tagging and structural analysis
82
+ 3. πŸ“Š **Decoding**: Conversion to hierarchical JSON
83
+ 4. πŸ–ΌοΈ **Extraction**: Base64 embedding of detected visual elements
84
  """)
85
 
86
  with gr.Row():
87
  with gr.Column(scale=1):
88
+ file_input = gr.File(
89
+ label="Upload PDF or Image",
90
+ file_types=[".pdf", ".jpg", ".jpeg", ".png", ".bmp", ".webp", ".tiff"],
91
  type="filepath"
92
  )
93
 
 
98
  interactive=True
99
  )
100
 
101
+ process_btn = gr.Button("πŸš€ Process File", variant="primary", size="lg")
102
 
103
  gr.Markdown("""
104
  ### ℹ️ Notes:
105
+ - **Images** are treated as single-page documents.
106
+ - **PDFs** are processed page-by-page.
107
+ - High-resolution Tesseract OCR is used for all image content.
108
  """)
109
 
110
  with gr.Column(scale=2):
 
119
  interactive=False
120
  )
121
 
122
+ # UI Logic
 
 
 
 
 
 
 
 
 
 
123
  process_btn.click(
124
+ fn=process_file,
125
+ inputs=[file_input, model_path_input],
126
  outputs=[json_output, download_output],
127
  api_name="process_document"
128
  )
129
 
 
130
  if __name__ == "__main__":
131
  demo.launch(
132
  server_name="0.0.0.0",