iammraat commited on
Commit
2a30a76
·
verified ·
1 Parent(s): 148d241

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -318
app.py CHANGED
@@ -1,325 +1,82 @@
1
- # import gradio as gr
2
- # print("GRADIO VERSION:", gr.__version__)
3
- # import json
4
- # import os
5
- # import tempfile
6
- # from pathlib import Path
7
-
8
- # # NOTE: You must ensure that 'working_yolo_pipeline.py' exists
9
- # # and defines the following items correctly:
10
- # from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH
11
- # # Since I don't have this file, I am assuming the imports are correct.
12
-
13
- # # Define placeholders for assumed constants if the pipeline file isn't present
14
- # # You should replace these with your actual definitions if they are missing
15
- # try:
16
- # from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH
17
- # except ImportError:
18
- # print("Warning: 'working_yolo_pipeline.py' not found. Using dummy paths.")
19
- # def run_document_pipeline(*args):
20
- # return {"error": "Placeholder pipeline function called."}
21
- # DEFAULT_LAYOUTLMV3_MODEL_PATH = "./models/layoutlmv3_model"
22
- # WEIGHTS_PATH = "./weights/yolo_weights.pt"
23
-
24
-
25
- # def process_pdf(pdf_file, layoutlmv3_model_path=None):
26
- # """
27
- # Wrapper function for Gradio interface.
28
-
29
- # Args:
30
- # pdf_file: Gradio UploadButton file object
31
- # layoutlmv3_model_path: Optional custom model path
32
-
33
- # Returns:
34
- # Tuple of (JSON string, download file path)
35
- # """
36
- # if pdf_file is None:
37
- # return "❌ Error: No PDF file uploaded.", None
38
-
39
- # # Use default model path if not provided
40
- # if not layoutlmv3_model_path:
41
- # layoutlmv3_model_path = DEFAULT_LAYOUTLMV3_MODEL_PATH
42
-
43
- # # Verify model and weights exist
44
- # if not os.path.exists(layoutlmv3_model_path):
45
- # return f"❌ Error: LayoutLMv3 model not found at {layoutlmv3_model_path}", None
46
-
47
- # if not os.path.exists(WEIGHTS_PATH):
48
- # return f"❌ Error: YOLO weights not found at {WEIGHTS_PATH}", None
49
-
50
- # try:
51
- # # Get the uploaded PDF path
52
- # pdf_path = pdf_file.name
53
-
54
- # # Run the pipeline
55
- # result = run_document_pipeline(pdf_path, layoutlmv3_model_path, 'label_studio_import.json')
56
-
57
- # if result is None:
58
- # return "❌ Error: Pipeline failed to process the PDF. Check console for details.", None
59
-
60
- # # Create a temporary file for download
61
- # output_filename = f"{Path(pdf_path).stem}_analysis.json"
62
- # temp_output = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', prefix='analysis_')
63
-
64
- # # Dump results to the temporary file
65
- # with open(temp_output.name, 'w', encoding='utf-8') as f:
66
- # json.dump(result, f, indent=2, ensure_ascii=False)
67
-
68
- # # Format JSON for display
69
- # json_display = json.dumps(result, indent=2, ensure_ascii=False)
70
-
71
- # return json_display, temp_output.name
72
-
73
- # except Exception as e:
74
- # return f"❌ Error during processing: {str(e)}", None
75
-
76
-
77
- # # Create Gradio interface
78
- # # FIX APPLIED: Removed 'theme=gr.themes.Soft()' which caused the TypeError
79
- # with gr.Blocks(title="Document Analysis Pipeline") as demo:
80
- # gr.Markdown("""
81
- # # 📄 Document Analysis Pipeline
82
-
83
- # Upload a PDF document to extract structured data including questions, options, answers, passages, and embedded images.
84
-
85
- # **Pipeline Steps:**
86
- # 1. 🔍 YOLO/OCR Preprocessing (word extraction + figure/equation detection)
87
- # 2. 🤖 LayoutLMv3 Inference (BIO tagging)
88
- # 3. 📊 Structured JSON Decoding
89
- # 4. 🖼️ Base64 Image Embedding
90
- # """)
91
-
92
- # with gr.Row():
93
- # with gr.Column(scale=1):
94
- # pdf_input = gr.File(
95
- # label="Upload PDF Document",
96
- # file_types=[".pdf"],
97
- # type="filepath"
98
- # )
99
-
100
- # model_path_input = gr.Textbox(
101
- # label="LayoutLMv3 Model Path (optional)",
102
- # placeholder=DEFAULT_LAYOUTLMV3_MODEL_PATH,
103
- # value=DEFAULT_LAYOUTLMV3_MODEL_PATH,
104
- # interactive=True
105
- # )
106
-
107
- # process_btn = gr.Button("🚀 Process Document", variant="primary", size="lg")
108
-
109
- # gr.Markdown("""
110
- # ### ℹ️ Notes:
111
- # - Processing may take several minutes depending on PDF size
112
- # - Figures and equations will be extracted and embedded as Base64
113
- # - The output JSON includes structured questions, options, and answers
114
- # """)
115
-
116
- # with gr.Column(scale=2):
117
- # json_output = gr.Code(
118
- # label="Structured JSON Output",
119
- # language="json",
120
- # lines=25
121
- # )
122
-
123
- # download_output = gr.File(
124
- # label="Download Full JSON",
125
- # interactive=False
126
- # )
127
-
128
- # # Status/Examples section
129
- # with gr.Row():
130
- # gr.Markdown("""
131
- # ### 📋 Output Format
132
- # The pipeline generates JSON with the following structure:
133
- # - **Questions**: Extracted question text
134
- # - **Options**: Multiple choice options (A, B, C, D, etc.)
135
- # - **Answers**: Correct answer(s)
136
- # - **Passages**: Associated reading passages
137
- # - **Images**: Base64-encoded figures and equations (embedded with keys like `figure1`, `equation2`)
138
- # """)
139
-
140
- # # Connect the button to the processing function
141
- # process_btn.click(
142
- # fn=process_pdf,
143
- # inputs=[pdf_input, model_path_input],
144
- # outputs=[json_output, download_output],
145
- # api_name="process_document"
146
- # )
147
-
148
- # # Example section (optional - add example PDFs if available)
149
- # # gr.Examples(
150
- # # examples=[
151
- # # ["examples/sample1.pdf"],
152
- # # ["examples/sample2.pdf"],
153
- # # ],
154
- # # inputs=pdf_input,
155
- # # )
156
-
157
- # # Launch the app
158
- # if __name__ == "__main__":
159
- # demo.launch(
160
- # server_name="0.0.0.0",
161
- # server_port=7860,
162
- # share=False,
163
- # show_error=True
164
- # )
165
-
166
-
167
-
168
-
169
-
170
  import gradio as gr
171
- print("GRADIO VERSION:", gr.__version__)
172
- import json
 
 
173
  import os
174
- import tempfile
175
- from pathlib import Path
176
-
177
- # ==============================
178
- # WRITE CUSTOM CSS FOR FONTS
179
- # ==============================
180
-
181
- # CUSTOM_CSS = """
182
- # @font-face {
183
- # font-family: 'NotoSansMath';
184
- # src: url('./NotoSansMath-Regular.ttf') format('truetype');
185
- # font-weight: normal;
186
- # font-style: normal;
187
- # }
188
-
189
- # html, body, * {
190
- # font-family: 'NotoSansMath', sans-serif !important;
191
- # }
192
- # """
193
-
194
- # # Optionally write the CSS file if needed (not required for inline css)
195
- # if not os.path.exists("custom.css"):
196
- # with open("custom.css", "w") as f:
197
- # f.write(CUSTOM_CSS)
198
- # ==============================
199
-
200
- try:
201
- from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH
202
- except ImportError:
203
- print("Warning: 'working_yolo_pipeline.py' not found. Using dummy paths.")
204
- def run_document_pipeline(*args):
205
- return {"error": "Placeholder pipeline function called."}
206
- DEFAULT_LAYOUTLMV3_MODEL_PATH = "./models/layoutlmv3_model"
207
- WEIGHTS_PATH = "./weights/yolo_weights.pt"
208
-
209
-
210
- def process_pdf(pdf_file, layoutlmv3_model_path=None):
211
- if pdf_file is None:
212
- return "❌ Error: No PDF file uploaded.", None
213
-
214
- if not layoutlmv3_model_path:
215
- layoutlmv3_model_path = DEFAULT_LAYOUTLMV3_MODEL_PATH
216
-
217
- if not os.path.exists(layoutlmv3_model_path):
218
- return f"❌ Error: LayoutLMv3 model not found at {layoutlmv3_model_path}", None
219
-
220
- if not os.path.exists(WEIGHTS_PATH):
221
- return f"❌ Error: YOLO weights not found at {WEIGHTS_PATH}", None
222
-
223
- try:
224
- pdf_path = pdf_file.name
225
-
226
- result = run_document_pipeline(pdf_path, layoutlmv3_model_path, 'label_studio_import.json')
227
-
228
- if result is None:
229
- return "❌ Error: Pipeline failed to process the PDF. Check console for details.", None
230
-
231
- output_filename = f"{Path(pdf_path).stem}_analysis.json"
232
- temp_output = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', prefix='analysis_')
233
-
234
- with open(temp_output.name, 'w', encoding='utf-8') as f:
235
- json.dump(result, f, indent=2, ensure_ascii=False)
236
-
237
- json_display = json.dumps(result, indent=2, ensure_ascii=False)
238
-
239
- return json_display, temp_output.name
240
-
241
- except Exception as e:
242
- return f"❌ Error during processing: {str(e)}", None
243
-
244
-
245
- with gr.Blocks(
246
- title="Document Analysis Pipeline"
247
- ) as demo:
248
-
249
-
250
- gr.HTML()
251
-
252
- gr.Markdown("""
253
- # 📄 Document Analysis Pipeline
254
-
255
- Upload a PDF document to extract structured data including questions, options, answers, passages, and embedded images.
256
-
257
- **Pipeline Steps:**
258
- 1. 🔍 YOLO/OCR Preprocessing (word extraction + figure/equation detection)
259
- 2. 🤖 LayoutLMv3 Inference (BIO tagging)
260
- 3. 📊 Structured JSON Decoding
261
- 4. 🖼️ Base64 Image Embedding
262
- """)
263
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  with gr.Row():
265
- with gr.Column(scale=1):
266
- pdf_input = gr.File(
267
- label="Upload PDF Document",
268
- file_types=[".pdf"],
269
- type="filepath"
270
- )
271
-
272
- model_path_input = gr.Textbox(
273
- label="LayoutLMv3 Model Path (optional)",
274
- placeholder=DEFAULT_LAYOUTLMV3_MODEL_PATH,
275
- value=DEFAULT_LAYOUTLMV3_MODEL_PATH,
276
- interactive=True
277
- )
278
-
279
- process_btn = gr.Button("🚀 Process Document", variant="primary", size="lg")
280
-
281
- gr.Markdown("""
282
- ### ℹ️ Notes:
283
- - Processing may take several minutes depending on PDF size
284
- - Figures and equations will be extracted and embedded as Base64
285
- - The output JSON includes structured questions, options, and answers
286
- """)
287
-
288
- with gr.Column(scale=2):
289
- json_output = gr.Code(
290
- label="Structured JSON Output",
291
- language="json",
292
- lines=25
293
- )
294
-
295
- download_output = gr.File(
296
- label="Download Full JSON",
297
- interactive=False
298
- )
299
-
300
- with gr.Row():
301
- gr.Markdown("""
302
- ### 📋 Output Format
303
- The pipeline generates JSON with the following structure:
304
- - **Questions**: Extracted question text
305
- - **Options**: Multiple choice options
306
- - **Answers**: Correct answer(s)
307
- - **Passages**: Associated reading passages
308
- - **Images**: Base64-encoded figures and equations
309
- """)
310
-
311
- process_btn.click(
312
- fn=process_pdf,
313
- inputs=[pdf_input, model_path_input],
314
- outputs=[json_output, download_output],
315
- api_name="process_document"
316
- )
317
 
 
318
 
319
  if __name__ == "__main__":
320
- demo.launch(
321
- server_name="0.0.0.0",
322
- server_port=7860,
323
- share=False,
324
- show_error=True
325
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import cv2
3
+ import numpy as np
4
+ from paddleocr import PPStructure
5
+ from huggingface_hub import snapshot_download
6
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ # --- STEP 1: Download the Model from Hugging Face ---
9
+ # We download the 'main' branch which contains the Paddle inference weights
10
+ print("Downloading PP-DocLayoutV3 from Hugging Face...")
11
+ model_path = snapshot_download(repo_id="PaddlePaddle/PP-DocLayoutV3", allow_patterns=["*.pdiparams", "*.pdmodel", "*.yml", "*.json"])
12
+ print(f"Model downloaded to: {model_path}")
13
+
14
+ # --- STEP 2: Initialize the Layout Engine ---
15
+ # We use PPStructure, which is PaddleOCR's layout analysis module.
16
+ # We point it to the downloaded model folder.
17
+ layout_engine = PPStructure(
18
+ layout_model_dir=model_path,
19
+ table=False, # Disable table structure recognition for speed
20
+ ocr=False, # Disable OCR for now (we just want to see layout)
21
+ show_log=True,
22
+ use_angle_cls=True, # Helps with orientation
23
+ enable_mkldnn=False # CRITICAL: Fixes the CPU crash
24
+ )
25
+
26
+ def analyze_layout(input_image):
27
+ if input_image is None:
28
+ return None, "No image uploaded"
29
+
30
+ image_np = np.array(input_image)
31
+
32
+ # Run Inference
33
+ # result is a list of dictionaries, one per detected region
34
+ result = layout_engine(image_np)
35
+
36
+ viz_image = image_np.copy()
37
+ detections_text = []
38
+
39
+ # --- STEP 3: Visualize Results ---
40
+ for region in result:
41
+ # Extract Box (4 points)
42
+ box = region['layout_bbox']
43
+ label = region['label']
44
+
45
+ # Convert to numpy format for drawing
46
+ # layout_bbox is usually [x1, y1, x2, y2]
47
+ x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])
48
+
49
+ # Color coding based on type
50
+ color = (0, 255, 0) # Green for Text
51
+ if label == 'title': color = (0, 0, 255) # Red for Title
52
+ elif label == 'figure': color = (255, 0, 0) # Blue for Figures
53
+ elif label == 'table': color = (255, 255, 0) # Cyan for Tables
54
+
55
+ # Draw Rectangle
56
+ cv2.rectangle(viz_image, (x1, y1), (x2, y2), color, 3)
57
+
58
+ # Draw Label
59
+ cv2.putText(viz_image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
60
+
61
+ detections_text.append(f"Found {label} at {box}")
62
+
63
+ return viz_image, "\n".join(detections_text)
64
+
65
+ # --- Gradio UI ---
66
+ with gr.Blocks(title="PP-DocLayoutV3 Demo") as demo:
67
+ gr.Markdown("## 📄 PP-DocLayoutV3 Explorer")
68
+ gr.Markdown("This model detects **layout regions** (Text, Tables, Titles) instead of reading characters. It is excellent for de-warping and segmenting messy documents.")
69
+
70
  with gr.Row():
71
+ with gr.Column():
72
+ input_img = gr.Image(type="pil", label="Input Document")
73
+ submit_btn = gr.Button("Analyze Layout", variant="primary")
74
+
75
+ with gr.Column():
76
+ output_img = gr.Image(label="Layout Visualization")
77
+ output_log = gr.Textbox(label="Detected Regions", lines=10)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
+ submit_btn.click(fn=analyze_layout, inputs=input_img, outputs=[output_img, output_log])
80
 
81
  if __name__ == "__main__":
82
+ demo.launch(server_name="0.0.0.0", server_port=7860)