heerjtdev commited on
Commit
317a0d7
Β·
verified Β·
1 Parent(s): aeaffba

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +325 -0
  2. requirements.txt +19 -0
app.py ADDED
@@ -0,0 +1,325 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import gradio as gr
2
+ # print("GRADIO VERSION:", gr.__version__)
3
+ # import json
4
+ # import os
5
+ # import tempfile
6
+ # from pathlib import Path
7
+
8
+ # # NOTE: You must ensure that 'working_yolo_pipeline.py' exists
9
+ # # and defines the following items correctly:
10
+ # from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH
11
+ # # Since I don't have this file, I am assuming the imports are correct.
12
+
13
+ # # Define placeholders for assumed constants if the pipeline file isn't present
14
+ # # You should replace these with your actual definitions if they are missing
15
+ # try:
16
+ # from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH
17
+ # except ImportError:
18
+ # print("Warning: 'working_yolo_pipeline.py' not found. Using dummy paths.")
19
+ # def run_document_pipeline(*args):
20
+ # return {"error": "Placeholder pipeline function called."}
21
+ # DEFAULT_LAYOUTLMV3_MODEL_PATH = "./models/layoutlmv3_model"
22
+ # WEIGHTS_PATH = "./weights/yolo_weights.pt"
23
+
24
+
25
+ # def process_pdf(pdf_file, layoutlmv3_model_path=None):
26
+ # """
27
+ # Wrapper function for Gradio interface.
28
+
29
+ # Args:
30
+ # pdf_file: Gradio UploadButton file object
31
+ # layoutlmv3_model_path: Optional custom model path
32
+
33
+ # Returns:
34
+ # Tuple of (JSON string, download file path)
35
+ # """
36
+ # if pdf_file is None:
37
+ # return "❌ Error: No PDF file uploaded.", None
38
+
39
+ # # Use default model path if not provided
40
+ # if not layoutlmv3_model_path:
41
+ # layoutlmv3_model_path = DEFAULT_LAYOUTLMV3_MODEL_PATH
42
+
43
+ # # Verify model and weights exist
44
+ # if not os.path.exists(layoutlmv3_model_path):
45
+ # return f"❌ Error: LayoutLMv3 model not found at {layoutlmv3_model_path}", None
46
+
47
+ # if not os.path.exists(WEIGHTS_PATH):
48
+ # return f"❌ Error: YOLO weights not found at {WEIGHTS_PATH}", None
49
+
50
+ # try:
51
+ # # Get the uploaded PDF path
52
+ # pdf_path = pdf_file.name
53
+
54
+ # # Run the pipeline
55
+ # result = run_document_pipeline(pdf_path, layoutlmv3_model_path, 'label_studio_import.json')
56
+
57
+ # if result is None:
58
+ # return "❌ Error: Pipeline failed to process the PDF. Check console for details.", None
59
+
60
+ # # Create a temporary file for download
61
+ # output_filename = f"{Path(pdf_path).stem}_analysis.json"
62
+ # temp_output = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', prefix='analysis_')
63
+
64
+ # # Dump results to the temporary file
65
+ # with open(temp_output.name, 'w', encoding='utf-8') as f:
66
+ # json.dump(result, f, indent=2, ensure_ascii=False)
67
+
68
+ # # Format JSON for display
69
+ # json_display = json.dumps(result, indent=2, ensure_ascii=False)
70
+
71
+ # return json_display, temp_output.name
72
+
73
+ # except Exception as e:
74
+ # return f"❌ Error during processing: {str(e)}", None
75
+
76
+
77
+ # # Create Gradio interface
78
+ # # FIX APPLIED: Removed 'theme=gr.themes.Soft()' which caused the TypeError
79
+ # with gr.Blocks(title="Document Analysis Pipeline") as demo:
80
+ # gr.Markdown("""
81
+ # # πŸ“„ Document Analysis Pipeline
82
+
83
+ # Upload a PDF document to extract structured data including questions, options, answers, passages, and embedded images.
84
+
85
+ # **Pipeline Steps:**
86
+ # 1. πŸ” YOLO/OCR Preprocessing (word extraction + figure/equation detection)
87
+ # 2. πŸ€– LayoutLMv3 Inference (BIO tagging)
88
+ # 3. πŸ“Š Structured JSON Decoding
89
+ # 4. πŸ–ΌοΈ Base64 Image Embedding
90
+ # """)
91
+
92
+ # with gr.Row():
93
+ # with gr.Column(scale=1):
94
+ # pdf_input = gr.File(
95
+ # label="Upload PDF Document",
96
+ # file_types=[".pdf"],
97
+ # type="filepath"
98
+ # )
99
+
100
+ # model_path_input = gr.Textbox(
101
+ # label="LayoutLMv3 Model Path (optional)",
102
+ # placeholder=DEFAULT_LAYOUTLMV3_MODEL_PATH,
103
+ # value=DEFAULT_LAYOUTLMV3_MODEL_PATH,
104
+ # interactive=True
105
+ # )
106
+
107
+ # process_btn = gr.Button("πŸš€ Process Document", variant="primary", size="lg")
108
+
109
+ # gr.Markdown("""
110
+ # ### ℹ️ Notes:
111
+ # - Processing may take several minutes depending on PDF size
112
+ # - Figures and equations will be extracted and embedded as Base64
113
+ # - The output JSON includes structured questions, options, and answers
114
+ # """)
115
+
116
+ # with gr.Column(scale=2):
117
+ # json_output = gr.Code(
118
+ # label="Structured JSON Output",
119
+ # language="json",
120
+ # lines=25
121
+ # )
122
+
123
+ # download_output = gr.File(
124
+ # label="Download Full JSON",
125
+ # interactive=False
126
+ # )
127
+
128
+ # # Status/Examples section
129
+ # with gr.Row():
130
+ # gr.Markdown("""
131
+ # ### πŸ“‹ Output Format
132
+ # The pipeline generates JSON with the following structure:
133
+ # - **Questions**: Extracted question text
134
+ # - **Options**: Multiple choice options (A, B, C, D, etc.)
135
+ # - **Answers**: Correct answer(s)
136
+ # - **Passages**: Associated reading passages
137
+ # - **Images**: Base64-encoded figures and equations (embedded with keys like `figure1`, `equation2`)
138
+ # """)
139
+
140
+ # # Connect the button to the processing function
141
+ # process_btn.click(
142
+ # fn=process_pdf,
143
+ # inputs=[pdf_input, model_path_input],
144
+ # outputs=[json_output, download_output],
145
+ # api_name="process_document"
146
+ # )
147
+
148
+ # # Example section (optional - add example PDFs if available)
149
+ # # gr.Examples(
150
+ # # examples=[
151
+ # # ["examples/sample1.pdf"],
152
+ # # ["examples/sample2.pdf"],
153
+ # # ],
154
+ # # inputs=pdf_input,
155
+ # # )
156
+
157
+ # # Launch the app
158
+ # if __name__ == "__main__":
159
+ # demo.launch(
160
+ # server_name="0.0.0.0",
161
+ # server_port=7860,
162
+ # share=False,
163
+ # show_error=True
164
+ # )
165
+
166
+
167
+
168
+
169
+
170
+ import gradio as gr
171
+ print("GRADIO VERSION:", gr.__version__)
172
+ import json
173
+ import os
174
+ import tempfile
175
+ from pathlib import Path
176
+
177
+ # ==============================
178
+ # WRITE CUSTOM CSS FOR FONTS
179
+ # ==============================
180
+
181
+ # CUSTOM_CSS = """
182
+ # @font-face {
183
+ # font-family: 'NotoSansMath';
184
+ # src: url('./NotoSansMath-Regular.ttf') format('truetype');
185
+ # font-weight: normal;
186
+ # font-style: normal;
187
+ # }
188
+
189
+ # html, body, * {
190
+ # font-family: 'NotoSansMath', sans-serif !important;
191
+ # }
192
+ # """
193
+
194
+ # # Optionally write the CSS file if needed (not required for inline css)
195
+ # if not os.path.exists("custom.css"):
196
+ # with open("custom.css", "w") as f:
197
+ # f.write(CUSTOM_CSS)
198
+ # ==============================
199
+
200
+ try:
201
+ from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH
202
+ except ImportError:
203
+ print("Warning: 'working_yolo_pipeline.py' not found. Using dummy paths.")
204
+ def run_document_pipeline(*args):
205
+ return {"error": "Placeholder pipeline function called."}
206
+ DEFAULT_LAYOUTLMV3_MODEL_PATH = "./models/layoutlmv3_model"
207
+ WEIGHTS_PATH = "./weights/yolo_weights.pt"
208
+
209
+
210
+ def process_pdf(pdf_file, layoutlmv3_model_path=None):
211
+ if pdf_file is None:
212
+ return "❌ Error: No PDF file uploaded.", None
213
+
214
+ if not layoutlmv3_model_path:
215
+ layoutlmv3_model_path = DEFAULT_LAYOUTLMV3_MODEL_PATH
216
+
217
+ if not os.path.exists(layoutlmv3_model_path):
218
+ return f"❌ Error: LayoutLMv3 model not found at {layoutlmv3_model_path}", None
219
+
220
+ if not os.path.exists(WEIGHTS_PATH):
221
+ return f"❌ Error: YOLO weights not found at {WEIGHTS_PATH}", None
222
+
223
+ try:
224
+ pdf_path = pdf_file.name
225
+
226
+ result = run_document_pipeline(pdf_path, layoutlmv3_model_path, 'label_studio_import.json')
227
+
228
+ if result is None:
229
+ return "❌ Error: Pipeline failed to process the PDF. Check console for details.", None
230
+
231
+ output_filename = f"{Path(pdf_path).stem}_analysis.json"
232
+ temp_output = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', prefix='analysis_')
233
+
234
+ with open(temp_output.name, 'w', encoding='utf-8') as f:
235
+ json.dump(result, f, indent=2, ensure_ascii=False)
236
+
237
+ json_display = json.dumps(result, indent=2, ensure_ascii=False)
238
+
239
+ return json_display, temp_output.name
240
+
241
+ except Exception as e:
242
+ return f"❌ Error during processing: {str(e)}", None
243
+
244
+
245
+ with gr.Blocks(
246
+ title="Document Analysis Pipeline"
247
+ ) as demo:
248
+
249
+
250
+ gr.HTML()
251
+
252
+ gr.Markdown("""
253
+ # πŸ“„ Document Analysis Pipeline
254
+
255
+ Upload a PDF document to extract structured data including questions, options, answers, passages, and embedded images.
256
+
257
+ **Pipeline Steps:**
258
+ 1. πŸ” YOLO/OCR Preprocessing (word extraction + figure/equation detection)
259
+ 2. πŸ€– LayoutLMv3 Inference (BIO tagging)
260
+ 3. πŸ“Š Structured JSON Decoding
261
+ 4. πŸ–ΌοΈ Base64 Image Embedding
262
+ """)
263
+
264
+ with gr.Row():
265
+ with gr.Column(scale=1):
266
+ pdf_input = gr.File(
267
+ label="Upload PDF Document",
268
+ file_types=[".pdf"],
269
+ type="filepath"
270
+ )
271
+
272
+ model_path_input = gr.Textbox(
273
+ label="LayoutLMv3 Model Path (optional)",
274
+ placeholder=DEFAULT_LAYOUTLMV3_MODEL_PATH,
275
+ value=DEFAULT_LAYOUTLMV3_MODEL_PATH,
276
+ interactive=True
277
+ )
278
+
279
+ process_btn = gr.Button("πŸš€ Process Document", variant="primary", size="lg")
280
+
281
+ gr.Markdown("""
282
+ ### ℹ️ Notes:
283
+ - Processing may take several minutes depending on PDF size
284
+ - Figures and equations will be extracted and embedded as Base64
285
+ - The output JSON includes structured questions, options, and answers
286
+ """)
287
+
288
+ with gr.Column(scale=2):
289
+ json_output = gr.Code(
290
+ label="Structured JSON Output",
291
+ language="json",
292
+ lines=25
293
+ )
294
+
295
+ download_output = gr.File(
296
+ label="Download Full JSON",
297
+ interactive=False
298
+ )
299
+
300
+ with gr.Row():
301
+ gr.Markdown("""
302
+ ### πŸ“‹ Output Format
303
+ The pipeline generates JSON with the following structure:
304
+ - **Questions**: Extracted question text
305
+ - **Options**: Multiple choice options
306
+ - **Answers**: Correct answer(s)
307
+ - **Passages**: Associated reading passages
308
+ - **Images**: Base64-encoded figures and equations
309
+ """)
310
+
311
+ process_btn.click(
312
+ fn=process_pdf,
313
+ inputs=[pdf_input, model_path_input],
314
+ outputs=[json_output, download_output],
315
+ api_name="process_document"
316
+ )
317
+
318
+
319
+ if __name__ == "__main__":
320
+ demo.launch(
321
+ server_name="0.0.0.0",
322
+ server_port=7860,
323
+ share=False,
324
+ show_error=True
325
+ )
requirements.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # requirements.txt
2
+ #ultralytics
3
+ #transformers
4
+ #torch
5
+ #pillow
6
+ #numpy
7
+ #gradio
8
+
9
+ #pytz
10
+ #huggingface_hub
11
+
12
+
13
+ gradio
14
+ ultralytics
15
+ transformers
16
+ torch
17
+ torchvision
18
+ pillow
19
+ pytz