Yinsongliu commited on
Commit
a4894fe
·
1 Parent(s): 5797b71
Files changed (5) hide show
  1. README.md +7 -3
  2. app.py +452 -0
  3. config.py +86 -0
  4. requirements.txt +3 -0
  5. runtime.txt +1 -0
README.md CHANGED
@@ -1,12 +1,16 @@
1
  ---
2
- title: Youtu Parsing
3
- emoji: 🌖
4
  colorFrom: gray
5
  colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 6.4.0
8
  app_file: app.py
9
  pinned: false
 
 
 
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Youtu-Parsing
3
+ emoji: 🚀
4
  colorFrom: gray
5
  colorTo: indigo
6
  sdk: gradio
7
+ sdk_version: 5.49.1
8
  app_file: app.py
9
  pinned: false
10
+ short_description: Try out Youtu-Parsing on your PDFs or images
11
+ license: other
12
+ license_name: youtu-parsing
13
+ license_link: https://huggingface.co/tencent/Youtu-Parsing/blob/main/LICENSE.txt
14
  ---
15
 
16
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,452 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import json
4
+ import tempfile
5
+ import logging
6
+ import warnings
7
+ from PIL import Image, ImageDraw, ImageFont
8
+ import math
9
+ import numpy as np
10
+ from pathlib import Path
11
+ from typing import Optional, Tuple, List, Dict, Any
12
+
13
+ # Suppress warnings for HuggingFace Spaces
14
+ warnings.filterwarnings("ignore", category=FutureWarning)
15
+ warnings.filterwarnings("ignore", category=UserWarning)
16
+ warnings.filterwarnings("ignore", category=DeprecationWarning)
17
+
18
+ # Try to import spaces for ZeroGPU support
19
+ try:
20
+ import spaces
21
+ SPACES_AVAILABLE = True
22
+ logger_temp = logging.getLogger(__name__)
23
+ logger_temp.info("HuggingFace Spaces library available - ZeroGPU support enabled")
24
+ except ImportError:
25
+ SPACES_AVAILABLE = False
26
+ logger_temp = logging.getLogger(__name__)
27
+ logger_temp.info("HuggingFace Spaces library not available - running without ZeroGPU")
28
+
29
+ # No external markdown dependency needed
30
+
31
+ # Import configuration
32
+ from config import (
33
+ MODEL_NAME, LAYOUT_COLORS,
34
+ GRADIO_THEME, GRADIO_TITLE, GRADIO_DESCRIPTION,
35
+ DEFAULT_ENABLE_ANGLE_CORRECTION,
36
+ ERROR_MESSAGES, SUCCESS_MESSAGES, IS_HUGGINGFACE_SPACE,
37
+ HUGGINGFACE_TOKEN
38
+ )
39
+
40
+ # Setup logging
41
+ logging.basicConfig(level=logging.INFO)
42
+ logger = logging.getLogger(__name__)
43
+
44
+ # Import youtu parsing modules
45
+ try:
46
+ from youtu_hf_parser import YoutuOCRParserHF
47
+ from youtu_parsing_utils import IMAGE_EXT, PDF_EXT, load_image, load_images_from_pdf
48
+ YOUTU_PARSING_AVAILABLE = True
49
+ logger.info("Youtu-Parsing modules imported successfully")
50
+ except ImportError as e:
51
+ logger.warning(f"Failed to import youtu parsing modules: {e}")
52
+ logger.warning("Please ensure youtu-parsing is properly installed")
53
+ YOUTU_PARSING_AVAILABLE = False
54
+
55
+ # Global variables
56
+ # Note: For ZeroGPU, we should NOT load model in main process
57
+ # Model will be loaded lazily inside @spaces.GPU decorated function
58
+ parser = None
59
+
60
+ model_loaded = False
61
+
62
+ def _load_model_internal() -> Optional[YoutuOCRParserHF]:
63
+ """Load the Youtu-Parsing model from HuggingFace"""
64
+ global parser, model_loaded
65
+
66
+ if model_loaded and parser is not None:
67
+ logger.info("Model already loaded, returning cached parser")
68
+ return parser
69
+
70
+ if not YOUTU_PARSING_AVAILABLE:
71
+ logger.error("Youtu-Parsing modules not available")
72
+ logger.error("Please ensure youtu-parsing is properly installed:")
73
+ logger.error(" pip install git+https://github.com/TencentCloudADP/youtu-parsing.git#subdirectory=youtu_hf_parser")
74
+ return None
75
+
76
+ try:
77
+ logger.info("=" * 60)
78
+ logger.info(f"Starting model loading: {MODEL_NAME}")
79
+ logger.info(f"Is HuggingFace Space: {IS_HUGGINGFACE_SPACE}")
80
+
81
+ # IMPORTANT: Do NOT call torch.cuda methods in main process for ZeroGPU!
82
+ # ZeroGPU will automatically handle device placement inside @spaces.GPU context
83
+ logger.info("Loading model (device placement handled by ZeroGPU)")
84
+
85
+ # Prepare model loading parameters
86
+ model_kwargs = {
87
+ "model_path": MODEL_NAME,
88
+ "enable_angle_correct": True,
89
+ }
90
+
91
+ # Add HuggingFace token if available (for private/gated models)
92
+ if IS_HUGGINGFACE_SPACE:
93
+ if HUGGINGFACE_TOKEN:
94
+ logger.info("Using HuggingFace token for authentication")
95
+ model_kwargs["token"] = HUGGINGFACE_TOKEN
96
+ else:
97
+ logger.warning("HF_TOKEN not found in environment variables")
98
+ logger.warning("If the model is private or gated, please set HF_TOKEN in Space settings")
99
+
100
+ logger.info("Initializing YoutuOCRParserHF...")
101
+ logger.info(f"Model kwargs: {model_kwargs}")
102
+
103
+ # Load the parser
104
+ # In ZeroGPU: loads on CPU, moves to GPU inside @spaces.GPU decorated function
105
+ parser = YoutuOCRParserHF(**model_kwargs)
106
+
107
+ model_loaded = True
108
+ logger.info("=" * 60)
109
+ logger.info("✅ " + SUCCESS_MESSAGES["model_loaded"])
110
+ logger.info("=" * 60)
111
+ return parser
112
+
113
+ except ImportError as e:
114
+ logger.error("=" * 60)
115
+ logger.error(f"❌ Import error: {str(e)}")
116
+ logger.error("Missing dependencies. Please ensure all required packages are installed:")
117
+ logger.error(" - torch>=2.0.0")
118
+ logger.error(" - transformers>=4.30.0")
119
+ logger.error(" - accelerate>=0.20.0")
120
+ logger.error(" - pillow>=8.0.0")
121
+ logger.error(" - numpy>=1.20.0")
122
+ logger.error("=" * 60)
123
+ return None
124
+
125
+ except MemoryError as e:
126
+ logger.error("=" * 60)
127
+ logger.error(f"❌ Memory error: {str(e)}")
128
+ logger.error("Insufficient memory to load the model")
129
+ logger.error("Solutions:")
130
+ logger.error(" 1. Upgrade to a Space with more RAM")
131
+ logger.error(" 2. Use ZeroGPU hardware tier")
132
+ logger.error(" 3. Contact HuggingFace support for assistance")
133
+ logger.error("=" * 60)
134
+ return None
135
+
136
+ except OSError as e:
137
+ logger.error("=" * 60)
138
+ logger.error(f"❌ OS/File error: {str(e)}")
139
+ logger.error("This might be a model download issue or disk space problem")
140
+ logger.error("Possible causes:")
141
+ logger.error(" - Network timeout during model download")
142
+ logger.error(" - Insufficient disk space")
143
+ logger.error(" - Permission issues")
144
+ logger.error(" - Model repository not accessible")
145
+ logger.error("=" * 60)
146
+ return None
147
+
148
+ except Exception as e:
149
+ logger.error("=" * 60)
150
+ logger.error(f"❌ Unexpected error loading model: {str(e)}")
151
+ logger.error(f"Error type: {type(e).__name__}")
152
+
153
+ import traceback
154
+ logger.error("Full traceback:")
155
+ logger.error("-" * 60)
156
+ logger.error(traceback.format_exc())
157
+ logger.error("=" * 60)
158
+ return None
159
+
160
+ def draw_layout_boxes(image: Image.Image, bboxes: List[Dict]) -> Image.Image:
161
+ """Draw layout bounding boxes on the image"""
162
+ if not bboxes:
163
+ return image
164
+
165
+ # Create image copy
166
+ draw_image = image.copy()
167
+ if draw_image.mode != "RGBA":
168
+ draw_image = draw_image.convert("RGBA")
169
+
170
+ overlay = Image.new("RGBA", image.size, (0,0,0,0))
171
+ draw = ImageDraw.Draw(overlay)
172
+
173
+ # Load font
174
+ try:
175
+ font = ImageFont.load_default()
176
+ except Exception:
177
+ font = ImageFont.load_default()
178
+
179
+ for i, cell in enumerate(bboxes):
180
+ bbox = cell.get('bbox', [])
181
+ if len(bbox) < 8:
182
+ continue
183
+
184
+ # Convert bbox to points: [x0, y0, x1, y1, x2, y2, x3, y3]
185
+ pts = [(bbox[j], bbox[j+1]) for j in range(0, 8, 2)]
186
+ layout_type = cell.get('type', '').replace('<LAYOUT_', '').replace('>', '') or 'Unknown'
187
+ color = LAYOUT_COLORS.get(layout_type, LAYOUT_COLORS['Unknown'])
188
+
189
+ # Fill rectangle
190
+ fill_color = tuple(color[:3]) + (100,)
191
+ outline_color = tuple(color[:3]) + (255,)
192
+
193
+ try:
194
+ draw.polygon(pts, outline=outline_color, fill=fill_color)
195
+
196
+ # Draw text label
197
+ order_cate = f"{i}_{layout_type}"
198
+ text_color = tuple(color[:3]) + (255,)
199
+
200
+ # Calculate text position
201
+ x_anchor, y_anchor = pts[0]
202
+
203
+ # Draw text
204
+ draw.text((x_anchor, y_anchor), order_cate, font=font, fill=text_color)
205
+ except Exception as e:
206
+ logger.warning(f"Error drawing bbox {i}: {e}")
207
+ continue
208
+
209
+ # Composite to original image
210
+ try:
211
+ result = Image.alpha_composite(draw_image, overlay)
212
+ return result.convert("RGB")
213
+ except Exception as e:
214
+ logger.error(f"Error compositing image: {e}")
215
+ return image
216
+
217
+ # Decorator for GPU acceleration if available
218
+ if SPACES_AVAILABLE:
219
+ @spaces.GPU
220
+ def parse_document(image: Optional[Image.Image],
221
+ enable_angle_corrector: bool) -> Tuple[Optional[Image.Image], str, str, str, str]:
222
+ """Parse the uploaded document (with ZeroGPU support)
223
+
224
+ Returns:
225
+ Tuple of (output_image, markdown_rendered, markdown_source, json_output, status_msg)
226
+ """
227
+ return _parse_document_internal(image, enable_angle_corrector)
228
+ else:
229
+ def parse_document(image: Optional[Image.Image],
230
+ enable_angle_corrector: bool) -> Tuple[Optional[Image.Image], str, str, str, str]:
231
+ """Parse the uploaded document (without ZeroGPU)
232
+
233
+ Returns:
234
+ Tuple of (output_image, markdown_rendered, markdown_source, json_output, status_msg)
235
+ """
236
+ return _parse_document_internal(image, enable_angle_corrector)
237
+
238
+ def _parse_document_internal(image: Optional[Image.Image],
239
+ enable_angle_corrector: bool) -> Tuple[Optional[Image.Image], str, str, str, str]:
240
+ """Internal parse function
241
+
242
+ This function is called inside @spaces.GPU context (if available)
243
+ So it's safe to load model here - CUDA will be initialized properly by ZeroGPU
244
+
245
+ Returns:
246
+ Tuple of (output_image, markdown_rendered, markdown_source, json_output, status_msg)
247
+ """
248
+ global parser
249
+
250
+ if image is None:
251
+ return None, "<p>Please upload an image first</p>", "", "", ERROR_MESSAGES["no_image"]
252
+
253
+ if not YOUTU_PARSING_AVAILABLE:
254
+ return None, "<p>Youtu-Parsing module is not available, please check installation</p>", "", "", "Youtu-Parsing modules are not available. Please check the installation."
255
+
256
+ # Load model if not already loaded
257
+ # In ZeroGPU environment, this is called inside @spaces.GPU decorated function
258
+ # so CUDA initialization is safe here
259
+ if parser is None:
260
+ parser = _load_model_internal()
261
+ if parser is None:
262
+ return None, "<p>Model loading failed</p>", "", "", ERROR_MESSAGES["model_load_failed"]
263
+
264
+ try:
265
+ logger.info(f"Parsing document (enable_angle_corrector={enable_angle_corrector})")
266
+
267
+ # 直接使用 _parse_single_image 函数处理 PIL Image,无需保存临时文件
268
+ # 传入 enable_angle_corrector 和 batch_size 参数
269
+ page_result, page_angle, hierarchy_json = parser._parse_single_image(
270
+ image,
271
+ enable_angle_corrector=enable_angle_corrector
272
+ )
273
+
274
+ if page_result and len(page_result) > 0:
275
+ # Extract layout bboxes for visualization
276
+ layout_bboxes = []
277
+ for item in page_result:
278
+ if 'bbox' in item:
279
+ layout_bboxes.append({
280
+ 'bbox': item['bbox'],
281
+ 'type': item.get('type', ''),
282
+ 'content': item.get('content', '')
283
+ })
284
+
285
+ # Draw layout boxes on image
286
+ image_with_boxes = draw_layout_boxes(image, layout_bboxes)
287
+
288
+ # Create markdown content
289
+ markdown_content = "\n\n".join([
290
+ item.get('content', '') for item in page_result if item.get('content')
291
+ ])
292
+
293
+ # Create JSON content (include hierarchy info)
294
+ json_output = {
295
+ "page_result": page_result,
296
+ "page_angle": page_angle,
297
+ "hierarchy": hierarchy_json
298
+ }
299
+ json_content = json.dumps(json_output, ensure_ascii=False, indent=2)
300
+
301
+ # 直接返回 markdown 内容给 gr.Markdown 组件渲染
302
+ logger.info(f"Generated markdown content (first 200 chars): {markdown_content[:200] if markdown_content else 'empty'}")
303
+
304
+ logger.info("Document parsing completed successfully")
305
+ return image_with_boxes, markdown_content, markdown_content, json_content, SUCCESS_MESSAGES["parsing_complete"]
306
+ else:
307
+ return None, "No parsing results", "", "", ERROR_MESSAGES["no_results"]
308
+
309
+ except Exception as e:
310
+ logger.error(f"Error during parsing: {str(e)}")
311
+ return None, f"Parsing error: {str(e)}", "", "", ERROR_MESSAGES["parsing_failed"].format(str(e))
312
+
313
+ def create_interface():
314
+ """Create the Gradio interface - simplified layout for HuggingFace Space compatibility"""
315
+
316
+ # 自定义 CSS 字体样式
317
+ custom_css = """
318
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=Noto+Sans+SC:wght@400;500;700&display=swap');
319
+
320
+ * {
321
+ font-family: 'Inter', 'Noto Sans SC', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif !important;
322
+ }
323
+
324
+ .markdown-text {
325
+ font-family: 'Inter', 'Noto Sans SC', sans-serif !important;
326
+ line-height: 1.7 !important;
327
+ }
328
+
329
+ h1, h2, h3, h4, h5, h6 {
330
+ font-weight: 600 !important;
331
+ }
332
+
333
+ code, pre {
334
+ font-family: 'JetBrains Mono', 'Fira Code', 'SF Mono', Consolas, monospace !important;
335
+ }
336
+
337
+ textarea, input {
338
+ font-family: 'Inter', 'Noto Sans SC', sans-serif !important;
339
+ }
340
+ """
341
+
342
+ with gr.Blocks(title=GRADIO_TITLE, css=custom_css) as demo:
343
+ gr.Markdown(f"# 📄 {GRADIO_TITLE}")
344
+ gr.Markdown(f"{GRADIO_DESCRIPTION}")
345
+
346
+ with gr.Row():
347
+ with gr.Column(scale=1):
348
+ input_image = gr.Image(
349
+ type="pil",
350
+ label="Upload Document Image",
351
+ height=300,
352
+ sources=["upload", "clipboard"]
353
+ )
354
+
355
+ with gr.Accordion("⚙️ Advanced Options", open=False):
356
+ enable_angle_corrector = gr.Checkbox(
357
+ label="Enable Angle Correction",
358
+ value=DEFAULT_ENABLE_ANGLE_CORRECTION,
359
+ info="Automatically correct document orientation"
360
+ )
361
+
362
+ parse_btn = gr.Button("🚀 Start Parsing", variant="primary", size="lg")
363
+ status_msg = gr.Textbox(label="Status", interactive=False, lines=2)
364
+
365
+ with gr.Column(scale=2):
366
+ with gr.Tabs():
367
+ with gr.Tab("Visualization"):
368
+ output_image = gr.Image(label="Layout Detection Result", height=500)
369
+ with gr.Tab("Markdown Rendered"):
370
+ markdown_rendered = gr.Markdown(
371
+ value="Upload a document and the parsing results will appear here...",
372
+ latex_delimiters=[
373
+ {"left": "$$", "right": "$$", "display": True},
374
+ {"left": "$", "right": "$", "display": False},
375
+ {"left": "\\[", "right": "\\]", "display": True},
376
+ {"left": "\\(", "right": "\\)", "display": False},
377
+ ]
378
+ )
379
+ with gr.Tab("Markdown Source"):
380
+ markdown_source = gr.Textbox(label="Markdown Source Code", lines=20)
381
+ with gr.Tab("JSON Output"):
382
+ json_output = gr.Textbox(label="Structured Data", lines=20)
383
+
384
+ # Event handler
385
+ parse_btn.click(
386
+ fn=parse_document,
387
+ inputs=[input_image, enable_angle_corrector],
388
+ outputs=[output_image, markdown_rendered, markdown_source, json_output, status_msg]
389
+ )
390
+
391
+ with gr.Accordion("ℹ️ Instructions", open=False):
392
+ gr.Markdown("""
393
+ ### Supported Document Types
394
+ - **Text Documents** - Documents containing text and tables
395
+ - **Charts & Graphics** - Various charts and diagrams
396
+ - **Math Formulas** - Mathematical expressions in LaTeX format
397
+
398
+ ### How to Use
399
+ 1. Upload a document image (supports JPG, PNG, etc.)
400
+ 2. Click the "Start Parsing" button
401
+ 3. View the results (Visualization, Markdown, JSON)
402
+ """)
403
+
404
+ return demo
405
+
406
+ def main():
407
+ """Main function to preload model and launch the interface
408
+
409
+ 1. Load model first (predownload weights)
410
+ 2. Then create and launch interface
411
+ """
412
+ global parser, model_loaded
413
+
414
+ # Preload model before launching interface
415
+ # This ensures model weights are downloaded during startup
416
+ logger.info("=" * 60)
417
+ logger.info("🚀 Starting Youtu-Parsing Application")
418
+ logger.info("=" * 60)
419
+
420
+ logger.info(f"Environment: {'HuggingFace Space' if IS_HUGGINGFACE_SPACE else 'Local'}")
421
+ logger.info("Preloading model before interface launch...")
422
+
423
+ # Always preload model to ensure weights are downloaded at startup
424
+ # This prevents download delay on first request
425
+ try:
426
+ parser = _load_model_internal()
427
+ if parser is not None:
428
+ logger.info("✅ Model preloaded successfully")
429
+ model_loaded = True
430
+ else:
431
+ logger.warning("⚠️ Model preload failed, will retry on first inference")
432
+ except Exception as e:
433
+ logger.error(f"❌ Error preloading model: {e}")
434
+ import traceback
435
+ logger.error(traceback.format_exc())
436
+ logger.warning("⚠️ Will attempt to load model on first inference")
437
+
438
+ # Create and launch the interface
439
+ logger.info("Creating Gradio interface...")
440
+
441
+ demo = create_interface()
442
+
443
+ logger.info("Launching Gradio interface...")
444
+ # Launch with theme for better compatibility
445
+ demo.queue(max_size=20).launch(
446
+ share=False,
447
+ inbrowser=False
448
+ )
449
+
450
+
451
+ if __name__ == "__main__":
452
+ main()
config.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configuration file for Youtu-Parsing HuggingFace Space
3
+ """
4
+
5
+ import os
6
+
7
+ # Model configuration
8
+ MODEL_NAME = "tencent/Youtu-Parsing"
9
+ ENABLE_ANGLE_CORRECTION = True
10
+
11
+ # Image processing settings
12
+ MIN_PIXELS = 256 * 256
13
+ MAX_PIXELS = 1024 * 1024
14
+
15
+ # Supported file extensions
16
+ IMAGE_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp']
17
+ PDF_EXTENSIONS = ['.pdf']
18
+
19
+ # Parsing mode configurations
20
+ PARSING_MODES = {
21
+ "document_parsing": {
22
+ "name": "Document Parsing",
23
+ "description": "Comprehensive document structure analysis",
24
+ "prompt": "Analyze the layout structure of the input document, detect all structural elements and classify them semantically. Use \\n to delimit different regions."
25
+ },
26
+ "chart_parsing": {
27
+ "name": "Chart Parsing",
28
+ "description": "Convert charts and diagrams to structured formats",
29
+ "prompt": "Convert the logic charts in the figure to Mermaid format and the data charts to Markdown format."
30
+ },
31
+ "formula_parsing": {
32
+ "name": "Formula Parsing",
33
+ "description": "Extract mathematical formulas and convert to LaTeX",
34
+ "prompt": "Based on the given input field coordinates and layout type, identify and extract the content within the specified region. Formulas shall be represented in LaTeX notation, and tables shall be structured in OTSL format."
35
+ },
36
+ "custom": {
37
+ "name": "Custom",
38
+ "description": "Use custom prompt for specialized analysis",
39
+ "prompt": ""
40
+ }
41
+ }
42
+
43
+ # Layout type colors for visualization
44
+ LAYOUT_COLORS = {
45
+ "Text": (51, 160, 44, 255),
46
+ "Figure": (214, 39, 40, 255),
47
+ "Caption": (255, 127, 14, 255),
48
+ "Header": (31, 119, 180, 255),
49
+ "Footer": (148, 103, 189, 255),
50
+ "Formula": (23, 190, 207, 255),
51
+ "Table": (247, 182, 210, 255),
52
+ "Title": (255, 217, 47, 255),
53
+ "Code": (127, 127, 127, 255),
54
+ "Unknown": (200, 200, 200, 128),
55
+ "Chart": (102, 195, 165, 255),
56
+ "Seal": (140, 86, 75, 255),
57
+ }
58
+
59
+ # Gradio interface settings
60
+ GRADIO_THEME = "default" # Use default theme for better custom CSS compatibility
61
+ GRADIO_TITLE = "Youtu-Parsing Demo"
62
+ GRADIO_DESCRIPTION = "🚀 Intelligent document content extraction and analysis. Supports recognition of text, tables, formulas, charts, and other document elements with precise structured parsing results."
63
+
64
+ # Default settings
65
+ DEFAULT_BATCH_SIZE = 5
66
+ DEFAULT_ENABLE_ANGLE_CORRECTION = True
67
+ DEFAULT_PARSING_MODE = "document_parsing"
68
+
69
+ # Environment-specific settings
70
+ IS_HUGGINGFACE_SPACE = os.getenv("SPACE_ID") is not None
71
+ HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
72
+
73
+ # Error messages
74
+ ERROR_MESSAGES = {
75
+ "no_image": "Please upload an image first.",
76
+ "model_load_failed": "Failed to load model. Please check the model configuration.",
77
+ "parsing_failed": "Error during parsing: {}",
78
+ "no_results": "No results returned from parsing.",
79
+ "invalid_file": "Invalid file format. Please upload an image or PDF file."
80
+ }
81
+
82
+ # Success messages
83
+ SUCCESS_MESSAGES = {
84
+ "parsing_complete": "✅ Document parsing completed successfully!",
85
+ "model_loaded": "✅ Model loaded successfully!"
86
+ }
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ git+https://github.com/TencentCloudADP/youtu-parsing.git#subdirectory=youtu_hf_parser
2
+ tqdm
3
+ flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
runtime.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ python-3.10