Spaces:
Running on Zero
Running on Zero
| """ | |
| Configuration file for Youtu-Parsing HuggingFace Space | |
| """ | |
| import os | |
| # Model configuration | |
| MODEL_NAME = "tencent/Youtu-Parsing" | |
| ENABLE_ANGLE_CORRECTION = True | |
| # Image processing settings | |
| MIN_PIXELS = 256 * 256 | |
| MAX_PIXELS = 1024 * 1024 | |
| # Supported file extensions | |
| IMAGE_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp'] | |
| PDF_EXTENSIONS = ['.pdf'] | |
| # Parsing mode configurations | |
| PARSING_MODES = { | |
| "document_parsing": { | |
| "name": "Document Parsing", | |
| "description": "Comprehensive document structure analysis", | |
| "prompt": "Analyze the layout structure of the input document, detect all structural elements and classify them semantically. Use \\n to delimit different regions." | |
| }, | |
| "chart_parsing": { | |
| "name": "Chart Parsing", | |
| "description": "Convert charts and diagrams to structured formats", | |
| "prompt": "Convert the logic charts in the figure to Mermaid format and the data charts to Markdown format." | |
| }, | |
| "formula_parsing": { | |
| "name": "Formula Parsing", | |
| "description": "Extract mathematical formulas and convert to LaTeX", | |
| "prompt": "Based on the given input field coordinates and layout type, identify and extract the content within the specified region. Formulas shall be represented in LaTeX notation, and tables shall be structured in OTSL format." | |
| }, | |
| "custom": { | |
| "name": "Custom", | |
| "description": "Use custom prompt for specialized analysis", | |
| "prompt": "" | |
| } | |
| } | |
| # Layout type colors for visualization | |
| LAYOUT_COLORS = { | |
| "Text": (51, 160, 44, 255), | |
| "Figure": (214, 39, 40, 255), | |
| "Caption": (255, 127, 14, 255), | |
| "Header": (31, 119, 180, 255), | |
| "Footer": (148, 103, 189, 255), | |
| "Formula": (23, 190, 207, 255), | |
| "Table": (247, 182, 210, 255), | |
| "Title": (255, 217, 47, 255), | |
| "Code": (127, 127, 127, 255), | |
| "Unknown": (200, 200, 200, 128), | |
| "Chart": (102, 195, 165, 255), | |
| "Seal": (140, 86, 75, 255), | |
| } | |
| # Gradio interface settings | |
| GRADIO_THEME = "default" # Use default theme for better custom CSS compatibility | |
| GRADIO_TITLE = "Youtu-Parsing Demo" | |
| GRADIO_DESCRIPTION = "π Intelligent document content extraction and analysis. Supports recognition of text, tables, formulas, charts, and other document elements with precise structured parsing results." | |
| # Default settings | |
| DEFAULT_BATCH_SIZE = 5 | |
| DEFAULT_ENABLE_ANGLE_CORRECTION = True | |
| DEFAULT_PARSING_MODE = "document_parsing" | |
| # Environment-specific settings | |
| IS_HUGGINGFACE_SPACE = os.getenv("SPACE_ID") is not None | |
| HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN") | |
| # Error messages | |
| ERROR_MESSAGES = { | |
| "no_image": "Please upload an image first.", | |
| "model_load_failed": "Failed to load model. Please check the model configuration.", | |
| "parsing_failed": "Error during parsing: {}", | |
| "no_results": "No results returned from parsing.", | |
| "invalid_file": "Invalid file format. Please upload an image or PDF file." | |
| } | |
| # Success messages | |
| SUCCESS_MESSAGES = { | |
| "parsing_complete": "β Document parsing completed successfully!", | |
| "model_loaded": "β Model loaded successfully!" | |
| } | |