Youtu-Parsing / config.py
Yinsongliu's picture
initial
a4894fe
"""
Configuration file for Youtu-Parsing HuggingFace Space
"""
import os
# Model configuration
MODEL_NAME = "tencent/Youtu-Parsing"
ENABLE_ANGLE_CORRECTION = True
# Image processing settings
MIN_PIXELS = 256 * 256
MAX_PIXELS = 1024 * 1024
# Supported file extensions
IMAGE_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp']
PDF_EXTENSIONS = ['.pdf']
# Parsing mode configurations
PARSING_MODES = {
"document_parsing": {
"name": "Document Parsing",
"description": "Comprehensive document structure analysis",
"prompt": "Analyze the layout structure of the input document, detect all structural elements and classify them semantically. Use \\n to delimit different regions."
},
"chart_parsing": {
"name": "Chart Parsing",
"description": "Convert charts and diagrams to structured formats",
"prompt": "Convert the logic charts in the figure to Mermaid format and the data charts to Markdown format."
},
"formula_parsing": {
"name": "Formula Parsing",
"description": "Extract mathematical formulas and convert to LaTeX",
"prompt": "Based on the given input field coordinates and layout type, identify and extract the content within the specified region. Formulas shall be represented in LaTeX notation, and tables shall be structured in OTSL format."
},
"custom": {
"name": "Custom",
"description": "Use custom prompt for specialized analysis",
"prompt": ""
}
}
# Layout type colors for visualization
LAYOUT_COLORS = {
"Text": (51, 160, 44, 255),
"Figure": (214, 39, 40, 255),
"Caption": (255, 127, 14, 255),
"Header": (31, 119, 180, 255),
"Footer": (148, 103, 189, 255),
"Formula": (23, 190, 207, 255),
"Table": (247, 182, 210, 255),
"Title": (255, 217, 47, 255),
"Code": (127, 127, 127, 255),
"Unknown": (200, 200, 200, 128),
"Chart": (102, 195, 165, 255),
"Seal": (140, 86, 75, 255),
}
# Gradio interface settings
GRADIO_THEME = "default" # Use default theme for better custom CSS compatibility
GRADIO_TITLE = "Youtu-Parsing Demo"
GRADIO_DESCRIPTION = "πŸš€ Intelligent document content extraction and analysis. Supports recognition of text, tables, formulas, charts, and other document elements with precise structured parsing results."
# Default settings
DEFAULT_BATCH_SIZE = 5
DEFAULT_ENABLE_ANGLE_CORRECTION = True
DEFAULT_PARSING_MODE = "document_parsing"
# Environment-specific settings
IS_HUGGINGFACE_SPACE = os.getenv("SPACE_ID") is not None
HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
# Error messages
ERROR_MESSAGES = {
"no_image": "Please upload an image first.",
"model_load_failed": "Failed to load model. Please check the model configuration.",
"parsing_failed": "Error during parsing: {}",
"no_results": "No results returned from parsing.",
"invalid_file": "Invalid file format. Please upload an image or PDF file."
}
# Success messages
SUCCESS_MESSAGES = {
"parsing_complete": "βœ… Document parsing completed successfully!",
"model_loaded": "βœ… Model loaded successfully!"
}