zeroshotGPU / configs /default.yaml
Arjunvir Singh
Initial commit: zeroshotGPU MVP with full eval surface
db06ffa
parsers:
text:
enabled: true
pymupdf:
enabled: true
docling:
enabled: false
do_ocr: false
do_table_structure: false
force_backend_text: true
marker:
enabled: false
command: null
timeout_seconds: 300
output_args: "--output_dir {output_dir} --output_format markdown"
extra_args: ""
mineru:
enabled: false
command: null
timeout_seconds: 600
output_args: "--output_dir {output_dir}"
extra_args: ""
olmocr:
enabled: false
command: null
timeout_seconds: 600
output_args: "--output_dir {output_dir}"
extra_args: ""
paddleocr:
enabled: false
command: null
timeout_seconds: 600
output_args: "--output_dir {output_dir}"
extra_args: ""
unstructured:
enabled: false
routing:
run_multiple_on_hard_pages: true
max_primary_parsers_per_page: 2
hard_page_threshold: 0.65
scanned_text_threshold: 0.40
table_density_threshold: 0.25
formula_density_threshold: 0.15
figure_density_threshold: 0.20
repair:
enabled: true
max_iterations: 3
# Plan and dry-run GPU escalations for verification failures.
gpu_escalation: true
# Actually invoke the configured GPU/VLM backend on flagged regions.
# Defaults to false to avoid surprise model downloads on local runs;
# set true on the Space once GPU models are warm.
execute_gpu_escalations: false
table_repair: true
reading_order_repair: true
figure_repair: true
ocr_repair: true
gpu:
backend: transformers
provider: huggingface_spaces
space_name: zeroshotGPU
batch_pages: true
validate_tasks: true
max_batch_size: 4
max_gpu_seconds_per_doc: 120
max_vlm_calls_per_doc: 30
models:
vlm:
model_id: Qwen/Qwen2.5-VL-3B-Instruct
task: image-text-to-text
device: auto
dtype: bfloat16
max_batch_size: 1
ocr:
model_id: Qwen/Qwen2.5-VL-3B-Instruct
task: document-ocr
device: auto
dtype: bfloat16
max_batch_size: 1
table:
model_id: Qwen/Qwen2.5-VL-3B-Instruct
task: table-repair
device: auto
dtype: bfloat16
max_batch_size: 1
embedding:
model_id: jinaai/jina-embeddings-v3
task: retrieval.passage
device: auto
dtype: bfloat16
max_batch_size: 16
task_model_roles:
vlm_route_repair: vlm
ocr_page: ocr
table_vlm_repair: table
figure_description: vlm
pdf:
render_pages: true
render_dpi: 150
crop_tables: true
crop_figures: true
asset_dir: assets
quality:
accept_threshold: 0.88
blocking_failures:
- empty_page
- invalid_table
- missing_text_coverage
- reading_order_failure
chunking:
enabled: true
planner: agentic
baseline_strategy: recursive_structure
target_tokens: 512
min_tokens: 120
overlap_ratio: 0.15
parent_child: true
parent_target_tokens: 1600
page_level_for_paginated_docs: true
table_chunks: true
figure_chunks: true
contextual_prefix: false
contextual_retrieval: false
semantic_similarity_threshold: 0.18
max_propositions_per_source: 8
max_proposition_chunks: 64
semantic_chunking: false
late_chunking: false
vision_guided: false
agentic_proposition_chunking: false
strategy_ladder:
- fixed_token_baseline
- recursive_structure
- metadata_enriched
- parent_child
- contextual_retrieval
- late_chunking
- semantic_chunking
- vision_guided
- agentic_proposition
benchmarks:
retriever:
# `lexical` (default, model-free TF-IDF) or `embedding` (sentence-transformers).
# The `embedding` backend pulls model_id and task from gpu.models.embedding
# unless overridden here. Requires `pip install sentence-transformers`.
backend: lexical
model_id: null
task: null
deployment:
target: huggingface_spaces
gpu_models_target: zeroshotGPU