| name: "docvqa"
|
| task: "QA"
|
| dataloader_model_task_as:
|
| base_dataset_name: "ex_docvqa"
|
| documents_count: 50
|
| valid_labels:
|
| label_mapping:
|
| valid_secondary_labels:
|
|
|
| prompt_template: "ClaudeRefined12"
|
| prompt_task: "json"
|
| prompt_params:
|
| num_solutions: 3
|
| doc_type: "business and administrative"
|
| language: "English"
|
| gt_type: "Multiple questions about each document, with their answers taken **verbatim** from the document."
|
| gt_format: '{"<Text of question 1>": "<Answer to question 1>", "<Text of question 2>": "<Answer to question 2>", ...}'
|
|
|
| seed_selection_strategy: "v2"
|
| seed_images_count: 6
|
| hdbscan_min_cluster_size: 10
|
| embedding_type: combined
|
| alpha: 1
|
| max_seed_pool: -1 |