| dataset_path: lmms-lab/D170_v4.1_EN | |
| dataset_kwargs: | |
| token: True | |
| task: "d170_en" | |
| test_split: test | |
| output_type: generate_until | |
| doc_to_visual: !function d170_en_utils.doc_to_visual | |
| doc_to_text: !function utils.doc_to_text # Such that {{prompt}} will be replaced by doc["question"] | |
| doc_to_target: "{{annotation}}" | |
| generation_kwargs: | |
| until: | |
| - "ASSISTANT:" | |
| max_new_tokens: 1024 | |
| temperature: 0 | |
| top_p: 1.0 | |
| num_beams: 1 | |
| do_sample: false | |
| process_results: !function d170_en_utils.process_results # apply llm judge eval here | |
| metric_list: | |
| - metric: llm_as_judge_eval | |
| aggregation: mean | |
| higher_is_better: true | |
| metadata: | |
| version: 0.0 | |
| gpt_eval_model_name: "gpt-4-1106-preview" | |
| include: _default_template_internal_eval_yaml |