koichi12
/

llm-scripts

Model card Files Files and versions

llm-scripts / scripts /yans /lm-evaluation-harness /tests /testyamls /test-01.yaml

koichi12's picture

Add files using upload-large-folder tool

0ba7ae8 verified over 1 year ago

history blame contribute delete

1.19 kB

	group: test-1
	group_alias: test 1
	task:
	- piqa # string task
	- ai2_arc # string tag
	- task: super-glue-lm-eval-v1 # Should this be spread out?
	num_fewshot: 3
	- task: swag # dict registered task
	num_fewshot: 2
	- task: mmlu
	num_fewshot: 5
	- group: nli-tasks # dict group
	task:
	- anli
	- boolq
	- sglue_rte
	num_fewshot: 4
	metric_list:
	- metric: brier_score
	- task: sciq # dict registered task duplicate
	task_alias: sciq 2-shot
	num_fewshot: 2
	- task: sciq # dict registered task duplicate
	task_alias: sciq 4-shot
	num_fewshot: 4
	- task: sciq # dict registered task duplicate
	task_alias: sciq 6-shot
	num_fewshot: 6
	- task: siqa_custom # dict task
	dataset_path: social_i_qa
	dataset_name: null
	output_type: multiple_choice
	training_split: train
	validation_split: validation
	doc_to_text: "Question: {{context}} {{question}}\nAnswer:"
	target_delimiter: " "
	doc_to_choice:
	- "{{answerA}}"
	- "{{answerB}}"
	- "{{answerC}}"
	doc_to_target: "{{ (label\|int) - 1 }}"
	metric_list:
	- metric: acc
	aggregation: mean
	higher_is_better: true