| { |
| "results": { |
| "winogrande": { |
| "acc,none": 0.7655880031570639, |
| "acc_stderr,none": 0.011906130106237992, |
| "alias": "winogrande" |
| } |
| }, |
| "configs": { |
| "winogrande": { |
| "task": "winogrande", |
| "dataset_path": "/lustre07/scratch/gagan30/arocr/meta-llama/self_rewarding_models/eval/winogrande", |
| "dataset_name": "winogrande_xl", |
| "training_split": "train", |
| "validation_split": "validation", |
| "doc_to_text": "def doc_to_text(doc):\n answer_to_num = {\"1\": 0, \"2\": 1}\n return answer_to_num[doc[\"answer\"]]\n", |
| "doc_to_target": "def doc_to_target(doc):\n idx = doc[\"sentence\"].index(\"_\") + 1\n return doc[\"sentence\"][idx:].strip()\n", |
| "doc_to_choice": "def doc_to_choice(doc):\n idx = doc[\"sentence\"].index(\"_\")\n options = [doc[\"option1\"], doc[\"option2\"]]\n return [doc[\"sentence\"][:idx] + opt for opt in options]\n", |
| "description": "", |
| "target_delimiter": " ", |
| "fewshot_delimiter": "\n\n", |
| "num_fewshot": 5, |
| "metric_list": [ |
| { |
| "metric": "acc", |
| "aggregation": "mean", |
| "higher_is_better": true |
| } |
| ], |
| "output_type": "multiple_choice", |
| "repeats": 1, |
| "should_decontaminate": true, |
| "doc_to_decontamination_query": "sentence", |
| "metadata": { |
| "version": 1.0 |
| } |
| } |
| }, |
| "versions": { |
| "winogrande": 1.0 |
| }, |
| "n-shot": { |
| "winogrande": 5 |
| }, |
| "config": { |
| "model": "vllm", |
| "model_args": "pretrained=/lustre07/scratch/gagan30/arocr/meta-llama/self_rewarding_models/Oasis,tensor_parallel_size=1,dtype=auto,gpu_memory_utilization=0.9,data_parallel_size=1,max_model_len=4096", |
| "batch_size": "auto:128", |
| "batch_sizes": [], |
| "device": "cuda", |
| "use_cache": "/lustre07/scratch/gagan30/arocr/cache/", |
| "limit": null, |
| "bootstrap_iters": 100000, |
| "gen_kwargs": null |
| }, |
| "git_hash": null |
| } |