| {"task_name": "csqa", "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0", "model_config": {"model": "hf_checkpoints/dclm-pool-1b-1x-h-edu-open_lm_1b_swiglutorch-warm5000-lr0p003-wd0p033-cd3e-05-bs256-mult1-seed124-tokens28795904000", "revision": null, "trust_remote_code": null, "max_length": 2048, "model_path": null, "model_type": "hf"}, "task_config": {"task_name": "csqa", "task_core": "csqa", "limit": null, "split": "validation", "num_shots": 5, "fewshot_seed": 1234, "primary_metric": "acc_uncond", "random_subsample_seed": 1234, "context_kwargs": null, "generation_kwargs": null, "metric_kwargs": {"uncond_docid_offset": 1000000}, "native_id_field": "id", "fewshot_source": "OLMES:commonsense_qa", "dataset_path": "commonsense_qa", "dataset_name": null, "use_chat_format": null, "version": 0, "revision": null, "metadata": {"regimes": ["OLMES-v0.1"], "alias": "csqa:rc::olmes"}}, "compute_config": {"batch_size": "16", "max_batch_size": 32, "output_dir": "hf_checkpoints/dclm-pool-1b-1x-h-edu-open_lm_1b_swiglutorch-warm5000-lr0p003-wd0p033-cd3e-05-bs256-mult1-seed124-tokens28795904000/olmes_fp32/core_9mcqa", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false, "check_datalake": false, "push_datalake": false, "no_datalake": false}, "processing_time": 43.80733871459961, "current_date": "2024-11-19 21:15:23 UTC", "num_instances": 1221, "beaker_info": {}, "metrics": {"acc_raw": 0.5544635544635544, "acc_per_token": 0.5626535626535627, "acc_per_char": 0.6027846027846028, "correct_loss_raw": 6.805971597949078, "incorrect_loss_raw": 11.287900251836, "correct_loss_per_token": 4.20926476363647, "incorrect_loss_per_token": 7.28920202576634, "correct_loss_per_char": 0.6899969903561814, "incorrect_loss_per_char": 1.2158805284120953, "acc_uncond": 0.6003276003276004, "correct_loss_uncond": -9.904503226292416, "incorrect_loss_uncond": -5.4698025422272964, "primary_score": 0.6003276003276004}, "task_idx": 7} |