ghrua commited on
Commit
cdb089a
·
verified ·
1 Parent(s): b9fefcc

Upload results_hist/talk_like_a_graph/OLMo2-1B-stage2-seed42-SEXMH-L5/talk_like_a_graph:test/metrics-all.jsonl with huggingface_hub

Browse files
results_hist/talk_like_a_graph/OLMo2-1B-stage2-seed42-SEXMH-L5/talk_like_a_graph:test/metrics-all.jsonl ADDED
@@ -0,0 +1 @@
 
 
1
+ {"task_name": "talk_like_a_graph", "task_hash": "74b44fa01fbbfa269a80f1e8919bc6e6", "model_hash": "4dceda1252803749643cc459156c0435", "model_config": {"model": "../../hf_ckpts//OLMo2-1B-stage2-seed42-SEXMH-L5/step23852-unsharded", "revision": null, "trust_remote_code": null, "max_length": 4096, "model_path": "../../hf_ckpts//OLMo2-1B-stage2-seed42-SEXMH-L5/step23852-unsharded", "model_type": "vllm"}, "task_config": {"task_name": "talk_like_a_graph", "task_core": "talk_like_a_graph", "limit": null, "split": "test", "num_shots": 0, "fewshot_seed": 1234, "primary_metric": "substring_exact_match", "random_subsample_seed": 1234, "context_kwargs": null, "generation_kwargs": {"max_gen_toks": 128, "temperature": 0.0, "do_sample": false, "stop_sequences": ["\n\n", "\n"]}, "metric_kwargs": null, "native_id_field": "id", "fewshot_source": null, "dataset_path": "ghrua/talk-like-a-graph", "dataset_name": null, "use_chat_format": null, "version": 0, "revision": null, "compute_gold_bpb": false, "external_eval": null, "custom_kwargs": {"shuffle_choices": false, "choices_first": false}, "skip_model_judges": null, "model_max_length": null, "metadata": {"alias": "talk_like_a_graph:test"}}, "compute_config": {"batch_size": "10000", "max_batch_size": 32, "output_dir": "results_hist/talk_like_a_graph/OLMo2-1B-stage2-seed42-SEXMH-L5/talk_like_a_graph:test", "num_recorded_inputs": 3, "save_raw_requests": true, "recompute_metrics": false}, "processing_time": 2.4161808490753174, "current_date": "2026-01-20 15:09:36 UTC", "num_instances": 308, "beaker_info": {}, "metrics": {"substring_exact_match": 0.2012987012987013, "primary_score": 0.2012987012987013, "extra_metrics": {"num_tokens": 71.92532467532467}}, "task_idx": 0}