Upload add_sub_sorl_v1_abs10_K1_100K_2L1H128d
Browse files
add_sub_sorl_v1_abs10_K1_100K_2L1H128d/metrics.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
add_sub_sorl_v1_abs10_K1_100K_2L1H128d/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 157702060
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c61e97d3aa07dee13276136fb15d1ac292dde1fe428b9b68e841e8bd046d4154
|
| 3 |
size 157702060
|
add_sub_sorl_v1_abs10_K1_100K_2L1H128d/train_config.json
CHANGED
|
@@ -69,16 +69,20 @@
|
|
| 69 |
"no_wandb": false,
|
| 70 |
"n_params": 39348864,
|
| 71 |
"run_name": "add_sub_sorl_v1_abs10_K1_100K_2L1H128d",
|
| 72 |
-
"git_commit": "
|
| 73 |
-
"timestamp": "2026-04-
|
| 74 |
"tokenizer": "Qwen/Qwen3-0.6B",
|
| 75 |
"dataset_repo": "thoughtworks/arithmetic-sorl-data",
|
| 76 |
"dataset_config": "add_sub_6digit",
|
|
|
|
| 77 |
"model_repo": "thoughtworks/arithmetic-sorl",
|
| 78 |
"trainer_version": "v1",
|
| 79 |
-
"wandb_run_id": "
|
| 80 |
-
"wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/
|
| 81 |
-
"
|
| 82 |
-
"
|
|
|
|
|
|
|
|
|
|
| 83 |
"eval_method": "ArithmeticEvaluator"
|
| 84 |
}
|
|
|
|
| 69 |
"no_wandb": false,
|
| 70 |
"n_params": 39348864,
|
| 71 |
"run_name": "add_sub_sorl_v1_abs10_K1_100K_2L1H128d",
|
| 72 |
+
"git_commit": "f835493c19eb98267697007042c9d440cad2afbb",
|
| 73 |
+
"timestamp": "2026-04-15T16:26:32.962102+00:00",
|
| 74 |
"tokenizer": "Qwen/Qwen3-0.6B",
|
| 75 |
"dataset_repo": "thoughtworks/arithmetic-sorl-data",
|
| 76 |
"dataset_config": "add_sub_6digit",
|
| 77 |
+
"train_dataset": "fixed_train/train_100K_seed42.pt",
|
| 78 |
"model_repo": "thoughtworks/arithmetic-sorl",
|
| 79 |
"trainer_version": "v1",
|
| 80 |
+
"wandb_run_id": "6rjq3rdp",
|
| 81 |
+
"wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/6rjq3rdp",
|
| 82 |
+
"eval_final_dataset": "eval_sets/eval_add_sub_6d_N100_seed42.json",
|
| 83 |
+
"eval_epoch_dataset": "eval_sets/eval_add_sub_6d_N25_seed42.json",
|
| 84 |
+
"eval_hf_repo": "thoughtworks/arithmetic-sorl-data",
|
| 85 |
+
"final_accuracy": 0.6476923076923077,
|
| 86 |
+
"sft_accuracy": 0.14153846153846153,
|
| 87 |
"eval_method": "ArithmeticEvaluator"
|
| 88 |
}
|