Upload add_sub_sorl_v1_abs30_100K_1L2H256d
Browse files
add_sub_sorl_v1_abs30_100K_1L2H256d/metrics.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
add_sub_sorl_v1_abs30_100K_1L2H256d/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 315132124
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09d98deafce719c16926ccc3da07e66a470e2dac55d7c8c24705fd96e4793295
|
| 3 |
size 315132124
|
add_sub_sorl_v1_abs30_100K_1L2H256d/train_config.json
CHANGED
|
@@ -69,16 +69,20 @@
|
|
| 69 |
"no_wandb": false,
|
| 70 |
"n_params": 78706688,
|
| 71 |
"run_name": "add_sub_sorl_v1_abs30_100K_1L2H256d",
|
| 72 |
-
"git_commit": "
|
| 73 |
-
"timestamp": "2026-04-
|
| 74 |
"tokenizer": "Qwen/Qwen3-0.6B",
|
| 75 |
"dataset_repo": "thoughtworks/arithmetic-sorl-data",
|
| 76 |
"dataset_config": "add_sub_6digit",
|
|
|
|
| 77 |
"model_repo": "thoughtworks/arithmetic-sorl",
|
| 78 |
"trainer_version": "v1",
|
| 79 |
-
"wandb_run_id": "
|
| 80 |
-
"wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/
|
| 81 |
-
"
|
| 82 |
-
"
|
|
|
|
|
|
|
|
|
|
| 83 |
"eval_method": "ArithmeticEvaluator"
|
| 84 |
}
|
|
|
|
| 69 |
"no_wandb": false,
|
| 70 |
"n_params": 78706688,
|
| 71 |
"run_name": "add_sub_sorl_v1_abs30_100K_1L2H256d",
|
| 72 |
+
"git_commit": "f835493c19eb98267697007042c9d440cad2afbb",
|
| 73 |
+
"timestamp": "2026-04-15T18:24:53.677274+00:00",
|
| 74 |
"tokenizer": "Qwen/Qwen3-0.6B",
|
| 75 |
"dataset_repo": "thoughtworks/arithmetic-sorl-data",
|
| 76 |
"dataset_config": "add_sub_6digit",
|
| 77 |
+
"train_dataset": "fixed_train/train_100K_seed42.pt",
|
| 78 |
"model_repo": "thoughtworks/arithmetic-sorl",
|
| 79 |
"trainer_version": "v1",
|
| 80 |
+
"wandb_run_id": "5kbgqa24",
|
| 81 |
+
"wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/5kbgqa24",
|
| 82 |
+
"eval_final_dataset": "eval_sets/eval_add_sub_6d_N100_seed42.json",
|
| 83 |
+
"eval_epoch_dataset": "eval_sets/eval_add_sub_6d_N25_seed42.json",
|
| 84 |
+
"eval_hf_repo": "thoughtworks/arithmetic-sorl-data",
|
| 85 |
+
"final_accuracy": 0.2723076923076923,
|
| 86 |
+
"sft_accuracy": 0.0034615384615384616,
|
| 87 |
"eval_method": "ArithmeticEvaluator"
|
| 88 |
}
|