Upload add_sub_sorl_v1_abs10_50K
Browse files
add_sub_sorl_v1_abs10_50K/metrics.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
add_sub_sorl_v1_abs10_50K/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 650303660
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:45c37fed66bed3773e1e69fae722c5dc56192e47f3b8468245de3448610fb81a
|
| 3 |
size 650303660
|
add_sub_sorl_v1_abs10_50K/train_config.json
CHANGED
|
@@ -17,10 +17,10 @@
|
|
| 17 |
"target_vocab_util": 0.8,
|
| 18 |
"min_abs_ppl": 0.0,
|
| 19 |
"zipf_alpha": 1.0,
|
| 20 |
-
"lr":
|
| 21 |
"emb_lr_mult": 1.0,
|
| 22 |
"weight_decay": 0.01,
|
| 23 |
-
"warmup_steps":
|
| 24 |
"cooldown_frac": 0.4,
|
| 25 |
"max_grad_norm": 1.0,
|
| 26 |
"vq_abs_pretrain_steps": 0,
|
|
@@ -30,7 +30,7 @@
|
|
| 30 |
"vq_abs_pretrain_target_vectors": 20000,
|
| 31 |
"batch_size": 64,
|
| 32 |
"gradient_accumulation_steps": 1,
|
| 33 |
-
"num_epochs":
|
| 34 |
"emb_warmup_steps": 0,
|
| 35 |
"log_every": 50,
|
| 36 |
"eval_every": 781,
|
|
@@ -69,16 +69,16 @@
|
|
| 69 |
"no_wandb": false,
|
| 70 |
"n_params": 162499262,
|
| 71 |
"run_name": "add_sub_sorl_v1_abs10_50K",
|
| 72 |
-
"git_commit": "
|
| 73 |
-
"timestamp": "2026-04-
|
| 74 |
"tokenizer": "Qwen/Qwen3-0.6B",
|
| 75 |
"dataset_repo": "thoughtworks/arithmetic-sorl-data",
|
| 76 |
"dataset_config": "add_sub_6digit",
|
| 77 |
"model_repo": "thoughtworks/arithmetic-sorl",
|
| 78 |
"trainer_version": "v1",
|
| 79 |
-
"wandb_run_id": "
|
| 80 |
-
"wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/
|
| 81 |
-
"final_accuracy":
|
| 82 |
-
"sft_accuracy": 0.
|
| 83 |
"eval_method": "ArithmeticEvaluator"
|
| 84 |
}
|
|
|
|
| 17 |
"target_vocab_util": 0.8,
|
| 18 |
"min_abs_ppl": 0.0,
|
| 19 |
"zipf_alpha": 1.0,
|
| 20 |
+
"lr": 8e-05,
|
| 21 |
"emb_lr_mult": 1.0,
|
| 22 |
"weight_decay": 0.01,
|
| 23 |
+
"warmup_steps": 468,
|
| 24 |
"cooldown_frac": 0.4,
|
| 25 |
"max_grad_norm": 1.0,
|
| 26 |
"vq_abs_pretrain_steps": 0,
|
|
|
|
| 30 |
"vq_abs_pretrain_target_vectors": 20000,
|
| 31 |
"batch_size": 64,
|
| 32 |
"gradient_accumulation_steps": 1,
|
| 33 |
+
"num_epochs": 20,
|
| 34 |
"emb_warmup_steps": 0,
|
| 35 |
"log_every": 50,
|
| 36 |
"eval_every": 781,
|
|
|
|
| 69 |
"no_wandb": false,
|
| 70 |
"n_params": 162499262,
|
| 71 |
"run_name": "add_sub_sorl_v1_abs10_50K",
|
| 72 |
+
"git_commit": "8d5ee5420119746ef4e2c87570eb250c9718f643",
|
| 73 |
+
"timestamp": "2026-04-12T20:32:49.376051+00:00",
|
| 74 |
"tokenizer": "Qwen/Qwen3-0.6B",
|
| 75 |
"dataset_repo": "thoughtworks/arithmetic-sorl-data",
|
| 76 |
"dataset_config": "add_sub_6digit",
|
| 77 |
"model_repo": "thoughtworks/arithmetic-sorl",
|
| 78 |
"trainer_version": "v1",
|
| 79 |
+
"wandb_run_id": "2jm1c18r",
|
| 80 |
+
"wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/2jm1c18r",
|
| 81 |
+
"final_accuracy": 1.0,
|
| 82 |
+
"sft_accuracy": 0.7220833333333333,
|
| 83 |
"eval_method": "ArithmeticEvaluator"
|
| 84 |
}
|