amirali1985 commited on
Commit
4c0ffe8
·
verified ·
1 Parent(s): 3a0cee3

Upload add_sub_sorl_v1_abs10_K1_50K

Browse files
add_sub_sorl_v1_abs10_K1_50K/metrics.json CHANGED
The diff for this file is too large to render. See raw diff
 
add_sub_sorl_v1_abs10_K1_50K/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a2c0251ad09fdb996d2fea102a5627dae806ff705f7c530ac476b1e21327481
3
  size 650303660
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ceaee5d75482133531e610fe74922e0c8d2bd4d186de1d664e9e8159890d3d4
3
  size 650303660
add_sub_sorl_v1_abs10_K1_50K/train_config.json CHANGED
@@ -17,10 +17,10 @@
17
  "target_vocab_util": 0.8,
18
  "min_abs_ppl": 0.0,
19
  "zipf_alpha": 1.0,
20
- "lr": 4e-05,
21
  "emb_lr_mult": 1.0,
22
  "weight_decay": 0.01,
23
- "warmup_steps": 234,
24
  "cooldown_frac": 0.4,
25
  "max_grad_norm": 1.0,
26
  "vq_abs_pretrain_steps": 0,
@@ -30,7 +30,7 @@
30
  "vq_abs_pretrain_target_vectors": 20000,
31
  "batch_size": 64,
32
  "gradient_accumulation_steps": 1,
33
- "num_epochs": 10,
34
  "emb_warmup_steps": 0,
35
  "log_every": 50,
36
  "eval_every": 781,
@@ -69,16 +69,16 @@
69
  "no_wandb": false,
70
  "n_params": 162499262,
71
  "run_name": "add_sub_sorl_v1_abs10_K1_50K",
72
- "git_commit": "f447da529caceac8c7d256cbb2cd185cbc50feac",
73
- "timestamp": "2026-04-12T16:03:25.379451+00:00",
74
  "tokenizer": "Qwen/Qwen3-0.6B",
75
  "dataset_repo": "thoughtworks/arithmetic-sorl-data",
76
  "dataset_config": "add_sub_6digit",
77
  "model_repo": "thoughtworks/arithmetic-sorl",
78
  "trainer_version": "v1",
79
- "wandb_run_id": "e21wdqti",
80
- "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/e21wdqti",
81
- "final_accuracy": 0.9266666666666666,
82
- "sft_accuracy": 0.69875,
83
  "eval_method": "ArithmeticEvaluator"
84
  }
 
17
  "target_vocab_util": 0.8,
18
  "min_abs_ppl": 0.0,
19
  "zipf_alpha": 1.0,
20
+ "lr": 8e-05,
21
  "emb_lr_mult": 1.0,
22
  "weight_decay": 0.01,
23
+ "warmup_steps": 468,
24
  "cooldown_frac": 0.4,
25
  "max_grad_norm": 1.0,
26
  "vq_abs_pretrain_steps": 0,
 
30
  "vq_abs_pretrain_target_vectors": 20000,
31
  "batch_size": 64,
32
  "gradient_accumulation_steps": 1,
33
+ "num_epochs": 20,
34
  "emb_warmup_steps": 0,
35
  "log_every": 50,
36
  "eval_every": 781,
 
69
  "no_wandb": false,
70
  "n_params": 162499262,
71
  "run_name": "add_sub_sorl_v1_abs10_K1_50K",
72
+ "git_commit": "8d5ee5420119746ef4e2c87570eb250c9718f643",
73
+ "timestamp": "2026-04-13T01:34:00.793882+00:00",
74
  "tokenizer": "Qwen/Qwen3-0.6B",
75
  "dataset_repo": "thoughtworks/arithmetic-sorl-data",
76
  "dataset_config": "add_sub_6digit",
77
  "model_repo": "thoughtworks/arithmetic-sorl",
78
  "trainer_version": "v1",
79
+ "wandb_run_id": "w8qij9iy",
80
+ "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/w8qij9iy",
81
+ "final_accuracy": 0.9970833333333333,
82
+ "sft_accuracy": 0.9295833333333333,
83
  "eval_method": "ArithmeticEvaluator"
84
  }