vinoku89 commited on
Commit
a5dbd67
·
verified ·
1 Parent(s): b66ce54

[sft] Qwen3-0.6B | e1 | 13.9k samples | lr=2e-05

Browse files

Metrics:
- train_loss: 2.4142
- val_loss: 2.3905

W&B: https://wandb.ai/run/p8xonerz

adapter_config.json CHANGED
@@ -30,12 +30,12 @@
30
  "revision": null,
31
  "target_modules": [
32
  "o_proj",
33
- "gate_proj",
34
- "down_proj",
35
- "up_proj",
36
  "v_proj",
 
37
  "q_proj",
38
- "k_proj"
 
 
39
  ],
40
  "target_parameters": null,
41
  "task_type": "CAUSAL_LM",
 
30
  "revision": null,
31
  "target_modules": [
32
  "o_proj",
 
 
 
33
  "v_proj",
34
+ "k_proj",
35
  "q_proj",
36
+ "up_proj",
37
+ "gate_proj",
38
+ "down_proj"
39
  ],
40
  "target_parameters": null,
41
  "task_type": "CAUSAL_LM",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:feabe87a4afe0e16bced7642a8952e4b2d56e593505a0eaf7918434d5574243c
3
  size 20237256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5291e93e62b72caad86ab0477904bae30f91c2d34bdb5a58801153e95290bea
3
  size 20237256
metrics.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "train_loss": 2.0744,
3
- "val_loss": 2.0531225204467773,
4
  "accuracy": null,
5
  "epochs": 1,
6
  "max_steps": 10,
@@ -8,8 +8,8 @@
8
  "learning_rate": 2e-05,
9
  "model_name": "Qwen/Qwen3-0.6B",
10
  "method": "sft",
11
- "run_number": "003",
12
  "experiment_name": "aimo3-exp-1",
13
- "wandb_run_id": "0pkyx3cq",
14
- "timestamp": "2026-01-12T22:37:21.523229"
15
  }
 
1
  {
2
+ "train_loss": 2.4142,
3
+ "val_loss": 2.3904521465301514,
4
  "accuracy": null,
5
  "epochs": 1,
6
  "max_steps": 10,
 
8
  "learning_rate": 2e-05,
9
  "model_name": "Qwen/Qwen3-0.6B",
10
  "method": "sft",
11
+ "run_number": "004",
12
  "experiment_name": "aimo3-exp-1",
13
+ "wandb_run_id": "p8xonerz",
14
+ "timestamp": "2026-01-16T14:11:54.096241"
15
  }