baby-dev commited on
Commit
35fd73f
·
verified ·
1 Parent(s): c3e6bf2

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -20,12 +20,12 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "o_proj",
24
  "down_proj",
25
- "q_proj",
26
- "k_proj",
27
  "gate_proj",
 
28
  "up_proj",
 
29
  "v_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "down_proj",
24
+ "o_proj",
 
25
  "gate_proj",
26
+ "k_proj",
27
  "up_proj",
28
+ "q_proj",
29
  "v_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1ac2cde4d6de38916287a0f92576746b8b912405a05e9927d9b7c43e6ad2b80
3
  size 69527352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e979dbd3cbd940f959a4ba68cfe39ed192ba6aaf26b940558b355981237dddf9
3
  size 69527352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9c0e178c57991d9b6c6637d99eafc6561e3fa4f68fde4e5eb906cb2276572dc
3
  size 35778900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39a2e574caffeb2fdfdb41ce93130d1ac0d397bbcadbeb7dd4e2d64b60937f1b
3
  size 35778900
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:679d9461f52487b2177f87f4dca8924af37af2c10803eb70744c2e4f376d3c2d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5aff15d3484f18cecf83b769183ac0cc241615e7ba3d43c9e65497b2930f18c5
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:884dbcf1f76cdf64d0ca16f2c81847fd9a35ed97a8bdfbbdeee456fb385a47c3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcf348532606e290f3cddebc7f00005cce6f05bb1cced2bad1d4a15482755657
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,112 +1,69 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.6348773841961854,
5
  "eval_steps": 500,
6
- "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.1362397820163488,
13
- "grad_norm": 0.7698700428009033,
14
  "learning_rate": 0.0004,
15
- "loss": 1.3953,
16
  "step": 50
17
  },
18
  {
19
  "epoch": 0.2724795640326976,
20
- "grad_norm": 0.5740014910697937,
21
  "learning_rate": 0.0004,
22
- "loss": 0.8509,
23
  "step": 100
24
  },
25
  {
26
  "epoch": 0.4087193460490463,
27
- "grad_norm": 0.7224377989768982,
28
  "learning_rate": 0.0004,
29
- "loss": 0.6471,
30
  "step": 150
31
  },
32
  {
33
  "epoch": 0.5449591280653951,
34
- "grad_norm": 1.064605474472046,
35
  "learning_rate": 0.0004,
36
- "loss": 0.5458,
37
  "step": 200
38
  },
39
  {
40
  "epoch": 0.6811989100817438,
41
- "grad_norm": 0.5921716690063477,
42
  "learning_rate": 0.0004,
43
- "loss": 0.4329,
44
  "step": 250
45
  },
46
  {
47
  "epoch": 0.8174386920980926,
48
- "grad_norm": 0.403538316488266,
49
  "learning_rate": 0.0004,
50
- "loss": 0.3786,
51
  "step": 300
52
  },
53
  {
54
  "epoch": 0.9536784741144414,
55
- "grad_norm": 0.4223729372024536,
56
  "learning_rate": 0.0004,
57
- "loss": 0.3036,
58
  "step": 350
59
  },
60
  {
61
  "epoch": 1.0,
62
- "eval_loss": 0.25390318036079407,
63
- "eval_runtime": 4.2748,
64
- "eval_samples_per_second": 72.284,
65
- "eval_steps_per_second": 18.247,
66
  "step": 367
67
- },
68
- {
69
- "epoch": 1.0899182561307903,
70
- "grad_norm": 0.2040538489818573,
71
- "learning_rate": 0.0004,
72
- "loss": 0.2737,
73
- "step": 400
74
- },
75
- {
76
- "epoch": 1.226158038147139,
77
- "grad_norm": 0.2643136978149414,
78
- "learning_rate": 0.0004,
79
- "loss": 0.1975,
80
- "step": 450
81
- },
82
- {
83
- "epoch": 1.3623978201634879,
84
- "grad_norm": 0.3208156228065491,
85
- "learning_rate": 0.0004,
86
- "loss": 0.1744,
87
- "step": 500
88
- },
89
- {
90
- "epoch": 1.4986376021798364,
91
- "grad_norm": 0.314196914434433,
92
- "learning_rate": 0.0004,
93
- "loss": 0.1536,
94
- "step": 550
95
- },
96
- {
97
- "epoch": 1.6348773841961854,
98
- "grad_norm": 0.44153332710266113,
99
- "learning_rate": 0.0004,
100
- "loss": 0.1304,
101
- "step": 600
102
- },
103
- {
104
- "epoch": 1.6348773841961854,
105
- "eval_loss": 0.1265459954738617,
106
- "eval_runtime": 4.2648,
107
- "eval_samples_per_second": 72.453,
108
- "eval_steps_per_second": 18.289,
109
- "step": 600
110
  }
111
  ],
112
  "logging_steps": 50,
@@ -121,12 +78,12 @@
121
  "should_evaluate": false,
122
  "should_log": false,
123
  "should_save": true,
124
- "should_training_stop": true
125
  },
126
  "attributes": {}
127
  }
128
  },
129
- "total_flos": 1.945276023865344e+16,
130
  "train_batch_size": 4,
131
  "trial_name": null,
132
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 367,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.1362397820163488,
13
+ "grad_norm": 0.784312903881073,
14
  "learning_rate": 0.0004,
15
+ "loss": 1.3927,
16
  "step": 50
17
  },
18
  {
19
  "epoch": 0.2724795640326976,
20
+ "grad_norm": 0.5759815573692322,
21
  "learning_rate": 0.0004,
22
+ "loss": 0.8469,
23
  "step": 100
24
  },
25
  {
26
  "epoch": 0.4087193460490463,
27
+ "grad_norm": 0.7147420048713684,
28
  "learning_rate": 0.0004,
29
+ "loss": 0.6428,
30
  "step": 150
31
  },
32
  {
33
  "epoch": 0.5449591280653951,
34
+ "grad_norm": 1.0268043279647827,
35
  "learning_rate": 0.0004,
36
+ "loss": 0.5406,
37
  "step": 200
38
  },
39
  {
40
  "epoch": 0.6811989100817438,
41
+ "grad_norm": 0.5647836923599243,
42
  "learning_rate": 0.0004,
43
+ "loss": 0.4279,
44
  "step": 250
45
  },
46
  {
47
  "epoch": 0.8174386920980926,
48
+ "grad_norm": 0.48977088928222656,
49
  "learning_rate": 0.0004,
50
+ "loss": 0.3726,
51
  "step": 300
52
  },
53
  {
54
  "epoch": 0.9536784741144414,
55
+ "grad_norm": 0.3192068636417389,
56
  "learning_rate": 0.0004,
57
+ "loss": 0.3033,
58
  "step": 350
59
  },
60
  {
61
  "epoch": 1.0,
62
+ "eval_loss": 0.2556329667568207,
63
+ "eval_runtime": 4.2889,
64
+ "eval_samples_per_second": 72.046,
65
+ "eval_steps_per_second": 18.186,
66
  "step": 367
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  }
68
  ],
69
  "logging_steps": 50,
 
78
  "should_evaluate": false,
79
  "should_log": false,
80
  "should_save": true,
81
+ "should_training_stop": false
82
  },
83
  "attributes": {}
84
  }
85
  },
86
+ "total_flos": 1.189318078660608e+16,
87
  "train_batch_size": 4,
88
  "trial_name": null,
89
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbdf780f678ce24abeccef8a93bd5140aadf5e01c25177d8fa09c1018ecfa471
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a656ef5d64b69ea7ac8f230e33aea3f20620e500bf2a829f86b58a5ee64c52b
3
  size 6776