cattadidntwakeup commited on
Commit
f4e9107
·
verified ·
1 Parent(s): c168e8e

upload cod4/checkpoint-best

Browse files
cod4/checkpoint-best/adapter_config.json CHANGED
@@ -29,13 +29,13 @@
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
- "down_proj",
33
- "q_proj",
34
  "v_proj",
35
  "gate_proj",
36
- "up_proj",
37
- "k_proj",
38
- "o_proj"
 
39
  ],
40
  "target_parameters": null,
41
  "task_type": "CAUSAL_LM",
 
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
+ "up_proj",
 
33
  "v_proj",
34
  "gate_proj",
35
+ "down_proj",
36
+ "q_proj",
37
+ "o_proj",
38
+ "k_proj"
39
  ],
40
  "target_parameters": null,
41
  "task_type": "CAUSAL_LM",
cod4/checkpoint-best/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e80f1018ae3bc6b04c49246709664b86225b97187666873c683f4d884c57fecd
3
  size 550593184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89e0d17a2d95f6ff84b0114dbc84cbc818a07c068c70f69bb6e20b31593b2000
3
  size 550593184
cod4/checkpoint-best/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:520723039ac4105a86d7ff3f74ad3ace9cd077f71385c3517ea91a31192b0c71
3
  size 1101572914
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2028b08827f56458b170a796812a85a5e3d0d2b8379a1be978547c2db12c1f8
3
  size 1101572914
cod4/checkpoint-best/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccd1f8415101a83abb687145f5f273a6314e23c3fa5f3eeecc216a8a33a90c3f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3734f3631d7c5ebb18ad8634348051fb92d97ff25d6dd92a7f9915a552feca4c
3
  size 14244
cod4/checkpoint-best/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afae0ac62ce7f67d2df22e4a084208d6db64d6727ce9aa1771b26910ab44a74b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa1bc073aaac39764607c6496e425c264a7d9b161f449cb039d1f6dda489edb5
3
  size 1064
cod4/checkpoint-best/trainer_state.json CHANGED
@@ -1,197 +1,148 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5477784540474742,
5
- "eval_steps": 75,
6
- "global_step": 225,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.024345709068776627,
13
- "grad_norm": 1.360837697982788,
14
  "learning_rate": 3.6585365853658536e-07,
15
- "loss": 0.3238,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.048691418137553254,
20
- "grad_norm": 1.781339168548584,
21
  "learning_rate": 7.317073170731707e-07,
22
- "loss": 0.3499,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.07303712720632989,
27
- "grad_norm": 1.2938371896743774,
28
  "learning_rate": 1.097560975609756e-06,
29
- "loss": 0.2839,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.09738283627510651,
34
- "grad_norm": 1.2939085960388184,
35
  "learning_rate": 1.4634146341463414e-06,
36
  "loss": 0.3297,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.12172854534388314,
41
- "grad_norm": 1.1001590490341187,
42
  "learning_rate": 1.8292682926829268e-06,
43
- "loss": 0.3168,
 
 
 
 
 
 
 
 
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.14607425441265978,
48
- "grad_norm": 0.7245882153511047,
49
  "learning_rate": 2.195121951219512e-06,
50
- "loss": 0.256,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 0.1704199634814364,
55
- "grad_norm": 0.7190315127372742,
56
  "learning_rate": 2.5609756097560977e-06,
57
- "loss": 0.2602,
58
  "step": 70
59
  },
60
- {
61
- "epoch": 0.18259281801582472,
62
- "eval_loss": 0.18111442029476166,
63
- "eval_runtime": 189.9439,
64
- "eval_samples_per_second": 2.464,
65
- "eval_steps_per_second": 2.464,
66
- "step": 75
67
- },
68
  {
69
  "epoch": 0.19476567255021301,
70
- "grad_norm": 1.0097646713256836,
71
  "learning_rate": 2.926829268292683e-06,
72
- "loss": 0.2096,
73
  "step": 80
74
  },
75
  {
76
  "epoch": 0.21911138161898966,
77
- "grad_norm": 0.7169908285140991,
78
  "learning_rate": 2.9991302663250642e-06,
79
- "loss": 0.2148,
80
  "step": 90
81
  },
82
  {
83
  "epoch": 0.24345709068776628,
84
- "grad_norm": 0.8660908937454224,
85
  "learning_rate": 2.9955987017756107e-06,
86
- "loss": 0.1925,
 
 
 
 
 
 
 
 
87
  "step": 100
88
  },
89
  {
90
  "epoch": 0.2678027997565429,
91
- "grad_norm": 0.681736171245575,
92
  "learning_rate": 2.9893573417248957e-06,
93
- "loss": 0.1841,
94
  "step": 110
95
  },
96
  {
97
  "epoch": 0.29214850882531956,
98
- "grad_norm": 0.40242502093315125,
99
  "learning_rate": 2.980417494560234e-06,
100
  "loss": 0.1848,
101
  "step": 120
102
  },
103
  {
104
  "epoch": 0.31649421789409615,
105
- "grad_norm": 0.477758526802063,
106
  "learning_rate": 2.968795357913784e-06,
107
- "loss": 0.1705,
108
  "step": 130
109
  },
110
  {
111
  "epoch": 0.3408399269628728,
112
- "grad_norm": 0.5819408893585205,
113
  "learning_rate": 2.9545119893149243e-06,
114
- "loss": 0.1981,
115
  "step": 140
116
  },
117
  {
118
  "epoch": 0.36518563603164944,
119
- "grad_norm": 0.5944788455963135,
120
  "learning_rate": 2.9375932680372358e-06,
121
- "loss": 0.1668,
122
  "step": 150
123
  },
124
  {
125
  "epoch": 0.36518563603164944,
126
- "eval_loss": 0.17790192365646362,
127
- "eval_runtime": 190.128,
128
- "eval_samples_per_second": 2.462,
129
- "eval_steps_per_second": 2.462,
130
  "step": 150
131
- },
132
- {
133
- "epoch": 0.38953134510042603,
134
- "grad_norm": 0.39612388610839844,
135
- "learning_rate": 2.9180698482092302e-06,
136
- "loss": 0.182,
137
- "step": 160
138
- },
139
- {
140
- "epoch": 0.4138770541692027,
141
- "grad_norm": 0.4975854754447937,
142
- "learning_rate": 2.8959771032737673e-06,
143
- "loss": 0.1822,
144
- "step": 170
145
- },
146
- {
147
- "epoch": 0.4382227632379793,
148
- "grad_norm": 0.33769404888153076,
149
- "learning_rate": 2.8713550618968034e-06,
150
- "loss": 0.181,
151
- "step": 180
152
- },
153
- {
154
- "epoch": 0.4625684723067559,
155
- "grad_norm": 0.5186887383460999,
156
- "learning_rate": 2.8442483354415836e-06,
157
- "loss": 0.1849,
158
- "step": 190
159
- },
160
- {
161
- "epoch": 0.48691418137553255,
162
- "grad_norm": 0.40320202708244324,
163
- "learning_rate": 2.8147060371396953e-06,
164
- "loss": 0.1614,
165
- "step": 200
166
- },
167
- {
168
- "epoch": 0.5112598904443092,
169
- "grad_norm": 0.49186137318611145,
170
- "learning_rate": 2.7827816931054245e-06,
171
- "loss": 0.1701,
172
- "step": 210
173
- },
174
- {
175
- "epoch": 0.5356055995130858,
176
- "grad_norm": 0.877299427986145,
177
- "learning_rate": 2.7485331453546407e-06,
178
- "loss": 0.174,
179
- "step": 220
180
- },
181
- {
182
- "epoch": 0.5477784540474742,
183
- "eval_loss": 0.1760490983724594,
184
- "eval_runtime": 190.1667,
185
- "eval_samples_per_second": 2.461,
186
- "eval_steps_per_second": 2.461,
187
- "step": 225
188
  }
189
  ],
190
  "logging_steps": 10,
191
  "max_steps": 820,
192
  "num_input_tokens_seen": 0,
193
  "num_train_epochs": 2,
194
- "save_steps": 75,
195
  "stateful_callbacks": {
196
  "TrainerControl": {
197
  "args": {
@@ -204,7 +155,7 @@
204
  "attributes": {}
205
  }
206
  },
207
- "total_flos": 1.4490140738700902e+17,
208
  "train_batch_size": 1,
209
  "trial_name": null,
210
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.36518563603164944,
5
+ "eval_steps": 50,
6
+ "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.024345709068776627,
13
+ "grad_norm": 1.3434797525405884,
14
  "learning_rate": 3.6585365853658536e-07,
15
+ "loss": 0.3237,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.048691418137553254,
20
+ "grad_norm": 1.7791950702667236,
21
  "learning_rate": 7.317073170731707e-07,
22
+ "loss": 0.3513,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.07303712720632989,
27
+ "grad_norm": 1.2667977809906006,
28
  "learning_rate": 1.097560975609756e-06,
29
+ "loss": 0.2849,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.09738283627510651,
34
+ "grad_norm": 1.290136694908142,
35
  "learning_rate": 1.4634146341463414e-06,
36
  "loss": 0.3297,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.12172854534388314,
41
+ "grad_norm": 1.1097878217697144,
42
  "learning_rate": 1.8292682926829268e-06,
43
+ "loss": 0.3174,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.12172854534388314,
48
+ "eval_loss": 0.22251495718955994,
49
+ "eval_runtime": 189.8516,
50
+ "eval_samples_per_second": 2.465,
51
+ "eval_steps_per_second": 2.465,
52
  "step": 50
53
  },
54
  {
55
  "epoch": 0.14607425441265978,
56
+ "grad_norm": 0.7168009281158447,
57
  "learning_rate": 2.195121951219512e-06,
58
+ "loss": 0.2562,
59
  "step": 60
60
  },
61
  {
62
  "epoch": 0.1704199634814364,
63
+ "grad_norm": 0.7219638824462891,
64
  "learning_rate": 2.5609756097560977e-06,
65
+ "loss": 0.2583,
66
  "step": 70
67
  },
 
 
 
 
 
 
 
 
68
  {
69
  "epoch": 0.19476567255021301,
70
+ "grad_norm": 0.9940507411956787,
71
  "learning_rate": 2.926829268292683e-06,
72
+ "loss": 0.2086,
73
  "step": 80
74
  },
75
  {
76
  "epoch": 0.21911138161898966,
77
+ "grad_norm": 0.7270681262016296,
78
  "learning_rate": 2.9991302663250642e-06,
79
+ "loss": 0.2149,
80
  "step": 90
81
  },
82
  {
83
  "epoch": 0.24345709068776628,
84
+ "grad_norm": 0.8523025512695312,
85
  "learning_rate": 2.9955987017756107e-06,
86
+ "loss": 0.1905,
87
+ "step": 100
88
+ },
89
+ {
90
+ "epoch": 0.24345709068776628,
91
+ "eval_loss": 0.20457129180431366,
92
+ "eval_runtime": 189.863,
93
+ "eval_samples_per_second": 2.465,
94
+ "eval_steps_per_second": 2.465,
95
  "step": 100
96
  },
97
  {
98
  "epoch": 0.2678027997565429,
99
+ "grad_norm": 0.6568045616149902,
100
  "learning_rate": 2.9893573417248957e-06,
101
+ "loss": 0.1833,
102
  "step": 110
103
  },
104
  {
105
  "epoch": 0.29214850882531956,
106
+ "grad_norm": 0.3998699188232422,
107
  "learning_rate": 2.980417494560234e-06,
108
  "loss": 0.1848,
109
  "step": 120
110
  },
111
  {
112
  "epoch": 0.31649421789409615,
113
+ "grad_norm": 0.48115676641464233,
114
  "learning_rate": 2.968795357913784e-06,
115
+ "loss": 0.1698,
116
  "step": 130
117
  },
118
  {
119
  "epoch": 0.3408399269628728,
120
+ "grad_norm": 0.5803468227386475,
121
  "learning_rate": 2.9545119893149243e-06,
122
+ "loss": 0.1978,
123
  "step": 140
124
  },
125
  {
126
  "epoch": 0.36518563603164944,
127
+ "grad_norm": 0.5993896722793579,
128
  "learning_rate": 2.9375932680372358e-06,
129
+ "loss": 0.1674,
130
  "step": 150
131
  },
132
  {
133
  "epoch": 0.36518563603164944,
134
+ "eval_loss": 0.1782142072916031,
135
+ "eval_runtime": 189.9536,
136
+ "eval_samples_per_second": 2.464,
137
+ "eval_steps_per_second": 2.464,
138
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  }
140
  ],
141
  "logging_steps": 10,
142
  "max_steps": 820,
143
  "num_input_tokens_seen": 0,
144
  "num_train_epochs": 2,
145
+ "save_steps": 50,
146
  "stateful_callbacks": {
147
  "TrainerControl": {
148
  "args": {
 
155
  "attributes": {}
156
  }
157
  },
158
+ "total_flos": 9.609101934273946e+16,
159
  "train_batch_size": 1,
160
  "trial_name": null,
161
  "trial_params": null
cod4/checkpoint-best/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a84a4d610278dab96fa8be9465c8d94fd923971c230cd596c619c25b2e6c86d2
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:685d9b5d18d2c6827d3139cf322fe47abe6ed206413f5866b42023ba87fd17d9
3
  size 5304