youssefedweqd commited on
Commit
e1b5aec
·
verified ·
1 Parent(s): 8245e19

Training in progress, step 1400

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc1efa741b507f21867acb771d6a2bcb87457c35864022766a1a5ffcb020a0bb
3
  size 161533160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92ef4ffbfa7c777464f585965ca2c96c2e39b4e00ff8c7faef7bf31f4e8d24d6
3
  size 161533160
last-checkpoint/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "gate_proj",
27
  "q_proj",
28
  "v_proj",
29
- "o_proj",
30
- "down_proj",
31
  "k_proj",
32
- "up_proj"
 
 
 
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
26
  "q_proj",
27
  "v_proj",
 
 
28
  "k_proj",
29
+ "up_proj",
30
+ "down_proj",
31
+ "gate_proj",
32
+ "o_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc1efa741b507f21867acb771d6a2bcb87457c35864022766a1a5ffcb020a0bb
3
  size 161533160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:027c28cbacad0920c7a8ec1a4dbaf396f0658e37d9c57aa24903513cf568bf29
3
  size 161533160
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa94ed9f50da76608d25453a12bcbc3fc01431b0cc187ad4a4c90e7deb6d50ae
3
  size 323292202
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a811f08d635f9fd429d0ac8672eee899607dd871ece10f326b8ec3e7266d9db2
3
  size 323292202
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f64767d4230dc21f025b8832f167066411661115904841906481e0d88adf011
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:824d4a418ca52dbceab02ca3bdda11d00d54b246084fd87a75671a28233a0cb2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.2152317880794702,
6
  "eval_steps": 100,
7
- "global_step": 1300,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -164,48 +164,6 @@
164
  "learning_rate": 5.513245033112583e-05,
165
  "loss": 0.7213,
166
  "step": 1000
167
- },
168
- {
169
- "epoch": 0.173841059602649,
170
- "grad_norm": 1.8289754390716553,
171
- "learning_rate": 5.789183222958058e-05,
172
- "loss": 0.7335,
173
- "step": 1050
174
- },
175
- {
176
- "epoch": 0.18211920529801323,
177
- "grad_norm": 1.4989681243896484,
178
- "learning_rate": 6.065121412803533e-05,
179
- "loss": 0.7326,
180
- "step": 1100
181
- },
182
- {
183
- "epoch": 0.19039735099337748,
184
- "grad_norm": 1.5326098203659058,
185
- "learning_rate": 6.341059602649006e-05,
186
- "loss": 0.7311,
187
- "step": 1150
188
- },
189
- {
190
- "epoch": 0.1986754966887417,
191
- "grad_norm": 1.4897147417068481,
192
- "learning_rate": 6.616997792494481e-05,
193
- "loss": 0.6918,
194
- "step": 1200
195
- },
196
- {
197
- "epoch": 0.20695364238410596,
198
- "grad_norm": 1.634765863418579,
199
- "learning_rate": 6.892935982339957e-05,
200
- "loss": 0.7051,
201
- "step": 1250
202
- },
203
- {
204
- "epoch": 0.2152317880794702,
205
- "grad_norm": 1.4463587999343872,
206
- "learning_rate": 7.168874172185431e-05,
207
- "loss": 0.6955,
208
- "step": 1300
209
  }
210
  ],
211
  "logging_steps": 50,
@@ -225,7 +183,7 @@
225
  "attributes": {}
226
  }
227
  },
228
- "total_flos": 1.0171770055163904e+16,
229
  "train_batch_size": 1,
230
  "trial_name": null,
231
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.16556291390728478,
6
  "eval_steps": 100,
7
+ "global_step": 1000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
164
  "learning_rate": 5.513245033112583e-05,
165
  "loss": 0.7213,
166
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  }
168
  ],
169
  "logging_steps": 50,
 
183
  "attributes": {}
184
  }
185
  },
186
+ "total_flos": 7833052747137024.0,
187
  "train_batch_size": 1,
188
  "trial_name": null,
189
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2543e07a37d2c3de3cd8e1d682eb10ddfc7a8cf84209a331e0b0e44870af81c3
3
  size 5752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dd2ef96eff028fc6db83c8627ce2e789cafe652a25ea367c040819bc392f916
3
  size 5752
trainer_log.jsonl CHANGED
@@ -29,3 +29,5 @@
29
  {"current_steps": 1200, "total_steps": 18120, "loss": 0.6918, "lr": 6.616997792494481e-05, "epoch": 0.1986754966887417, "percentage": 6.62, "elapsed_time": "0:12:38", "remaining_time": "2:58:14"}
30
  {"current_steps": 1250, "total_steps": 18120, "loss": 0.7051, "lr": 6.892935982339957e-05, "epoch": 0.20695364238410596, "percentage": 6.9, "elapsed_time": "0:16:00", "remaining_time": "3:36:01"}
31
  {"current_steps": 1300, "total_steps": 18120, "loss": 0.6955, "lr": 7.168874172185431e-05, "epoch": 0.2152317880794702, "percentage": 7.17, "elapsed_time": "0:19:19", "remaining_time": "4:10:02"}
 
 
 
29
  {"current_steps": 1200, "total_steps": 18120, "loss": 0.6918, "lr": 6.616997792494481e-05, "epoch": 0.1986754966887417, "percentage": 6.62, "elapsed_time": "0:12:38", "remaining_time": "2:58:14"}
30
  {"current_steps": 1250, "total_steps": 18120, "loss": 0.7051, "lr": 6.892935982339957e-05, "epoch": 0.20695364238410596, "percentage": 6.9, "elapsed_time": "0:16:00", "remaining_time": "3:36:01"}
31
  {"current_steps": 1300, "total_steps": 18120, "loss": 0.6955, "lr": 7.168874172185431e-05, "epoch": 0.2152317880794702, "percentage": 7.17, "elapsed_time": "0:19:19", "remaining_time": "4:10:02"}
32
+ {"current_steps": 1350, "total_steps": 18120, "loss": 0.6901, "lr": 7.444812362030905e-05, "epoch": 0.22350993377483444, "percentage": 7.45, "elapsed_time": "0:22:20", "remaining_time": "4:37:35"}
33
+ {"current_steps": 1400, "total_steps": 18120, "loss": 0.6833, "lr": 7.72075055187638e-05, "epoch": 0.23178807947019867, "percentage": 7.73, "elapsed_time": "0:25:26", "remaining_time": "5:03:49"}