tejeshbhalla commited on
Commit
a539420
·
verified ·
1 Parent(s): d156560

Training in progress, step 220

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. logging.jsonl +34 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b485823019198c2a615d8f93fa9348ea465f6aa983e24989e3c8d108875a63c9
3
  size 13254157312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83d1b5943e014c474328b488d0c45a1265d4010965fb9426b6d554f17ba6e164
3
  size 13254157312
logging.jsonl CHANGED
@@ -212,3 +212,37 @@
212
  {"loss": 0.51051688, "token_acc": 0.83841894, "grad_norm": 0.14452438, "learning_rate": 1.72e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007496, "epoch": 0.82611022, "global_step/max_steps": "193/233", "percentage": "82.83%", "elapsed_time": "7h 8m 48s", "remaining_time": "1h 28m 52s"}
213
  {"loss": 0.51354766, "token_acc": 0.85329018, "grad_norm": 0.11761606, "learning_rate": 1.67e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007492, "epoch": 0.83039058, "global_step/max_steps": "194/233", "percentage": "83.26%", "elapsed_time": "7h 11m 13s", "remaining_time": "1h 26m 41s"}
214
  {"loss": 0.51062506, "token_acc": 0.85424893, "grad_norm": 0.14140776, "learning_rate": 1.63e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.0075, "epoch": 0.83467095, "global_step/max_steps": "195/233", "percentage": "83.69%", "elapsed_time": "7h 12m 58s", "remaining_time": "1h 24m 22s"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  {"loss": 0.51051688, "token_acc": 0.83841894, "grad_norm": 0.14452438, "learning_rate": 1.72e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007496, "epoch": 0.82611022, "global_step/max_steps": "193/233", "percentage": "82.83%", "elapsed_time": "7h 8m 48s", "remaining_time": "1h 28m 52s"}
213
  {"loss": 0.51354766, "token_acc": 0.85329018, "grad_norm": 0.11761606, "learning_rate": 1.67e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007492, "epoch": 0.83039058, "global_step/max_steps": "194/233", "percentage": "83.26%", "elapsed_time": "7h 11m 13s", "remaining_time": "1h 26m 41s"}
214
  {"loss": 0.51062506, "token_acc": 0.85424893, "grad_norm": 0.14140776, "learning_rate": 1.63e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.0075, "epoch": 0.83467095, "global_step/max_steps": "195/233", "percentage": "83.69%", "elapsed_time": "7h 12m 58s", "remaining_time": "1h 24m 22s"}
215
+ {"loss": 0.55619252, "token_acc": 0.84431075, "grad_norm": 0.1660874, "learning_rate": 1.59e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007499, "epoch": 0.83895131, "global_step/max_steps": "196/233", "percentage": "84.12%", "elapsed_time": "7h 15m 15s", "remaining_time": "1h 22m 9s"}
216
+ {"loss": 0.51129085, "token_acc": 0.81936346, "grad_norm": 0.14064978, "learning_rate": 1.55e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007516, "epoch": 0.84323167, "global_step/max_steps": "197/233", "percentage": "84.55%", "elapsed_time": "7h 16m 29s", "remaining_time": "1h 19m 45s"}
217
+ {"loss": 0.508412, "token_acc": 0.84866787, "grad_norm": 0.11868906, "learning_rate": 1.5e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007505, "epoch": 0.84751204, "global_step/max_steps": "198/233", "percentage": "84.98%", "elapsed_time": "7h 19m 22s", "remaining_time": "1h 17m 40s"}
218
+ {"loss": 0.54063851, "token_acc": 0.83712304, "grad_norm": 0.12853099, "learning_rate": 1.46e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007516, "epoch": 0.8517924, "global_step/max_steps": "199/233", "percentage": "85.41%", "elapsed_time": "7h 20m 55s", "remaining_time": "1h 15m 20s"}
219
+ {"loss": 0.46997577, "token_acc": 0.82234024, "grad_norm": 0.12919647, "learning_rate": 1.42e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007526, "epoch": 0.85607277, "global_step/max_steps": "200/233", "percentage": "85.84%", "elapsed_time": "7h 22m 33s", "remaining_time": "1h 13m 1s"}
220
+ {"eval_loss": 0.50845164, "eval_token_acc": 0.83379876, "eval_runtime": 235.3989, "eval_samples_per_second": 1.963, "eval_steps_per_second": 0.246, "epoch": 0.85607277, "global_step/max_steps": "200/233", "percentage": "85.84%", "elapsed_time": "7h 26m 29s", "remaining_time": "1h 13m 40s"}
221
+ {"loss": 0.51254267, "token_acc": 0.8372357, "grad_norm": 0.16826271, "learning_rate": 1.37e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007463, "epoch": 0.86035313, "global_step/max_steps": "201/233", "percentage": "86.27%", "elapsed_time": "7h 28m 30s", "remaining_time": "1h 11m 24s"}
222
+ {"loss": 0.50134093, "token_acc": 0.83362946, "grad_norm": 0.15078701, "learning_rate": 1.33e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007483, "epoch": 0.86463349, "global_step/max_steps": "202/233", "percentage": "86.70%", "elapsed_time": "7h 29m 35s", "remaining_time": "1h 8m 59s"}
223
+ {"loss": 0.51513213, "token_acc": 0.82223492, "grad_norm": 0.12865345, "learning_rate": 1.29e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007484, "epoch": 0.86891386, "global_step/max_steps": "203/233", "percentage": "87.12%", "elapsed_time": "7h 31m 42s", "remaining_time": "1h 6m 45s"}
224
+ {"loss": 0.52011067, "token_acc": 0.83890838, "grad_norm": 0.11933945, "learning_rate": 1.24e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007478, "epoch": 0.87319422, "global_step/max_steps": "204/233", "percentage": "87.55%", "elapsed_time": "7h 34m 18s", "remaining_time": "1h 4m 34s"}
225
+ {"loss": 0.49174532, "token_acc": 0.83849821, "grad_norm": 0.25774974, "learning_rate": 1.2e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007473, "epoch": 0.87747459, "global_step/max_steps": "205/233", "percentage": "87.98%", "elapsed_time": "7h 36m 50s", "remaining_time": "1h 2m 23s"}
226
+ {"loss": 0.49890012, "token_acc": 0.82418112, "grad_norm": 0.15101479, "learning_rate": 1.16e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.00749, "epoch": 0.88175495, "global_step/max_steps": "206/233", "percentage": "88.41%", "elapsed_time": "7h 38m 2s", "remaining_time": "1h 0m 2s"}
227
+ {"loss": 0.50326902, "token_acc": 0.81516661, "grad_norm": 0.1402124, "learning_rate": 1.12e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.00749, "epoch": 0.88603531, "global_step/max_steps": "207/233", "percentage": "88.84%", "elapsed_time": "7h 40m 17s", "remaining_time": "57m 48s"}
228
+ {"loss": 0.5040676, "token_acc": 0.81869791, "grad_norm": 0.27301475, "learning_rate": 1.07e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007491, "epoch": 0.89031568, "global_step/max_steps": "208/233", "percentage": "89.27%", "elapsed_time": "7h 42m 24s", "remaining_time": "55m 34s"}
229
+ {"loss": 0.47629112, "token_acc": 0.82132275, "grad_norm": 0.17399816, "learning_rate": 1.03e-06, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007495, "epoch": 0.89459604, "global_step/max_steps": "209/233", "percentage": "89.70%", "elapsed_time": "7h 44m 24s", "remaining_time": "53m 19s"}
230
+ {"loss": 0.50204992, "token_acc": 0.83965597, "grad_norm": 0.13901281, "learning_rate": 9.9e-07, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007496, "epoch": 0.8988764, "global_step/max_steps": "210/233", "percentage": "90.13%", "elapsed_time": "7h 46m 34s", "remaining_time": "51m 6s"}
231
+ {"eval_loss": 0.50826108, "eval_token_acc": 0.83396063, "eval_runtime": 235.9818, "eval_samples_per_second": 1.958, "eval_steps_per_second": 0.246, "epoch": 0.8988764, "global_step/max_steps": "210/233", "percentage": "90.13%", "elapsed_time": "7h 50m 30s", "remaining_time": "51m 31s"}
232
+ {"loss": 0.52222174, "token_acc": 0.83534812, "grad_norm": 0.13053484, "learning_rate": 9.4e-07, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007434, "epoch": 0.90315677, "global_step/max_steps": "211/233", "percentage": "90.56%", "elapsed_time": "7h 52m 43s", "remaining_time": "49m 17s"}
233
+ {"loss": 0.49084908, "token_acc": 0.83773069, "grad_norm": 0.1325853, "learning_rate": 9e-07, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007429, "epoch": 0.90743713, "global_step/max_steps": "212/233", "percentage": "90.99%", "elapsed_time": "7h 55m 17s", "remaining_time": "47m 4s"}
234
+ {"loss": 0.49427927, "token_acc": 0.85061342, "grad_norm": 0.12438133, "learning_rate": 8.6e-07, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007447, "epoch": 0.9117175, "global_step/max_steps": "213/233", "percentage": "91.42%", "elapsed_time": "7h 56m 20s", "remaining_time": "44m 43s"}
235
+ {"loss": 0.50391412, "token_acc": 0.85781355, "grad_norm": 0.11095317, "learning_rate": 8.2e-07, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007444, "epoch": 0.91599786, "global_step/max_steps": "214/233", "percentage": "91.85%", "elapsed_time": "7h 58m 47s", "remaining_time": "42m 30s"}
236
+ {"loss": 0.50294411, "token_acc": 0.83485309, "grad_norm": 0.1230354, "learning_rate": 7.7e-07, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007453, "epoch": 0.92027822, "global_step/max_steps": "215/233", "percentage": "92.27%", "elapsed_time": "8h 0m 25s", "remaining_time": "40m 13s"}
237
+ {"loss": 0.4798848, "token_acc": 0.8473504, "grad_norm": 0.17395706, "learning_rate": 7.3e-07, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.00745, "epoch": 0.92455859, "global_step/max_steps": "216/233", "percentage": "92.70%", "elapsed_time": "8h 2m 53s", "remaining_time": "38m 0s"}
238
+ {"loss": 0.51420152, "token_acc": 0.82058785, "grad_norm": 0.13729912, "learning_rate": 6.9e-07, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007451, "epoch": 0.92883895, "global_step/max_steps": "217/233", "percentage": "93.13%", "elapsed_time": "8h 5m 3s", "remaining_time": "35m 45s"}
239
+ {"loss": 0.51177865, "token_acc": 0.81501807, "grad_norm": 0.10358699, "learning_rate": 6.4e-07, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007453, "epoch": 0.93311932, "global_step/max_steps": "218/233", "percentage": "93.56%", "elapsed_time": "8h 7m 8s", "remaining_time": "33m 31s"}
240
+ {"loss": 0.52010918, "token_acc": 0.8401833, "grad_norm": 0.16485111, "learning_rate": 6e-07, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007455, "epoch": 0.93739968, "global_step/max_steps": "219/233", "percentage": "93.99%", "elapsed_time": "8h 9m 13s", "remaining_time": "31m 16s"}
241
+ {"loss": 0.5114243, "token_acc": 0.84615385, "grad_norm": 0.1616209, "learning_rate": 5.6e-07, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007451, "epoch": 0.94168004, "global_step/max_steps": "220/233", "percentage": "94.42%", "elapsed_time": "8h 11m 44s", "remaining_time": "29m 3s"}
242
+ {"eval_loss": 0.50814027, "eval_token_acc": 0.83419323, "eval_runtime": 236.0063, "eval_samples_per_second": 1.958, "eval_steps_per_second": 0.246, "epoch": 0.94168004, "global_step/max_steps": "220/233", "percentage": "94.42%", "elapsed_time": "8h 15m 40s", "remaining_time": "29m 17s"}
243
+ {"loss": 0.48112383, "token_acc": 0.83936344, "grad_norm": 0.12433523, "learning_rate": 5.2e-07, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007383, "epoch": 0.94596041, "global_step/max_steps": "221/233", "percentage": "94.85%", "elapsed_time": "8h 18m 32s", "remaining_time": "27m 4s"}
244
+ {"loss": 0.50242162, "token_acc": 0.84531625, "grad_norm": 0.23049626, "learning_rate": 4.7e-07, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007407, "epoch": 0.95024077, "global_step/max_steps": "222/233", "percentage": "95.28%", "elapsed_time": "8h 19m 12s", "remaining_time": "24m 44s"}
245
+ {"loss": 0.51799721, "token_acc": 0.83938443, "grad_norm": 0.13270883, "learning_rate": 4.3e-07, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007419, "epoch": 0.95452113, "global_step/max_steps": "223/233", "percentage": "95.71%", "elapsed_time": "8h 20m 37s", "remaining_time": "22m 26s"}
246
+ {"loss": 0.518457, "token_acc": 0.84743252, "grad_norm": 0.16565216, "learning_rate": 3.9e-07, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007428, "epoch": 0.9588015, "global_step/max_steps": "224/233", "percentage": "96.14%", "elapsed_time": "8h 22m 15s", "remaining_time": "20m 10s"}
247
+ {"loss": 0.51062959, "token_acc": 0.83592096, "grad_norm": 0.12720895, "learning_rate": 3.4e-07, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007436, "epoch": 0.96308186, "global_step/max_steps": "225/233", "percentage": "96.57%", "elapsed_time": "8h 23m 59s", "remaining_time": "17m 55s"}
248
+ {"loss": 0.50811839, "token_acc": 0.84152466, "grad_norm": 0.16164465, "learning_rate": 3e-07, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007445, "epoch": 0.96736223, "global_step/max_steps": "226/233", "percentage": "97.00%", "elapsed_time": "8h 25m 33s", "remaining_time": "15m 39s"}