tejeshbhalla commited on
Commit
b15a739
·
verified ·
1 Parent(s): a539420

Training in progress, step 230

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. logging.jsonl +8 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83d1b5943e014c474328b488d0c45a1265d4010965fb9426b6d554f17ba6e164
3
  size 13254157312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e15b9b06669fbbc4acf1460d7fc63c115e2cbf25e97f745a2ab62b53100397a8
3
  size 13254157312
logging.jsonl CHANGED
@@ -246,3 +246,11 @@
246
  {"loss": 0.518457, "token_acc": 0.84743252, "grad_norm": 0.16565216, "learning_rate": 3.9e-07, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007428, "epoch": 0.9588015, "global_step/max_steps": "224/233", "percentage": "96.14%", "elapsed_time": "8h 22m 15s", "remaining_time": "20m 10s"}
247
  {"loss": 0.51062959, "token_acc": 0.83592096, "grad_norm": 0.12720895, "learning_rate": 3.4e-07, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007436, "epoch": 0.96308186, "global_step/max_steps": "225/233", "percentage": "96.57%", "elapsed_time": "8h 23m 59s", "remaining_time": "17m 55s"}
248
  {"loss": 0.50811839, "token_acc": 0.84152466, "grad_norm": 0.16164465, "learning_rate": 3e-07, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007445, "epoch": 0.96736223, "global_step/max_steps": "226/233", "percentage": "97.00%", "elapsed_time": "8h 25m 33s", "remaining_time": "15m 39s"}
 
 
 
 
 
 
 
 
 
246
  {"loss": 0.518457, "token_acc": 0.84743252, "grad_norm": 0.16565216, "learning_rate": 3.9e-07, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007428, "epoch": 0.9588015, "global_step/max_steps": "224/233", "percentage": "96.14%", "elapsed_time": "8h 22m 15s", "remaining_time": "20m 10s"}
247
  {"loss": 0.51062959, "token_acc": 0.83592096, "grad_norm": 0.12720895, "learning_rate": 3.4e-07, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007436, "epoch": 0.96308186, "global_step/max_steps": "225/233", "percentage": "96.57%", "elapsed_time": "8h 23m 59s", "remaining_time": "17m 55s"}
248
  {"loss": 0.50811839, "token_acc": 0.84152466, "grad_norm": 0.16164465, "learning_rate": 3e-07, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007445, "epoch": 0.96736223, "global_step/max_steps": "226/233", "percentage": "97.00%", "elapsed_time": "8h 25m 33s", "remaining_time": "15m 39s"}
249
+ {"loss": 0.51978874, "token_acc": 0.83158328, "grad_norm": 0.13573085, "learning_rate": 2.6e-07, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007447, "epoch": 0.97164259, "global_step/max_steps": "227/233", "percentage": "97.42%", "elapsed_time": "8h 27m 40s", "remaining_time": "13m 25s"}
250
+ {"loss": 0.49247271, "token_acc": 0.84161592, "grad_norm": 0.12217645, "learning_rate": 2.1e-07, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007453, "epoch": 0.97592295, "global_step/max_steps": "228/233", "percentage": "97.85%", "elapsed_time": "8h 29m 30s", "remaining_time": "11m 10s"}
251
+ {"loss": 0.50794172, "token_acc": 0.84790565, "grad_norm": 0.14334756, "learning_rate": 1.7e-07, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007469, "epoch": 0.98020332, "global_step/max_steps": "229/233", "percentage": "98.28%", "elapsed_time": "8h 30m 40s", "remaining_time": "8m 55s"}
252
+ {"loss": 0.52748251, "token_acc": 0.82375963, "grad_norm": 0.13245644, "learning_rate": 1.3e-07, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.00748, "epoch": 0.98448368, "global_step/max_steps": "230/233", "percentage": "98.71%", "elapsed_time": "8h 32m 10s", "remaining_time": "6m 40s"}
253
+ {"eval_loss": 0.50761044, "eval_token_acc": 0.83408849, "eval_runtime": 236.4465, "eval_samples_per_second": 1.954, "eval_steps_per_second": 0.245, "epoch": 0.98448368, "global_step/max_steps": "230/233", "percentage": "98.71%", "elapsed_time": "8h 36m 6s", "remaining_time": "6m 43s"}
254
+ {"loss": 0.52742505, "token_acc": 0.83657238, "grad_norm": 0.13020933, "learning_rate": 9e-08, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007407, "epoch": 0.98876404, "global_step/max_steps": "231/233", "percentage": "99.14%", "elapsed_time": "8h 39m 25s", "remaining_time": "4m 29s"}
255
+ {"loss": 0.48712298, "token_acc": 0.82561059, "grad_norm": 0.13148691, "learning_rate": 4e-08, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007419, "epoch": 0.99304441, "global_step/max_steps": "232/233", "percentage": "99.57%", "elapsed_time": "8h 40m 50s", "remaining_time": "2m 14s"}
256
+ {"loss": 0.50508392, "token_acc": 0.85302594, "grad_norm": 0.12396082, "learning_rate": 0.0, "memory(GiB)": 133.2, "train_speed(iter/s)": 0.007426, "epoch": 0.99732477, "global_step/max_steps": "233/233", "percentage": "100.00%", "elapsed_time": "8h 42m 33s", "remaining_time": "0s"}