tejeshbhalla commited on
Commit
c9abc2f
·
verified ·
1 Parent(s): 704da98

Training in progress, step 150

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. logging.jsonl +10 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7763f9504c51caf7a80f032bc53fc1485c22dea823705324bcb4121efcd20057
3
  size 13254157312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69cf3891e4cfe1cb2064f250c1e0c06cc09441a469282a54c312a95b9b55a8dc
3
  size 13254157312
logging.jsonl CHANGED
@@ -157,3 +157,13 @@
157
  {"loss": 0.48448735, "token_acc": 0.84470637, "grad_norm": 0.16265908, "learning_rate": 4.31e-06, "memory(GiB)": 133.69, "train_speed(iter/s)": 0.007574, "epoch": 0.61209203, "global_step/max_steps": "143/233", "percentage": "61.37%", "elapsed_time": "5h 14m 14s", "remaining_time": "3h 17m 46s"}
158
  {"loss": 0.46533874, "token_acc": 0.85549313, "grad_norm": 0.12145889, "learning_rate": 4.26e-06, "memory(GiB)": 133.69, "train_speed(iter/s)": 0.00755, "epoch": 0.61637239, "global_step/max_steps": "144/233", "percentage": "61.80%", "elapsed_time": "5h 17m 28s", "remaining_time": "3h 16m 12s"}
159
  {"loss": 0.44867301, "token_acc": 0.86317424, "grad_norm": 0.11760305, "learning_rate": 4.21e-06, "memory(GiB)": 133.69, "train_speed(iter/s)": 0.007559, "epoch": 0.62065276, "global_step/max_steps": "145/233", "percentage": "62.23%", "elapsed_time": "5h 19m 16s", "remaining_time": "3h 13m 46s"}
 
 
 
 
 
 
 
 
 
 
 
157
  {"loss": 0.48448735, "token_acc": 0.84470637, "grad_norm": 0.16265908, "learning_rate": 4.31e-06, "memory(GiB)": 133.69, "train_speed(iter/s)": 0.007574, "epoch": 0.61209203, "global_step/max_steps": "143/233", "percentage": "61.37%", "elapsed_time": "5h 14m 14s", "remaining_time": "3h 17m 46s"}
158
  {"loss": 0.46533874, "token_acc": 0.85549313, "grad_norm": 0.12145889, "learning_rate": 4.26e-06, "memory(GiB)": 133.69, "train_speed(iter/s)": 0.00755, "epoch": 0.61637239, "global_step/max_steps": "144/233", "percentage": "61.80%", "elapsed_time": "5h 17m 28s", "remaining_time": "3h 16m 12s"}
159
  {"loss": 0.44867301, "token_acc": 0.86317424, "grad_norm": 0.11760305, "learning_rate": 4.21e-06, "memory(GiB)": 133.69, "train_speed(iter/s)": 0.007559, "epoch": 0.62065276, "global_step/max_steps": "145/233", "percentage": "62.23%", "elapsed_time": "5h 19m 16s", "remaining_time": "3h 13m 46s"}
160
+ {"loss": 0.49094412, "token_acc": 0.83705991, "grad_norm": 0.10963392, "learning_rate": 4.16e-06, "memory(GiB)": 133.69, "train_speed(iter/s)": 0.007559, "epoch": 0.62493312, "global_step/max_steps": "146/233", "percentage": "62.66%", "elapsed_time": "5h 21m 29s", "remaining_time": "3h 11m 34s"}
161
+ {"loss": 0.48288625, "token_acc": 0.84562759, "grad_norm": 0.11926857, "learning_rate": 4.11e-06, "memory(GiB)": 133.69, "train_speed(iter/s)": 0.00757, "epoch": 0.62921348, "global_step/max_steps": "147/233", "percentage": "63.09%", "elapsed_time": "5h 23m 13s", "remaining_time": "3h 9m 6s"}
162
+ {"loss": 0.46473897, "token_acc": 0.86574336, "grad_norm": 0.12197684, "learning_rate": 4.07e-06, "memory(GiB)": 133.69, "train_speed(iter/s)": 0.007572, "epoch": 0.63349385, "global_step/max_steps": "148/233", "percentage": "63.52%", "elapsed_time": "5h 25m 20s", "remaining_time": "3h 6m 50s"}
163
+ {"loss": 0.46687201, "token_acc": 0.86090455, "grad_norm": 0.12635399, "learning_rate": 4.02e-06, "memory(GiB)": 133.69, "train_speed(iter/s)": 0.007595, "epoch": 0.63777421, "global_step/max_steps": "149/233", "percentage": "63.95%", "elapsed_time": "5h 26m 31s", "remaining_time": "3h 4m 5s"}
164
+ {"loss": 0.50646043, "token_acc": 0.83715979, "grad_norm": 0.11814403, "learning_rate": 3.97e-06, "memory(GiB)": 133.69, "train_speed(iter/s)": 0.007587, "epoch": 0.64205457, "global_step/max_steps": "150/233", "percentage": "64.38%", "elapsed_time": "5h 29m 5s", "remaining_time": "3h 2m 5s"}
165
+ {"eval_loss": 0.4841184, "eval_token_acc": 0.83763193, "eval_runtime": 233.6185, "eval_samples_per_second": 1.978, "eval_steps_per_second": 0.248, "epoch": 0.64205457, "global_step/max_steps": "150/233", "percentage": "64.38%", "elapsed_time": "5h 32m 59s", "remaining_time": "3h 4m 15s"}
166
+ {"loss": 0.48874947, "token_acc": 0.8424199, "grad_norm": 0.12018572, "learning_rate": 3.92e-06, "memory(GiB)": 133.69, "train_speed(iter/s)": 0.007498, "epoch": 0.64633494, "global_step/max_steps": "151/233", "percentage": "64.81%", "elapsed_time": "5h 35m 12s", "remaining_time": "3h 2m 1s"}
167
+ {"loss": 0.48023725, "token_acc": 0.84619697, "grad_norm": 0.1180267, "learning_rate": 3.88e-06, "memory(GiB)": 133.69, "train_speed(iter/s)": 0.007511, "epoch": 0.6506153, "global_step/max_steps": "152/233", "percentage": "65.24%", "elapsed_time": "5h 36m 51s", "remaining_time": "2h 59m 30s"}
168
+ {"loss": 0.48215163, "token_acc": 0.81318267, "grad_norm": 0.12059806, "learning_rate": 3.83e-06, "memory(GiB)": 133.69, "train_speed(iter/s)": 0.007523, "epoch": 0.65489567, "global_step/max_steps": "153/233", "percentage": "65.67%", "elapsed_time": "5h 38m 32s", "remaining_time": "2h 57m 0s"}
169
+ {"loss": 0.47923255, "token_acc": 0.83857755, "grad_norm": 0.12951089, "learning_rate": 3.78e-06, "memory(GiB)": 133.69, "train_speed(iter/s)": 0.007541, "epoch": 0.65917603, "global_step/max_steps": "154/233", "percentage": "66.09%", "elapsed_time": "5h 39m 56s", "remaining_time": "2h 54m 22s"}