tejeshbhalla commited on
Commit
6fbe13e
·
verified ·
1 Parent(s): b49c300

Training in progress, step 20

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. logging.jsonl +10 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:138a619f6d398e3542f90de7b6429f6236271e5733df67388a9c72a1f34f1205
3
  size 1656902648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26ca64573f2a5b9cf2c8bffd93645e6500adb274b61778d7f2f61f4626b3161e
3
  size 1656902648
logging.jsonl CHANGED
@@ -10,3 +10,13 @@
10
  {"loss": 0.24568725, "token_acc": 0.91618446, "grad_norm": 0.09478283, "learning_rate": 3.03e-06, "memory(GiB)": 768.55, "train_speed(iter/s)": 0.014157, "epoch": 0.0304414, "global_step/max_steps": "10/328", "percentage": "3.05%", "elapsed_time": "11m 43s", "remaining_time": "6h 12m 52s"}
11
  {"eval_loss": 0.30681169, "eval_token_acc": 0.89584491, "eval_runtime": 247.1437, "eval_samples_per_second": 0.328, "eval_steps_per_second": 0.328, "epoch": 0.0304414, "global_step/max_steps": "10/328", "percentage": "3.05%", "elapsed_time": "15m 50s", "remaining_time": "8h 23m 51s"}
12
  {"loss": 0.26882207, "token_acc": 0.89675909, "grad_norm": 0.10335801, "learning_rate": 3.33e-06, "memory(GiB)": 768.55, "train_speed(iter/s)": 0.010964, "epoch": 0.03348554, "global_step/max_steps": "11/328", "percentage": "3.35%", "elapsed_time": "16m 40s", "remaining_time": "8h 0m 30s"}
 
 
 
 
 
 
 
 
 
 
 
10
  {"loss": 0.24568725, "token_acc": 0.91618446, "grad_norm": 0.09478283, "learning_rate": 3.03e-06, "memory(GiB)": 768.55, "train_speed(iter/s)": 0.014157, "epoch": 0.0304414, "global_step/max_steps": "10/328", "percentage": "3.05%", "elapsed_time": "11m 43s", "remaining_time": "6h 12m 52s"}
11
  {"eval_loss": 0.30681169, "eval_token_acc": 0.89584491, "eval_runtime": 247.1437, "eval_samples_per_second": 0.328, "eval_steps_per_second": 0.328, "epoch": 0.0304414, "global_step/max_steps": "10/328", "percentage": "3.05%", "elapsed_time": "15m 50s", "remaining_time": "8h 23m 51s"}
12
  {"loss": 0.26882207, "token_acc": 0.89675909, "grad_norm": 0.10335801, "learning_rate": 3.33e-06, "memory(GiB)": 768.55, "train_speed(iter/s)": 0.010964, "epoch": 0.03348554, "global_step/max_steps": "11/328", "percentage": "3.35%", "elapsed_time": "16m 40s", "remaining_time": "8h 0m 30s"}
13
+ {"loss": 0.27928257, "token_acc": 0.90756404, "grad_norm": 0.11043152, "learning_rate": 3.64e-06, "memory(GiB)": 815.51, "train_speed(iter/s)": 0.009228, "epoch": 0.03652968, "global_step/max_steps": "12/328", "percentage": "3.66%", "elapsed_time": "21m 37s", "remaining_time": "9h 29m 29s"}
14
+ {"loss": 0.20683245, "token_acc": 0.92941097, "grad_norm": 0.08834656, "learning_rate": 3.94e-06, "memory(GiB)": 815.53, "train_speed(iter/s)": 0.00948, "epoch": 0.03957382, "global_step/max_steps": "13/328", "percentage": "3.96%", "elapsed_time": "22m 48s", "remaining_time": "9h 12m 40s"}
15
+ {"loss": 0.1894401, "token_acc": 0.93424278, "grad_norm": 0.10855642, "learning_rate": 4.24e-06, "memory(GiB)": 815.53, "train_speed(iter/s)": 0.009895, "epoch": 0.04261796, "global_step/max_steps": "14/328", "percentage": "4.27%", "elapsed_time": "23m 31s", "remaining_time": "8h 47m 49s"}
16
+ {"loss": 0.18465643, "token_acc": 0.93911768, "grad_norm": 0.10122923, "learning_rate": 4.55e-06, "memory(GiB)": 815.53, "train_speed(iter/s)": 0.009787, "epoch": 0.0456621, "global_step/max_steps": "15/328", "percentage": "4.57%", "elapsed_time": "25m 29s", "remaining_time": "8h 52m 1s"}
17
+ {"loss": 0.30540073, "token_acc": 0.8956815, "grad_norm": 0.08773388, "learning_rate": 4.85e-06, "memory(GiB)": 815.53, "train_speed(iter/s)": 0.00999, "epoch": 0.04870624, "global_step/max_steps": "16/328", "percentage": "4.88%", "elapsed_time": "26m 38s", "remaining_time": "8h 39m 34s"}
18
+ {"loss": 0.27026296, "token_acc": 0.90511308, "grad_norm": 0.10466376, "learning_rate": 5.15e-06, "memory(GiB)": 815.53, "train_speed(iter/s)": 0.010316, "epoch": 0.05175038, "global_step/max_steps": "17/328", "percentage": "5.18%", "elapsed_time": "27m 25s", "remaining_time": "8h 21m 36s"}
19
+ {"loss": 0.18841991, "token_acc": 0.93332867, "grad_norm": 0.09853333, "learning_rate": 5.45e-06, "memory(GiB)": 815.53, "train_speed(iter/s)": 0.010012, "epoch": 0.05479452, "global_step/max_steps": "18/328", "percentage": "5.49%", "elapsed_time": "29m 54s", "remaining_time": "8h 35m 13s"}
20
+ {"loss": 0.24658927, "token_acc": 0.91555587, "grad_norm": 0.12874357, "learning_rate": 5.76e-06, "memory(GiB)": 815.53, "train_speed(iter/s)": 0.009529, "epoch": 0.05783866, "global_step/max_steps": "19/328", "percentage": "5.79%", "elapsed_time": "33m 11s", "remaining_time": "8h 59m 40s"}
21
+ {"loss": 0.26267582, "token_acc": 0.90973451, "grad_norm": 0.09905843, "learning_rate": 6.06e-06, "memory(GiB)": 815.53, "train_speed(iter/s)": 0.009347, "epoch": 0.0608828, "global_step/max_steps": "20/328", "percentage": "6.10%", "elapsed_time": "35m 36s", "remaining_time": "9h 8m 28s"}
22
+ {"eval_loss": 0.30634969, "eval_token_acc": 0.8960476, "eval_runtime": 247.0826, "eval_samples_per_second": 0.328, "eval_steps_per_second": 0.328, "epoch": 0.0608828, "global_step/max_steps": "20/328", "percentage": "6.10%", "elapsed_time": "39m 44s", "remaining_time": "10h 11m 53s"}