tejeshbhalla commited on
Commit
99432e4
·
verified ·
1 Parent(s): 6fbe13e

Training in progress, step 30

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. logging.jsonl +12 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26ca64573f2a5b9cf2c8bffd93645e6500adb274b61778d7f2f61f4626b3161e
3
  size 1656902648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce04af93b650aa745681d6f16691431d088dd3e1f19c916dbd7e124d747f5ee5
3
  size 1656902648
logging.jsonl CHANGED
@@ -20,3 +20,15 @@
20
  {"loss": 0.24658927, "token_acc": 0.91555587, "grad_norm": 0.12874357, "learning_rate": 5.76e-06, "memory(GiB)": 815.53, "train_speed(iter/s)": 0.009529, "epoch": 0.05783866, "global_step/max_steps": "19/328", "percentage": "5.79%", "elapsed_time": "33m 11s", "remaining_time": "8h 59m 40s"}
21
  {"loss": 0.26267582, "token_acc": 0.90973451, "grad_norm": 0.09905843, "learning_rate": 6.06e-06, "memory(GiB)": 815.53, "train_speed(iter/s)": 0.009347, "epoch": 0.0608828, "global_step/max_steps": "20/328", "percentage": "6.10%", "elapsed_time": "35m 36s", "remaining_time": "9h 8m 28s"}
22
  {"eval_loss": 0.30634969, "eval_token_acc": 0.8960476, "eval_runtime": 247.0826, "eval_samples_per_second": 0.328, "eval_steps_per_second": 0.328, "epoch": 0.0608828, "global_step/max_steps": "20/328", "percentage": "6.10%", "elapsed_time": "39m 44s", "remaining_time": "10h 11m 53s"}
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  {"loss": 0.24658927, "token_acc": 0.91555587, "grad_norm": 0.12874357, "learning_rate": 5.76e-06, "memory(GiB)": 815.53, "train_speed(iter/s)": 0.009529, "epoch": 0.05783866, "global_step/max_steps": "19/328", "percentage": "5.79%", "elapsed_time": "33m 11s", "remaining_time": "8h 59m 40s"}
21
  {"loss": 0.26267582, "token_acc": 0.90973451, "grad_norm": 0.09905843, "learning_rate": 6.06e-06, "memory(GiB)": 815.53, "train_speed(iter/s)": 0.009347, "epoch": 0.0608828, "global_step/max_steps": "20/328", "percentage": "6.10%", "elapsed_time": "35m 36s", "remaining_time": "9h 8m 28s"}
22
  {"eval_loss": 0.30634969, "eval_token_acc": 0.8960476, "eval_runtime": 247.0826, "eval_samples_per_second": 0.328, "eval_steps_per_second": 0.328, "epoch": 0.0608828, "global_step/max_steps": "20/328", "percentage": "6.10%", "elapsed_time": "39m 44s", "remaining_time": "10h 11m 53s"}
23
+ {"loss": 0.2819325, "token_acc": 0.89672202, "grad_norm": 0.10652838, "learning_rate": 6.36e-06, "memory(GiB)": 815.53, "train_speed(iter/s)": 0.008354, "epoch": 0.06392694, "global_step/max_steps": "21/328", "percentage": "6.40%", "elapsed_time": "41m 50s", "remaining_time": "10h 11m 46s"}
24
+ {"loss": 0.2759549, "token_acc": 0.9119108, "grad_norm": 0.13444495, "learning_rate": 6.67e-06, "memory(GiB)": 815.53, "train_speed(iter/s)": 0.008204, "epoch": 0.06697108, "global_step/max_steps": "22/328", "percentage": "6.71%", "elapsed_time": "44m 38s", "remaining_time": "10h 20m 58s"}
25
+ {"loss": 0.2600477, "token_acc": 0.91032317, "grad_norm": 0.11379159, "learning_rate": 6.97e-06, "memory(GiB)": 815.53, "train_speed(iter/s)": 0.008384, "epoch": 0.07001522, "global_step/max_steps": "23/328", "percentage": "7.01%", "elapsed_time": "45m 40s", "remaining_time": "10h 5m 42s"}
26
+ {"loss": 0.21677592, "token_acc": 0.92537639, "grad_norm": 0.10500745, "learning_rate": 7.27e-06, "memory(GiB)": 815.53, "train_speed(iter/s)": 0.00859, "epoch": 0.07305936, "global_step/max_steps": "24/328", "percentage": "7.32%", "elapsed_time": "46m 31s", "remaining_time": "9h 49m 15s"}
27
+ {"loss": 0.24449337, "token_acc": 0.91679533, "grad_norm": 0.09542393, "learning_rate": 7.58e-06, "memory(GiB)": 815.53, "train_speed(iter/s)": 0.008704, "epoch": 0.0761035, "global_step/max_steps": "25/328", "percentage": "7.62%", "elapsed_time": "47m 49s", "remaining_time": "9h 39m 39s"}
28
+ {"loss": 0.31512672, "token_acc": 0.89371783, "grad_norm": 0.10736728, "learning_rate": 7.88e-06, "memory(GiB)": 815.53, "train_speed(iter/s)": 0.00849, "epoch": 0.07914764, "global_step/max_steps": "26/328", "percentage": "7.93%", "elapsed_time": "50m 59s", "remaining_time": "9h 52m 20s"}
29
+ {"loss": 0.26235914, "token_acc": 0.91197774, "grad_norm": 0.09094914, "learning_rate": 8.18e-06, "memory(GiB)": 815.53, "train_speed(iter/s)": 0.008173, "epoch": 0.08219178, "global_step/max_steps": "27/328", "percentage": "8.23%", "elapsed_time": "55m 0s", "remaining_time": "10h 13m 15s"}
30
+ {"loss": 0.24826424, "token_acc": 0.91292023, "grad_norm": 0.10768829, "learning_rate": 8.48e-06, "memory(GiB)": 815.53, "train_speed(iter/s)": 0.008326, "epoch": 0.08523592, "global_step/max_steps": "28/328", "percentage": "8.54%", "elapsed_time": "56m 0s", "remaining_time": "10h 0m 2s"}
31
+ {"loss": 0.15691531, "token_acc": 0.94420261, "grad_norm": 0.08934391, "learning_rate": 8.79e-06, "memory(GiB)": 815.53, "train_speed(iter/s)": 0.008529, "epoch": 0.08828006, "global_step/max_steps": "29/328", "percentage": "8.84%", "elapsed_time": "56m 37s", "remaining_time": "9h 43m 46s"}
32
+ {"loss": 0.24090692, "token_acc": 0.91948326, "grad_norm": 0.10308056, "learning_rate": 9.09e-06, "memory(GiB)": 815.53, "train_speed(iter/s)": 0.008655, "epoch": 0.0913242, "global_step/max_steps": "30/328", "percentage": "9.15%", "elapsed_time": "57m 43s", "remaining_time": "9h 33m 21s"}
33
+ {"eval_loss": 0.30391112, "eval_token_acc": 0.89699565, "eval_runtime": 246.9889, "eval_samples_per_second": 0.328, "eval_steps_per_second": 0.328, "epoch": 0.0913242, "global_step/max_steps": "30/328", "percentage": "9.15%", "elapsed_time": "1h 1m 50s", "remaining_time": "10h 14m 14s"}
34
+ {"loss": 0.25271627, "token_acc": 0.8980103, "grad_norm": 0.17491929, "learning_rate": 9.39e-06, "memory(GiB)": 815.53, "train_speed(iter/s)": 0.008214, "epoch": 0.09436834, "global_step/max_steps": "31/328", "percentage": "9.45%", "elapsed_time": "1h 2m 51s", "remaining_time": "10h 2m 11s"}