tejeshbhalla commited on
Commit
7eaa4b3
·
verified ·
1 Parent(s): f307d1f

Training in progress, step 30

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. logging.jsonl +30 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f511a37d700aeb75f9a130794aa2ca48eababe7bf6871d22693baf45bc7ce3e
3
  size 13254157312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b05019bdbc9ceed3cfef082003c5c2def393313ad289d6425d7111cb69d84bf
3
  size 13254157312
logging.jsonl CHANGED
@@ -22,3 +22,33 @@
22
  {"eval_loss": 0.73671097, "eval_token_acc": 0.79061684, "eval_runtime": 230.7649, "eval_samples_per_second": 2.002, "eval_steps_per_second": 0.251, "epoch": 0.01070091, "global_step/max_steps": "20/1869", "percentage": "1.07%", "elapsed_time": "14m 11s", "remaining_time": "21h 51m 58s"}
23
  {"loss": 0.71632719, "token_acc": 0.79510949, "grad_norm": 5.31815338, "learning_rate": 1.12e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.022449, "epoch": 0.01123596, "global_step/max_steps": "21/1869", "percentage": "1.12%", "elapsed_time": "15m 10s", "remaining_time": "22h 15m 19s"}
24
  {"loss": 0.75135165, "token_acc": 0.77522936, "grad_norm": 4.08145428, "learning_rate": 1.18e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.022744, "epoch": 0.011771, "global_step/max_steps": "22/1869", "percentage": "1.18%", "elapsed_time": "15m 42s", "remaining_time": "21h 58m 26s"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  {"eval_loss": 0.73671097, "eval_token_acc": 0.79061684, "eval_runtime": 230.7649, "eval_samples_per_second": 2.002, "eval_steps_per_second": 0.251, "epoch": 0.01070091, "global_step/max_steps": "20/1869", "percentage": "1.07%", "elapsed_time": "14m 11s", "remaining_time": "21h 51m 58s"}
23
  {"loss": 0.71632719, "token_acc": 0.79510949, "grad_norm": 5.31815338, "learning_rate": 1.12e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.022449, "epoch": 0.01123596, "global_step/max_steps": "21/1869", "percentage": "1.12%", "elapsed_time": "15m 10s", "remaining_time": "22h 15m 19s"}
24
  {"loss": 0.75135165, "token_acc": 0.77522936, "grad_norm": 4.08145428, "learning_rate": 1.18e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.022744, "epoch": 0.011771, "global_step/max_steps": "22/1869", "percentage": "1.18%", "elapsed_time": "15m 42s", "remaining_time": "21h 58m 26s"}
25
+ {"loss": 0.70510459, "token_acc": 0.78542155, "grad_norm": 3.34494901, "learning_rate": 1.23e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023061, "epoch": 0.01230605, "global_step/max_steps": "23/1869", "percentage": "1.23%", "elapsed_time": "16m 12s", "remaining_time": "21h 40m 41s"}
26
+ {"loss": 0.58708155, "token_acc": 0.89411765, "grad_norm": 4.94634724, "learning_rate": 1.28e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023844, "epoch": 0.01284109, "global_step/max_steps": "24/1869", "percentage": "1.28%", "elapsed_time": "16m 21s", "remaining_time": "20h 57m 34s"}
27
+ {"loss": 0.69591022, "token_acc": 0.80592105, "grad_norm": 5.5039525, "learning_rate": 1.34e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024497, "epoch": 0.01337614, "global_step/max_steps": "25/1869", "percentage": "1.34%", "elapsed_time": "16m 35s", "remaining_time": "20h 23m 49s"}
28
+ {"loss": 0.67804903, "token_acc": 0.81148564, "grad_norm": 3.53921056, "learning_rate": 1.39e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.025329, "epoch": 0.01391118, "global_step/max_steps": "26/1869", "percentage": "1.39%", "elapsed_time": "16m 41s", "remaining_time": "19h 43m 9s"}
29
+ {"loss": 0.59909678, "token_acc": 0.84777518, "grad_norm": 2.86513925, "learning_rate": 1.44e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.02614, "epoch": 0.01444623, "global_step/max_steps": "27/1869", "percentage": "1.44%", "elapsed_time": "16m 47s", "remaining_time": "19h 6m 0s"}
30
+ {"loss": 0.61625493, "token_acc": 0.87399236, "grad_norm": 4.2735076, "learning_rate": 1.5e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.026501, "epoch": 0.01498127, "global_step/max_steps": "28/1869", "percentage": "1.50%", "elapsed_time": "17m 11s", "remaining_time": "18h 50m 24s"}
31
+ {"loss": 0.62591362, "token_acc": 0.79508197, "grad_norm": 3.5549407, "learning_rate": 1.55e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.027282, "epoch": 0.01551632, "global_step/max_steps": "29/1869", "percentage": "1.55%", "elapsed_time": "17m 17s", "remaining_time": "18h 17m 37s"}
32
+ {"loss": 0.58011794, "token_acc": 0.79526227, "grad_norm": 3.10110259, "learning_rate": 1.6e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.028066, "epoch": 0.01605136, "global_step/max_steps": "30/1869", "percentage": "1.61%", "elapsed_time": "17m 23s", "remaining_time": "17h 46m 30s"}
33
+ {"eval_loss": 0.65177089, "eval_token_acc": 0.79980309, "eval_runtime": 230.3026, "eval_samples_per_second": 2.006, "eval_steps_per_second": 0.252, "epoch": 0.01605136, "global_step/max_steps": "30/1869", "percentage": "1.61%", "elapsed_time": "21m 14s", "remaining_time": "21h 41m 47s"}
34
+ {"loss": 0.67728925, "token_acc": 0.8055818, "grad_norm": 3.54144835, "learning_rate": 1.66e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.022548, "epoch": 0.01658641, "global_step/max_steps": "31/1869", "percentage": "1.66%", "elapsed_time": "22m 29s", "remaining_time": "22h 13m 53s"}
35
+ {"loss": 0.64033484, "token_acc": 0.80456949, "grad_norm": 3.63276386, "learning_rate": 1.71e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.022864, "epoch": 0.01712146, "global_step/max_steps": "32/1869", "percentage": "1.71%", "elapsed_time": "22m 54s", "remaining_time": "21h 55m 9s"}
36
+ {"loss": 0.59276545, "token_acc": 0.81354051, "grad_norm": 3.55099034, "learning_rate": 1.76e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023474, "epoch": 0.0176565, "global_step/max_steps": "33/1869", "percentage": "1.77%", "elapsed_time": "23m 0s", "remaining_time": "21h 20m 22s"}
37
+ {"loss": 0.68321669, "token_acc": 0.77755906, "grad_norm": 3.37234855, "learning_rate": 1.82e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024078, "epoch": 0.01819155, "global_step/max_steps": "34/1869", "percentage": "1.82%", "elapsed_time": "23m 7s", "remaining_time": "20h 47m 41s"}
38
+ {"loss": 0.66526508, "token_acc": 0.79340278, "grad_norm": 3.60016561, "learning_rate": 1.87e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024627, "epoch": 0.01872659, "global_step/max_steps": "35/1869", "percentage": "1.87%", "elapsed_time": "23m 16s", "remaining_time": "20h 19m 21s"}
39
+ {"loss": 0.69026184, "token_acc": 0.85021097, "grad_norm": 3.26779056, "learning_rate": 1.93e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.02513, "epoch": 0.01926164, "global_step/max_steps": "36/1869", "percentage": "1.93%", "elapsed_time": "23m 27s", "remaining_time": "19h 54m 28s"}
40
+ {"loss": 0.52923977, "token_acc": 0.81941748, "grad_norm": 2.91604924, "learning_rate": 1.98e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.025709, "epoch": 0.01979668, "global_step/max_steps": "37/1869", "percentage": "1.98%", "elapsed_time": "23m 34s", "remaining_time": "19h 26m 59s"}
41
+ {"loss": 0.59180516, "token_acc": 0.89078156, "grad_norm": 3.15961385, "learning_rate": 2.03e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.026141, "epoch": 0.02033173, "global_step/max_steps": "38/1869", "percentage": "2.03%", "elapsed_time": "23m 48s", "remaining_time": "19h 7m 18s"}
42
+ {"loss": 0.63943875, "token_acc": 0.75691134, "grad_norm": 4.54194307, "learning_rate": 2.09e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.026649, "epoch": 0.02086677, "global_step/max_steps": "39/1869", "percentage": "2.09%", "elapsed_time": "23m 58s", "remaining_time": "18h 44m 55s"}
43
+ {"loss": 0.57443136, "token_acc": 0.75711382, "grad_norm": 3.56686258, "learning_rate": 2.14e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.027218, "epoch": 0.02140182, "global_step/max_steps": "40/1869", "percentage": "2.14%", "elapsed_time": "24m 4s", "remaining_time": "18h 20m 53s"}
44
+ {"eval_loss": 0.61997586, "eval_token_acc": 0.80398873, "eval_runtime": 230.319, "eval_samples_per_second": 2.006, "eval_steps_per_second": 0.252, "epoch": 0.02140182, "global_step/max_steps": "40/1869", "percentage": "2.14%", "elapsed_time": "27m 54s", "remaining_time": "21h 16m 25s"}
45
+ {"loss": 0.5590893, "token_acc": 0.80868182, "grad_norm": 3.27547669, "learning_rate": 2.19e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023291, "epoch": 0.02193686, "global_step/max_steps": "41/1869", "percentage": "2.19%", "elapsed_time": "28m 55s", "remaining_time": "21h 29m 31s"}
46
+ {"loss": 0.58981824, "token_acc": 0.77457265, "grad_norm": 3.15756106, "learning_rate": 2.25e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023774, "epoch": 0.02247191, "global_step/max_steps": "42/1869", "percentage": "2.25%", "elapsed_time": "29m 1s", "remaining_time": "21h 2m 39s"}
47
+ {"loss": 0.57524711, "token_acc": 0.83855422, "grad_norm": 4.17729044, "learning_rate": 2.3e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.02426, "epoch": 0.02300696, "global_step/max_steps": "43/1869", "percentage": "2.30%", "elapsed_time": "29m 7s", "remaining_time": "20h 36m 44s"}
48
+ {"loss": 0.63644284, "token_acc": 0.81646274, "grad_norm": 2.62428689, "learning_rate": 2.35e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024434, "epoch": 0.023542, "global_step/max_steps": "44/1869", "percentage": "2.35%", "elapsed_time": "29m 35s", "remaining_time": "20h 27m 32s"}
49
+ {"loss": 0.64309716, "token_acc": 0.83072917, "grad_norm": 2.948915, "learning_rate": 2.41e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024905, "epoch": 0.02407705, "global_step/max_steps": "45/1869", "percentage": "2.41%", "elapsed_time": "29m 41s", "remaining_time": "20h 3m 44s"}
50
+ {"loss": 0.5813536, "token_acc": 0.86121392, "grad_norm": 3.02264547, "learning_rate": 2.46e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.025314, "epoch": 0.02461209, "global_step/max_steps": "46/1869", "percentage": "2.46%", "elapsed_time": "29m 52s", "remaining_time": "19h 43m 42s"}
51
+ {"loss": 0.67458242, "token_acc": 0.84415584, "grad_norm": 4.08577394, "learning_rate": 2.51e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.025777, "epoch": 0.02514714, "global_step/max_steps": "47/1869", "percentage": "2.51%", "elapsed_time": "29m 58s", "remaining_time": "19h 21m 52s"}
52
+ {"loss": 0.55463028, "token_acc": 0.79433368, "grad_norm": 3.79704332, "learning_rate": 2.57e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.02624, "epoch": 0.02568218, "global_step/max_steps": "48/1869", "percentage": "2.57%", "elapsed_time": "30m 4s", "remaining_time": "19h 0m 49s"}
53
+ {"loss": 0.62436664, "token_acc": 0.7584, "grad_norm": 3.25214219, "learning_rate": 2.62e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.026392, "epoch": 0.02621723, "global_step/max_steps": "49/1869", "percentage": "2.62%", "elapsed_time": "30m 31s", "remaining_time": "18h 53m 49s"}
54
+ {"loss": 0.53918386, "token_acc": 0.90425532, "grad_norm": 4.26714802, "learning_rate": 2.67e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.026844, "epoch": 0.02675227, "global_step/max_steps": "50/1869", "percentage": "2.68%", "elapsed_time": "30m 37s", "remaining_time": "18h 34m 11s"}