tejeshbhalla commited on
Commit
668dd05
·
verified ·
1 Parent(s): b1c1686

Training in progress, step 20

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c624a3697c6b2517405e1db3247ef432ff8523bb20b920fd002ca357e788885b
3
  size 739322344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47d75631fa28d59b23937f7209eecbe72cf5b2457493a5c10a7d7b2b91b74cd4
3
  size 739322344
logging.jsonl CHANGED
@@ -9,3 +9,14 @@
9
  {"loss": 0.25180489, "token_acc": 0.92015499, "grad_norm": 3.09036565, "learning_rate": 7.69e-06, "memory(GiB)": 115.29, "train_speed(iter/s)": 0.022339, "epoch": 0.30638298, "global_step/max_steps": "9/29", "percentage": "31.03%", "elapsed_time": "6m 35s", "remaining_time": "14m 39s"}
10
  {"loss": 0.25393221, "token_acc": 0.9164224, "grad_norm": 2.27255797, "learning_rate": 7.31e-06, "memory(GiB)": 115.29, "train_speed(iter/s)": 0.022295, "epoch": 0.34042553, "global_step/max_steps": "10/29", "percentage": "34.48%", "elapsed_time": "7m 21s", "remaining_time": "13m 59s"}
11
  {"eval_loss": 0.24552706, "eval_token_acc": 0.91472783, "eval_runtime": 13.041, "eval_samples_per_second": 4.371, "eval_steps_per_second": 0.613, "epoch": 0.34042553, "global_step/max_steps": "10/29", "percentage": "34.48%", "elapsed_time": "7m 34s", "remaining_time": "14m 23s"}
 
 
 
 
 
 
 
 
 
 
 
 
9
  {"loss": 0.25180489, "token_acc": 0.92015499, "grad_norm": 3.09036565, "learning_rate": 7.69e-06, "memory(GiB)": 115.29, "train_speed(iter/s)": 0.022339, "epoch": 0.30638298, "global_step/max_steps": "9/29", "percentage": "31.03%", "elapsed_time": "6m 35s", "remaining_time": "14m 39s"}
10
  {"loss": 0.25393221, "token_acc": 0.9164224, "grad_norm": 2.27255797, "learning_rate": 7.31e-06, "memory(GiB)": 115.29, "train_speed(iter/s)": 0.022295, "epoch": 0.34042553, "global_step/max_steps": "10/29", "percentage": "34.48%", "elapsed_time": "7m 21s", "remaining_time": "13m 59s"}
11
  {"eval_loss": 0.24552706, "eval_token_acc": 0.91472783, "eval_runtime": 13.041, "eval_samples_per_second": 4.371, "eval_steps_per_second": 0.613, "epoch": 0.34042553, "global_step/max_steps": "10/29", "percentage": "34.48%", "elapsed_time": "7m 34s", "remaining_time": "14m 23s"}
12
+ {"loss": 0.25681421, "token_acc": 0.91740926, "grad_norm": 2.15936017, "learning_rate": 6.92e-06, "memory(GiB)": 115.29, "train_speed(iter/s)": 0.021734, "epoch": 0.37446809, "global_step/max_steps": "11/29", "percentage": "37.93%", "elapsed_time": "8m 19s", "remaining_time": "13m 36s"}
13
+ {"loss": 0.24919307, "token_acc": 0.91749503, "grad_norm": 2.12157631, "learning_rate": 6.54e-06, "memory(GiB)": 115.29, "train_speed(iter/s)": 0.022075, "epoch": 0.40851064, "global_step/max_steps": "12/29", "percentage": "41.38%", "elapsed_time": "8m 56s", "remaining_time": "12m 40s"}
14
+ {"loss": 0.23583913, "token_acc": 0.91674937, "grad_norm": 1.95219505, "learning_rate": 6.15e-06, "memory(GiB)": 115.29, "train_speed(iter/s)": 0.022257, "epoch": 0.44255319, "global_step/max_steps": "13/29", "percentage": "44.83%", "elapsed_time": "9m 37s", "remaining_time": "11m 50s"}
15
+ {"loss": 0.23160429, "token_acc": 0.90663566, "grad_norm": 2.19393158, "learning_rate": 5.77e-06, "memory(GiB)": 115.29, "train_speed(iter/s)": 0.022163, "epoch": 0.47659574, "global_step/max_steps": "14/29", "percentage": "48.28%", "elapsed_time": "10m 24s", "remaining_time": "11m 9s"}
16
+ {"loss": 0.21596812, "token_acc": 0.9109509, "grad_norm": 2.00559306, "learning_rate": 5.38e-06, "memory(GiB)": 115.29, "train_speed(iter/s)": 0.022383, "epoch": 0.5106383, "global_step/max_steps": "15/29", "percentage": "51.72%", "elapsed_time": "11m 3s", "remaining_time": "10m 19s"}
17
+ {"loss": 0.21437249, "token_acc": 0.93086992, "grad_norm": 1.88266635, "learning_rate": 5e-06, "memory(GiB)": 115.29, "train_speed(iter/s)": 0.0223, "epoch": 0.54468085, "global_step/max_steps": "16/29", "percentage": "55.17%", "elapsed_time": "11m 50s", "remaining_time": "9m 37s"}
18
+ {"loss": 0.21812737, "token_acc": 0.9273279, "grad_norm": 3.35490966, "learning_rate": 4.62e-06, "memory(GiB)": 115.29, "train_speed(iter/s)": 0.022206, "epoch": 0.5787234, "global_step/max_steps": "17/29", "percentage": "58.62%", "elapsed_time": "12m 38s", "remaining_time": "8m 55s"}
19
+ {"loss": 0.22592279, "token_acc": 0.91987076, "grad_norm": 2.69795418, "learning_rate": 4.23e-06, "memory(GiB)": 115.29, "train_speed(iter/s)": 0.022245, "epoch": 0.61276596, "global_step/max_steps": "18/29", "percentage": "62.07%", "elapsed_time": "13m 22s", "remaining_time": "8m 10s"}
20
+ {"loss": 0.21260472, "token_acc": 0.9241944, "grad_norm": 2.05847311, "learning_rate": 3.85e-06, "memory(GiB)": 115.29, "train_speed(iter/s)": 0.022284, "epoch": 0.64680851, "global_step/max_steps": "19/29", "percentage": "65.52%", "elapsed_time": "14m 5s", "remaining_time": "7m 25s"}
21
+ {"loss": 0.23483819, "token_acc": 0.90957684, "grad_norm": 1.9506942, "learning_rate": 3.46e-06, "memory(GiB)": 115.29, "train_speed(iter/s)": 0.022228, "epoch": 0.68085106, "global_step/max_steps": "20/29", "percentage": "68.97%", "elapsed_time": "14m 52s", "remaining_time": "6m 41s"}
22
+ {"eval_loss": 0.20867875, "eval_token_acc": 0.92511696, "eval_runtime": 13.0105, "eval_samples_per_second": 4.381, "eval_steps_per_second": 0.615, "epoch": 0.68085106, "global_step/max_steps": "20/29", "percentage": "68.97%", "elapsed_time": "15m 5s", "remaining_time": "6m 47s"}
runs/events.out.tfevents.1744086424.notebook-67532d59-da58-11ef-a92b-d22a634ff48d-0.693841.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97fb45281c82221e002a4eb5ce07173f96c6d8499eb29e416a23b527bf05a3d2
3
- size 10951
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00f5c081ccef106a51247aec3cb3825d24a2eacb87e3955619ba3c1688b9d96b
3
+ size 15049