jacpetro commited on
Commit
3080959
·
verified ·
1 Parent(s): b314506

Training in progress, step 20

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. logging.jsonl +6 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aba94b294b77ab1b529c8814f6c0eef6d82bac90189f0a09399acd43066b733a
3
  size 1656903768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e767a2aa978c9100da0250167dfecd8f04a1fd8d5bdeb7313dc6d3ba41666c0
3
  size 1656903768
logging.jsonl CHANGED
@@ -10,3 +10,9 @@
10
  {"loss": 19.65369225, "token_acc": 0.80329641, "grad_norm": 0.08990939, "learning_rate": 4.288e-05, "memory(GiB)": 96.08, "train_speed(iter/s)": 0.001742, "epoch": 1.73282443, "global_step/max_steps": "14/24", "percentage": "58.33%", "elapsed_time": "2h 13m 43s", "remaining_time": "1h 35m 31s"}
11
  {"loss": 19.39007759, "token_acc": 0.82374368, "grad_norm": 0.08934228, "learning_rate": 3.591e-05, "memory(GiB)": 96.08, "train_speed(iter/s)": 0.00169, "epoch": 1.85496183, "global_step/max_steps": "15/24", "percentage": "62.50%", "elapsed_time": "2h 27m 45s", "remaining_time": "1h 28m 39s"}
12
  {"eval_loss": 0.61527205, "eval_token_acc": 0.82136516, "eval_runtime": 21.2842, "eval_samples_per_second": 0.987, "eval_steps_per_second": 0.141, "epoch": 1.85496183, "global_step/max_steps": "15/24", "percentage": "62.50%", "elapsed_time": "2h 28m 6s", "remaining_time": "1h 28m 51s"}
 
 
 
 
 
 
 
10
  {"loss": 19.65369225, "token_acc": 0.80329641, "grad_norm": 0.08990939, "learning_rate": 4.288e-05, "memory(GiB)": 96.08, "train_speed(iter/s)": 0.001742, "epoch": 1.73282443, "global_step/max_steps": "14/24", "percentage": "58.33%", "elapsed_time": "2h 13m 43s", "remaining_time": "1h 35m 31s"}
11
  {"loss": 19.39007759, "token_acc": 0.82374368, "grad_norm": 0.08934228, "learning_rate": 3.591e-05, "memory(GiB)": 96.08, "train_speed(iter/s)": 0.00169, "epoch": 1.85496183, "global_step/max_steps": "15/24", "percentage": "62.50%", "elapsed_time": "2h 27m 45s", "remaining_time": "1h 28m 39s"}
12
  {"eval_loss": 0.61527205, "eval_token_acc": 0.82136516, "eval_runtime": 21.2842, "eval_samples_per_second": 0.987, "eval_steps_per_second": 0.141, "epoch": 1.85496183, "global_step/max_steps": "15/24", "percentage": "62.50%", "elapsed_time": "2h 28m 6s", "remaining_time": "1h 28m 51s"}
13
+ {"loss": 19.09448242, "token_acc": 0.82234902, "grad_norm": 0.07514143, "learning_rate": 2.923e-05, "memory(GiB)": 96.08, "train_speed(iter/s)": 0.001634, "epoch": 1.97709924, "global_step/max_steps": "16/24", "percentage": "66.67%", "elapsed_time": "2h 43m 1s", "remaining_time": "1h 21m 30s"}
14
+ {"loss": 3.55800676, "token_acc": 0.8386462, "grad_norm": 0.07514143, "learning_rate": 2.297e-05, "memory(GiB)": 96.08, "train_speed(iter/s)": 0.001706, "epoch": 2.0, "global_step/max_steps": "17/24", "percentage": "70.83%", "elapsed_time": "2h 45m 54s", "remaining_time": "1h 8m 18s"}
15
+ {"loss": 18.6829834, "token_acc": 0.82620363, "grad_norm": 0.07605129, "learning_rate": 1.726e-05, "memory(GiB)": 96.08, "train_speed(iter/s)": 0.001661, "epoch": 2.1221374, "global_step/max_steps": "18/24", "percentage": "75.00%", "elapsed_time": "3h 0m 22s", "remaining_time": "1h 0m 7s"}
16
+ {"loss": 19.1674099, "token_acc": 0.81255735, "grad_norm": 0.07065283, "learning_rate": 1.221e-05, "memory(GiB)": 96.08, "train_speed(iter/s)": 0.001627, "epoch": 2.24427481, "global_step/max_steps": "19/24", "percentage": "79.17%", "elapsed_time": "3h 14m 26s", "remaining_time": "51m 10s"}
17
+ {"loss": 18.75621605, "token_acc": 0.83829445, "grad_norm": 0.07214139, "learning_rate": 7.94e-06, "memory(GiB)": 96.08, "train_speed(iter/s)": 0.001595, "epoch": 2.36641221, "global_step/max_steps": "20/24", "percentage": "83.33%", "elapsed_time": "3h 28m 43s", "remaining_time": "41m 44s"}
18
+ {"eval_loss": 0.59709889, "eval_token_acc": 0.82612473, "eval_runtime": 21.3202, "eval_samples_per_second": 0.985, "eval_steps_per_second": 0.141, "epoch": 2.36641221, "global_step/max_steps": "20/24", "percentage": "83.33%", "elapsed_time": "3h 29m 4s", "remaining_time": "41m 48s"}