jacpetro commited on
Commit
8a5b2ca
·
verified ·
1 Parent(s): 118e74c

Training in progress, step 20

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. logging.jsonl +11 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3b540ffd8ebcee47d535e8e3a7bf89de0c2a0c62c396e9e5f8f2ad2c22e5577
3
  size 1656903768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:153f700b4e7a89ecc4c61cd01fbef40f1f6e1ed9f2dec533b2f28af4694e57d0
3
  size 1656903768
logging.jsonl CHANGED
@@ -9,3 +9,14 @@
9
  {"loss": 4.33601952, "token_acc": 0.78835404, "grad_norm": 0.1694329, "learning_rate": 5.392e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001237, "epoch": 1.0, "global_step/max_steps": "9/24", "percentage": "37.50%", "elapsed_time": "2h 1m 8s", "remaining_time": "3h 21m 53s"}
10
  {"loss": 21.8188076, "token_acc": 0.79576805, "grad_norm": 0.14743451, "learning_rate": 4.954e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001229, "epoch": 1.1221374, "global_step/max_steps": "10/24", "percentage": "41.67%", "elapsed_time": "2h 15m 30s", "remaining_time": "3h 9m 42s"}
11
  {"eval_loss": 0.6696381, "eval_token_acc": 0.81154384, "eval_runtime": 21.8063, "eval_samples_per_second": 0.963, "eval_steps_per_second": 0.138, "epoch": 1.1221374, "global_step/max_steps": "10/24", "percentage": "41.67%", "elapsed_time": "2h 15m 52s", "remaining_time": "3h 10m 13s"}
 
 
 
 
 
 
 
 
 
 
 
 
9
  {"loss": 4.33601952, "token_acc": 0.78835404, "grad_norm": 0.1694329, "learning_rate": 5.392e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001237, "epoch": 1.0, "global_step/max_steps": "9/24", "percentage": "37.50%", "elapsed_time": "2h 1m 8s", "remaining_time": "3h 21m 53s"}
10
  {"loss": 21.8188076, "token_acc": 0.79576805, "grad_norm": 0.14743451, "learning_rate": 4.954e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001229, "epoch": 1.1221374, "global_step/max_steps": "10/24", "percentage": "41.67%", "elapsed_time": "2h 15m 30s", "remaining_time": "3h 9m 42s"}
11
  {"eval_loss": 0.6696381, "eval_token_acc": 0.81154384, "eval_runtime": 21.8063, "eval_samples_per_second": 0.963, "eval_steps_per_second": 0.138, "epoch": 1.1221374, "global_step/max_steps": "10/24", "percentage": "41.67%", "elapsed_time": "2h 15m 52s", "remaining_time": "3h 10m 13s"}
12
+ {"loss": 21.68900299, "token_acc": 0.78968529, "grad_norm": 0.15730255, "learning_rate": 4.486e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001213, "epoch": 1.24427481, "global_step/max_steps": "11/24", "percentage": "45.83%", "elapsed_time": "2h 31m 0s", "remaining_time": "2h 58m 27s"}
13
+ {"loss": 20.34348297, "token_acc": 0.81513798, "grad_norm": 0.14047588, "learning_rate": 3.998e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001208, "epoch": 1.36641221, "global_step/max_steps": "12/24", "percentage": "50.00%", "elapsed_time": "2h 45m 29s", "remaining_time": "2h 45m 29s"}
14
+ {"loss": 19.69247437, "token_acc": 0.81367603, "grad_norm": 0.13805264, "learning_rate": 3.5e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.0012, "epoch": 1.48854962, "global_step/max_steps": "13/24", "percentage": "54.17%", "elapsed_time": "3h 0m 29s", "remaining_time": "2h 32m 43s"}
15
+ {"loss": 19.99150848, "token_acc": 0.82031954, "grad_norm": 0.13972557, "learning_rate": 3.002e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001189, "epoch": 1.61068702, "global_step/max_steps": "14/24", "percentage": "58.33%", "elapsed_time": "3h 16m 2s", "remaining_time": "2h 20m 1s"}
16
+ {"loss": 19.0448761, "token_acc": 0.80773965, "grad_norm": 0.11615134, "learning_rate": 2.514e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001188, "epoch": 1.73282443, "global_step/max_steps": "15/24", "percentage": "62.50%", "elapsed_time": "3h 30m 15s", "remaining_time": "2h 6m 9s"}
17
+ {"loss": 18.81958389, "token_acc": 0.82736064, "grad_norm": 0.10968279, "learning_rate": 2.046e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001186, "epoch": 1.85496183, "global_step/max_steps": "16/24", "percentage": "66.67%", "elapsed_time": "3h 44m 38s", "remaining_time": "1h 52m 19s"}
18
+ {"loss": 18.56712723, "token_acc": 0.82723235, "grad_norm": 0.16789175, "learning_rate": 1.608e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001181, "epoch": 1.97709924, "global_step/max_steps": "17/24", "percentage": "70.83%", "elapsed_time": "3h 59m 44s", "remaining_time": "1h 38m 43s"}
19
+ {"loss": 3.45807076, "token_acc": 0.83812147, "grad_norm": 0.16789175, "learning_rate": 1.208e-05, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001235, "epoch": 2.0, "global_step/max_steps": "18/24", "percentage": "75.00%", "elapsed_time": "4h 2m 42s", "remaining_time": "1h 20m 54s"}
20
+ {"loss": 18.17948341, "token_acc": 0.82936069, "grad_norm": 0.10841891, "learning_rate": 8.55e-06, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001229, "epoch": 2.1221374, "global_step/max_steps": "19/24", "percentage": "79.17%", "elapsed_time": "4h 17m 32s", "remaining_time": "1h 7m 46s"}
21
+ {"loss": 18.72079086, "token_acc": 0.81507356, "grad_norm": 0.10311155, "learning_rate": 5.56e-06, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001225, "epoch": 2.24427481, "global_step/max_steps": "20/24", "percentage": "83.33%", "elapsed_time": "4h 31m 57s", "remaining_time": "54m 23s"}
22
+ {"eval_loss": 0.5854618, "eval_token_acc": 0.82846674, "eval_runtime": 21.8067, "eval_samples_per_second": 0.963, "eval_steps_per_second": 0.138, "epoch": 2.24427481, "global_step/max_steps": "20/24", "percentage": "83.33%", "elapsed_time": "4h 32m 19s", "remaining_time": "54m 27s"}