jacpetro commited on
Commit
3fe5887
·
verified ·
1 Parent(s): 8a5b2ca

Training in progress, step 24

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. logging.jsonl +6 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:153f700b4e7a89ecc4c61cd01fbef40f1f6e1ed9f2dec533b2f28af4694e57d0
3
  size 1656903768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d2f4c210bebe87f620680099105f54487c255d346fb0fea6cdfdb6477c8d093
3
  size 1656903768
logging.jsonl CHANGED
@@ -20,3 +20,9 @@
20
  {"loss": 18.17948341, "token_acc": 0.82936069, "grad_norm": 0.10841891, "learning_rate": 8.55e-06, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001229, "epoch": 2.1221374, "global_step/max_steps": "19/24", "percentage": "79.17%", "elapsed_time": "4h 17m 32s", "remaining_time": "1h 7m 46s"}
21
  {"loss": 18.72079086, "token_acc": 0.81507356, "grad_norm": 0.10311155, "learning_rate": 5.56e-06, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001225, "epoch": 2.24427481, "global_step/max_steps": "20/24", "percentage": "83.33%", "elapsed_time": "4h 31m 57s", "remaining_time": "54m 23s"}
22
  {"eval_loss": 0.5854618, "eval_token_acc": 0.82846674, "eval_runtime": 21.8067, "eval_samples_per_second": 0.963, "eval_steps_per_second": 0.138, "epoch": 2.24427481, "global_step/max_steps": "20/24", "percentage": "83.33%", "elapsed_time": "4h 32m 19s", "remaining_time": "54m 27s"}
 
 
 
 
 
 
 
20
  {"loss": 18.17948341, "token_acc": 0.82936069, "grad_norm": 0.10841891, "learning_rate": 8.55e-06, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001229, "epoch": 2.1221374, "global_step/max_steps": "19/24", "percentage": "79.17%", "elapsed_time": "4h 17m 32s", "remaining_time": "1h 7m 46s"}
21
  {"loss": 18.72079086, "token_acc": 0.81507356, "grad_norm": 0.10311155, "learning_rate": 5.56e-06, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001225, "epoch": 2.24427481, "global_step/max_steps": "20/24", "percentage": "83.33%", "elapsed_time": "4h 31m 57s", "remaining_time": "54m 23s"}
22
  {"eval_loss": 0.5854618, "eval_token_acc": 0.82846674, "eval_runtime": 21.8067, "eval_samples_per_second": 0.963, "eval_steps_per_second": 0.138, "epoch": 2.24427481, "global_step/max_steps": "20/24", "percentage": "83.33%", "elapsed_time": "4h 32m 19s", "remaining_time": "54m 27s"}
23
+ {"loss": 18.32196236, "token_acc": 0.84050898, "grad_norm": 0.10798978, "learning_rate": 3.16e-06, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001219, "epoch": 2.36641221, "global_step/max_steps": "21/24", "percentage": "87.50%", "elapsed_time": "4h 47m 5s", "remaining_time": "41m 0s"}
24
+ {"loss": 17.90100861, "token_acc": 0.82744203, "grad_norm": 0.1008019, "learning_rate": 1.42e-06, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001216, "epoch": 2.48854962, "global_step/max_steps": "22/24", "percentage": "91.67%", "elapsed_time": "5h 1m 20s", "remaining_time": "27m 23s"}
25
+ {"loss": 17.8360672, "token_acc": 0.8334202, "grad_norm": 0.10203252, "learning_rate": 3.6e-07, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001211, "epoch": 2.61068702, "global_step/max_steps": "23/24", "percentage": "95.83%", "elapsed_time": "5h 16m 24s", "remaining_time": "13m 45s"}
26
+ {"loss": 18.5565834, "token_acc": 0.82515168, "grad_norm": 0.13676309, "learning_rate": 0.0, "memory(GiB)": 91.74, "train_speed(iter/s)": 0.001207, "epoch": 2.73282443, "global_step/max_steps": "24/24", "percentage": "100.00%", "elapsed_time": "5h 31m 10s", "remaining_time": "0s"}
27
+ {"eval_loss": 0.58164978, "eval_token_acc": 0.82967552, "eval_runtime": 21.8292, "eval_samples_per_second": 0.962, "eval_steps_per_second": 0.137, "epoch": 2.73282443, "global_step/max_steps": "24/24", "percentage": "100.00%", "elapsed_time": "5h 31m 32s", "remaining_time": "0s"}
28
+ {"train_runtime": 19906.1961, "train_samples_per_second": 0.315, "train_steps_per_second": 0.001, "total_flos": 5731493804507136.0, "train_loss": 20.69945669, "epoch": 2.73282443, "global_step/max_steps": "24/24", "percentage": "100.00%", "elapsed_time": "5h 31m 41s", "remaining_time": "0s"}