jacpetro commited on
Commit
b7deb2f
·
verified ·
1 Parent(s): 561db8f

Training in progress, step 10

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. logging.jsonl +6 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7878bd1c2c4f17055fbd605acaef280032408e110d69a437430cdcced4ab7da
3
  size 1656903768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e02a732a97109085620824021d0ee4dc74fa0457f2941ea2250852cb25388606
3
  size 1656903768
logging.jsonl CHANGED
@@ -4,3 +4,9 @@
4
  {"loss": 30.84427452, "token_acc": 0.76022193, "grad_norm": 3.02257953, "learning_rate": 6.86e-06, "memory(GiB)": 91.05, "train_speed(iter/s)": 0.001128, "epoch": 0.48854962, "global_step/max_steps": "4/24", "percentage": "16.67%", "elapsed_time": "58m 58s", "remaining_time": "4h 54m 54s"}
5
  {"loss": 32.8370285, "token_acc": 0.75285632, "grad_norm": 2.24804417, "learning_rate": 6.68e-06, "memory(GiB)": 91.05, "train_speed(iter/s)": 0.001133, "epoch": 0.61068702, "global_step/max_steps": "5/24", "percentage": "20.83%", "elapsed_time": "1h 13m 26s", "remaining_time": "4h 39m 3s"}
6
  {"eval_loss": 0.91817999, "eval_token_acc": 0.77373173, "eval_runtime": 22.0971, "eval_samples_per_second": 0.95, "eval_steps_per_second": 0.136, "epoch": 0.61068702, "global_step/max_steps": "5/24", "percentage": "20.83%", "elapsed_time": "1h 13m 48s", "remaining_time": "4h 40m 27s"}
 
 
 
 
 
 
 
4
  {"loss": 30.84427452, "token_acc": 0.76022193, "grad_norm": 3.02257953, "learning_rate": 6.86e-06, "memory(GiB)": 91.05, "train_speed(iter/s)": 0.001128, "epoch": 0.48854962, "global_step/max_steps": "4/24", "percentage": "16.67%", "elapsed_time": "58m 58s", "remaining_time": "4h 54m 54s"}
5
  {"loss": 32.8370285, "token_acc": 0.75285632, "grad_norm": 2.24804417, "learning_rate": 6.68e-06, "memory(GiB)": 91.05, "train_speed(iter/s)": 0.001133, "epoch": 0.61068702, "global_step/max_steps": "5/24", "percentage": "20.83%", "elapsed_time": "1h 13m 26s", "remaining_time": "4h 39m 3s"}
6
  {"eval_loss": 0.91817999, "eval_token_acc": 0.77373173, "eval_runtime": 22.0971, "eval_samples_per_second": 0.95, "eval_steps_per_second": 0.136, "epoch": 0.61068702, "global_step/max_steps": "5/24", "percentage": "20.83%", "elapsed_time": "1h 13m 48s", "remaining_time": "4h 40m 27s"}
7
+ {"loss": 33.37625504, "token_acc": 0.74381874, "grad_norm": 16.70630121, "learning_rate": 6.44e-06, "memory(GiB)": 91.05, "train_speed(iter/s)": 0.001118, "epoch": 0.73282443, "global_step/max_steps": "6/24", "percentage": "25.00%", "elapsed_time": "1h 29m 18s", "remaining_time": "4h 27m 56s"}
8
+ {"loss": 31.02261353, "token_acc": 0.75488503, "grad_norm": 9.27220664, "learning_rate": 6.15e-06, "memory(GiB)": 91.05, "train_speed(iter/s)": 0.001122, "epoch": 0.85496183, "global_step/max_steps": "7/24", "percentage": "29.17%", "elapsed_time": "1h 43m 49s", "remaining_time": "4h 12m 8s"}
9
+ {"loss": 33.63227844, "token_acc": 0.75749061, "grad_norm": 1.92525593, "learning_rate": 5.79e-06, "memory(GiB)": 91.05, "train_speed(iter/s)": 0.001122, "epoch": 0.97709924, "global_step/max_steps": "8/24", "percentage": "33.33%", "elapsed_time": "1h 58m 45s", "remaining_time": "3h 57m 30s"}
10
+ {"loss": 6.81848335, "token_acc": 0.73555901, "grad_norm": 1.92525593, "learning_rate": 5.39e-06, "memory(GiB)": 91.05, "train_speed(iter/s)": 0.001232, "epoch": 1.0, "global_step/max_steps": "9/24", "percentage": "37.50%", "elapsed_time": "2h 1m 40s", "remaining_time": "3h 22m 46s"}
11
+ {"loss": 31.74690628, "token_acc": 0.74373912, "grad_norm": 50.72140627, "learning_rate": 4.95e-06, "memory(GiB)": 91.05, "train_speed(iter/s)": 0.001224, "epoch": 1.1221374, "global_step/max_steps": "10/24", "percentage": "41.67%", "elapsed_time": "2h 16m 2s", "remaining_time": "3h 10m 27s"}
12
+ {"eval_loss": 0.91494149, "eval_token_acc": 0.77429834, "eval_runtime": 22.0647, "eval_samples_per_second": 0.952, "eval_steps_per_second": 0.136, "epoch": 1.1221374, "global_step/max_steps": "10/24", "percentage": "41.67%", "elapsed_time": "2h 16m 24s", "remaining_time": "3h 10m 58s"}