tejeshbhalla commited on
Commit
b22f90c
·
verified ·
1 Parent(s): 7eaa4b3

Training in progress, step 50

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. logging.jsonl +19 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b05019bdbc9ceed3cfef082003c5c2def393313ad289d6425d7111cb69d84bf
3
  size 13254157312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89b47f9cab9030a6c081f240c1c68c758be86e91e2f635b1eecf5e7f4b933edf
3
  size 13254157312
logging.jsonl CHANGED
@@ -52,3 +52,22 @@
52
  {"loss": 0.55463028, "token_acc": 0.79433368, "grad_norm": 3.79704332, "learning_rate": 2.57e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.02624, "epoch": 0.02568218, "global_step/max_steps": "48/1869", "percentage": "2.57%", "elapsed_time": "30m 4s", "remaining_time": "19h 0m 49s"}
53
  {"loss": 0.62436664, "token_acc": 0.7584, "grad_norm": 3.25214219, "learning_rate": 2.62e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.026392, "epoch": 0.02621723, "global_step/max_steps": "49/1869", "percentage": "2.62%", "elapsed_time": "30m 31s", "remaining_time": "18h 53m 49s"}
54
  {"loss": 0.53918386, "token_acc": 0.90425532, "grad_norm": 4.26714802, "learning_rate": 2.67e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.026844, "epoch": 0.02675227, "global_step/max_steps": "50/1869", "percentage": "2.68%", "elapsed_time": "30m 37s", "remaining_time": "18h 34m 11s"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  {"loss": 0.55463028, "token_acc": 0.79433368, "grad_norm": 3.79704332, "learning_rate": 2.57e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.02624, "epoch": 0.02568218, "global_step/max_steps": "48/1869", "percentage": "2.57%", "elapsed_time": "30m 4s", "remaining_time": "19h 0m 49s"}
53
  {"loss": 0.62436664, "token_acc": 0.7584, "grad_norm": 3.25214219, "learning_rate": 2.62e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.026392, "epoch": 0.02621723, "global_step/max_steps": "49/1869", "percentage": "2.62%", "elapsed_time": "30m 31s", "remaining_time": "18h 53m 49s"}
54
  {"loss": 0.53918386, "token_acc": 0.90425532, "grad_norm": 4.26714802, "learning_rate": 2.67e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.026844, "epoch": 0.02675227, "global_step/max_steps": "50/1869", "percentage": "2.68%", "elapsed_time": "30m 37s", "remaining_time": "18h 34m 11s"}
55
+ {"eval_loss": 0.60215878, "eval_token_acc": 0.80712147, "eval_runtime": 230.281, "eval_samples_per_second": 2.006, "eval_steps_per_second": 0.252, "epoch": 0.02675227, "global_step/max_steps": "50/1869", "percentage": "2.68%", "elapsed_time": "34m 27s", "remaining_time": "20h 53m 49s"}
56
+ {"loss": 0.51995146, "token_acc": 0.81191565, "grad_norm": 3.53206921, "learning_rate": 2.73e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023724, "epoch": 0.02728732, "global_step/max_steps": "51/1869", "percentage": "2.73%", "elapsed_time": "35m 24s", "remaining_time": "21h 2m 17s"}
57
+ {"loss": 0.58067459, "token_acc": 0.83819629, "grad_norm": 6.21328926, "learning_rate": 2.78e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.02381, "epoch": 0.02782236, "global_step/max_steps": "52/1869", "percentage": "2.78%", "elapsed_time": "35m 58s", "remaining_time": "20h 57m 17s"}
58
+ {"loss": 0.64557421, "token_acc": 0.76157407, "grad_norm": 3.65991592, "learning_rate": 2.83e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024202, "epoch": 0.02835741, "global_step/max_steps": "53/1869", "percentage": "2.84%", "elapsed_time": "36m 4s", "remaining_time": "20h 36m 18s"}
59
+ {"loss": 0.51230466, "token_acc": 0.82673797, "grad_norm": 3.01753736, "learning_rate": 2.89e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024517, "epoch": 0.02889246, "global_step/max_steps": "54/1869", "percentage": "2.89%", "elapsed_time": "36m 17s", "remaining_time": "20h 19m 48s"}
60
+ {"loss": 0.48997158, "token_acc": 0.88925803, "grad_norm": 2.5939014, "learning_rate": 2.94e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024898, "epoch": 0.0294275, "global_step/max_steps": "55/1869", "percentage": "2.94%", "elapsed_time": "36m 23s", "remaining_time": "20h 0m 31s"}
61
+ {"loss": 0.55220222, "token_acc": 0.82684825, "grad_norm": 3.10655284, "learning_rate": 2.99e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.025282, "epoch": 0.02996255, "global_step/max_steps": "56/1869", "percentage": "3.00%", "elapsed_time": "36m 29s", "remaining_time": "19h 41m 40s"}
62
+ {"loss": 0.59301412, "token_acc": 0.79541446, "grad_norm": 2.94104075, "learning_rate": 3.05e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.02563, "epoch": 0.03049759, "global_step/max_steps": "57/1869", "percentage": "3.05%", "elapsed_time": "36m 38s", "remaining_time": "19h 25m 2s"}
63
+ {"loss": 0.56519073, "token_acc": 0.8638985, "grad_norm": 2.9064157, "learning_rate": 3.1e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.025796, "epoch": 0.03103264, "global_step/max_steps": "58/1869", "percentage": "3.10%", "elapsed_time": "37m 3s", "remaining_time": "19h 17m 4s"}
64
+ {"loss": 0.53200567, "token_acc": 0.79587405, "grad_norm": 3.22322154, "learning_rate": 3.16e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.026169, "epoch": 0.03156768, "global_step/max_steps": "59/1869", "percentage": "3.16%", "elapsed_time": "37m 9s", "remaining_time": "18h 59m 58s"}
65
+ {"loss": 0.58337617, "token_acc": 0.77966102, "grad_norm": 3.01505756, "learning_rate": 3.21e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.02654, "epoch": 0.03210273, "global_step/max_steps": "60/1869", "percentage": "3.21%", "elapsed_time": "37m 15s", "remaining_time": "18h 43m 28s"}
66
+ {"eval_loss": 0.59162891, "eval_token_acc": 0.80901806, "eval_runtime": 230.4998, "eval_samples_per_second": 2.004, "eval_steps_per_second": 0.252, "epoch": 0.03210273, "global_step/max_steps": "60/1869", "percentage": "3.21%", "elapsed_time": "41m 6s", "remaining_time": "20h 39m 17s"}
67
+ {"loss": 0.48333877, "token_acc": 0.81099476, "grad_norm": 3.13191962, "learning_rate": 3.26e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.02384, "epoch": 0.03263777, "global_step/max_steps": "61/1869", "percentage": "3.26%", "elapsed_time": "42m 13s", "remaining_time": "20h 51m 36s"}
68
+ {"loss": 0.60811639, "token_acc": 0.83554377, "grad_norm": 3.21621466, "learning_rate": 3.32e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.023923, "epoch": 0.03317282, "global_step/max_steps": "62/1869", "percentage": "3.32%", "elapsed_time": "42m 46s", "remaining_time": "20h 46m 45s"}
69
+ {"loss": 0.56866759, "token_acc": 0.85844749, "grad_norm": 3.32019591, "learning_rate": 3.37e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024245, "epoch": 0.03370787, "global_step/max_steps": "63/1869", "percentage": "3.37%", "elapsed_time": "42m 53s", "remaining_time": "20h 29m 32s"}
70
+ {"loss": 0.60677022, "token_acc": 0.85037037, "grad_norm": 3.47439218, "learning_rate": 3.42e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024572, "epoch": 0.03424291, "global_step/max_steps": "64/1869", "percentage": "3.42%", "elapsed_time": "42m 59s", "remaining_time": "20h 12m 32s"}
71
+ {"loss": 0.54550421, "token_acc": 0.87831325, "grad_norm": 4.10164118, "learning_rate": 3.48e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.024841, "epoch": 0.03477796, "global_step/max_steps": "65/1869", "percentage": "3.48%", "elapsed_time": "43m 11s", "remaining_time": "19h 58m 48s"}
72
+ {"loss": 0.53837061, "token_acc": 0.73560209, "grad_norm": 3.15438747, "learning_rate": 3.53e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.025137, "epoch": 0.035313, "global_step/max_steps": "66/1869", "percentage": "3.53%", "elapsed_time": "43m 20s", "remaining_time": "19h 44m 3s"}
73
+ {"loss": 0.5946157, "token_acc": 0.73014257, "grad_norm": 3.01018405, "learning_rate": 3.58e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.025458, "epoch": 0.03584805, "global_step/max_steps": "67/1869", "percentage": "3.58%", "elapsed_time": "43m 26s", "remaining_time": "19h 28m 30s"}