tejeshbhalla commited on
Commit
9862e06
·
verified ·
1 Parent(s): 8638e9f

Training in progress, step 50

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. logging.jsonl +22 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3847528c3fef03fc7d9c26c4217ea563b0b8312dbf5b0a23bd5019a3c2364f2
3
  size 3565203280
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da687e7800a4fee287b56e2eb75bd2cb6a6d585d814b73ef2c6517325e058a72
3
  size 3565203280
logging.jsonl CHANGED
@@ -52,3 +52,25 @@
52
  {"loss": 0.54839796, "token_acc": 0.79958027, "grad_norm": 1.43800616, "learning_rate": 2.57e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.115267, "epoch": 0.02568218, "global_step/max_steps": "48/1869", "percentage": "2.57%", "elapsed_time": "6m 26s", "remaining_time": "4h 4m 38s"}
53
  {"loss": 0.61684155, "token_acc": 0.77229299, "grad_norm": 1.36796963, "learning_rate": 2.62e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.11613, "epoch": 0.02621723, "global_step/max_steps": "49/1869", "percentage": "2.62%", "elapsed_time": "6m 32s", "remaining_time": "4h 2m 55s"}
54
  {"loss": 0.52599233, "token_acc": 0.89716312, "grad_norm": 1.61794019, "learning_rate": 2.67e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.118051, "epoch": 0.02675227, "global_step/max_steps": "50/1869", "percentage": "2.68%", "elapsed_time": "6m 34s", "remaining_time": "3h 58m 54s"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  {"loss": 0.54839796, "token_acc": 0.79958027, "grad_norm": 1.43800616, "learning_rate": 2.57e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.115267, "epoch": 0.02568218, "global_step/max_steps": "48/1869", "percentage": "2.57%", "elapsed_time": "6m 26s", "remaining_time": "4h 4m 38s"}
53
  {"loss": 0.61684155, "token_acc": 0.77229299, "grad_norm": 1.36796963, "learning_rate": 2.62e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.11613, "epoch": 0.02621723, "global_step/max_steps": "49/1869", "percentage": "2.62%", "elapsed_time": "6m 32s", "remaining_time": "4h 2m 55s"}
54
  {"loss": 0.52599233, "token_acc": 0.89716312, "grad_norm": 1.61794019, "learning_rate": 2.67e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.118051, "epoch": 0.02675227, "global_step/max_steps": "50/1869", "percentage": "2.68%", "elapsed_time": "6m 34s", "remaining_time": "3h 58m 54s"}
55
+ {"eval_loss": 0.59815454, "eval_token_acc": 0.80803328, "eval_runtime": 47.1012, "eval_samples_per_second": 9.809, "eval_steps_per_second": 1.231, "epoch": 0.02675227, "global_step/max_steps": "50/1869", "percentage": "2.68%", "elapsed_time": "7m 21s", "remaining_time": "4h 27m 28s"}
56
+ {"loss": 0.51500702, "token_acc": 0.81298095, "grad_norm": 1.52756071, "learning_rate": 2.73e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.105011, "epoch": 0.02728732, "global_step/max_steps": "51/1869", "percentage": "2.73%", "elapsed_time": "7m 36s", "remaining_time": "4h 31m 0s"}
57
+ {"loss": 0.58626986, "token_acc": 0.81697613, "grad_norm": 3.09773326, "learning_rate": 2.78e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.105844, "epoch": 0.02782236, "global_step/max_steps": "52/1869", "percentage": "2.78%", "elapsed_time": "7m 41s", "remaining_time": "4h 28m 55s"}
58
+ {"loss": 0.63133675, "token_acc": 0.75462963, "grad_norm": 1.64922523, "learning_rate": 2.83e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.107526, "epoch": 0.02835741, "global_step/max_steps": "53/1869", "percentage": "2.84%", "elapsed_time": "7m 43s", "remaining_time": "4h 24m 37s"}
59
+ {"loss": 0.4953768, "token_acc": 0.83932347, "grad_norm": 1.70219147, "learning_rate": 2.89e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.108906, "epoch": 0.02889246, "global_step/max_steps": "54/1869", "percentage": "2.89%", "elapsed_time": "7m 46s", "remaining_time": "4h 21m 13s"}
60
+ {"loss": 0.482317, "token_acc": 0.88925803, "grad_norm": 1.33054411, "learning_rate": 2.94e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.110562, "epoch": 0.0294275, "global_step/max_steps": "55/1869", "percentage": "2.94%", "elapsed_time": "7m 47s", "remaining_time": "4h 17m 13s"}
61
+ {"loss": 0.54709989, "token_acc": 0.83463035, "grad_norm": 1.64871359, "learning_rate": 2.99e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.11221, "epoch": 0.02996255, "global_step/max_steps": "56/1869", "percentage": "3.00%", "elapsed_time": "7m 49s", "remaining_time": "4h 13m 21s"}
62
+ {"loss": 0.59203637, "token_acc": 0.80776014, "grad_norm": 1.6295495, "learning_rate": 3.05e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.113744, "epoch": 0.03049759, "global_step/max_steps": "57/1869", "percentage": "3.05%", "elapsed_time": "7m 51s", "remaining_time": "4h 9m 52s"}
63
+ {"loss": 0.55395043, "token_acc": 0.8650519, "grad_norm": 1.50918293, "learning_rate": 3.1e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.114486, "epoch": 0.03103264, "global_step/max_steps": "58/1869", "percentage": "3.10%", "elapsed_time": "7m 57s", "remaining_time": "4h 8m 16s"}
64
+ {"loss": 0.52487975, "token_acc": 0.79044517, "grad_norm": 1.46274054, "learning_rate": 3.16e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.116092, "epoch": 0.03156768, "global_step/max_steps": "59/1869", "percentage": "3.16%", "elapsed_time": "7m 58s", "remaining_time": "4h 4m 45s"}
65
+ {"loss": 0.58494765, "token_acc": 0.78531073, "grad_norm": 3.75680161, "learning_rate": 3.21e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.117693, "epoch": 0.03210273, "global_step/max_steps": "60/1869", "percentage": "3.21%", "elapsed_time": "8m 0s", "remaining_time": "4h 1m 20s"}
66
+ {"eval_loss": 0.58433348, "eval_token_acc": 0.81007931, "eval_runtime": 47.2718, "eval_samples_per_second": 9.773, "eval_steps_per_second": 1.227, "epoch": 0.03210273, "global_step/max_steps": "60/1869", "percentage": "3.21%", "elapsed_time": "8m 47s", "remaining_time": "4h 25m 5s"}
67
+ {"loss": 0.46443856, "token_acc": 0.81141288, "grad_norm": 1.25838745, "learning_rate": 3.26e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.106316, "epoch": 0.03263777, "global_step/max_steps": "61/1869", "percentage": "3.26%", "elapsed_time": "9m 4s", "remaining_time": "4h 28m 51s"}
68
+ {"loss": 0.61769801, "token_acc": 0.85033113, "grad_norm": 1.79472578, "learning_rate": 3.32e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.107026, "epoch": 0.03317282, "global_step/max_steps": "62/1869", "percentage": "3.32%", "elapsed_time": "9m 9s", "remaining_time": "4h 27m 3s"}
69
+ {"loss": 0.57896692, "token_acc": 0.82039574, "grad_norm": 1.62414014, "learning_rate": 3.37e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.108433, "epoch": 0.03370787, "global_step/max_steps": "63/1869", "percentage": "3.37%", "elapsed_time": "9m 11s", "remaining_time": "4h 23m 29s"}
70
+ {"loss": 0.5766288, "token_acc": 0.84592593, "grad_norm": 1.42683816, "learning_rate": 3.42e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.10985, "epoch": 0.03424291, "global_step/max_steps": "64/1869", "percentage": "3.42%", "elapsed_time": "9m 13s", "remaining_time": "4h 19m 59s"}
71
+ {"loss": 0.55697632, "token_acc": 0.88038278, "grad_norm": 1.73366427, "learning_rate": 3.48e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.111046, "epoch": 0.03477796, "global_step/max_steps": "65/1869", "percentage": "3.48%", "elapsed_time": "9m 15s", "remaining_time": "4h 17m 6s"}
72
+ {"loss": 0.52125347, "token_acc": 0.7486911, "grad_norm": 1.34537971, "learning_rate": 3.53e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.112359, "epoch": 0.035313, "global_step/max_steps": "66/1869", "percentage": "3.53%", "elapsed_time": "9m 17s", "remaining_time": "4h 14m 0s"}
73
+ {"loss": 0.57437229, "token_acc": 0.73727088, "grad_norm": 1.39122069, "learning_rate": 3.58e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.113746, "epoch": 0.03584805, "global_step/max_steps": "67/1869", "percentage": "3.58%", "elapsed_time": "9m 19s", "remaining_time": "4h 10m 48s"}
74
+ {"loss": 0.55027413, "token_acc": 0.8056338, "grad_norm": 1.32035482, "learning_rate": 3.64e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.114339, "epoch": 0.03638309, "global_step/max_steps": "68/1869", "percentage": "3.64%", "elapsed_time": "9m 25s", "remaining_time": "4h 9m 29s"}
75
+ {"loss": 0.48549351, "token_acc": 0.81279621, "grad_norm": 1.4051944, "learning_rate": 3.69e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.115625, "epoch": 0.03691814, "global_step/max_steps": "69/1869", "percentage": "3.69%", "elapsed_time": "9m 27s", "remaining_time": "4h 6m 37s"}
76
+ {"loss": 0.60460436, "token_acc": 0.85180055, "grad_norm": 1.62005782, "learning_rate": 3.74e-06, "memory(GiB)": 49.79, "train_speed(iter/s)": 0.116986, "epoch": 0.03745318, "global_step/max_steps": "70/1869", "percentage": "3.75%", "elapsed_time": "9m 28s", "remaining_time": "4h 3m 39s"}