tejeshbhalla commited on
Commit
998e89b
·
verified ·
1 Parent(s): 8c602e2

Training in progress, step 30

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. logging.jsonl +10 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9b35651af0f2d2c6dd892f723613643b186c129ac3092df5ad7a54cfb6b0d8f
3
  size 6627156248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9094dd1114c70436de0a0e319fd0588b1cd44566c485099bcac708952c3f37ce
3
  size 6627156248
logging.jsonl CHANGED
@@ -21,3 +21,13 @@
21
  {"loss": 0.4979248, "grad_norm": 0.49843666, "learning_rate": 2.92e-05, "memory(GiB)": 133.21, "train_speed(iter/s)": 0.002855, "rewards/chosen": -0.05352783, "rewards/rejected": -0.04724121, "rewards/accuracies": 0.75, "rewards/margins": -0.00628662, "logps/rejected": -0.47216797, "logps/chosen": -0.53515625, "logits/rejected": -0.65527344, "logits/chosen": -0.63183594, "nll_loss": 0.53515625, "log_odds_ratio": -0.74023438, "log_odds_chosen": -0.04248047, "epoch": 0.11331445, "global_step/max_steps": "20/528", "percentage": "3.79%", "elapsed_time": "1h 56m 28s", "remaining_time": "2d 1h 18m 18s"}
22
  {"eval_loss": 0.50023127, "eval_runtime": 214.6405, "eval_samples_per_second": 0.266, "eval_steps_per_second": 0.037, "eval_rewards/chosen": -0.03556061, "eval_rewards/rejected": -0.03941345, "eval_rewards/accuracies": 0.75, "eval_rewards/margins": 0.00385284, "eval_logps/rejected": -0.39416504, "eval_logps/chosen": -0.35577393, "eval_logits/rejected": -0.78125, "eval_logits/chosen": -0.73828125, "eval_nll_loss": 0.35552979, "eval_log_odds_ratio": -0.59912109, "eval_log_odds_chosen": 0.22839355, "epoch": 0.11331445, "global_step/max_steps": "20/528", "percentage": "3.79%", "elapsed_time": "2h 0m 2s", "remaining_time": "2d 2h 49m 10s"}
23
  {"loss": 0.51702881, "grad_norm": 0.73438716, "learning_rate": 2.914e-05, "memory(GiB)": 133.21, "train_speed(iter/s)": 0.002801, "rewards/chosen": -0.03884888, "rewards/rejected": -0.03894043, "rewards/accuracies": 0.5, "rewards/margins": 9.155e-05, "logps/rejected": -0.38989258, "logps/chosen": -0.38867188, "logits/rejected": -0.74121094, "logits/chosen": -0.72851562, "nll_loss": 0.3894043, "log_odds_ratio": -0.64941406, "log_odds_chosen": 0.1472168, "epoch": 0.11898017, "global_step/max_steps": "21/528", "percentage": "3.98%", "elapsed_time": "2h 4m 40s", "remaining_time": "2d 2h 10m 0s"}
 
 
 
 
 
 
 
 
 
 
 
21
  {"loss": 0.4979248, "grad_norm": 0.49843666, "learning_rate": 2.92e-05, "memory(GiB)": 133.21, "train_speed(iter/s)": 0.002855, "rewards/chosen": -0.05352783, "rewards/rejected": -0.04724121, "rewards/accuracies": 0.75, "rewards/margins": -0.00628662, "logps/rejected": -0.47216797, "logps/chosen": -0.53515625, "logits/rejected": -0.65527344, "logits/chosen": -0.63183594, "nll_loss": 0.53515625, "log_odds_ratio": -0.74023438, "log_odds_chosen": -0.04248047, "epoch": 0.11331445, "global_step/max_steps": "20/528", "percentage": "3.79%", "elapsed_time": "1h 56m 28s", "remaining_time": "2d 1h 18m 18s"}
22
  {"eval_loss": 0.50023127, "eval_runtime": 214.6405, "eval_samples_per_second": 0.266, "eval_steps_per_second": 0.037, "eval_rewards/chosen": -0.03556061, "eval_rewards/rejected": -0.03941345, "eval_rewards/accuracies": 0.75, "eval_rewards/margins": 0.00385284, "eval_logps/rejected": -0.39416504, "eval_logps/chosen": -0.35577393, "eval_logits/rejected": -0.78125, "eval_logits/chosen": -0.73828125, "eval_nll_loss": 0.35552979, "eval_log_odds_ratio": -0.59912109, "eval_log_odds_chosen": 0.22839355, "epoch": 0.11331445, "global_step/max_steps": "20/528", "percentage": "3.79%", "elapsed_time": "2h 0m 2s", "remaining_time": "2d 2h 49m 10s"}
23
  {"loss": 0.51702881, "grad_norm": 0.73438716, "learning_rate": 2.914e-05, "memory(GiB)": 133.21, "train_speed(iter/s)": 0.002801, "rewards/chosen": -0.03884888, "rewards/rejected": -0.03894043, "rewards/accuracies": 0.5, "rewards/margins": 9.155e-05, "logps/rejected": -0.38989258, "logps/chosen": -0.38867188, "logits/rejected": -0.74121094, "logits/chosen": -0.72851562, "nll_loss": 0.3894043, "log_odds_ratio": -0.64941406, "log_odds_chosen": 0.1472168, "epoch": 0.11898017, "global_step/max_steps": "21/528", "percentage": "3.98%", "elapsed_time": "2h 4m 40s", "remaining_time": "2d 2h 10m 0s"}
24
+ {"loss": 0.81253052, "grad_norm": 1.53866589, "learning_rate": 2.908e-05, "memory(GiB)": 133.21, "train_speed(iter/s)": 0.002805, "rewards/chosen": -0.04421997, "rewards/rejected": -0.04016113, "rewards/accuracies": 0.75, "rewards/margins": -0.00405884, "logps/rejected": -0.40136719, "logps/chosen": -0.44238281, "logits/rejected": -0.75976562, "logits/chosen": -0.69824219, "nll_loss": 0.44238281, "log_odds_ratio": -0.68554688, "log_odds_chosen": 0.10449219, "epoch": 0.12464589, "global_step/max_steps": "22/528", "percentage": "4.17%", "elapsed_time": "2h 10m 23s", "remaining_time": "2d 1h 59m 10s"}
25
+ {"loss": 0.39785767, "grad_norm": 4.08225965, "learning_rate": 2.902e-05, "memory(GiB)": 133.21, "train_speed(iter/s)": 0.002831, "rewards/chosen": -0.02941895, "rewards/rejected": -0.03512573, "rewards/accuracies": 0.75, "rewards/margins": 0.00570679, "logps/rejected": -0.35107422, "logps/chosen": -0.29370117, "logits/rejected": -0.48242188, "logits/chosen": -0.44238281, "nll_loss": 0.29345703, "log_odds_ratio": -0.58300781, "log_odds_chosen": 0.24072266, "epoch": 0.13031161, "global_step/max_steps": "23/528", "percentage": "4.36%", "elapsed_time": "2h 15m 5s", "remaining_time": "2d 1h 26m 7s"}
26
+ {"loss": 0.50842285, "grad_norm": 0.75203818, "learning_rate": 2.897e-05, "memory(GiB)": 133.31, "train_speed(iter/s)": 0.002829, "rewards/chosen": -0.04470825, "rewards/rejected": -0.04074097, "rewards/accuracies": 0.25, "rewards/margins": -0.00396729, "logps/rejected": -0.4074707, "logps/chosen": -0.44677734, "logits/rejected": -0.41992188, "logits/chosen": -0.47167969, "nll_loss": 0.44775391, "log_odds_ratio": -0.80273438, "log_odds_chosen": -0.15820312, "epoch": 0.13597734, "global_step/max_steps": "24/528", "percentage": "4.55%", "elapsed_time": "2h 21m 7s", "remaining_time": "2d 1h 23m 29s"}
27
+ {"loss": 0.66778564, "grad_norm": 2.3309772, "learning_rate": 2.891e-05, "memory(GiB)": 133.31, "train_speed(iter/s)": 0.002853, "rewards/chosen": -0.12059021, "rewards/rejected": -0.14254761, "rewards/accuracies": 0.75, "rewards/margins": 0.02194214, "logps/rejected": -1.42749023, "logps/chosen": -1.20690918, "logits/rejected": -0.37792969, "logits/chosen": -0.37036133, "nll_loss": 1.20678711, "log_odds_ratio": -0.5480957, "log_odds_chosen": 0.44335938, "epoch": 0.14164306, "global_step/max_steps": "25/528", "percentage": "4.73%", "elapsed_time": "2h 25m 43s", "remaining_time": "2d 0h 52m 5s"}
28
+ {"loss": 0.54031372, "grad_norm": 2.24770308, "learning_rate": 2.885e-05, "memory(GiB)": 133.31, "train_speed(iter/s)": 0.002837, "rewards/chosen": -0.02548218, "rewards/rejected": -0.02676392, "rewards/accuracies": 0.5, "rewards/margins": 0.00128174, "logps/rejected": -0.26733398, "logps/chosen": -0.25488281, "logits/rejected": -0.53955078, "logits/chosen": -0.43457031, "nll_loss": 0.25463867, "log_odds_ratio": -0.6796875, "log_odds_chosen": 0.05322266, "epoch": 0.14730878, "global_step/max_steps": "26/528", "percentage": "4.92%", "elapsed_time": "2h 32m 28s", "remaining_time": "2d 1h 3m 47s"}
29
+ {"loss": 0.29516602, "grad_norm": 0.13307834, "learning_rate": 2.879e-05, "memory(GiB)": 133.31, "train_speed(iter/s)": 0.002869, "rewards/chosen": -0.01672363, "rewards/rejected": -0.02449036, "rewards/accuracies": 1.0, "rewards/margins": 0.00776672, "logps/rejected": -0.24511719, "logps/chosen": -0.16723633, "logits/rejected": -0.44970703, "logits/chosen": -0.40771484, "nll_loss": 0.16699219, "log_odds_ratio": -0.52685547, "log_odds_chosen": 0.38671875, "epoch": 0.1529745, "global_step/max_steps": "27/528", "percentage": "5.11%", "elapsed_time": "2h 36m 31s", "remaining_time": "2d 0h 24m 24s"}
30
+ {"loss": 0.67800903, "grad_norm": 1.45895541, "learning_rate": 2.874e-05, "memory(GiB)": 133.31, "train_speed(iter/s)": 0.002851, "rewards/chosen": -0.1439209, "rewards/rejected": -0.1578064, "rewards/accuracies": 1.0, "rewards/margins": 0.0138855, "logps/rejected": -1.57885742, "logps/chosen": -1.43896484, "logits/rejected": -0.57958984, "logits/chosen": -0.55810547, "nll_loss": 1.4387207, "log_odds_ratio": -0.56494141, "log_odds_chosen": 0.28515625, "epoch": 0.15864023, "global_step/max_steps": "28/528", "percentage": "5.30%", "elapsed_time": "2h 43m 24s", "remaining_time": "2d 0h 38m 5s"}
31
+ {"loss": 0.5168457, "grad_norm": 0.36635926, "learning_rate": 2.868e-05, "memory(GiB)": 133.31, "train_speed(iter/s)": 0.002861, "rewards/chosen": -0.03540039, "rewards/rejected": -0.04214478, "rewards/accuracies": 1.0, "rewards/margins": 0.00674438, "logps/rejected": -0.421875, "logps/chosen": -0.35351562, "logits/rejected": -0.41064453, "logits/chosen": -0.38623047, "nll_loss": 0.35302734, "log_odds_ratio": -0.58349609, "log_odds_chosen": 0.24853516, "epoch": 0.16430595, "global_step/max_steps": "29/528", "percentage": "5.49%", "elapsed_time": "2h 48m 39s", "remaining_time": "2d 0h 21m 59s"}
32
+ {"loss": 0.54055786, "grad_norm": 2.31381583, "learning_rate": 2.862e-05, "memory(GiB)": 133.31, "train_speed(iter/s)": 0.002872, "rewards/chosen": -0.04043579, "rewards/rejected": -0.04840088, "rewards/accuracies": 1.0, "rewards/margins": 0.00796509, "logps/rejected": -0.484375, "logps/chosen": -0.40429688, "logits/rejected": -0.41455078, "logits/chosen": -0.36621094, "nll_loss": 0.40527344, "log_odds_ratio": -0.57568359, "log_odds_chosen": 0.25878906, "epoch": 0.16997167, "global_step/max_steps": "30/528", "percentage": "5.68%", "elapsed_time": "2h 53m 48s", "remaining_time": "2d 0h 5m 10s"}
33
+ {"eval_loss": 0.42711759, "eval_runtime": 214.3814, "eval_samples_per_second": 0.266, "eval_steps_per_second": 0.037, "eval_rewards/chosen": -0.02880859, "eval_rewards/rejected": -0.03607178, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 0.00726318, "eval_logps/rejected": -0.36108398, "eval_logps/chosen": -0.28771973, "eval_logits/rejected": -0.53515625, "eval_logits/chosen": -0.4855957, "eval_nll_loss": 0.28778076, "eval_log_odds_ratio": -0.54736328, "eval_log_odds_chosen": 0.34350586, "epoch": 0.16997167, "global_step/max_steps": "30/528", "percentage": "5.68%", "elapsed_time": "2h 57m 22s", "remaining_time": "2d 1h 4m 29s"}