tejeshbhalla commited on
Commit
8a01f38
·
verified ·
1 Parent(s): b6a852e

Training in progress, step 40

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. logging.jsonl +11 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6aa5003359e54ac08e4e24ac7f6303ea81ae41fe1cd0c784a246d256391e219e
3
  size 13254157312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43e5a2ee020e679e55844f88475baeb607e0fbc6f7eec3d565657b2c09a1367c
3
  size 13254157312
logging.jsonl CHANGED
@@ -41,3 +41,14 @@
41
  {"loss": 0.43920898, "grad_norm": 2.3938894, "learning_rate": 9.83e-06, "memory(GiB)": 133.18, "train_speed(iter/s)": 0.015565, "rewards/chosen": -0.02514648, "rewards/rejected": -0.03955078, "rewards/accuracies": 1.0, "rewards/margins": 0.0144043, "logps/rejected": -0.39453125, "logps/chosen": -0.25195312, "logits/rejected": 0.6640625, "logits/chosen": 0.78515625, "nll_loss": 0.25, "log_odds_ratio": -0.47070312, "log_odds_chosen": 0.515625, "epoch": 0.11550152, "global_step/max_steps": "38/329", "percentage": "11.55%", "elapsed_time": "40m 34s", "remaining_time": "5h 10m 44s"}
42
  {"loss": 0.52661133, "grad_norm": 4.36714908, "learning_rate": 9.8e-06, "memory(GiB)": 133.18, "train_speed(iter/s)": 0.015636, "rewards/chosen": -0.04272461, "rewards/rejected": -0.04785156, "rewards/accuracies": 1.0, "rewards/margins": 0.00512695, "logps/rejected": -0.47851562, "logps/chosen": -0.42773438, "logits/rejected": 0.6171875, "logits/chosen": 0.52734375, "nll_loss": 0.42773438, "log_odds_ratio": -0.625, "log_odds_chosen": 0.13671875, "epoch": 0.11854103, "global_step/max_steps": "39/329", "percentage": "11.85%", "elapsed_time": "41m 27s", "remaining_time": "5h 8m 15s"}
43
  {"loss": 0.47045898, "grad_norm": 4.14709109, "learning_rate": 9.76e-06, "memory(GiB)": 133.18, "train_speed(iter/s)": 0.015704, "rewards/chosen": -0.03271484, "rewards/rejected": -0.02807617, "rewards/accuracies": 0.0, "rewards/margins": -0.00463867, "logps/rejected": -0.28125, "logps/chosen": -0.32617188, "logits/rejected": 0.5078125, "logits/chosen": 0.5625, "nll_loss": 0.32617188, "log_odds_ratio": -0.77734375, "log_odds_chosen": -0.16210938, "epoch": 0.12158055, "global_step/max_steps": "40/329", "percentage": "12.16%", "elapsed_time": "42m 20s", "remaining_time": "5h 5m 53s"}
 
 
 
 
 
 
 
 
 
 
 
 
41
  {"loss": 0.43920898, "grad_norm": 2.3938894, "learning_rate": 9.83e-06, "memory(GiB)": 133.18, "train_speed(iter/s)": 0.015565, "rewards/chosen": -0.02514648, "rewards/rejected": -0.03955078, "rewards/accuracies": 1.0, "rewards/margins": 0.0144043, "logps/rejected": -0.39453125, "logps/chosen": -0.25195312, "logits/rejected": 0.6640625, "logits/chosen": 0.78515625, "nll_loss": 0.25, "log_odds_ratio": -0.47070312, "log_odds_chosen": 0.515625, "epoch": 0.11550152, "global_step/max_steps": "38/329", "percentage": "11.55%", "elapsed_time": "40m 34s", "remaining_time": "5h 10m 44s"}
42
  {"loss": 0.52661133, "grad_norm": 4.36714908, "learning_rate": 9.8e-06, "memory(GiB)": 133.18, "train_speed(iter/s)": 0.015636, "rewards/chosen": -0.04272461, "rewards/rejected": -0.04785156, "rewards/accuracies": 1.0, "rewards/margins": 0.00512695, "logps/rejected": -0.47851562, "logps/chosen": -0.42773438, "logits/rejected": 0.6171875, "logits/chosen": 0.52734375, "nll_loss": 0.42773438, "log_odds_ratio": -0.625, "log_odds_chosen": 0.13671875, "epoch": 0.11854103, "global_step/max_steps": "39/329", "percentage": "11.85%", "elapsed_time": "41m 27s", "remaining_time": "5h 8m 15s"}
43
  {"loss": 0.47045898, "grad_norm": 4.14709109, "learning_rate": 9.76e-06, "memory(GiB)": 133.18, "train_speed(iter/s)": 0.015704, "rewards/chosen": -0.03271484, "rewards/rejected": -0.02807617, "rewards/accuracies": 0.0, "rewards/margins": -0.00463867, "logps/rejected": -0.28125, "logps/chosen": -0.32617188, "logits/rejected": 0.5078125, "logits/chosen": 0.5625, "nll_loss": 0.32617188, "log_odds_ratio": -0.77734375, "log_odds_chosen": -0.16210938, "epoch": 0.12158055, "global_step/max_steps": "40/329", "percentage": "12.16%", "elapsed_time": "42m 20s", "remaining_time": "5h 5m 53s"}
44
+ {"eval_loss": 0.430435, "eval_runtime": 141.4573, "eval_samples_per_second": 0.573, "eval_steps_per_second": 0.078, "eval_rewards/chosen": -0.03850764, "eval_rewards/rejected": -0.04306863, "eval_rewards/accuracies": 0.54545456, "eval_rewards/margins": 0.00456099, "eval_logps/rejected": -0.43066406, "eval_logps/chosen": -0.38512075, "eval_logits/rejected": 0.59778941, "eval_logits/chosen": 0.43534711, "eval_nll_loss": 0.38556463, "eval_log_odds_ratio": -0.65500712, "eval_log_odds_chosen": 0.12073863, "epoch": 0.12158055, "global_step/max_steps": "40/329", "percentage": "12.16%", "elapsed_time": "44m 41s", "remaining_time": "5h 22m 55s"}
45
+ {"loss": 0.45703125, "grad_norm": 3.23141242, "learning_rate": 9.73e-06, "memory(GiB)": 133.18, "train_speed(iter/s)": 0.014723, "rewards/chosen": -0.04174805, "rewards/rejected": -0.06591797, "rewards/accuracies": 1.0, "rewards/margins": 0.02416992, "logps/rejected": -0.66015625, "logps/chosen": -0.41796875, "logits/rejected": 0.9375, "logits/chosen": 0.703125, "nll_loss": 0.41796875, "log_odds_ratio": -0.43945312, "log_odds_chosen": 0.59375, "epoch": 0.12462006, "global_step/max_steps": "41/329", "percentage": "12.46%", "elapsed_time": "46m 17s", "remaining_time": "5h 25m 13s"}
46
+ {"loss": 0.47009277, "grad_norm": 3.47659563, "learning_rate": 9.7e-06, "memory(GiB)": 133.18, "train_speed(iter/s)": 0.014886, "rewards/chosen": -0.03955078, "rewards/rejected": -0.04223633, "rewards/accuracies": 1.0, "rewards/margins": 0.00268555, "logps/rejected": -0.421875, "logps/chosen": -0.39453125, "logits/rejected": 0.5546875, "logits/chosen": 0.6015625, "nll_loss": 0.39453125, "log_odds_ratio": -0.65625, "log_odds_chosen": 0.08203125, "epoch": 0.12765957, "global_step/max_steps": "42/329", "percentage": "12.77%", "elapsed_time": "46m 54s", "remaining_time": "5h 20m 33s"}
47
+ {"loss": 0.47509766, "grad_norm": 2.83518068, "learning_rate": 9.66e-06, "memory(GiB)": 133.18, "train_speed(iter/s)": 0.014982, "rewards/chosen": -0.04736328, "rewards/rejected": -0.04052734, "rewards/accuracies": 0.0, "rewards/margins": -0.00683594, "logps/rejected": -0.40625, "logps/chosen": -0.47265625, "logits/rejected": 0.72265625, "logits/chosen": 0.17382812, "nll_loss": 0.47265625, "log_odds_ratio": -0.79296875, "log_odds_chosen": -0.1875, "epoch": 0.13069909, "global_step/max_steps": "43/329", "percentage": "13.07%", "elapsed_time": "47m 43s", "remaining_time": "5h 17m 24s"}
48
+ {"loss": 0.49536133, "grad_norm": 5.37874758, "learning_rate": 9.63e-06, "memory(GiB)": 133.18, "train_speed(iter/s)": 0.015103, "rewards/chosen": -0.03979492, "rewards/rejected": -0.03881836, "rewards/accuracies": 0.0, "rewards/margins": -0.00097656, "logps/rejected": -0.38867188, "logps/chosen": -0.3984375, "logits/rejected": 0.8046875, "logits/chosen": 0.7578125, "nll_loss": 0.39648438, "log_odds_ratio": -0.70703125, "log_odds_chosen": -0.03320312, "epoch": 0.1337386, "global_step/max_steps": "44/329", "percentage": "13.37%", "elapsed_time": "48m 26s", "remaining_time": "5h 13m 45s"}
49
+ {"loss": 0.4909668, "grad_norm": 2.45579564, "learning_rate": 9.59e-06, "memory(GiB)": 133.18, "train_speed(iter/s)": 0.015144, "rewards/chosen": -0.02893066, "rewards/rejected": -0.04785156, "rewards/accuracies": 1.0, "rewards/margins": 0.0189209, "logps/rejected": -0.47851562, "logps/chosen": -0.2890625, "logits/rejected": 0.50390625, "logits/chosen": 0.66796875, "nll_loss": 0.2890625, "log_odds_ratio": -0.43945312, "log_odds_chosen": 0.6015625, "epoch": 0.13677812, "global_step/max_steps": "45/329", "percentage": "13.68%", "elapsed_time": "49m 24s", "remaining_time": "5h 11m 50s"}
50
+ {"loss": 0.39465332, "grad_norm": 2.91095937, "learning_rate": 9.56e-06, "memory(GiB)": 133.18, "train_speed(iter/s)": 0.015301, "rewards/chosen": -0.04785156, "rewards/rejected": -0.04052734, "rewards/accuracies": 0.0, "rewards/margins": -0.00732422, "logps/rejected": -0.40625, "logps/chosen": -0.47851562, "logits/rejected": 0.57421875, "logits/chosen": 0.6875, "nll_loss": 0.47851562, "log_odds_ratio": -0.80078125, "log_odds_chosen": -0.20507812, "epoch": 0.13981763, "global_step/max_steps": "46/329", "percentage": "13.98%", "elapsed_time": "49m 59s", "remaining_time": "5h 7m 33s"}
51
+ {"loss": 0.52075195, "grad_norm": 3.60181006, "learning_rate": 9.53e-06, "memory(GiB)": 133.18, "train_speed(iter/s)": 0.015388, "rewards/chosen": -0.08154297, "rewards/rejected": -0.09472656, "rewards/accuracies": 1.0, "rewards/margins": 0.01318359, "logps/rejected": -0.9453125, "logps/chosen": -0.81640625, "logits/rejected": 0.15722656, "logits/chosen": -0.23925781, "nll_loss": 0.81640625, "log_odds_ratio": -0.58984375, "log_odds_chosen": 0.21875, "epoch": 0.14285714, "global_step/max_steps": "47/329", "percentage": "14.29%", "elapsed_time": "50m 47s", "remaining_time": "5h 4m 45s"}
52
+ {"loss": 0.47900391, "grad_norm": 2.62579317, "learning_rate": 9.49e-06, "memory(GiB)": 133.18, "train_speed(iter/s)": 0.015451, "rewards/chosen": -0.04370117, "rewards/rejected": -0.03686523, "rewards/accuracies": 0.0, "rewards/margins": -0.00683594, "logps/rejected": -0.36914062, "logps/chosen": -0.4375, "logits/rejected": 0.66015625, "logits/chosen": 0.68359375, "nll_loss": 0.43945312, "log_odds_ratio": -0.8046875, "log_odds_chosen": -0.20898438, "epoch": 0.14589666, "global_step/max_steps": "48/329", "percentage": "14.59%", "elapsed_time": "51m 39s", "remaining_time": "5h 2m 26s"}
53
+ {"loss": 0.43115234, "grad_norm": 3.17159777, "learning_rate": 9.46e-06, "memory(GiB)": 133.18, "train_speed(iter/s)": 0.015583, "rewards/chosen": -0.05029297, "rewards/rejected": -0.04516602, "rewards/accuracies": 0.0, "rewards/margins": -0.00512695, "logps/rejected": -0.45117188, "logps/chosen": -0.50390625, "logits/rejected": 1.078125, "logits/chosen": -0.6015625, "nll_loss": 0.50390625, "log_odds_ratio": -0.765625, "log_odds_chosen": -0.13867188, "epoch": 0.14893617, "global_step/max_steps": "49/329", "percentage": "14.89%", "elapsed_time": "52m 17s", "remaining_time": "4h 58m 49s"}
54
+ {"loss": 0.40551758, "grad_norm": 2.99144395, "learning_rate": 9.43e-06, "memory(GiB)": 133.18, "train_speed(iter/s)": 0.015667, "rewards/chosen": -0.03833008, "rewards/rejected": -0.04418945, "rewards/accuracies": 1.0, "rewards/margins": 0.00585938, "logps/rejected": -0.44140625, "logps/chosen": -0.3828125, "logits/rejected": 1.0546875, "logits/chosen": 0.80859375, "nll_loss": 0.3828125, "log_odds_ratio": -0.609375, "log_odds_chosen": 0.17578125, "epoch": 0.15197568, "global_step/max_steps": "50/329", "percentage": "15.20%", "elapsed_time": "53m 4s", "remaining_time": "4h 56m 9s"}