Training in progress, epoch 0, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step700/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step700/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step700/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step700/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step700/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +335 -3
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1172343536
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a25f53b86cb0d18e76005ef7631a16e5d28e2c4b40e63c63c5944927040e7cae
|
| 3 |
size 1172343536
|
last-checkpoint/global_step700/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25357a757ebf5592fa042b9321b556dc5634272c1168ac340bdca9a626f23e07
|
| 3 |
+
size 883824229
|
last-checkpoint/global_step700/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f2ac1f8359e420e4a66c4ef48a112cc3c99f672cc078c0417c033effa91df13f
|
| 3 |
+
size 883824293
|
last-checkpoint/global_step700/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c5e40e5561d12dda73279ee7288b72c8d8d7d3b6b27703ef6d98f69114e4cef
|
| 3 |
+
size 883824293
|
last-checkpoint/global_step700/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2e26352cbdcf821b445feaa115008ab37c4ef40c9989046a8e82182faf22e44
|
| 3 |
+
size 883824293
|
last-checkpoint/global_step700/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1df0fa0c4467f3cbadeca81e7232ed395180b129c2837ed3d9ffdc195122db60
|
| 3 |
+
size 1172522073
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step700
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f67957e71beac3aac584ce7da49055cc9c7edaf3d732505bfffa5511f709f41
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:305594a2a478d20bb06c74dcc62d37dde101425234afb4331ef411c36814de11
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1eb2d84f63d7341151dcb60706643579b7c3105045d9ce41fc7fd7aa2c6c8fb0
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5633e0320a424cdde99e10e62d0382c89fdf5b90d88d95ba4955f9644083937
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:336d3de2036e71626b0f815e82e0c2ae29554f5ccd7af556bd21908e68a7f924
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2000,6 +2000,338 @@
|
|
| 2000 |
"eval_samples_per_second": 45.237,
|
| 2001 |
"eval_steps_per_second": 2.835,
|
| 2002 |
"step": 600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2003 |
}
|
| 2004 |
],
|
| 2005 |
"logging_steps": 5,
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 0.028052611276507378,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.6080347448425625,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 700,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2000 |
"eval_samples_per_second": 45.237,
|
| 2001 |
"eval_steps_per_second": 2.835,
|
| 2002 |
"step": 600
|
| 2003 |
+
},
|
| 2004 |
+
{
|
| 2005 |
+
"epoch": 0.5255157437567861,
|
| 2006 |
+
"grad_norm": 0.30262914299964905,
|
| 2007 |
+
"learning_rate": 2.582497171281706e-05,
|
| 2008 |
+
"logits/chosen": -0.912109375,
|
| 2009 |
+
"logits/rejected": -1.4345703125,
|
| 2010 |
+
"logps/chosen": -99.8499984741211,
|
| 2011 |
+
"logps/rejected": -208.0,
|
| 2012 |
+
"loss": 0.0349,
|
| 2013 |
+
"rewards/accuracies": 0.9921875,
|
| 2014 |
+
"rewards/chosen": -2.3775391578674316,
|
| 2015 |
+
"rewards/margins": 14.800000190734863,
|
| 2016 |
+
"rewards/rejected": -17.173437118530273,
|
| 2017 |
+
"step": 605
|
| 2018 |
+
},
|
| 2019 |
+
{
|
| 2020 |
+
"epoch": 0.5298588490770901,
|
| 2021 |
+
"grad_norm": 1.1037715673446655,
|
| 2022 |
+
"learning_rate": 2.582167183916507e-05,
|
| 2023 |
+
"logits/chosen": -0.8954101800918579,
|
| 2024 |
+
"logits/rejected": -1.4089844226837158,
|
| 2025 |
+
"logps/chosen": -97.625,
|
| 2026 |
+
"logps/rejected": -205.6999969482422,
|
| 2027 |
+
"loss": 0.0239,
|
| 2028 |
+
"rewards/accuracies": 0.9937499761581421,
|
| 2029 |
+
"rewards/chosen": -2.354687452316284,
|
| 2030 |
+
"rewards/margins": 15.201562881469727,
|
| 2031 |
+
"rewards/rejected": -17.5546875,
|
| 2032 |
+
"step": 610
|
| 2033 |
+
},
|
| 2034 |
+
{
|
| 2035 |
+
"epoch": 0.5342019543973942,
|
| 2036 |
+
"grad_norm": 1.3360408544540405,
|
| 2037 |
+
"learning_rate": 2.5818339675420697e-05,
|
| 2038 |
+
"logits/chosen": -0.9012695550918579,
|
| 2039 |
+
"logits/rejected": -1.4142577648162842,
|
| 2040 |
+
"logps/chosen": -99.1624984741211,
|
| 2041 |
+
"logps/rejected": -215.75,
|
| 2042 |
+
"loss": 0.0197,
|
| 2043 |
+
"rewards/accuracies": 0.9921875,
|
| 2044 |
+
"rewards/chosen": -2.437304735183716,
|
| 2045 |
+
"rewards/margins": 16.6875,
|
| 2046 |
+
"rewards/rejected": -19.128124237060547,
|
| 2047 |
+
"step": 615
|
| 2048 |
+
},
|
| 2049 |
+
{
|
| 2050 |
+
"epoch": 0.5385450597176982,
|
| 2051 |
+
"grad_norm": 0.8700627684593201,
|
| 2052 |
+
"learning_rate": 2.5814975229972658e-05,
|
| 2053 |
+
"logits/chosen": -1.006250023841858,
|
| 2054 |
+
"logits/rejected": -1.4474608898162842,
|
| 2055 |
+
"logps/chosen": -104.3375015258789,
|
| 2056 |
+
"logps/rejected": -229.0,
|
| 2057 |
+
"loss": 0.0542,
|
| 2058 |
+
"rewards/accuracies": 0.979687511920929,
|
| 2059 |
+
"rewards/chosen": -3.26171875,
|
| 2060 |
+
"rewards/margins": 17.548437118530273,
|
| 2061 |
+
"rewards/rejected": -20.817188262939453,
|
| 2062 |
+
"step": 620
|
| 2063 |
+
},
|
| 2064 |
+
{
|
| 2065 |
+
"epoch": 0.5428881650380022,
|
| 2066 |
+
"grad_norm": 0.7034734487533569,
|
| 2067 |
+
"learning_rate": 2.581157851129095e-05,
|
| 2068 |
+
"logits/chosen": -0.964648425579071,
|
| 2069 |
+
"logits/rejected": -1.421289086341858,
|
| 2070 |
+
"logps/chosen": -102.63749694824219,
|
| 2071 |
+
"logps/rejected": -212.625,
|
| 2072 |
+
"loss": 0.0318,
|
| 2073 |
+
"rewards/accuracies": 0.984375,
|
| 2074 |
+
"rewards/chosen": -3.0042967796325684,
|
| 2075 |
+
"rewards/margins": 14.979687690734863,
|
| 2076 |
+
"rewards/rejected": -17.978124618530273,
|
| 2077 |
+
"step": 625
|
| 2078 |
+
},
|
| 2079 |
+
{
|
| 2080 |
+
"epoch": 0.5472312703583062,
|
| 2081 |
+
"grad_norm": 0.245732381939888,
|
| 2082 |
+
"learning_rate": 2.5808149527926798e-05,
|
| 2083 |
+
"logits/chosen": -1.041894555091858,
|
| 2084 |
+
"logits/rejected": -1.42578125,
|
| 2085 |
+
"logps/chosen": -106.23750305175781,
|
| 2086 |
+
"logps/rejected": -210.97500610351562,
|
| 2087 |
+
"loss": 0.0572,
|
| 2088 |
+
"rewards/accuracies": 0.9828125238418579,
|
| 2089 |
+
"rewards/chosen": -3.512890577316284,
|
| 2090 |
+
"rewards/margins": 14.265625,
|
| 2091 |
+
"rewards/rejected": -17.776561737060547,
|
| 2092 |
+
"step": 630
|
| 2093 |
+
},
|
| 2094 |
+
{
|
| 2095 |
+
"epoch": 0.5515743756786102,
|
| 2096 |
+
"grad_norm": 0.418514221906662,
|
| 2097 |
+
"learning_rate": 2.5804688288512667e-05,
|
| 2098 |
+
"logits/chosen": -1.086328148841858,
|
| 2099 |
+
"logits/rejected": -1.48828125,
|
| 2100 |
+
"logps/chosen": -107.38749694824219,
|
| 2101 |
+
"logps/rejected": -210.4499969482422,
|
| 2102 |
+
"loss": 0.0117,
|
| 2103 |
+
"rewards/accuracies": 0.996874988079071,
|
| 2104 |
+
"rewards/chosen": -3.696093797683716,
|
| 2105 |
+
"rewards/margins": 14.215624809265137,
|
| 2106 |
+
"rewards/rejected": -17.90625,
|
| 2107 |
+
"step": 635
|
| 2108 |
+
},
|
| 2109 |
+
{
|
| 2110 |
+
"epoch": 0.5559174809989142,
|
| 2111 |
+
"grad_norm": 1.4381979703903198,
|
| 2112 |
+
"learning_rate": 2.5801194801762228e-05,
|
| 2113 |
+
"logits/chosen": -1.148828148841858,
|
| 2114 |
+
"logits/rejected": -1.5232422351837158,
|
| 2115 |
+
"logps/chosen": -108.26249694824219,
|
| 2116 |
+
"logps/rejected": -222.85000610351562,
|
| 2117 |
+
"loss": 0.0217,
|
| 2118 |
+
"rewards/accuracies": 0.9921875,
|
| 2119 |
+
"rewards/chosen": -3.8187499046325684,
|
| 2120 |
+
"rewards/margins": 15.737500190734863,
|
| 2121 |
+
"rewards/rejected": -19.556249618530273,
|
| 2122 |
+
"step": 640
|
| 2123 |
+
},
|
| 2124 |
+
{
|
| 2125 |
+
"epoch": 0.5602605863192183,
|
| 2126 |
+
"grad_norm": 2.1103994846343994,
|
| 2127 |
+
"learning_rate": 2.579766907647032e-05,
|
| 2128 |
+
"logits/chosen": -1.172265648841858,
|
| 2129 |
+
"logits/rejected": -1.5222656726837158,
|
| 2130 |
+
"logps/chosen": -106.4625015258789,
|
| 2131 |
+
"logps/rejected": -222.0,
|
| 2132 |
+
"loss": 0.0257,
|
| 2133 |
+
"rewards/accuracies": 0.9906250238418579,
|
| 2134 |
+
"rewards/chosen": -3.8203125,
|
| 2135 |
+
"rewards/margins": 16.510936737060547,
|
| 2136 |
+
"rewards/rejected": -20.325000762939453,
|
| 2137 |
+
"step": 645
|
| 2138 |
+
},
|
| 2139 |
+
{
|
| 2140 |
+
"epoch": 0.5646036916395223,
|
| 2141 |
+
"grad_norm": 2.4054081439971924,
|
| 2142 |
+
"learning_rate": 2.579411112151296e-05,
|
| 2143 |
+
"logits/chosen": -1.268164038658142,
|
| 2144 |
+
"logits/rejected": -1.5841796398162842,
|
| 2145 |
+
"logps/chosen": -111.9625015258789,
|
| 2146 |
+
"logps/rejected": -230.5500030517578,
|
| 2147 |
+
"loss": 0.0507,
|
| 2148 |
+
"rewards/accuracies": 0.981249988079071,
|
| 2149 |
+
"rewards/chosen": -4.525000095367432,
|
| 2150 |
+
"rewards/margins": 16.7578125,
|
| 2151 |
+
"rewards/rejected": -21.278125762939453,
|
| 2152 |
+
"step": 650
|
| 2153 |
+
},
|
| 2154 |
+
{
|
| 2155 |
+
"epoch": 0.5646036916395223,
|
| 2156 |
+
"eval_logits/chosen": -1.2523972988128662,
|
| 2157 |
+
"eval_logits/rejected": -1.6043264865875244,
|
| 2158 |
+
"eval_logps/chosen": -107.36823272705078,
|
| 2159 |
+
"eval_logps/rejected": -217.88809204101562,
|
| 2160 |
+
"eval_loss": 0.031916987150907516,
|
| 2161 |
+
"eval_rewards/accuracies": 0.9880415201187134,
|
| 2162 |
+
"eval_rewards/chosen": -3.794872522354126,
|
| 2163 |
+
"eval_rewards/margins": 15.53542423248291,
|
| 2164 |
+
"eval_rewards/rejected": -19.325586318969727,
|
| 2165 |
+
"eval_runtime": 97.6969,
|
| 2166 |
+
"eval_samples_per_second": 45.242,
|
| 2167 |
+
"eval_steps_per_second": 2.835,
|
| 2168 |
+
"step": 650
|
| 2169 |
+
},
|
| 2170 |
+
{
|
| 2171 |
+
"epoch": 0.5689467969598263,
|
| 2172 |
+
"grad_norm": 0.738905668258667,
|
| 2173 |
+
"learning_rate": 2.5790520945847294e-05,
|
| 2174 |
+
"logits/chosen": -1.232812523841858,
|
| 2175 |
+
"logits/rejected": -1.612695336341858,
|
| 2176 |
+
"logps/chosen": -107.1500015258789,
|
| 2177 |
+
"logps/rejected": -219.22500610351562,
|
| 2178 |
+
"loss": 0.0169,
|
| 2179 |
+
"rewards/accuracies": 0.9921875,
|
| 2180 |
+
"rewards/chosen": -3.674999952316284,
|
| 2181 |
+
"rewards/margins": 15.8125,
|
| 2182 |
+
"rewards/rejected": -19.496875762939453,
|
| 2183 |
+
"step": 655
|
| 2184 |
+
},
|
| 2185 |
+
{
|
| 2186 |
+
"epoch": 0.5732899022801303,
|
| 2187 |
+
"grad_norm": 1.7680950164794922,
|
| 2188 |
+
"learning_rate": 2.578689855851158e-05,
|
| 2189 |
+
"logits/chosen": -1.215234398841858,
|
| 2190 |
+
"logits/rejected": -1.6212890148162842,
|
| 2191 |
+
"logps/chosen": -103.2874984741211,
|
| 2192 |
+
"logps/rejected": -212.97500610351562,
|
| 2193 |
+
"loss": 0.0223,
|
| 2194 |
+
"rewards/accuracies": 0.9937499761581421,
|
| 2195 |
+
"rewards/chosen": -3.3832030296325684,
|
| 2196 |
+
"rewards/margins": 15.240625381469727,
|
| 2197 |
+
"rewards/rejected": -18.618749618530273,
|
| 2198 |
+
"step": 660
|
| 2199 |
+
},
|
| 2200 |
+
{
|
| 2201 |
+
"epoch": 0.5776330076004343,
|
| 2202 |
+
"grad_norm": 1.0927232503890991,
|
| 2203 |
+
"learning_rate": 2.5783243968625182e-05,
|
| 2204 |
+
"logits/chosen": -1.1130859851837158,
|
| 2205 |
+
"logits/rejected": -1.591796875,
|
| 2206 |
+
"logps/chosen": -98.6500015258789,
|
| 2207 |
+
"logps/rejected": -208.6750030517578,
|
| 2208 |
+
"loss": 0.0233,
|
| 2209 |
+
"rewards/accuracies": 0.989062488079071,
|
| 2210 |
+
"rewards/chosen": -1.92431640625,
|
| 2211 |
+
"rewards/margins": 15.643750190734863,
|
| 2212 |
+
"rewards/rejected": -17.564062118530273,
|
| 2213 |
+
"step": 665
|
| 2214 |
+
},
|
| 2215 |
+
{
|
| 2216 |
+
"epoch": 0.5819761129207384,
|
| 2217 |
+
"grad_norm": 1.0042508840560913,
|
| 2218 |
+
"learning_rate": 2.577955718538852e-05,
|
| 2219 |
+
"logits/chosen": -0.9864257574081421,
|
| 2220 |
+
"logits/rejected": -1.5556640625,
|
| 2221 |
+
"logps/chosen": -91.2874984741211,
|
| 2222 |
+
"logps/rejected": -200.10000610351562,
|
| 2223 |
+
"loss": 0.0514,
|
| 2224 |
+
"rewards/accuracies": 0.9859374761581421,
|
| 2225 |
+
"rewards/chosen": -0.8651367425918579,
|
| 2226 |
+
"rewards/margins": 15.442187309265137,
|
| 2227 |
+
"rewards/rejected": -16.306249618530273,
|
| 2228 |
+
"step": 670
|
| 2229 |
+
},
|
| 2230 |
+
{
|
| 2231 |
+
"epoch": 0.5863192182410424,
|
| 2232 |
+
"grad_norm": 0.8699201345443726,
|
| 2233 |
+
"learning_rate": 2.5775838218083068e-05,
|
| 2234 |
+
"logits/chosen": -0.924609363079071,
|
| 2235 |
+
"logits/rejected": -1.532812476158142,
|
| 2236 |
+
"logps/chosen": -87.07499694824219,
|
| 2237 |
+
"logps/rejected": -194.47500610351562,
|
| 2238 |
+
"loss": 0.0149,
|
| 2239 |
+
"rewards/accuracies": 0.995312511920929,
|
| 2240 |
+
"rewards/chosen": -0.359140008687973,
|
| 2241 |
+
"rewards/margins": 15.171875,
|
| 2242 |
+
"rewards/rejected": -15.534375190734863,
|
| 2243 |
+
"step": 675
|
| 2244 |
+
},
|
| 2245 |
+
{
|
| 2246 |
+
"epoch": 0.5906623235613464,
|
| 2247 |
+
"grad_norm": 0.5305850505828857,
|
| 2248 |
+
"learning_rate": 2.5772087076071322e-05,
|
| 2249 |
+
"logits/chosen": -0.931445300579071,
|
| 2250 |
+
"logits/rejected": -1.5304687023162842,
|
| 2251 |
+
"logps/chosen": -89.5625,
|
| 2252 |
+
"logps/rejected": -204.35000610351562,
|
| 2253 |
+
"loss": 0.0349,
|
| 2254 |
+
"rewards/accuracies": 0.989062488079071,
|
| 2255 |
+
"rewards/chosen": -0.29111021757125854,
|
| 2256 |
+
"rewards/margins": 16.0859375,
|
| 2257 |
+
"rewards/rejected": -16.365625381469727,
|
| 2258 |
+
"step": 680
|
| 2259 |
+
},
|
| 2260 |
+
{
|
| 2261 |
+
"epoch": 0.5950054288816504,
|
| 2262 |
+
"grad_norm": 0.5979002118110657,
|
| 2263 |
+
"learning_rate": 2.5768303768796776e-05,
|
| 2264 |
+
"logits/chosen": -0.9864257574081421,
|
| 2265 |
+
"logits/rejected": -1.5128905773162842,
|
| 2266 |
+
"logps/chosen": -89.9749984741211,
|
| 2267 |
+
"logps/rejected": -212.3249969482422,
|
| 2268 |
+
"loss": 0.0221,
|
| 2269 |
+
"rewards/accuracies": 0.9906250238418579,
|
| 2270 |
+
"rewards/chosen": -0.9491897821426392,
|
| 2271 |
+
"rewards/margins": 17.404687881469727,
|
| 2272 |
+
"rewards/rejected": -18.345312118530273,
|
| 2273 |
+
"step": 685
|
| 2274 |
+
},
|
| 2275 |
+
{
|
| 2276 |
+
"epoch": 0.5993485342019544,
|
| 2277 |
+
"grad_norm": 0.6950270533561707,
|
| 2278 |
+
"learning_rate": 2.5764488305783906e-05,
|
| 2279 |
+
"logits/chosen": -1.0769531726837158,
|
| 2280 |
+
"logits/rejected": -1.5525391101837158,
|
| 2281 |
+
"logps/chosen": -98.4625015258789,
|
| 2282 |
+
"logps/rejected": -223.6999969482422,
|
| 2283 |
+
"loss": 0.0727,
|
| 2284 |
+
"rewards/accuracies": 0.9859374761581421,
|
| 2285 |
+
"rewards/chosen": -2.060473680496216,
|
| 2286 |
+
"rewards/margins": 17.839061737060547,
|
| 2287 |
+
"rewards/rejected": -19.8984375,
|
| 2288 |
+
"step": 690
|
| 2289 |
+
},
|
| 2290 |
+
{
|
| 2291 |
+
"epoch": 0.6036916395222585,
|
| 2292 |
+
"grad_norm": 1.630603313446045,
|
| 2293 |
+
"learning_rate": 2.576064069663813e-05,
|
| 2294 |
+
"logits/chosen": -1.0413086414337158,
|
| 2295 |
+
"logits/rejected": -1.602929711341858,
|
| 2296 |
+
"logps/chosen": -97.2874984741211,
|
| 2297 |
+
"logps/rejected": -217.39999389648438,
|
| 2298 |
+
"loss": 0.0391,
|
| 2299 |
+
"rewards/accuracies": 0.987500011920929,
|
| 2300 |
+
"rewards/chosen": -1.9284179210662842,
|
| 2301 |
+
"rewards/margins": 16.299999237060547,
|
| 2302 |
+
"rewards/rejected": -18.228124618530273,
|
| 2303 |
+
"step": 695
|
| 2304 |
+
},
|
| 2305 |
+
{
|
| 2306 |
+
"epoch": 0.6080347448425625,
|
| 2307 |
+
"grad_norm": 1.2844674587249756,
|
| 2308 |
+
"learning_rate": 2.57567609510458e-05,
|
| 2309 |
+
"logits/chosen": -1.130468726158142,
|
| 2310 |
+
"logits/rejected": -1.641992211341858,
|
| 2311 |
+
"logps/chosen": -101.125,
|
| 2312 |
+
"logps/rejected": -203.625,
|
| 2313 |
+
"loss": 0.0322,
|
| 2314 |
+
"rewards/accuracies": 0.9859374761581421,
|
| 2315 |
+
"rewards/chosen": -2.3846678733825684,
|
| 2316 |
+
"rewards/margins": 14.40625,
|
| 2317 |
+
"rewards/rejected": -16.792186737060547,
|
| 2318 |
+
"step": 700
|
| 2319 |
+
},
|
| 2320 |
+
{
|
| 2321 |
+
"epoch": 0.6080347448425625,
|
| 2322 |
+
"eval_logits/chosen": -1.1740325689315796,
|
| 2323 |
+
"eval_logits/rejected": -1.6940432786941528,
|
| 2324 |
+
"eval_logps/chosen": -100.71479797363281,
|
| 2325 |
+
"eval_logps/rejected": -200.29603576660156,
|
| 2326 |
+
"eval_loss": 0.028052611276507378,
|
| 2327 |
+
"eval_rewards/accuracies": 0.9902978539466858,
|
| 2328 |
+
"eval_rewards/chosen": -2.572061061859131,
|
| 2329 |
+
"eval_rewards/margins": 13.520757675170898,
|
| 2330 |
+
"eval_rewards/rejected": -16.09092903137207,
|
| 2331 |
+
"eval_runtime": 97.5048,
|
| 2332 |
+
"eval_samples_per_second": 45.331,
|
| 2333 |
+
"eval_steps_per_second": 2.841,
|
| 2334 |
+
"step": 700
|
| 2335 |
}
|
| 2336 |
],
|
| 2337 |
"logging_steps": 5,
|