FormlessAI commited on
Commit
4c8637f
·
verified ·
1 Parent(s): 26e3113

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2120d5d536aba10e00501ceedc3558e455e3cff895955ef8f273753d39d93536
3
  size 1172343536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a25f53b86cb0d18e76005ef7631a16e5d28e2c4b40e63c63c5944927040e7cae
3
  size 1172343536
last-checkpoint/global_step700/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25357a757ebf5592fa042b9321b556dc5634272c1168ac340bdca9a626f23e07
3
+ size 883824229
last-checkpoint/global_step700/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2ac1f8359e420e4a66c4ef48a112cc3c99f672cc078c0417c033effa91df13f
3
+ size 883824293
last-checkpoint/global_step700/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c5e40e5561d12dda73279ee7288b72c8d8d7d3b6b27703ef6d98f69114e4cef
3
+ size 883824293
last-checkpoint/global_step700/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2e26352cbdcf821b445feaa115008ab37c4ef40c9989046a8e82182faf22e44
3
+ size 883824293
last-checkpoint/global_step700/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1df0fa0c4467f3cbadeca81e7232ed395180b129c2837ed3d9ffdc195122db60
3
+ size 1172522073
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step600
 
1
+ global_step700
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4894c218b3f6eaf3b9761899ca66cc4ee052559eaf58bed0eb77d1f141f5a8f8
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f67957e71beac3aac584ce7da49055cc9c7edaf3d732505bfffa5511f709f41
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e35dc37a61e3442d3a3c91b1def510a65866249fe0f6bfe143097becbb018fdc
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:305594a2a478d20bb06c74dcc62d37dde101425234afb4331ef411c36814de11
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cee7319258b43ce62816538b7f06b4a5ae5b8b56e7ea61d662ecb9ed3402c92a
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1eb2d84f63d7341151dcb60706643579b7c3105045d9ce41fc7fd7aa2c6c8fb0
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aed48390c7aa15be53030fdcd4b9104f35ff8b16f59f6cd4b6566c973f83388a
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5633e0320a424cdde99e10e62d0382c89fdf5b90d88d95ba4955f9644083937
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b643f96fae1c7c195d82363db91efd66b514c2fc5280977aad9c8846720b5046
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:336d3de2036e71626b0f815e82e0c2ae29554f5ccd7af556bd21908e68a7f924
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 0.030555352568626404,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.5211726384364821,
6
  "eval_steps": 50,
7
- "global_step": 600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2000,6 +2000,338 @@
2000
  "eval_samples_per_second": 45.237,
2001
  "eval_steps_per_second": 2.835,
2002
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2003
  }
2004
  ],
2005
  "logging_steps": 5,
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 0.028052611276507378,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.6080347448425625,
6
  "eval_steps": 50,
7
+ "global_step": 700,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2000
  "eval_samples_per_second": 45.237,
2001
  "eval_steps_per_second": 2.835,
2002
  "step": 600
2003
+ },
2004
+ {
2005
+ "epoch": 0.5255157437567861,
2006
+ "grad_norm": 0.30262914299964905,
2007
+ "learning_rate": 2.582497171281706e-05,
2008
+ "logits/chosen": -0.912109375,
2009
+ "logits/rejected": -1.4345703125,
2010
+ "logps/chosen": -99.8499984741211,
2011
+ "logps/rejected": -208.0,
2012
+ "loss": 0.0349,
2013
+ "rewards/accuracies": 0.9921875,
2014
+ "rewards/chosen": -2.3775391578674316,
2015
+ "rewards/margins": 14.800000190734863,
2016
+ "rewards/rejected": -17.173437118530273,
2017
+ "step": 605
2018
+ },
2019
+ {
2020
+ "epoch": 0.5298588490770901,
2021
+ "grad_norm": 1.1037715673446655,
2022
+ "learning_rate": 2.582167183916507e-05,
2023
+ "logits/chosen": -0.8954101800918579,
2024
+ "logits/rejected": -1.4089844226837158,
2025
+ "logps/chosen": -97.625,
2026
+ "logps/rejected": -205.6999969482422,
2027
+ "loss": 0.0239,
2028
+ "rewards/accuracies": 0.9937499761581421,
2029
+ "rewards/chosen": -2.354687452316284,
2030
+ "rewards/margins": 15.201562881469727,
2031
+ "rewards/rejected": -17.5546875,
2032
+ "step": 610
2033
+ },
2034
+ {
2035
+ "epoch": 0.5342019543973942,
2036
+ "grad_norm": 1.3360408544540405,
2037
+ "learning_rate": 2.5818339675420697e-05,
2038
+ "logits/chosen": -0.9012695550918579,
2039
+ "logits/rejected": -1.4142577648162842,
2040
+ "logps/chosen": -99.1624984741211,
2041
+ "logps/rejected": -215.75,
2042
+ "loss": 0.0197,
2043
+ "rewards/accuracies": 0.9921875,
2044
+ "rewards/chosen": -2.437304735183716,
2045
+ "rewards/margins": 16.6875,
2046
+ "rewards/rejected": -19.128124237060547,
2047
+ "step": 615
2048
+ },
2049
+ {
2050
+ "epoch": 0.5385450597176982,
2051
+ "grad_norm": 0.8700627684593201,
2052
+ "learning_rate": 2.5814975229972658e-05,
2053
+ "logits/chosen": -1.006250023841858,
2054
+ "logits/rejected": -1.4474608898162842,
2055
+ "logps/chosen": -104.3375015258789,
2056
+ "logps/rejected": -229.0,
2057
+ "loss": 0.0542,
2058
+ "rewards/accuracies": 0.979687511920929,
2059
+ "rewards/chosen": -3.26171875,
2060
+ "rewards/margins": 17.548437118530273,
2061
+ "rewards/rejected": -20.817188262939453,
2062
+ "step": 620
2063
+ },
2064
+ {
2065
+ "epoch": 0.5428881650380022,
2066
+ "grad_norm": 0.7034734487533569,
2067
+ "learning_rate": 2.581157851129095e-05,
2068
+ "logits/chosen": -0.964648425579071,
2069
+ "logits/rejected": -1.421289086341858,
2070
+ "logps/chosen": -102.63749694824219,
2071
+ "logps/rejected": -212.625,
2072
+ "loss": 0.0318,
2073
+ "rewards/accuracies": 0.984375,
2074
+ "rewards/chosen": -3.0042967796325684,
2075
+ "rewards/margins": 14.979687690734863,
2076
+ "rewards/rejected": -17.978124618530273,
2077
+ "step": 625
2078
+ },
2079
+ {
2080
+ "epoch": 0.5472312703583062,
2081
+ "grad_norm": 0.245732381939888,
2082
+ "learning_rate": 2.5808149527926798e-05,
2083
+ "logits/chosen": -1.041894555091858,
2084
+ "logits/rejected": -1.42578125,
2085
+ "logps/chosen": -106.23750305175781,
2086
+ "logps/rejected": -210.97500610351562,
2087
+ "loss": 0.0572,
2088
+ "rewards/accuracies": 0.9828125238418579,
2089
+ "rewards/chosen": -3.512890577316284,
2090
+ "rewards/margins": 14.265625,
2091
+ "rewards/rejected": -17.776561737060547,
2092
+ "step": 630
2093
+ },
2094
+ {
2095
+ "epoch": 0.5515743756786102,
2096
+ "grad_norm": 0.418514221906662,
2097
+ "learning_rate": 2.5804688288512667e-05,
2098
+ "logits/chosen": -1.086328148841858,
2099
+ "logits/rejected": -1.48828125,
2100
+ "logps/chosen": -107.38749694824219,
2101
+ "logps/rejected": -210.4499969482422,
2102
+ "loss": 0.0117,
2103
+ "rewards/accuracies": 0.996874988079071,
2104
+ "rewards/chosen": -3.696093797683716,
2105
+ "rewards/margins": 14.215624809265137,
2106
+ "rewards/rejected": -17.90625,
2107
+ "step": 635
2108
+ },
2109
+ {
2110
+ "epoch": 0.5559174809989142,
2111
+ "grad_norm": 1.4381979703903198,
2112
+ "learning_rate": 2.5801194801762228e-05,
2113
+ "logits/chosen": -1.148828148841858,
2114
+ "logits/rejected": -1.5232422351837158,
2115
+ "logps/chosen": -108.26249694824219,
2116
+ "logps/rejected": -222.85000610351562,
2117
+ "loss": 0.0217,
2118
+ "rewards/accuracies": 0.9921875,
2119
+ "rewards/chosen": -3.8187499046325684,
2120
+ "rewards/margins": 15.737500190734863,
2121
+ "rewards/rejected": -19.556249618530273,
2122
+ "step": 640
2123
+ },
2124
+ {
2125
+ "epoch": 0.5602605863192183,
2126
+ "grad_norm": 2.1103994846343994,
2127
+ "learning_rate": 2.579766907647032e-05,
2128
+ "logits/chosen": -1.172265648841858,
2129
+ "logits/rejected": -1.5222656726837158,
2130
+ "logps/chosen": -106.4625015258789,
2131
+ "logps/rejected": -222.0,
2132
+ "loss": 0.0257,
2133
+ "rewards/accuracies": 0.9906250238418579,
2134
+ "rewards/chosen": -3.8203125,
2135
+ "rewards/margins": 16.510936737060547,
2136
+ "rewards/rejected": -20.325000762939453,
2137
+ "step": 645
2138
+ },
2139
+ {
2140
+ "epoch": 0.5646036916395223,
2141
+ "grad_norm": 2.4054081439971924,
2142
+ "learning_rate": 2.579411112151296e-05,
2143
+ "logits/chosen": -1.268164038658142,
2144
+ "logits/rejected": -1.5841796398162842,
2145
+ "logps/chosen": -111.9625015258789,
2146
+ "logps/rejected": -230.5500030517578,
2147
+ "loss": 0.0507,
2148
+ "rewards/accuracies": 0.981249988079071,
2149
+ "rewards/chosen": -4.525000095367432,
2150
+ "rewards/margins": 16.7578125,
2151
+ "rewards/rejected": -21.278125762939453,
2152
+ "step": 650
2153
+ },
2154
+ {
2155
+ "epoch": 0.5646036916395223,
2156
+ "eval_logits/chosen": -1.2523972988128662,
2157
+ "eval_logits/rejected": -1.6043264865875244,
2158
+ "eval_logps/chosen": -107.36823272705078,
2159
+ "eval_logps/rejected": -217.88809204101562,
2160
+ "eval_loss": 0.031916987150907516,
2161
+ "eval_rewards/accuracies": 0.9880415201187134,
2162
+ "eval_rewards/chosen": -3.794872522354126,
2163
+ "eval_rewards/margins": 15.53542423248291,
2164
+ "eval_rewards/rejected": -19.325586318969727,
2165
+ "eval_runtime": 97.6969,
2166
+ "eval_samples_per_second": 45.242,
2167
+ "eval_steps_per_second": 2.835,
2168
+ "step": 650
2169
+ },
2170
+ {
2171
+ "epoch": 0.5689467969598263,
2172
+ "grad_norm": 0.738905668258667,
2173
+ "learning_rate": 2.5790520945847294e-05,
2174
+ "logits/chosen": -1.232812523841858,
2175
+ "logits/rejected": -1.612695336341858,
2176
+ "logps/chosen": -107.1500015258789,
2177
+ "logps/rejected": -219.22500610351562,
2178
+ "loss": 0.0169,
2179
+ "rewards/accuracies": 0.9921875,
2180
+ "rewards/chosen": -3.674999952316284,
2181
+ "rewards/margins": 15.8125,
2182
+ "rewards/rejected": -19.496875762939453,
2183
+ "step": 655
2184
+ },
2185
+ {
2186
+ "epoch": 0.5732899022801303,
2187
+ "grad_norm": 1.7680950164794922,
2188
+ "learning_rate": 2.578689855851158e-05,
2189
+ "logits/chosen": -1.215234398841858,
2190
+ "logits/rejected": -1.6212890148162842,
2191
+ "logps/chosen": -103.2874984741211,
2192
+ "logps/rejected": -212.97500610351562,
2193
+ "loss": 0.0223,
2194
+ "rewards/accuracies": 0.9937499761581421,
2195
+ "rewards/chosen": -3.3832030296325684,
2196
+ "rewards/margins": 15.240625381469727,
2197
+ "rewards/rejected": -18.618749618530273,
2198
+ "step": 660
2199
+ },
2200
+ {
2201
+ "epoch": 0.5776330076004343,
2202
+ "grad_norm": 1.0927232503890991,
2203
+ "learning_rate": 2.5783243968625182e-05,
2204
+ "logits/chosen": -1.1130859851837158,
2205
+ "logits/rejected": -1.591796875,
2206
+ "logps/chosen": -98.6500015258789,
2207
+ "logps/rejected": -208.6750030517578,
2208
+ "loss": 0.0233,
2209
+ "rewards/accuracies": 0.989062488079071,
2210
+ "rewards/chosen": -1.92431640625,
2211
+ "rewards/margins": 15.643750190734863,
2212
+ "rewards/rejected": -17.564062118530273,
2213
+ "step": 665
2214
+ },
2215
+ {
2216
+ "epoch": 0.5819761129207384,
2217
+ "grad_norm": 1.0042508840560913,
2218
+ "learning_rate": 2.577955718538852e-05,
2219
+ "logits/chosen": -0.9864257574081421,
2220
+ "logits/rejected": -1.5556640625,
2221
+ "logps/chosen": -91.2874984741211,
2222
+ "logps/rejected": -200.10000610351562,
2223
+ "loss": 0.0514,
2224
+ "rewards/accuracies": 0.9859374761581421,
2225
+ "rewards/chosen": -0.8651367425918579,
2226
+ "rewards/margins": 15.442187309265137,
2227
+ "rewards/rejected": -16.306249618530273,
2228
+ "step": 670
2229
+ },
2230
+ {
2231
+ "epoch": 0.5863192182410424,
2232
+ "grad_norm": 0.8699201345443726,
2233
+ "learning_rate": 2.5775838218083068e-05,
2234
+ "logits/chosen": -0.924609363079071,
2235
+ "logits/rejected": -1.532812476158142,
2236
+ "logps/chosen": -87.07499694824219,
2237
+ "logps/rejected": -194.47500610351562,
2238
+ "loss": 0.0149,
2239
+ "rewards/accuracies": 0.995312511920929,
2240
+ "rewards/chosen": -0.359140008687973,
2241
+ "rewards/margins": 15.171875,
2242
+ "rewards/rejected": -15.534375190734863,
2243
+ "step": 675
2244
+ },
2245
+ {
2246
+ "epoch": 0.5906623235613464,
2247
+ "grad_norm": 0.5305850505828857,
2248
+ "learning_rate": 2.5772087076071322e-05,
2249
+ "logits/chosen": -0.931445300579071,
2250
+ "logits/rejected": -1.5304687023162842,
2251
+ "logps/chosen": -89.5625,
2252
+ "logps/rejected": -204.35000610351562,
2253
+ "loss": 0.0349,
2254
+ "rewards/accuracies": 0.989062488079071,
2255
+ "rewards/chosen": -0.29111021757125854,
2256
+ "rewards/margins": 16.0859375,
2257
+ "rewards/rejected": -16.365625381469727,
2258
+ "step": 680
2259
+ },
2260
+ {
2261
+ "epoch": 0.5950054288816504,
2262
+ "grad_norm": 0.5979002118110657,
2263
+ "learning_rate": 2.5768303768796776e-05,
2264
+ "logits/chosen": -0.9864257574081421,
2265
+ "logits/rejected": -1.5128905773162842,
2266
+ "logps/chosen": -89.9749984741211,
2267
+ "logps/rejected": -212.3249969482422,
2268
+ "loss": 0.0221,
2269
+ "rewards/accuracies": 0.9906250238418579,
2270
+ "rewards/chosen": -0.9491897821426392,
2271
+ "rewards/margins": 17.404687881469727,
2272
+ "rewards/rejected": -18.345312118530273,
2273
+ "step": 685
2274
+ },
2275
+ {
2276
+ "epoch": 0.5993485342019544,
2277
+ "grad_norm": 0.6950270533561707,
2278
+ "learning_rate": 2.5764488305783906e-05,
2279
+ "logits/chosen": -1.0769531726837158,
2280
+ "logits/rejected": -1.5525391101837158,
2281
+ "logps/chosen": -98.4625015258789,
2282
+ "logps/rejected": -223.6999969482422,
2283
+ "loss": 0.0727,
2284
+ "rewards/accuracies": 0.9859374761581421,
2285
+ "rewards/chosen": -2.060473680496216,
2286
+ "rewards/margins": 17.839061737060547,
2287
+ "rewards/rejected": -19.8984375,
2288
+ "step": 690
2289
+ },
2290
+ {
2291
+ "epoch": 0.6036916395222585,
2292
+ "grad_norm": 1.630603313446045,
2293
+ "learning_rate": 2.576064069663813e-05,
2294
+ "logits/chosen": -1.0413086414337158,
2295
+ "logits/rejected": -1.602929711341858,
2296
+ "logps/chosen": -97.2874984741211,
2297
+ "logps/rejected": -217.39999389648438,
2298
+ "loss": 0.0391,
2299
+ "rewards/accuracies": 0.987500011920929,
2300
+ "rewards/chosen": -1.9284179210662842,
2301
+ "rewards/margins": 16.299999237060547,
2302
+ "rewards/rejected": -18.228124618530273,
2303
+ "step": 695
2304
+ },
2305
+ {
2306
+ "epoch": 0.6080347448425625,
2307
+ "grad_norm": 1.2844674587249756,
2308
+ "learning_rate": 2.57567609510458e-05,
2309
+ "logits/chosen": -1.130468726158142,
2310
+ "logits/rejected": -1.641992211341858,
2311
+ "logps/chosen": -101.125,
2312
+ "logps/rejected": -203.625,
2313
+ "loss": 0.0322,
2314
+ "rewards/accuracies": 0.9859374761581421,
2315
+ "rewards/chosen": -2.3846678733825684,
2316
+ "rewards/margins": 14.40625,
2317
+ "rewards/rejected": -16.792186737060547,
2318
+ "step": 700
2319
+ },
2320
+ {
2321
+ "epoch": 0.6080347448425625,
2322
+ "eval_logits/chosen": -1.1740325689315796,
2323
+ "eval_logits/rejected": -1.6940432786941528,
2324
+ "eval_logps/chosen": -100.71479797363281,
2325
+ "eval_logps/rejected": -200.29603576660156,
2326
+ "eval_loss": 0.028052611276507378,
2327
+ "eval_rewards/accuracies": 0.9902978539466858,
2328
+ "eval_rewards/chosen": -2.572061061859131,
2329
+ "eval_rewards/margins": 13.520757675170898,
2330
+ "eval_rewards/rejected": -16.09092903137207,
2331
+ "eval_runtime": 97.5048,
2332
+ "eval_samples_per_second": 45.331,
2333
+ "eval_steps_per_second": 2.841,
2334
+ "step": 700
2335
  }
2336
  ],
2337
  "logging_steps": 5,