OpenRS-GRPO / step_metrics.csv
NTQuoc's picture
Model save
0dd1e45 verified
step,epoch,loss,learning_rate,grad_norm,rewards/format_reward,rewards/cosine_scaled_reward,reward,reward_std,gpu_mem_alloc_mb,gpu_mem_peak_mb,step_time_sec
1,0.0006,0.003743603825569153,0.0,,0.0,-0.13172357995063066,-0.2634471617639065,0.18880490399897099,1565.3,4716.7,977.69
2,0.0011,5.21540641784668e-08,1e-06,,0.0,-0.18893574364483356,-0.37787145748734474,0.3398940674960613,1565.3,4735.9,937.12
3,0.0017,9.12398099899292e-05,2e-06,,0.0,-0.21025604009628296,-0.4205120652914047,0.3639860153198242,1565.3,4736.2,933.75
4,0.0023,0.00023540854454040527,1.986326977710987e-06,,0.0,-0.1617991798557341,-0.3235983597114682,0.27216803655028343,1565.3,4737.5,908.2
5,0.0029,-0.0842575952410698,1.9457233587073175e-06,,0.0,-0.16338778473436832,-0.32677557691931725,0.2703730836510658,1565.3,4739.5,937.6
6,0.0034,-0.0006134212017059326,1.8794228634059947e-06,,0.0,-0.21015496738255024,-0.4203099086880684,0.3194398656487465,1565.3,4739.5,933.46
7,0.004,0.0011770054697990417,1.78943999880708e-06,,0.0,-0.1622915817424655,-0.324583163484931,0.22306307777762413,1565.3,4739.5,935.67
8,0.0046,0.0018889307975769043,1.6785088487178854e-06,,0.0,-0.11076603084802628,-0.22153206076472998,0.27774741500616074,1565.3,4739.5,939.23
9,0.0051,0.0001969374716281891,1.55e-06,,0.0,-0.10312882997095585,-0.2062576599419117,0.2492200918495655,1565.3,4740.5,930.68
10,0.0057,0.0010615885257720947,1.4078181289931019e-06,,0.0,-0.15356164425611496,-0.3071232885122299,0.4035240039229393,1565.3,4740.5,934.1
11,0.0063,0.0006595328450202942,1.2562833599002374e-06,,0.0,-0.1030469723045826,-0.2060939408838749,0.17943128757178783,1565.3,4740.5,931.57
12,0.0069,0.00129680335521698,1.1e-06,,0.0,-0.14683218486607075,-0.2936643697321415,0.3133760243654251,1565.3,4740.5,937.98
13,0.0074,0.0004699230194091797,9.437166400997627e-07,,0.0,-0.18716039136052132,-0.37432078272104263,0.3915751725435257,1565.3,4740.5,930.45
14,0.008,0.003101266920566559,7.921818710068981e-07,,0.0,-0.11048614233732224,-0.22097227722406387,0.21915528364479542,1565.3,4740.5,933.75
15,0.0086,0.0011118575930595398,6.500000000000002e-07,,0.0,-0.10605884226970375,-0.2121176845394075,0.2806076686247252,1565.3,4740.5,927.66
16,0.0091,-0.011211458593606949,5.214911512821145e-07,,0.0,-0.2968476861715317,-0.5936953723430634,0.4250611439347267,1565.3,4740.5,935.1
17,0.0097,-0.00897778570652008,4.105600011929199e-07,,0.0,-0.09564100485295057,-0.19128201343119144,0.1637940350919962,1565.3,4740.5,929.9
18,0.0103,-0.03545624762773514,3.2057713659400516e-07,,0.0,-0.17633757088333368,-0.35267514176666737,0.257406591437757,1565.3,4740.5,878.76
19,0.0109,0.0015729628503322601,2.542766412926825e-07,,0.0,-0.09523642808198929,-0.19047284871339798,0.18389804009348154,1565.3,4740.5,934.27
20,0.0114,0.0017768293619155884,2.136730222890128e-07,,0.0,-0.11850203666836023,-0.23700407380238175,0.27234716434031725,1565.3,4740.5,934.07
20,0.0114,,,,,,,,1565.3,4740.5,937.48