NTQuoc commited on
Commit
a57e920
·
verified ·
1 Parent(s): 7f628f9

Model save

Browse files
Files changed (6) hide show
  1. README.md +2 -4
  2. all_results.json +4 -4
  3. step_metrics.csv +21 -101
  4. train_results.json +4 -4
  5. trainer_state.json +146 -1186
  6. training_metrics.txt +6 -6
README.md CHANGED
@@ -1,11 +1,9 @@
1
  ---
2
- base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
3
- datasets: knoveleng/open-rs
4
  library_name: transformers
5
  model_name: OpenRS-GRPO
6
  tags:
7
  - generated_from_trainer
8
- - open-r1
9
  - trl
10
  - grpo
11
  licence: license
@@ -13,7 +11,7 @@ licence: license
13
 
14
  # Model Card for OpenRS-GRPO
15
 
16
- This model is a fine-tuned version of [deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) on the [knoveleng/open-rs](https://huggingface.co/datasets/knoveleng/open-rs) dataset.
17
  It has been trained using [TRL](https://github.com/huggingface/trl).
18
 
19
  ## Quick start
 
1
  ---
2
+ base_model: Qwen/Qwen3.5-0.8B
 
3
  library_name: transformers
4
  model_name: OpenRS-GRPO
5
  tags:
6
  - generated_from_trainer
 
7
  - trl
8
  - grpo
9
  licence: license
 
11
 
12
  # Model Card for OpenRS-GRPO
13
 
14
+ This model is a fine-tuned version of [Qwen/Qwen3.5-0.8B](https://huggingface.co/Qwen/Qwen3.5-0.8B).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.0013520326372236013,
4
- "train_runtime": 36067.9715,
5
  "train_samples": 7000,
6
- "train_samples_per_second": 0.044,
7
- "train_steps_per_second": 0.003
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.010430806130170823,
4
+ "train_runtime": 9458.0577,
5
  "train_samples": 7000,
6
+ "train_samples_per_second": 0.034,
7
+ "train_steps_per_second": 0.002
8
  }
step_metrics.csv CHANGED
@@ -1,102 +1,22 @@
1
  step,epoch,loss,learning_rate,grad_norm,rewards/format_reward,rewards/cosine_scaled_reward,reward,reward_std,gpu_mem_alloc_mb,gpu_mem_peak_mb,step_time_sec
2
- 1,0.0006,3.3527612686157227e-08,0.0,,0.0,-0.45186813920736313,-0.9037362784147263,0.11632500402629375,3617.5,6086.5,356.01
3
- 2,0.0011,-2.9802322387695312e-08,0.0,,0.0,-0.4544064328074455,-0.9088128805160522,0.134795643389225,3617.5,6091.3,357.14
4
- 3,0.0017,-2.384185791015625e-07,0.0,,0.0,-0.44806819409132004,-0.8961364179849625,0.1266492954455316,3617.5,6092.8,356.69
5
- 4,0.0023,3.203749656677246e-07,0.0,,0.0,-0.46924517303705215,-0.9384903311729431,0.08165389159694314,3617.5,6095.2,358.13
6
- 5,0.0029,2.980232238769531e-07,0.0,,0.0,-0.4361194893717766,-0.8722389936447144,0.1861576708033681,3617.5,6095.2,355.88
7
- 6,0.0034,3.2782554626464844e-07,0.0,,0.0,-0.4838990569114685,-0.9677980989217758,0.03695695102214813,3617.5,6102.0,359.97
8
- 7,0.004,-6.48200511932373e-07,0.0,,0.0,-0.46913372725248337,-0.9382674694061279,0.04741490981541574,3617.5,6102.0,366.73
9
- 8,0.0046,-1.9371509552001953e-07,0.0,,0.0,-0.4689921736717224,-0.9379843473434448,0.08557501714676619,3617.5,6102.0,359.73
10
- 9,0.0051,6.258487701416016e-07,0.0,,0.0,-0.47829224169254303,-0.9565844535827637,0.04161944845691323,3617.5,6102.0,356.95
11
- 10,0.0057,0.0,0.0,,0.0,-0.39301924407482147,-0.7860384881496429,0.29622524976730347,3654.4,6102.0,356.9
12
- 11,0.0063,-1.2665987014770508e-06,1.0000000000000002e-06,,0.0,-0.47354312986135483,-0.9470862597227097,0.025707244174554944,3654.4,6129.7,357.65
13
- 12,0.0069,-3.2782554626464844e-07,1.0000000000000002e-06,,0.0,-0.4611263796687126,-0.9222527593374252,0.11853919085115194,3654.4,6132.1,359.0
14
- 13,0.0074,-9.834766387939453e-07,2.0000000000000003e-06,,0.0,-0.40241140127182007,-0.8048228025436401,0.2713719364255667,3654.4,6132.1,359.35
15
- 14,0.008,-9.98377799987793e-07,3e-06,,0.0,-0.4491143301129341,-0.8982286602258682,0.15008432138711214,3654.4,6132.1,359.42
16
- 15,0.0086,-1.2367963790893555e-06,4.000000000000001e-06,,0.0,-0.4349117949604988,-0.8698235750198364,0.1454296549782157,3654.4,6132.1,358.18
17
- 16,0.0091,-1.166015863418579e-06,5e-06,,0.0,-0.4481472671031952,-0.8962945342063904,0.11415091808885336,3654.4,6136.0,359.05
18
- 17,0.0097,-4.172325134277344e-07,6e-06,,0.0,-0.40375038236379623,-0.8075007796287537,0.2773375315591693,3654.4,6136.0,359.81
19
- 18,0.0103,1.0356307029724121e-06,7e-06,,0.0,-0.4421778917312622,-0.8843557834625244,0.1459937175968662,3654.4,6136.0,361.88
20
- 19,0.0109,-1.4901161193847656e-08,8.000000000000001e-06,,0.0,-0.46627073734998703,-0.9325414896011353,0.061190704349428415,3654.4,6136.0,361.95
21
- 20,0.0114,7.450580596923828e-08,9e-06,,0.0,-0.4463533014059067,-0.8927065879106522,0.1468478236347437,3654.4,6136.0,361.89
22
- 21,0.012,-3.241002559661865e-07,1e-05,,0.0,-0.430856853723526,-0.861713707447052,0.15973031288012862,3654.4,6136.0,360.43
23
- 22,0.0126,0.0,9.997258721585931e-06,,0.0,-0.42839662730693817,-0.8567932546138763,0.23303062841296196,3654.4,6136.0,359.97
24
- 23,0.0131,-1.9371509552001953e-07,9.98903822616921e-06,,0.0,-0.45275644212961197,-0.9055128693580627,0.13258774112910032,3654.4,6136.0,360.57
25
- 24,0.0137,4.3958425521850586e-07,9.97534852915723e-06,,0.0,-0.46590057015419006,-0.9318011105060577,0.08933348534628749,3654.4,6136.0,362.48
26
- 25,0.0143,-8.940696716308594e-08,9.956206309337067e-06,,0.0,-0.45125550776720047,-0.9025110006332397,0.15539621422067285,3654.4,6136.0,364.1
27
- 26,0.0149,-2.8312206268310547e-07,9.931634888554937e-06,,0.0,-0.456791490316391,-0.9135829955339432,0.08904895093291998,3654.4,6136.0,365.75
28
- 27,0.0154,-2.60770320892334e-06,9.901664203302126e-06,,0.0,-0.4751305654644966,-0.950261116027832,0.06268075766274706,3654.4,6136.0,370.26
29
- 28,0.016,5.960464477539063e-08,9.866330768241984e-06,,0.0,-0.4367612153291702,-0.8735224008560181,0.16378713678568602,3654.4,6138.0,366.74
30
- 29,0.0166,4.842877388000488e-07,9.825677631722436e-06,,0.0,-0.4282456487417221,-0.856491282582283,0.1944181639701128,3654.5,6138.0,364.94
31
- 30,0.0171,1.0058283805847168e-06,9.779754323328192e-06,,0.0,-0.45244697481393814,-0.9048939347267151,0.1466370872221887,3654.4,6138.0,359.72
32
- 31,0.0177,7.301568984985352e-07,9.728616793536588e-06,,0.0,-0.4787774831056595,-0.9575549513101578,0.04152237856760621,3654.4,6138.0,359.74
33
- 32,0.0183,7.525086402893066e-07,9.672327345550544e-06,,0.0,-0.4640432074666023,-0.9280864149332047,0.05714223568793386,3654.4,6138.0,359.53
34
- 33,0.0189,6.705522537231445e-07,9.610954559391704e-06,,0.0,-0.42499294877052307,-0.8499859273433685,0.13816553819924593,3654.5,6138.0,362.56
35
- 34,0.0194,3.4868717193603516e-06,9.544573208346252e-06,,0.0,-0.4555760398507118,-0.9111520648002625,0.06401598325464875,3654.4,6138.0,362.18
36
- 35,0.02,1.996755599975586e-06,9.473264167865172e-06,,0.0,-0.4616549611091614,-0.9233099222183228,0.07124835508875549,3654.4,6138.0,360.07
37
- 36,0.0206,3.5315752029418945e-06,9.397114317029975e-06,,0.0,-0.43700823187828064,-0.8740164637565613,0.1880413582548499,3654.4,6138.0,358.61
38
- 37,0.0211,0.1341422200202942,9.316216432703918e-06,,0.0,-0.40694746375083923,-0.8138948976993561,0.2712679710239172,3654.4,6138.0,357.92
39
- 38,0.0217,4.366040229797363e-06,9.230669076497688e-06,,0.0,-0.4537286013364792,-0.9074572026729584,0.140884583350271,3654.5,6138.0,361.01
40
- 39,0.0223,2.9802322387695312e-06,9.140576474687263e-06,,0.0,-0.4670984223484993,-0.9341968446969986,0.08859914634376764,3654.4,6138.0,358.53
41
- 40,0.0229,3.2633543014526367e-06,9.046048391230248e-06,,0.0,-0.4738834798336029,-0.9477669596672058,0.03630512161180377,3654.4,6138.0,356.31
42
- 41,0.0234,3.5390257835388184e-06,8.947199994035402e-06,,0.0,-0.48112839460372925,-0.9622567743062973,0.03198406333103776,3654.4,6138.0,356.46
43
- 42,0.024,4.059635102748871e-06,8.844151714648274e-06,,0.0,-0.45688286423683167,-0.9137657284736633,0.09395218873396516,3654.4,6138.0,358.44
44
- 43,0.0246,3.972090780735016e-06,8.737029101523931e-06,,0.0,-0.4356464073061943,-0.8712927997112274,0.17926698923110962,3654.4,6138.0,362.88
45
- 44,0.0251,7.338821887969971e-06,8.625962667065488e-06,,0.0,-0.4162614122033119,-0.8325228244066238,0.22310001868754625,3654.4,6138.0,358.73
46
- 45,0.0257,9.98377799987793e-06,8.511087728614863e-06,,0.0,-0.4377835765480995,-0.8755671381950378,0.1843216335400939,3654.4,6138.0,359.1
47
- 46,0.0263,7.063150405883789e-06,8.392544243589428e-06,,0.0,-0.4647279307246208,-0.9294558465480804,0.08181617665104568,3654.4,6138.0,364.32
48
- 47,0.0269,9.715557098388672e-06,8.270476638965463e-06,,0.0,-0.4818853959441185,-0.9637707620859146,0.04111184738576412,3654.4,6138.0,366.26
49
- 48,0.0274,1.0699033737182617e-05,8.14503363531613e-06,,0.0,-0.47147445380687714,-0.9429489076137543,0.05583410756662488,3654.4,6138.0,367.58
50
- 49,0.028,9.488314390182495e-06,8.016368065618361e-06,,0.0,-0.4839174821972847,-0.9678349643945694,0.031033652368932962,3654.4,6138.0,367.03
51
- 50,0.0286,1.1995434761047363e-05,7.884636689049423e-06,,0.0,-0.4634588584303856,-0.92691770195961,0.06057529430836439,3654.4,6138.0,365.37
52
- 51,0.0291,1.093745231628418e-05,7.75e-06,,0.0,-0.48549777269363403,-0.9709955900907516,0.023253681138157845,3654.5,6138.0,357.96
53
- 52,0.0297,1.2755393981933594e-05,7.612622032536508e-06,,0.0,-0.4660480171442032,-0.932096004486084,0.0814181575551629,3654.5,6138.0,357.69
54
- 53,0.0303,1.1809170246124268e-05,7.472670160550849e-06,,0.0,-0.42992351949214935,-0.8598470240831375,0.23317514825612307,3654.5,6138.0,360.51
55
- 54,0.0309,1.2435019016265869e-05,7.330314893841102e-06,,0.0,-0.4458845555782318,-0.8917691111564636,0.16836319211870432,3654.5,6138.0,359.99
56
- 55,0.0314,1.2367963790893555e-05,7.185729670371605e-06,,0.0,-0.46969927847385406,-0.9393985569477081,0.09125666646286845,3654.5,6138.0,357.92
57
- 56,0.032,1.3027340173721313e-05,7.0390906449655104e-06,,0.0,-0.4778226688504219,-0.9556453377008438,0.03563447529450059,3654.4,6138.0,359.54
58
- 57,0.0326,1.173466444015503e-05,6.890576474687264e-06,,0.0,-0.4766504615545273,-0.9533008933067322,0.047348865773528814,3654.5,6138.0,360.89
59
- 58,0.0331,1.0944902896881104e-05,6.740368101176496e-06,,0.0,-0.4303411394357681,-0.8606822788715363,0.19956759549677372,3654.5,6138.0,357.96
60
- 59,0.0337,1.4327466487884521e-05,6.588648530198505e-06,,0.0,-0.47175391018390656,-0.9435078203678131,0.028082083677873015,3654.4,6138.0,358.32
61
- 60,0.0343,1.862645149230957e-05,6.4356026086799176e-06,,0.0,-0.4349738284945488,-0.8699476420879364,0.14623272977769375,3654.5,6138.0,358.57
62
- 61,0.0349,1.6495585441589355e-05,6.281416799501188e-06,,0.0,-0.43933914601802826,-0.8786782920360565,0.14873503288254142,3654.5,6138.0,359.81
63
- 62,0.0354,1.4573335647583008e-05,6.126278954320295e-06,,0.0,-0.47655102610588074,-0.9531020373106003,0.04140742728486657,3654.5,6138.0,363.55
64
- 63,0.036,1.3574957847595215e-05,5.970378084704441e-06,,0.0,-0.4812774509191513,-0.9625549018383026,0.0323515310883522,3654.4,6138.0,361.67
65
- 64,0.0366,1.7490237951278687e-05,5.813904131848565e-06,,0.0,-0.4819137006998062,-0.9638274163007736,0.03202465921640396,3654.5,6138.0,360.66
66
- 65,0.0371,1.4990568161010742e-05,5.657047735161256e-06,,0.0,-0.4426536113023758,-0.8853072375059128,0.11870932951569557,3654.5,6138.0,360.47
67
- 66,0.0377,1.6763806343078613e-05,5.500000000000001e-06,,0.0,-0.4815715327858925,-0.9631430506706238,0.027665999252349138,3654.4,6138.0,364.95
68
- 67,0.0383,2.3305416107177734e-05,5.342952264838748e-06,,0.0,-0.46308349817991257,-0.9261669814586639,0.09819867718033493,3654.4,6138.0,369.54
69
- 68,0.0389,2.4430453777313232e-05,5.186095868151436e-06,,0.0,-0.4661281928420067,-0.9322563856840134,0.06839151354506612,3654.4,6138.0,369.64
70
- 69,0.0394,1.753866672515869e-05,5.02962191529556e-06,,0.0,-0.4736683666706085,-0.947336733341217,0.06602911371737719,3654.5,6138.0,365.41
71
- 70,0.04,1.9043684005737305e-05,4.873721045679707e-06,,0.0,-0.440708264708519,-0.881416529417038,0.15695088542997837,3654.4,6138.0,364.8
72
- 71,0.0406,2.4437904357910156e-05,4.718583200498814e-06,,0.0,-0.47764309495687485,-0.9552861750125885,0.04716450162231922,3654.4,6138.0,362.55
73
- 72,0.0411,1.781061291694641e-05,4.564397391320085e-06,,0.0,-0.45879723131656647,-0.9175944626331329,0.07855925057083368,3654.4,6138.0,359.86
74
- 73,0.0417,2.104230225086212e-05,4.4113514698014955e-06,,0.0,-0.47955460846424103,-0.9591091871261597,0.03032594360411167,3654.4,6138.0,354.97
75
- 74,0.0423,1.8224120140075684e-05,4.259631898823504e-06,,0.0,-0.470923513174057,-0.9418470114469528,0.09771440364420414,3654.4,6138.0,359.71
76
- 75,0.0429,1.697242259979248e-05,4.109423525312738e-06,,0.0,-0.4515884444117546,-0.9031769037246704,0.11431426065973938,3654.4,6138.0,359.51
77
- 76,0.0434,1.5683472156524658e-05,3.960909355034491e-06,,0.0,-0.43553559482097626,-0.8710711896419525,0.16496195830404758,3654.4,6138.0,357.76
78
- 77,0.044,2.2258609533309937e-05,3.8142703296283954e-06,,0.0,-0.418168805539608,-0.8363375961780548,0.23763815127313137,3654.4,6138.0,359.55
79
- 78,0.0446,2.06679105758667e-05,3.6696851061589e-06,,0.0,-0.45626621693372726,-0.912532389163971,0.1374040930531919,3654.5,6138.0,359.56
80
- 79,0.0451,3.053247928619385e-05,3.527329839449152e-06,,0.0,-0.478363037109375,-0.9567261040210724,0.03342599933966994,3654.4,6138.0,359.31
81
- 80,0.0457,3.489106893539429e-05,3.3873779674634932e-06,,0.0,-0.43451904505491257,-0.8690381050109863,0.166263896971941,3654.5,6138.0,358.11
82
- 81,0.0463,2.2970139980316162e-05,3.2500000000000015e-06,,0.0,-0.4851565733551979,-0.9703131467103958,0.018532322952523828,3654.4,6138.0,359.52
83
- 82,0.0469,2.2605061531066895e-05,3.115363310950579e-06,,0.0,-0.4188460633158684,-0.8376921266317368,0.21680933889001608,3654.5,6138.0,360.43
84
- 83,0.0474,2.580881118774414e-05,2.98363193438164e-06,,0.0,-0.45973849296569824,-0.9194770008325577,0.07644858444109559,3654.4,6138.0,357.84
85
- 84,0.048,2.232193946838379e-05,2.854966364683872e-06,,0.0,-0.4607899561524391,-0.921579897403717,0.07768060895614326,3654.4,6138.0,364.25
86
- 85,0.0486,2.230703830718994e-05,2.7295233610345384e-06,,0.0,-0.40651462972164154,-0.8130292594432831,0.21775285061448812,3654.4,6138.0,361.8
87
- 86,0.0491,2.0213425159454346e-05,2.607455756410573e-06,,0.0,-0.4558562785387039,-0.911712571978569,0.09181239921599627,3654.5,6138.0,363.18
88
- 87,0.0497,2.09808349609375e-05,2.4889122713851397e-06,,0.0,-0.47007501125335693,-0.9401500076055527,0.04285714589059353,3654.5,6138.0,368.87
89
- 88,0.0503,2.3573637008666992e-05,2.374037332934512e-06,,0.0,-0.41688134521245956,-0.8337626904249191,0.24908871483057737,3654.4,6138.0,360.15
90
- 89,0.0509,2.3312866687774658e-05,2.262970898476071e-06,,0.0,-0.4709980934858322,-0.9419961720705032,0.06526243314146996,3654.5,6138.0,355.62
91
- 90,0.0514,2.16066837310791e-05,2.1558482853517257e-06,,0.0,-0.47104664146900177,-0.9420932680368423,0.05442978721112013,3654.4,6138.0,359.25
92
- 91,0.052,2.3186206817626953e-05,2.0528000059646e-06,,0.0,-0.42244888097047806,-0.8448977470397949,0.17764843348413706,3654.4,6138.0,357.58
93
- 92,0.0526,2.4143606424331665e-05,1.953951608769752e-06,,0.0,-0.43437784910202026,-0.8687557131052017,0.13139949878677726,3654.5,6138.0,360.98
94
- 93,0.0531,1.7457641661167145e-05,1.8594235253127373e-06,,0.0,-0.4726478382945061,-0.9452957063913345,0.044932478107512,3654.4,6138.0,359.65
95
- 94,0.0537,2.3424625396728516e-05,1.769330923502313e-06,,0.0,-0.48662828654050827,-0.9732565432786942,0.023667596746236086,3654.4,6138.0,357.33
96
- 95,0.0543,2.664327621459961e-05,1.6837835672960834e-06,,0.0,-0.41473422944545746,-0.8294684588909149,0.21260959655046463,3654.4,6138.0,360.81
97
- 96,0.0549,2.310052514076233e-05,1.602885682970026e-06,,0.0,-0.4408787190914154,-0.8817574381828308,0.14791762363165617,3654.4,6138.0,359.17
98
- 97,0.0554,1.5437602996826172e-05,1.526735832134829e-06,,0.0,-0.46087589859962463,-0.9217518121004105,0.05548063712194562,3654.5,6138.0,357.98
99
- 98,0.056,2.017989754676819e-05,1.4554267916537495e-06,,0.0,-0.4707222431898117,-0.9414444863796234,0.057337059173732996,3654.4,6138.0,359.07
100
- 99,0.0566,1.6748905181884766e-05,1.389045440608296e-06,,0.0,-0.43424008786678314,-0.8684801608324051,0.18490357510745525,3654.4,6138.0,357.48
101
- 100,0.0571,2.2009015083312988e-05,1.3276726544494572e-06,,0.0,-0.44838932156562805,-0.8967786431312561,0.12791539868339896,3654.4,6138.0,358.57
102
- 100,0.0571,,,,,,,,3654.4,6138.0,362.18
 
1
  step,epoch,loss,learning_rate,grad_norm,rewards/format_reward,rewards/cosine_scaled_reward,reward,reward_std,gpu_mem_alloc_mb,gpu_mem_peak_mb,step_time_sec
2
+ 1,0.0006,-1.2665987014770508e-07,0.0,,0.0,-0.3879377990961075,-0.7758755832910538,0.2861072635278106,1565.2,3646.7,476.39
3
+ 2,0.0011,2.2351741790771484e-08,5e-07,,0.0,-0.34302495419979095,-0.6860499083995819,0.3919920399785042,1565.2,3663.9,469.44
4
+ 3,0.0017,0.04199279844760895,1e-06,,0.0,-0.39361898601055145,-0.7872379571199417,0.28899660520255566,1565.2,3665.2,464.19
5
+ 4,0.0023,0.00015985965728759766,9.931634888554935e-07,,0.0,-0.39139123260974884,-0.7827824652194977,0.30724803544580936,1565.2,3665.5,473.28
6
+ 5,0.0029,0.016923315823078156,9.728616793536587e-07,,0.0,-0.3417773097753525,-0.6835546344518661,0.4394468888640404,1565.2,3666.6,475.57
7
+ 6,0.0034,0.00023673847317695618,9.397114317029974e-07,,0.0,-0.43544115871191025,-0.8708823472261429,0.1456776731647551,1565.2,3669.9,476.37
8
+ 7,0.004,5.0827860832214355e-05,8.9471999940354e-07,,0.0,-0.46910375356674194,-0.9382074922323227,0.08606540504842997,1565.2,3669.9,481.67
9
+ 8,0.0046,7.636845111846924e-05,8.392544243589427e-07,,0.0,-0.3953063264489174,-0.7906126528978348,0.18317685835063457,1565.2,3669.9,481.42
10
+ 9,0.0051,7.21365213394165e-05,7.75e-07,,0.0,-0.4495018497109413,-0.8990036994218826,0.1606585686095059,1565.2,3669.9,479.75
11
+ 10,0.0057,0.00016715750098228455,7.039090644965509e-07,,0.0,-0.4300354793667793,-0.860070988535881,0.17052607703953981,1565.2,3669.9,466.26
12
+ 11,0.0063,0.00013599544763565063,6.281416799501187e-07,,0.0,-0.4115590825676918,-0.8231181800365448,0.1259385095909238,1565.2,3669.9,472.08
13
+ 12,0.0069,5.13419508934021e-05,5.5e-07,,0.0,-0.4723722040653229,-0.9447444081306458,0.08061030774842948,1565.2,3669.9,471.88
14
+ 13,0.0074,0.07869705557823181,4.7185832004988133e-07,,0.0,-0.44672856479883194,-0.8934571295976639,0.15774485282599926,1565.2,3669.9,466.09
15
+ 14,0.008,0.027497582137584686,3.9609093550344907e-07,,0.0,-0.3920762911438942,-0.7841525673866272,0.2220854666084051,1565.2,3669.9,468.75
16
+ 15,0.0086,0.0004888176918029785,3.250000000000001e-07,,0.0,-0.3608057275414467,-0.7216114401817322,0.3453192347660661,1565.2,3669.9,462.49
17
+ 16,0.0091,0.00015526264905929565,2.6074557564105724e-07,,0.0,-0.4132692217826843,-0.8265384286642075,0.25778803089633584,1565.2,3669.9,476.11
18
+ 17,0.0097,0.02472507953643799,2.0528000059645995e-07,,0.0,-0.4350534752011299,-0.8701069504022598,0.18937412789091468,1565.2,3669.9,469.71
19
+ 18,0.0103,0.00023746490478515625,1.6028856829700258e-07,,0.0,-0.41003918647766113,-0.8200783580541611,0.26157089229673147,1565.2,3669.9,472.28
20
+ 19,0.0109,0.016675502061843872,1.2713832064634125e-07,,0.0,-0.41168487817049026,-0.8233697563409805,0.20013628248125315,1565.2,3669.9,477.29
21
+ 20,0.0114,0.00027292221784591675,1.068365111445064e-07,,0.0,-0.4022079259157181,-0.8044158518314362,0.2423506089253351,1565.2,3669.9,473.18
22
+ 20,0.0114,,,,,,,,1565.2,3669.9,477.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.0013520326372236013,
4
- "train_runtime": 36067.9715,
5
  "train_samples": 7000,
6
- "train_samples_per_second": 0.044,
7
- "train_steps_per_second": 0.003
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.010430806130170823,
4
+ "train_runtime": 9458.0577,
5
  "train_samples": 7000,
6
+ "train_samples_per_second": 0.034,
7
+ "train_steps_per_second": 0.002
8
  }
trainer_state.json CHANGED
@@ -2,1325 +2,285 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.05714285714285714,
6
  "eval_steps": 500,
7
- "global_step": 100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "clip_ratio": 0.0,
14
- "completion_length": 1024.0,
15
  "epoch": 0.0005714285714285715,
16
  "kl": 0.0,
17
  "learning_rate": 0.0,
18
- "loss": 3.3527612686157227e-08,
19
- "reward": -0.9037362784147263,
20
- "reward_std": 0.11632500402629375,
21
- "rewards/cosine_scaled_reward": -0.45186813920736313,
22
  "rewards/format_reward": 0.0,
23
  "step": 1
24
  },
25
  {
26
  "clip_ratio": 0.0,
27
- "completion_length": 1024.0,
28
  "epoch": 0.001142857142857143,
29
  "kl": 0.0,
30
- "learning_rate": 0.0,
31
- "loss": -2.9802322387695312e-08,
32
- "reward": -0.9088128805160522,
33
- "reward_std": 0.134795643389225,
34
- "rewards/cosine_scaled_reward": -0.4544064328074455,
35
  "rewards/format_reward": 0.0,
36
  "step": 2
37
  },
38
  {
39
  "clip_ratio": 0.0,
40
- "completion_length": 1024.0,
41
  "epoch": 0.0017142857142857142,
42
- "kl": 0.0,
43
- "learning_rate": 0.0,
44
- "loss": -2.384185791015625e-07,
45
- "reward": -0.8961364179849625,
46
- "reward_std": 0.1266492954455316,
47
- "rewards/cosine_scaled_reward": -0.44806819409132004,
48
  "rewards/format_reward": 0.0,
49
  "step": 3
50
  },
51
  {
52
  "clip_ratio": 0.0,
53
- "completion_length": 1024.0,
54
  "epoch": 0.002285714285714286,
55
- "kl": 0.0,
56
- "learning_rate": 0.0,
57
- "loss": 3.203749656677246e-07,
58
- "reward": -0.9384903311729431,
59
- "reward_std": 0.08165389159694314,
60
- "rewards/cosine_scaled_reward": -0.46924517303705215,
61
  "rewards/format_reward": 0.0,
62
  "step": 4
63
  },
64
  {
65
  "clip_ratio": 0.0,
66
- "completion_length": 1024.0,
67
  "epoch": 0.002857142857142857,
68
- "kl": 0.0,
69
- "learning_rate": 0.0,
70
- "loss": 2.980232238769531e-07,
71
- "reward": -0.8722389936447144,
72
- "reward_std": 0.1861576708033681,
73
- "rewards/cosine_scaled_reward": -0.4361194893717766,
74
  "rewards/format_reward": 0.0,
75
  "step": 5
76
  },
77
  {
78
  "clip_ratio": 0.0,
79
- "completion_length": 1024.0,
80
  "epoch": 0.0034285714285714284,
81
- "kl": 0.0,
82
- "learning_rate": 0.0,
83
- "loss": 3.2782554626464844e-07,
84
- "reward": -0.9677980989217758,
85
- "reward_std": 0.03695695102214813,
86
- "rewards/cosine_scaled_reward": -0.4838990569114685,
87
  "rewards/format_reward": 0.0,
88
  "step": 6
89
  },
90
  {
91
  "clip_ratio": 0.0,
92
- "completion_length": 1024.0,
93
  "epoch": 0.004,
94
- "kl": 0.0,
95
- "learning_rate": 0.0,
96
- "loss": -6.48200511932373e-07,
97
- "reward": -0.9382674694061279,
98
- "reward_std": 0.04741490981541574,
99
- "rewards/cosine_scaled_reward": -0.46913372725248337,
100
  "rewards/format_reward": 0.0,
101
  "step": 7
102
  },
103
  {
104
  "clip_ratio": 0.0,
105
- "completion_length": 1024.0,
106
  "epoch": 0.004571428571428572,
107
- "kl": 0.0,
108
- "learning_rate": 0.0,
109
- "loss": -1.9371509552001953e-07,
110
- "reward": -0.9379843473434448,
111
- "reward_std": 0.08557501714676619,
112
- "rewards/cosine_scaled_reward": -0.4689921736717224,
113
  "rewards/format_reward": 0.0,
114
  "step": 8
115
  },
116
  {
117
  "clip_ratio": 0.0,
118
- "completion_length": 1024.0,
119
  "epoch": 0.005142857142857143,
120
- "kl": 0.0,
121
- "learning_rate": 0.0,
122
- "loss": 6.258487701416016e-07,
123
- "reward": -0.9565844535827637,
124
- "reward_std": 0.04161944845691323,
125
- "rewards/cosine_scaled_reward": -0.47829224169254303,
126
  "rewards/format_reward": 0.0,
127
  "step": 9
128
  },
129
  {
130
  "clip_ratio": 0.0,
131
- "completion_length": 1024.0,
132
  "epoch": 0.005714285714285714,
133
- "kl": 0.0,
134
- "learning_rate": 0.0,
135
- "loss": 0.0,
136
- "reward": -0.7860384881496429,
137
- "reward_std": 0.29622524976730347,
138
- "rewards/cosine_scaled_reward": -0.39301924407482147,
139
  "rewards/format_reward": 0.0,
140
  "step": 10
141
  },
142
  {
143
  "clip_ratio": 0.0,
144
- "completion_length": 1024.0,
145
  "epoch": 0.006285714285714286,
146
- "kl": 0.0,
147
- "learning_rate": 1.0000000000000002e-06,
148
- "loss": -1.2665987014770508e-06,
149
- "reward": -0.9470862597227097,
150
- "reward_std": 0.025707244174554944,
151
- "rewards/cosine_scaled_reward": -0.47354312986135483,
152
  "rewards/format_reward": 0.0,
153
  "step": 11
154
  },
155
  {
156
  "clip_ratio": 0.0,
157
- "completion_length": 1024.0,
158
  "epoch": 0.006857142857142857,
159
- "kl": 0.0,
160
- "learning_rate": 1.0000000000000002e-06,
161
- "loss": -3.2782554626464844e-07,
162
- "reward": -0.9222527593374252,
163
- "reward_std": 0.11853919085115194,
164
- "rewards/cosine_scaled_reward": -0.4611263796687126,
165
  "rewards/format_reward": 0.0,
166
  "step": 12
167
  },
168
  {
169
  "clip_ratio": 0.0,
170
- "completion_length": 1024.0,
171
  "epoch": 0.0074285714285714285,
172
- "kl": -2.9742717742919922e-05,
173
- "learning_rate": 2.0000000000000003e-06,
174
- "loss": -9.834766387939453e-07,
175
- "reward": -0.8048228025436401,
176
- "reward_std": 0.2713719364255667,
177
- "rewards/cosine_scaled_reward": -0.40241140127182007,
178
  "rewards/format_reward": 0.0,
179
  "step": 13
180
  },
181
  {
182
  "clip_ratio": 0.0,
183
- "completion_length": 1024.0,
184
  "epoch": 0.008,
185
- "kl": -1.9848346710205078e-05,
186
- "learning_rate": 3e-06,
187
- "loss": -9.98377799987793e-07,
188
- "reward": -0.8982286602258682,
189
- "reward_std": 0.15008432138711214,
190
- "rewards/cosine_scaled_reward": -0.4491143301129341,
191
  "rewards/format_reward": 0.0,
192
  "step": 14
193
  },
194
  {
195
  "clip_ratio": 0.0,
196
- "completion_length": 1024.0,
197
  "epoch": 0.008571428571428572,
198
- "kl": -3.173947334289551e-05,
199
- "learning_rate": 4.000000000000001e-06,
200
- "loss": -1.2367963790893555e-06,
201
- "reward": -0.8698235750198364,
202
- "reward_std": 0.1454296549782157,
203
- "rewards/cosine_scaled_reward": -0.4349117949604988,
204
  "rewards/format_reward": 0.0,
205
  "step": 15
206
  },
207
  {
208
  "clip_ratio": 0.0,
209
- "completion_length": 1024.0,
210
  "epoch": 0.009142857142857144,
211
- "kl": -2.2411346435546875e-05,
212
- "learning_rate": 5e-06,
213
- "loss": -1.166015863418579e-06,
214
- "reward": -0.8962945342063904,
215
- "reward_std": 0.11415091808885336,
216
- "rewards/cosine_scaled_reward": -0.4481472671031952,
217
  "rewards/format_reward": 0.0,
218
  "step": 16
219
  },
220
  {
221
  "clip_ratio": 0.0,
222
- "completion_length": 1024.0,
223
  "epoch": 0.009714285714285713,
224
- "kl": -9.268522262573242e-06,
225
- "learning_rate": 6e-06,
226
- "loss": -4.172325134277344e-07,
227
- "reward": -0.8075007796287537,
228
- "reward_std": 0.2773375315591693,
229
- "rewards/cosine_scaled_reward": -0.40375038236379623,
230
  "rewards/format_reward": 0.0,
231
  "step": 17
232
  },
233
  {
234
  "clip_ratio": 0.0,
235
- "completion_length": 1024.0,
236
  "epoch": 0.010285714285714285,
237
- "kl": -1.519918441772461e-06,
238
- "learning_rate": 7e-06,
239
- "loss": 1.0356307029724121e-06,
240
- "reward": -0.8843557834625244,
241
- "reward_std": 0.1459937175968662,
242
- "rewards/cosine_scaled_reward": -0.4421778917312622,
243
  "rewards/format_reward": 0.0,
244
  "step": 18
245
  },
246
  {
247
  "clip_ratio": 0.0,
248
- "completion_length": 1024.0,
249
  "epoch": 0.010857142857142857,
250
- "kl": -1.1920928955078125e-07,
251
- "learning_rate": 8.000000000000001e-06,
252
- "loss": -1.4901161193847656e-08,
253
- "reward": -0.9325414896011353,
254
- "reward_std": 0.061190704349428415,
255
- "rewards/cosine_scaled_reward": -0.46627073734998703,
256
  "rewards/format_reward": 0.0,
257
  "step": 19
258
  },
259
  {
260
  "clip_ratio": 0.0,
261
- "completion_length": 1024.0,
262
  "epoch": 0.011428571428571429,
263
- "kl": -7.152557373046875e-07,
264
- "learning_rate": 9e-06,
265
- "loss": 7.450580596923828e-08,
266
- "reward": -0.8927065879106522,
267
- "reward_std": 0.1468478236347437,
268
- "rewards/cosine_scaled_reward": -0.4463533014059067,
269
  "rewards/format_reward": 0.0,
270
  "step": 20
271
  },
272
  {
273
- "clip_ratio": 0.0,
274
- "completion_length": 1024.0,
275
- "epoch": 0.012,
276
- "kl": -4.470348358154297e-07,
277
- "learning_rate": 1e-05,
278
- "loss": -3.241002559661865e-07,
279
- "reward": -0.861713707447052,
280
- "reward_std": 0.15973031288012862,
281
- "rewards/cosine_scaled_reward": -0.430856853723526,
282
- "rewards/format_reward": 0.0,
283
- "step": 21
284
- },
285
- {
286
- "clip_ratio": 0.0,
287
- "completion_length": 1024.0,
288
- "epoch": 0.012571428571428572,
289
- "kl": 2.980232238769531e-07,
290
- "learning_rate": 9.997258721585931e-06,
291
- "loss": 0.0,
292
- "reward": -0.8567932546138763,
293
- "reward_std": 0.23303062841296196,
294
- "rewards/cosine_scaled_reward": -0.42839662730693817,
295
- "rewards/format_reward": 0.0,
296
- "step": 22
297
- },
298
- {
299
- "clip_ratio": 0.0,
300
- "completion_length": 1024.0,
301
- "epoch": 0.013142857142857144,
302
- "kl": 6.258487701416016e-07,
303
- "learning_rate": 9.98903822616921e-06,
304
- "loss": -1.9371509552001953e-07,
305
- "reward": -0.9055128693580627,
306
- "reward_std": 0.13258774112910032,
307
- "rewards/cosine_scaled_reward": -0.45275644212961197,
308
- "rewards/format_reward": 0.0,
309
- "step": 23
310
- },
311
- {
312
- "clip_ratio": 0.0,
313
- "completion_length": 1024.0,
314
- "epoch": 0.013714285714285714,
315
- "kl": 2.682209014892578e-07,
316
- "learning_rate": 9.97534852915723e-06,
317
- "loss": 4.3958425521850586e-07,
318
- "reward": -0.9318011105060577,
319
- "reward_std": 0.08933348534628749,
320
- "rewards/cosine_scaled_reward": -0.46590057015419006,
321
- "rewards/format_reward": 0.0,
322
- "step": 24
323
- },
324
- {
325
- "clip_ratio": 0.0,
326
- "completion_length": 1024.0,
327
- "epoch": 0.014285714285714285,
328
- "kl": 3.337860107421875e-06,
329
- "learning_rate": 9.956206309337067e-06,
330
- "loss": -8.940696716308594e-08,
331
- "reward": -0.9025110006332397,
332
- "reward_std": 0.15539621422067285,
333
- "rewards/cosine_scaled_reward": -0.45125550776720047,
334
- "rewards/format_reward": 0.0,
335
- "step": 25
336
- },
337
- {
338
- "clip_ratio": 0.0,
339
- "completion_length": 1024.0,
340
- "epoch": 0.014857142857142857,
341
- "kl": 2.086162567138672e-06,
342
- "learning_rate": 9.931634888554937e-06,
343
- "loss": -2.8312206268310547e-07,
344
- "reward": -0.9135829955339432,
345
- "reward_std": 0.08904895093291998,
346
- "rewards/cosine_scaled_reward": -0.456791490316391,
347
- "rewards/format_reward": 0.0,
348
- "step": 26
349
- },
350
- {
351
- "clip_ratio": 0.0,
352
- "completion_length": 1024.0,
353
- "epoch": 0.015428571428571429,
354
- "kl": 2.8312206268310547e-06,
355
- "learning_rate": 9.901664203302126e-06,
356
- "loss": -2.60770320892334e-06,
357
- "reward": -0.950261116027832,
358
- "reward_std": 0.06268075766274706,
359
- "rewards/cosine_scaled_reward": -0.4751305654644966,
360
- "rewards/format_reward": 0.0,
361
- "step": 27
362
- },
363
- {
364
- "clip_ratio": 0.0,
365
- "completion_length": 1024.0,
366
- "epoch": 0.016,
367
- "kl": 2.8014183044433594e-06,
368
- "learning_rate": 9.866330768241984e-06,
369
- "loss": 5.960464477539063e-08,
370
- "reward": -0.8735224008560181,
371
- "reward_std": 0.16378713678568602,
372
- "rewards/cosine_scaled_reward": -0.4367612153291702,
373
- "rewards/format_reward": 0.0,
374
- "step": 28
375
- },
376
- {
377
- "clip_ratio": 0.0,
378
- "completion_length": 1024.0,
379
- "epoch": 0.01657142857142857,
380
- "kl": 1.0013580322265625e-05,
381
- "learning_rate": 9.825677631722436e-06,
382
- "loss": 4.842877388000488e-07,
383
- "reward": -0.856491282582283,
384
- "reward_std": 0.1944181639701128,
385
- "rewards/cosine_scaled_reward": -0.4282456487417221,
386
- "rewards/format_reward": 0.0,
387
- "step": 29
388
- },
389
- {
390
- "clip_ratio": 0.0,
391
- "completion_length": 1024.0,
392
- "epoch": 0.017142857142857144,
393
- "kl": 6.258487701416016e-06,
394
- "learning_rate": 9.779754323328192e-06,
395
- "loss": 1.0058283805847168e-06,
396
- "reward": -0.9048939347267151,
397
- "reward_std": 0.1466370872221887,
398
- "rewards/cosine_scaled_reward": -0.45244697481393814,
399
- "rewards/format_reward": 0.0,
400
- "step": 30
401
- },
402
- {
403
- "clip_ratio": 0.0,
404
- "completion_length": 1024.0,
405
- "epoch": 0.017714285714285714,
406
- "kl": 1.4841556549072266e-05,
407
- "learning_rate": 9.728616793536588e-06,
408
- "loss": 7.301568984985352e-07,
409
- "reward": -0.9575549513101578,
410
- "reward_std": 0.04152237856760621,
411
- "rewards/cosine_scaled_reward": -0.4787774831056595,
412
- "rewards/format_reward": 0.0,
413
- "step": 31
414
- },
415
- {
416
- "clip_ratio": 0.0,
417
- "completion_length": 1024.0,
418
- "epoch": 0.018285714285714287,
419
- "kl": 1.5348196029663086e-05,
420
- "learning_rate": 9.672327345550544e-06,
421
- "loss": 7.525086402893066e-07,
422
- "reward": -0.9280864149332047,
423
- "reward_std": 0.05714223568793386,
424
- "rewards/cosine_scaled_reward": -0.4640432074666023,
425
- "rewards/format_reward": 0.0,
426
- "step": 32
427
- },
428
- {
429
- "clip_ratio": 0.0,
430
- "completion_length": 1024.0,
431
- "epoch": 0.018857142857142857,
432
- "kl": 2.396106719970703e-05,
433
- "learning_rate": 9.610954559391704e-06,
434
- "loss": 6.705522537231445e-07,
435
- "reward": -0.8499859273433685,
436
- "reward_std": 0.13816553819924593,
437
- "rewards/cosine_scaled_reward": -0.42499294877052307,
438
- "rewards/format_reward": 0.0,
439
- "step": 33
440
- },
441
- {
442
- "clip_ratio": 0.0,
443
- "completion_length": 1024.0,
444
- "epoch": 0.019428571428571427,
445
- "kl": 2.98917293548584e-05,
446
- "learning_rate": 9.544573208346252e-06,
447
- "loss": 3.4868717193603516e-06,
448
- "reward": -0.9111520648002625,
449
- "reward_std": 0.06401598325464875,
450
- "rewards/cosine_scaled_reward": -0.4555760398507118,
451
- "rewards/format_reward": 0.0,
452
- "step": 34
453
- },
454
- {
455
- "clip_ratio": 0.0,
456
- "completion_length": 1024.0,
457
- "epoch": 0.02,
458
- "kl": 5.048513412475586e-05,
459
- "learning_rate": 9.473264167865172e-06,
460
- "loss": 1.996755599975586e-06,
461
- "reward": -0.9233099222183228,
462
- "reward_std": 0.07124835508875549,
463
- "rewards/cosine_scaled_reward": -0.4616549611091614,
464
- "rewards/format_reward": 0.0,
465
- "step": 35
466
- },
467
- {
468
- "clip_ratio": 0.0,
469
- "completion_length": 1024.0,
470
- "epoch": 0.02057142857142857,
471
- "kl": 7.835030555725098e-05,
472
- "learning_rate": 9.397114317029975e-06,
473
- "loss": 3.5315752029418945e-06,
474
- "reward": -0.8740164637565613,
475
- "reward_std": 0.1880413582548499,
476
- "rewards/cosine_scaled_reward": -0.43700823187828064,
477
- "rewards/format_reward": 0.0,
478
- "step": 36
479
- },
480
- {
481
- "clip_ratio": 0.0,
482
- "completion_length": 960.4375,
483
- "epoch": 0.021142857142857144,
484
- "kl": 0.0002009570598602295,
485
- "learning_rate": 9.316216432703918e-06,
486
- "loss": 0.1341422200202942,
487
- "reward": -0.8138948976993561,
488
- "reward_std": 0.2712679710239172,
489
- "rewards/cosine_scaled_reward": -0.40694746375083923,
490
- "rewards/format_reward": 0.0,
491
- "step": 37
492
- },
493
- {
494
- "clip_ratio": 0.0,
495
- "completion_length": 1024.0,
496
- "epoch": 0.021714285714285714,
497
- "kl": 0.00011423230171203613,
498
- "learning_rate": 9.230669076497688e-06,
499
- "loss": 4.366040229797363e-06,
500
- "reward": -0.9074572026729584,
501
- "reward_std": 0.140884583350271,
502
- "rewards/cosine_scaled_reward": -0.4537286013364792,
503
- "rewards/format_reward": 0.0,
504
- "step": 38
505
- },
506
- {
507
- "clip_ratio": 0.0,
508
- "completion_length": 1024.0,
509
- "epoch": 0.022285714285714287,
510
- "kl": 7.608532905578613e-05,
511
- "learning_rate": 9.140576474687263e-06,
512
- "loss": 2.9802322387695312e-06,
513
- "reward": -0.9341968446969986,
514
- "reward_std": 0.08859914634376764,
515
- "rewards/cosine_scaled_reward": -0.4670984223484993,
516
- "rewards/format_reward": 0.0,
517
- "step": 39
518
- },
519
- {
520
- "clip_ratio": 0.0,
521
- "completion_length": 1024.0,
522
- "epoch": 0.022857142857142857,
523
- "kl": 8.574128150939941e-05,
524
- "learning_rate": 9.046048391230248e-06,
525
- "loss": 3.2633543014526367e-06,
526
- "reward": -0.9477669596672058,
527
- "reward_std": 0.03630512161180377,
528
- "rewards/cosine_scaled_reward": -0.4738834798336029,
529
- "rewards/format_reward": 0.0,
530
- "step": 40
531
- },
532
- {
533
- "clip_ratio": 0.0,
534
- "completion_length": 1024.0,
535
- "epoch": 0.023428571428571427,
536
- "kl": 9.319186210632324e-05,
537
- "learning_rate": 8.947199994035402e-06,
538
- "loss": 3.5390257835388184e-06,
539
- "reward": -0.9622567743062973,
540
- "reward_std": 0.03198406333103776,
541
- "rewards/cosine_scaled_reward": -0.48112839460372925,
542
- "rewards/format_reward": 0.0,
543
- "step": 41
544
- },
545
- {
546
- "clip_ratio": 0.0,
547
- "completion_length": 1024.0,
548
- "epoch": 0.024,
549
- "kl": 0.00011175870895385742,
550
- "learning_rate": 8.844151714648274e-06,
551
- "loss": 4.059635102748871e-06,
552
- "reward": -0.9137657284736633,
553
- "reward_std": 0.09395218873396516,
554
- "rewards/cosine_scaled_reward": -0.45688286423683167,
555
- "rewards/format_reward": 0.0,
556
- "step": 42
557
- },
558
- {
559
- "clip_ratio": 0.0,
560
- "completion_length": 1024.0,
561
- "epoch": 0.02457142857142857,
562
- "kl": 9.465217590332031e-05,
563
- "learning_rate": 8.737029101523931e-06,
564
- "loss": 3.972090780735016e-06,
565
- "reward": -0.8712927997112274,
566
- "reward_std": 0.17926698923110962,
567
- "rewards/cosine_scaled_reward": -0.4356464073061943,
568
- "rewards/format_reward": 0.0,
569
- "step": 43
570
- },
571
- {
572
- "clip_ratio": 0.0,
573
- "completion_length": 1024.0,
574
- "epoch": 0.025142857142857144,
575
- "kl": 0.00018846988677978516,
576
- "learning_rate": 8.625962667065488e-06,
577
- "loss": 7.338821887969971e-06,
578
- "reward": -0.8325228244066238,
579
- "reward_std": 0.22310001868754625,
580
- "rewards/cosine_scaled_reward": -0.4162614122033119,
581
- "rewards/format_reward": 0.0,
582
- "step": 44
583
- },
584
- {
585
- "clip_ratio": 0.0,
586
- "completion_length": 1024.0,
587
- "epoch": 0.025714285714285714,
588
- "kl": 0.00025010108947753906,
589
- "learning_rate": 8.511087728614863e-06,
590
- "loss": 9.98377799987793e-06,
591
- "reward": -0.8755671381950378,
592
- "reward_std": 0.1843216335400939,
593
- "rewards/cosine_scaled_reward": -0.4377835765480995,
594
- "rewards/format_reward": 0.0,
595
- "step": 45
596
- },
597
- {
598
- "clip_ratio": 0.0,
599
- "completion_length": 1024.0,
600
- "epoch": 0.026285714285714287,
601
- "kl": 0.00020682811737060547,
602
- "learning_rate": 8.392544243589428e-06,
603
- "loss": 7.063150405883789e-06,
604
- "reward": -0.9294558465480804,
605
- "reward_std": 0.08181617665104568,
606
- "rewards/cosine_scaled_reward": -0.4647279307246208,
607
- "rewards/format_reward": 0.0,
608
- "step": 46
609
- },
610
- {
611
- "clip_ratio": 0.0,
612
- "completion_length": 1024.0,
613
- "epoch": 0.026857142857142857,
614
- "kl": 0.00023487210273742676,
615
- "learning_rate": 8.270476638965463e-06,
616
- "loss": 9.715557098388672e-06,
617
- "reward": -0.9637707620859146,
618
- "reward_std": 0.04111184738576412,
619
- "rewards/cosine_scaled_reward": -0.4818853959441185,
620
- "rewards/format_reward": 0.0,
621
- "step": 47
622
- },
623
- {
624
- "clip_ratio": 0.0,
625
- "completion_length": 1024.0,
626
- "epoch": 0.027428571428571427,
627
- "kl": 0.00025856494903564453,
628
- "learning_rate": 8.14503363531613e-06,
629
- "loss": 1.0699033737182617e-05,
630
- "reward": -0.9429489076137543,
631
- "reward_std": 0.05583410756662488,
632
- "rewards/cosine_scaled_reward": -0.47147445380687714,
633
- "rewards/format_reward": 0.0,
634
- "step": 48
635
- },
636
- {
637
- "clip_ratio": 0.0,
638
- "completion_length": 1024.0,
639
- "epoch": 0.028,
640
- "kl": 0.00023242831230163574,
641
- "learning_rate": 8.016368065618361e-06,
642
- "loss": 9.488314390182495e-06,
643
- "reward": -0.9678349643945694,
644
- "reward_std": 0.031033652368932962,
645
- "rewards/cosine_scaled_reward": -0.4839174821972847,
646
- "rewards/format_reward": 0.0,
647
- "step": 49
648
- },
649
- {
650
- "clip_ratio": 0.0,
651
- "completion_length": 1024.0,
652
- "epoch": 0.02857142857142857,
653
- "kl": 0.0002980530261993408,
654
- "learning_rate": 7.884636689049423e-06,
655
- "loss": 1.1995434761047363e-05,
656
- "reward": -0.92691770195961,
657
- "reward_std": 0.06057529430836439,
658
- "rewards/cosine_scaled_reward": -0.4634588584303856,
659
- "rewards/format_reward": 0.0,
660
- "step": 50
661
- },
662
- {
663
- "clip_ratio": 0.0,
664
- "completion_length": 1024.0,
665
- "epoch": 0.029142857142857144,
666
- "kl": 0.0002821683883666992,
667
- "learning_rate": 7.75e-06,
668
- "loss": 1.093745231628418e-05,
669
- "reward": -0.9709955900907516,
670
- "reward_std": 0.023253681138157845,
671
- "rewards/cosine_scaled_reward": -0.48549777269363403,
672
- "rewards/format_reward": 0.0,
673
- "step": 51
674
- },
675
- {
676
- "clip_ratio": 0.0,
677
- "completion_length": 1024.0,
678
- "epoch": 0.029714285714285714,
679
- "kl": 0.0003021657466888428,
680
- "learning_rate": 7.612622032536508e-06,
681
- "loss": 1.2755393981933594e-05,
682
- "reward": -0.932096004486084,
683
- "reward_std": 0.0814181575551629,
684
- "rewards/cosine_scaled_reward": -0.4660480171442032,
685
- "rewards/format_reward": 0.0,
686
- "step": 52
687
- },
688
- {
689
- "clip_ratio": 0.0,
690
- "completion_length": 1024.0,
691
- "epoch": 0.030285714285714287,
692
- "kl": 0.0003120899200439453,
693
- "learning_rate": 7.472670160550849e-06,
694
- "loss": 1.1809170246124268e-05,
695
- "reward": -0.8598470240831375,
696
- "reward_std": 0.23317514825612307,
697
- "rewards/cosine_scaled_reward": -0.42992351949214935,
698
- "rewards/format_reward": 0.0,
699
- "step": 53
700
- },
701
- {
702
- "clip_ratio": 0.0,
703
- "completion_length": 1024.0,
704
- "epoch": 0.030857142857142857,
705
- "kl": 0.00031960010528564453,
706
- "learning_rate": 7.330314893841102e-06,
707
- "loss": 1.2435019016265869e-05,
708
- "reward": -0.8917691111564636,
709
- "reward_std": 0.16836319211870432,
710
- "rewards/cosine_scaled_reward": -0.4458845555782318,
711
- "rewards/format_reward": 0.0,
712
- "step": 54
713
- },
714
- {
715
- "clip_ratio": 0.0,
716
- "completion_length": 1024.0,
717
- "epoch": 0.03142857142857143,
718
- "kl": 0.0003180503845214844,
719
- "learning_rate": 7.185729670371605e-06,
720
- "loss": 1.2367963790893555e-05,
721
- "reward": -0.9393985569477081,
722
- "reward_std": 0.09125666646286845,
723
- "rewards/cosine_scaled_reward": -0.46969927847385406,
724
- "rewards/format_reward": 0.0,
725
- "step": 55
726
- },
727
- {
728
- "clip_ratio": 0.0,
729
- "completion_length": 1024.0,
730
- "epoch": 0.032,
731
- "kl": 0.00032332539558410645,
732
- "learning_rate": 7.0390906449655104e-06,
733
- "loss": 1.3027340173721313e-05,
734
- "reward": -0.9556453377008438,
735
- "reward_std": 0.03563447529450059,
736
- "rewards/cosine_scaled_reward": -0.4778226688504219,
737
- "rewards/format_reward": 0.0,
738
- "step": 56
739
- },
740
- {
741
- "clip_ratio": 0.0,
742
- "completion_length": 1024.0,
743
- "epoch": 0.03257142857142857,
744
- "kl": 0.00029200315475463867,
745
- "learning_rate": 6.890576474687264e-06,
746
- "loss": 1.173466444015503e-05,
747
- "reward": -0.9533008933067322,
748
- "reward_std": 0.047348865773528814,
749
- "rewards/cosine_scaled_reward": -0.4766504615545273,
750
- "rewards/format_reward": 0.0,
751
- "step": 57
752
- },
753
- {
754
- "clip_ratio": 0.0,
755
- "completion_length": 1024.0,
756
- "epoch": 0.03314285714285714,
757
- "kl": 0.00028020143508911133,
758
- "learning_rate": 6.740368101176496e-06,
759
- "loss": 1.0944902896881104e-05,
760
- "reward": -0.8606822788715363,
761
- "reward_std": 0.19956759549677372,
762
- "rewards/cosine_scaled_reward": -0.4303411394357681,
763
- "rewards/format_reward": 0.0,
764
- "step": 58
765
- },
766
- {
767
- "clip_ratio": 0.0,
768
- "completion_length": 1024.0,
769
- "epoch": 0.03371428571428572,
770
- "kl": 0.00034242868423461914,
771
- "learning_rate": 6.588648530198505e-06,
772
- "loss": 1.4327466487884521e-05,
773
- "reward": -0.9435078203678131,
774
- "reward_std": 0.028082083677873015,
775
- "rewards/cosine_scaled_reward": -0.47175391018390656,
776
- "rewards/format_reward": 0.0,
777
- "step": 59
778
- },
779
- {
780
- "clip_ratio": 0.0,
781
- "completion_length": 1024.0,
782
- "epoch": 0.03428571428571429,
783
- "kl": 0.0004596710205078125,
784
- "learning_rate": 6.4356026086799176e-06,
785
- "loss": 1.862645149230957e-05,
786
- "reward": -0.8699476420879364,
787
- "reward_std": 0.14623272977769375,
788
- "rewards/cosine_scaled_reward": -0.4349738284945488,
789
- "rewards/format_reward": 0.0,
790
- "step": 60
791
- },
792
- {
793
- "clip_ratio": 0.0,
794
- "completion_length": 1024.0,
795
- "epoch": 0.03485714285714286,
796
- "kl": 0.00041598081588745117,
797
- "learning_rate": 6.281416799501188e-06,
798
- "loss": 1.6495585441589355e-05,
799
- "reward": -0.8786782920360565,
800
- "reward_std": 0.14873503288254142,
801
- "rewards/cosine_scaled_reward": -0.43933914601802826,
802
- "rewards/format_reward": 0.0,
803
- "step": 61
804
- },
805
- {
806
- "clip_ratio": 0.0,
807
- "completion_length": 1024.0,
808
- "epoch": 0.03542857142857143,
809
- "kl": 0.0003428459167480469,
810
- "learning_rate": 6.126278954320295e-06,
811
- "loss": 1.4573335647583008e-05,
812
- "reward": -0.9531020373106003,
813
- "reward_std": 0.04140742728486657,
814
- "rewards/cosine_scaled_reward": -0.47655102610588074,
815
- "rewards/format_reward": 0.0,
816
- "step": 62
817
- },
818
- {
819
- "clip_ratio": 0.0,
820
- "completion_length": 1024.0,
821
- "epoch": 0.036,
822
- "kl": 0.00034630298614501953,
823
- "learning_rate": 5.970378084704441e-06,
824
- "loss": 1.3574957847595215e-05,
825
- "reward": -0.9625549018383026,
826
- "reward_std": 0.0323515310883522,
827
- "rewards/cosine_scaled_reward": -0.4812774509191513,
828
- "rewards/format_reward": 0.0,
829
- "step": 63
830
- },
831
- {
832
- "clip_ratio": 0.0,
833
- "completion_length": 1024.0,
834
- "epoch": 0.036571428571428574,
835
- "kl": 0.000436633825302124,
836
- "learning_rate": 5.813904131848565e-06,
837
- "loss": 1.7490237951278687e-05,
838
- "reward": -0.9638274163007736,
839
- "reward_std": 0.03202465921640396,
840
- "rewards/cosine_scaled_reward": -0.4819137006998062,
841
- "rewards/format_reward": 0.0,
842
- "step": 64
843
- },
844
- {
845
- "clip_ratio": 0.0,
846
- "completion_length": 1024.0,
847
- "epoch": 0.037142857142857144,
848
- "kl": 0.00037598609924316406,
849
- "learning_rate": 5.657047735161256e-06,
850
- "loss": 1.4990568161010742e-05,
851
- "reward": -0.8853072375059128,
852
- "reward_std": 0.11870932951569557,
853
- "rewards/cosine_scaled_reward": -0.4426536113023758,
854
- "rewards/format_reward": 0.0,
855
- "step": 65
856
- },
857
- {
858
- "clip_ratio": 0.0,
859
- "completion_length": 1024.0,
860
- "epoch": 0.037714285714285714,
861
- "kl": 0.000420987606048584,
862
- "learning_rate": 5.500000000000001e-06,
863
- "loss": 1.6763806343078613e-05,
864
- "reward": -0.9631430506706238,
865
- "reward_std": 0.027665999252349138,
866
- "rewards/cosine_scaled_reward": -0.4815715327858925,
867
- "rewards/format_reward": 0.0,
868
- "step": 66
869
- },
870
- {
871
- "clip_ratio": 0.0,
872
- "completion_length": 1024.0,
873
- "epoch": 0.038285714285714284,
874
- "kl": 0.0005689859390258789,
875
- "learning_rate": 5.342952264838748e-06,
876
- "loss": 2.3305416107177734e-05,
877
- "reward": -0.9261669814586639,
878
- "reward_std": 0.09819867718033493,
879
- "rewards/cosine_scaled_reward": -0.46308349817991257,
880
- "rewards/format_reward": 0.0,
881
- "step": 67
882
- },
883
- {
884
- "clip_ratio": 0.0,
885
- "completion_length": 1024.0,
886
- "epoch": 0.038857142857142854,
887
- "kl": 0.0006139874458312988,
888
- "learning_rate": 5.186095868151436e-06,
889
- "loss": 2.4430453777313232e-05,
890
- "reward": -0.9322563856840134,
891
- "reward_std": 0.06839151354506612,
892
- "rewards/cosine_scaled_reward": -0.4661281928420067,
893
- "rewards/format_reward": 0.0,
894
- "step": 68
895
- },
896
- {
897
- "clip_ratio": 0.0,
898
- "completion_length": 1024.0,
899
- "epoch": 0.03942857142857143,
900
- "kl": 0.0004551410675048828,
901
- "learning_rate": 5.02962191529556e-06,
902
- "loss": 1.753866672515869e-05,
903
- "reward": -0.947336733341217,
904
- "reward_std": 0.06602911371737719,
905
- "rewards/cosine_scaled_reward": -0.4736683666706085,
906
- "rewards/format_reward": 0.0,
907
- "step": 69
908
- },
909
- {
910
- "clip_ratio": 0.0,
911
- "completion_length": 1024.0,
912
- "epoch": 0.04,
913
- "kl": 0.0004698038101196289,
914
- "learning_rate": 4.873721045679707e-06,
915
- "loss": 1.9043684005737305e-05,
916
- "reward": -0.881416529417038,
917
- "reward_std": 0.15695088542997837,
918
- "rewards/cosine_scaled_reward": -0.440708264708519,
919
- "rewards/format_reward": 0.0,
920
- "step": 70
921
- },
922
- {
923
- "clip_ratio": 0.0,
924
- "completion_length": 1024.0,
925
- "epoch": 0.04057142857142857,
926
- "kl": 0.0005980134010314941,
927
- "learning_rate": 4.718583200498814e-06,
928
- "loss": 2.4437904357910156e-05,
929
- "reward": -0.9552861750125885,
930
- "reward_std": 0.04716450162231922,
931
- "rewards/cosine_scaled_reward": -0.47764309495687485,
932
- "rewards/format_reward": 0.0,
933
- "step": 71
934
- },
935
- {
936
- "clip_ratio": 0.0,
937
- "completion_length": 1024.0,
938
- "epoch": 0.04114285714285714,
939
- "kl": 0.00044411420822143555,
940
- "learning_rate": 4.564397391320085e-06,
941
- "loss": 1.781061291694641e-05,
942
- "reward": -0.9175944626331329,
943
- "reward_std": 0.07855925057083368,
944
- "rewards/cosine_scaled_reward": -0.45879723131656647,
945
- "rewards/format_reward": 0.0,
946
- "step": 72
947
- },
948
- {
949
- "clip_ratio": 0.0,
950
- "completion_length": 1024.0,
951
- "epoch": 0.04171428571428572,
952
- "kl": 0.0005142688751220703,
953
- "learning_rate": 4.4113514698014955e-06,
954
- "loss": 2.104230225086212e-05,
955
- "reward": -0.9591091871261597,
956
- "reward_std": 0.03032594360411167,
957
- "rewards/cosine_scaled_reward": -0.47955460846424103,
958
- "rewards/format_reward": 0.0,
959
- "step": 73
960
- },
961
- {
962
- "clip_ratio": 0.0,
963
- "completion_length": 1024.0,
964
- "epoch": 0.04228571428571429,
965
- "kl": 0.0004570484161376953,
966
- "learning_rate": 4.259631898823504e-06,
967
- "loss": 1.8224120140075684e-05,
968
- "reward": -0.9418470114469528,
969
- "reward_std": 0.09771440364420414,
970
- "rewards/cosine_scaled_reward": -0.470923513174057,
971
- "rewards/format_reward": 0.0,
972
- "step": 74
973
- },
974
- {
975
- "clip_ratio": 0.0,
976
- "completion_length": 1024.0,
977
- "epoch": 0.04285714285714286,
978
- "kl": 0.00042492151260375977,
979
- "learning_rate": 4.109423525312738e-06,
980
- "loss": 1.697242259979248e-05,
981
- "reward": -0.9031769037246704,
982
- "reward_std": 0.11431426065973938,
983
- "rewards/cosine_scaled_reward": -0.4515884444117546,
984
- "rewards/format_reward": 0.0,
985
- "step": 75
986
- },
987
- {
988
- "clip_ratio": 0.0,
989
- "completion_length": 1024.0,
990
- "epoch": 0.04342857142857143,
991
- "kl": 0.0003921389579772949,
992
- "learning_rate": 3.960909355034491e-06,
993
- "loss": 1.5683472156524658e-05,
994
- "reward": -0.8710711896419525,
995
- "reward_std": 0.16496195830404758,
996
- "rewards/cosine_scaled_reward": -0.43553559482097626,
997
- "rewards/format_reward": 0.0,
998
- "step": 76
999
- },
1000
- {
1001
- "clip_ratio": 0.0,
1002
- "completion_length": 1024.0,
1003
- "epoch": 0.044,
1004
- "kl": 0.0005483031272888184,
1005
- "learning_rate": 3.8142703296283954e-06,
1006
- "loss": 2.2258609533309937e-05,
1007
- "reward": -0.8363375961780548,
1008
- "reward_std": 0.23763815127313137,
1009
- "rewards/cosine_scaled_reward": -0.418168805539608,
1010
- "rewards/format_reward": 0.0,
1011
- "step": 77
1012
- },
1013
- {
1014
- "clip_ratio": 0.0,
1015
- "completion_length": 1024.0,
1016
- "epoch": 0.044571428571428574,
1017
- "kl": 0.0005096793174743652,
1018
- "learning_rate": 3.6696851061589e-06,
1019
- "loss": 2.06679105758667e-05,
1020
- "reward": -0.912532389163971,
1021
- "reward_std": 0.1374040930531919,
1022
- "rewards/cosine_scaled_reward": -0.45626621693372726,
1023
- "rewards/format_reward": 0.0,
1024
- "step": 78
1025
- },
1026
- {
1027
- "clip_ratio": 0.0,
1028
- "completion_length": 1024.0,
1029
- "epoch": 0.045142857142857144,
1030
- "kl": 0.0007692575454711914,
1031
- "learning_rate": 3.527329839449152e-06,
1032
- "loss": 3.053247928619385e-05,
1033
- "reward": -0.9567261040210724,
1034
- "reward_std": 0.03342599933966994,
1035
- "rewards/cosine_scaled_reward": -0.478363037109375,
1036
- "rewards/format_reward": 0.0,
1037
- "step": 79
1038
- },
1039
- {
1040
- "clip_ratio": 0.0,
1041
- "completion_length": 1024.0,
1042
- "epoch": 0.045714285714285714,
1043
- "kl": 0.0008780956268310547,
1044
- "learning_rate": 3.3873779674634932e-06,
1045
- "loss": 3.489106893539429e-05,
1046
- "reward": -0.8690381050109863,
1047
- "reward_std": 0.166263896971941,
1048
- "rewards/cosine_scaled_reward": -0.43451904505491257,
1049
- "rewards/format_reward": 0.0,
1050
- "step": 80
1051
- },
1052
- {
1053
- "clip_ratio": 0.0,
1054
- "completion_length": 1024.0,
1055
- "epoch": 0.046285714285714284,
1056
- "kl": 0.000619053840637207,
1057
- "learning_rate": 3.2500000000000015e-06,
1058
- "loss": 2.2970139980316162e-05,
1059
- "reward": -0.9703131467103958,
1060
- "reward_std": 0.018532322952523828,
1061
- "rewards/cosine_scaled_reward": -0.4851565733551979,
1062
- "rewards/format_reward": 0.0,
1063
- "step": 81
1064
- },
1065
- {
1066
- "clip_ratio": 0.0,
1067
- "completion_length": 1024.0,
1068
- "epoch": 0.046857142857142854,
1069
- "kl": 0.0005697011947631836,
1070
- "learning_rate": 3.115363310950579e-06,
1071
- "loss": 2.2605061531066895e-05,
1072
- "reward": -0.8376921266317368,
1073
- "reward_std": 0.21680933889001608,
1074
- "rewards/cosine_scaled_reward": -0.4188460633158684,
1075
- "rewards/format_reward": 0.0,
1076
- "step": 82
1077
- },
1078
- {
1079
- "clip_ratio": 0.0,
1080
- "completion_length": 1024.0,
1081
- "epoch": 0.04742857142857143,
1082
- "kl": 0.0006507635116577148,
1083
- "learning_rate": 2.98363193438164e-06,
1084
- "loss": 2.580881118774414e-05,
1085
- "reward": -0.9194770008325577,
1086
- "reward_std": 0.07644858444109559,
1087
- "rewards/cosine_scaled_reward": -0.45973849296569824,
1088
- "rewards/format_reward": 0.0,
1089
- "step": 83
1090
- },
1091
- {
1092
- "clip_ratio": 0.0,
1093
- "completion_length": 1024.0,
1094
- "epoch": 0.048,
1095
- "kl": 0.0005425214767456055,
1096
- "learning_rate": 2.854966364683872e-06,
1097
- "loss": 2.232193946838379e-05,
1098
- "reward": -0.921579897403717,
1099
- "reward_std": 0.07768060895614326,
1100
- "rewards/cosine_scaled_reward": -0.4607899561524391,
1101
- "rewards/format_reward": 0.0,
1102
- "step": 84
1103
- },
1104
- {
1105
- "clip_ratio": 0.0,
1106
- "completion_length": 1024.0,
1107
- "epoch": 0.04857142857142857,
1108
- "kl": 0.0005598664283752441,
1109
- "learning_rate": 2.7295233610345384e-06,
1110
- "loss": 2.230703830718994e-05,
1111
- "reward": -0.8130292594432831,
1112
- "reward_std": 0.21775285061448812,
1113
- "rewards/cosine_scaled_reward": -0.40651462972164154,
1114
- "rewards/format_reward": 0.0,
1115
- "step": 85
1116
- },
1117
- {
1118
- "clip_ratio": 0.0,
1119
- "completion_length": 1024.0,
1120
- "epoch": 0.04914285714285714,
1121
- "kl": 0.0005010366439819336,
1122
- "learning_rate": 2.607455756410573e-06,
1123
- "loss": 2.0213425159454346e-05,
1124
- "reward": -0.911712571978569,
1125
- "reward_std": 0.09181239921599627,
1126
- "rewards/cosine_scaled_reward": -0.4558562785387039,
1127
- "rewards/format_reward": 0.0,
1128
- "step": 86
1129
- },
1130
- {
1131
- "clip_ratio": 0.0,
1132
- "completion_length": 1024.0,
1133
- "epoch": 0.04971428571428571,
1134
- "kl": 0.0005140304565429688,
1135
- "learning_rate": 2.4889122713851397e-06,
1136
- "loss": 2.09808349609375e-05,
1137
- "reward": -0.9401500076055527,
1138
- "reward_std": 0.04285714589059353,
1139
- "rewards/cosine_scaled_reward": -0.47007501125335693,
1140
- "rewards/format_reward": 0.0,
1141
- "step": 87
1142
- },
1143
- {
1144
- "clip_ratio": 0.0,
1145
- "completion_length": 1024.0,
1146
- "epoch": 0.05028571428571429,
1147
- "kl": 0.0005880594253540039,
1148
- "learning_rate": 2.374037332934512e-06,
1149
- "loss": 2.3573637008666992e-05,
1150
- "reward": -0.8337626904249191,
1151
- "reward_std": 0.24908871483057737,
1152
- "rewards/cosine_scaled_reward": -0.41688134521245956,
1153
- "rewards/format_reward": 0.0,
1154
- "step": 88
1155
- },
1156
- {
1157
- "clip_ratio": 0.0,
1158
- "completion_length": 1024.0,
1159
- "epoch": 0.05085714285714286,
1160
- "kl": 0.0005828142166137695,
1161
- "learning_rate": 2.262970898476071e-06,
1162
- "loss": 2.3312866687774658e-05,
1163
- "reward": -0.9419961720705032,
1164
- "reward_std": 0.06526243314146996,
1165
- "rewards/cosine_scaled_reward": -0.4709980934858322,
1166
- "rewards/format_reward": 0.0,
1167
- "step": 89
1168
- },
1169
- {
1170
- "clip_ratio": 0.0,
1171
- "completion_length": 1024.0,
1172
- "epoch": 0.05142857142857143,
1173
- "kl": 0.0005592107772827148,
1174
- "learning_rate": 2.1558482853517257e-06,
1175
- "loss": 2.16066837310791e-05,
1176
- "reward": -0.9420932680368423,
1177
- "reward_std": 0.05442978721112013,
1178
- "rewards/cosine_scaled_reward": -0.47104664146900177,
1179
- "rewards/format_reward": 0.0,
1180
- "step": 90
1181
- },
1182
- {
1183
- "clip_ratio": 0.0,
1184
- "completion_length": 1024.0,
1185
- "epoch": 0.052,
1186
- "kl": 0.0005729198455810547,
1187
- "learning_rate": 2.0528000059646e-06,
1188
- "loss": 2.3186206817626953e-05,
1189
- "reward": -0.8448977470397949,
1190
- "reward_std": 0.17764843348413706,
1191
- "rewards/cosine_scaled_reward": -0.42244888097047806,
1192
- "rewards/format_reward": 0.0,
1193
- "step": 91
1194
- },
1195
- {
1196
- "clip_ratio": 0.0,
1197
- "completion_length": 1024.0,
1198
- "epoch": 0.052571428571428575,
1199
- "kl": 0.0006086826324462891,
1200
- "learning_rate": 1.953951608769752e-06,
1201
- "loss": 2.4143606424331665e-05,
1202
- "reward": -0.8687557131052017,
1203
- "reward_std": 0.13139949878677726,
1204
- "rewards/cosine_scaled_reward": -0.43437784910202026,
1205
- "rewards/format_reward": 0.0,
1206
- "step": 92
1207
- },
1208
- {
1209
- "clip_ratio": 0.0,
1210
- "completion_length": 1024.0,
1211
- "epoch": 0.053142857142857144,
1212
- "kl": 0.0004508495330810547,
1213
- "learning_rate": 1.8594235253127373e-06,
1214
- "loss": 1.7457641661167145e-05,
1215
- "reward": -0.9452957063913345,
1216
- "reward_std": 0.044932478107512,
1217
- "rewards/cosine_scaled_reward": -0.4726478382945061,
1218
- "rewards/format_reward": 0.0,
1219
- "step": 93
1220
- },
1221
- {
1222
- "clip_ratio": 0.0,
1223
- "completion_length": 1024.0,
1224
- "epoch": 0.053714285714285714,
1225
- "kl": 0.00058746337890625,
1226
- "learning_rate": 1.769330923502313e-06,
1227
- "loss": 2.3424625396728516e-05,
1228
- "reward": -0.9732565432786942,
1229
- "reward_std": 0.023667596746236086,
1230
- "rewards/cosine_scaled_reward": -0.48662828654050827,
1231
- "rewards/format_reward": 0.0,
1232
- "step": 94
1233
- },
1234
- {
1235
- "clip_ratio": 0.0,
1236
- "completion_length": 1024.0,
1237
- "epoch": 0.054285714285714284,
1238
- "kl": 0.0006615519523620605,
1239
- "learning_rate": 1.6837835672960834e-06,
1240
- "loss": 2.664327621459961e-05,
1241
- "reward": -0.8294684588909149,
1242
- "reward_std": 0.21260959655046463,
1243
- "rewards/cosine_scaled_reward": -0.41473422944545746,
1244
- "rewards/format_reward": 0.0,
1245
- "step": 95
1246
- },
1247
- {
1248
- "clip_ratio": 0.0,
1249
- "completion_length": 1024.0,
1250
- "epoch": 0.054857142857142854,
1251
- "kl": 0.0005936622619628906,
1252
- "learning_rate": 1.602885682970026e-06,
1253
- "loss": 2.310052514076233e-05,
1254
- "reward": -0.8817574381828308,
1255
- "reward_std": 0.14791762363165617,
1256
- "rewards/cosine_scaled_reward": -0.4408787190914154,
1257
- "rewards/format_reward": 0.0,
1258
- "step": 96
1259
- },
1260
- {
1261
- "clip_ratio": 0.0,
1262
- "completion_length": 1024.0,
1263
- "epoch": 0.05542857142857143,
1264
- "kl": 0.0004170536994934082,
1265
- "learning_rate": 1.526735832134829e-06,
1266
- "loss": 1.5437602996826172e-05,
1267
- "reward": -0.9217518121004105,
1268
- "reward_std": 0.05548063712194562,
1269
- "rewards/cosine_scaled_reward": -0.46087589859962463,
1270
- "rewards/format_reward": 0.0,
1271
- "step": 97
1272
- },
1273
- {
1274
- "clip_ratio": 0.0,
1275
- "completion_length": 1024.0,
1276
- "epoch": 0.056,
1277
- "kl": 0.0005135536193847656,
1278
- "learning_rate": 1.4554267916537495e-06,
1279
- "loss": 2.017989754676819e-05,
1280
- "reward": -0.9414444863796234,
1281
- "reward_std": 0.057337059173732996,
1282
- "rewards/cosine_scaled_reward": -0.4707222431898117,
1283
- "rewards/format_reward": 0.0,
1284
- "step": 98
1285
- },
1286
- {
1287
- "clip_ratio": 0.0,
1288
- "completion_length": 1024.0,
1289
- "epoch": 0.05657142857142857,
1290
- "kl": 0.0004296302795410156,
1291
- "learning_rate": 1.389045440608296e-06,
1292
- "loss": 1.6748905181884766e-05,
1293
- "reward": -0.8684801608324051,
1294
- "reward_std": 0.18490357510745525,
1295
- "rewards/cosine_scaled_reward": -0.43424008786678314,
1296
- "rewards/format_reward": 0.0,
1297
- "step": 99
1298
- },
1299
- {
1300
- "clip_ratio": 0.0,
1301
- "completion_length": 1024.0,
1302
- "epoch": 0.05714285714285714,
1303
- "kl": 0.0005539655685424805,
1304
- "learning_rate": 1.3276726544494572e-06,
1305
- "loss": 2.2009015083312988e-05,
1306
- "reward": -0.8967786431312561,
1307
- "reward_std": 0.12791539868339896,
1308
- "rewards/cosine_scaled_reward": -0.44838932156562805,
1309
- "rewards/format_reward": 0.0,
1310
- "step": 100
1311
- },
1312
- {
1313
- "epoch": 0.05714285714285714,
1314
- "step": 100,
1315
  "total_flos": 0.0,
1316
- "train_loss": 0.0013520326372236013,
1317
- "train_runtime": 36067.9715,
1318
- "train_samples_per_second": 0.044,
1319
- "train_steps_per_second": 0.003
1320
  }
1321
  ],
1322
  "logging_steps": 1,
1323
- "max_steps": 100,
1324
  "num_input_tokens_seen": 0,
1325
  "num_train_epochs": 1,
1326
  "save_steps": 50,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.011428571428571429,
6
  "eval_steps": 500,
7
+ "global_step": 20,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "clip_ratio": 0.0,
14
+ "completion_length": 453.625,
15
  "epoch": 0.0005714285714285715,
16
  "kl": 0.0,
17
  "learning_rate": 0.0,
18
+ "loss": -1.2665987014770508e-07,
19
+ "reward": -0.7758755832910538,
20
+ "reward_std": 0.2861072635278106,
21
+ "rewards/cosine_scaled_reward": -0.3879377990961075,
22
  "rewards/format_reward": 0.0,
23
  "step": 1
24
  },
25
  {
26
  "clip_ratio": 0.0,
27
+ "completion_length": 512.0,
28
  "epoch": 0.001142857142857143,
29
  "kl": 0.0,
30
+ "learning_rate": 5e-07,
31
+ "loss": 2.2351741790771484e-08,
32
+ "reward": -0.6860499083995819,
33
+ "reward_std": 0.3919920399785042,
34
+ "rewards/cosine_scaled_reward": -0.34302495419979095,
35
  "rewards/format_reward": 0.0,
36
  "step": 2
37
  },
38
  {
39
  "clip_ratio": 0.0,
40
+ "completion_length": 494.9375,
41
  "epoch": 0.0017142857142857142,
42
+ "kl": 0.00760650634765625,
43
+ "learning_rate": 1e-06,
44
+ "loss": 0.04199279844760895,
45
+ "reward": -0.7872379571199417,
46
+ "reward_std": 0.28899660520255566,
47
+ "rewards/cosine_scaled_reward": -0.39361898601055145,
48
  "rewards/format_reward": 0.0,
49
  "step": 3
50
  },
51
  {
52
  "clip_ratio": 0.0,
53
+ "completion_length": 484.875,
54
  "epoch": 0.002285714285714286,
55
+ "kl": 0.00399017333984375,
56
+ "learning_rate": 9.931634888554935e-07,
57
+ "loss": 0.00015985965728759766,
58
+ "reward": -0.7827824652194977,
59
+ "reward_std": 0.30724803544580936,
60
+ "rewards/cosine_scaled_reward": -0.39139123260974884,
61
  "rewards/format_reward": 0.0,
62
  "step": 4
63
  },
64
  {
65
  "clip_ratio": 0.0,
66
+ "completion_length": 498.125,
67
  "epoch": 0.002857142857142857,
68
+ "kl": 0.010009765625,
69
+ "learning_rate": 9.728616793536587e-07,
70
+ "loss": 0.016923315823078156,
71
+ "reward": -0.6835546344518661,
72
+ "reward_std": 0.4394468888640404,
73
+ "rewards/cosine_scaled_reward": -0.3417773097753525,
74
  "rewards/format_reward": 0.0,
75
  "step": 5
76
  },
77
  {
78
  "clip_ratio": 0.0,
79
+ "completion_length": 512.0,
80
  "epoch": 0.0034285714285714284,
81
+ "kl": 0.0059185028076171875,
82
+ "learning_rate": 9.397114317029974e-07,
83
+ "loss": 0.00023673847317695618,
84
+ "reward": -0.8708823472261429,
85
+ "reward_std": 0.1456776731647551,
86
+ "rewards/cosine_scaled_reward": -0.43544115871191025,
87
  "rewards/format_reward": 0.0,
88
  "step": 6
89
  },
90
  {
91
  "clip_ratio": 0.0,
92
+ "completion_length": 512.0,
93
  "epoch": 0.004,
94
+ "kl": 0.001270294189453125,
95
+ "learning_rate": 8.9471999940354e-07,
96
+ "loss": 5.0827860832214355e-05,
97
+ "reward": -0.9382074922323227,
98
+ "reward_std": 0.08606540504842997,
99
+ "rewards/cosine_scaled_reward": -0.46910375356674194,
100
  "rewards/format_reward": 0.0,
101
  "step": 7
102
  },
103
  {
104
  "clip_ratio": 0.0,
105
+ "completion_length": 480.625,
106
  "epoch": 0.004571428571428572,
107
+ "kl": 0.0019054412841796875,
108
+ "learning_rate": 8.392544243589427e-07,
109
+ "loss": 7.636845111846924e-05,
110
+ "reward": -0.7906126528978348,
111
+ "reward_std": 0.18317685835063457,
112
+ "rewards/cosine_scaled_reward": -0.3953063264489174,
113
  "rewards/format_reward": 0.0,
114
  "step": 8
115
  },
116
  {
117
  "clip_ratio": 0.0,
118
+ "completion_length": 512.0,
119
  "epoch": 0.005142857142857143,
120
+ "kl": 0.0017995834350585938,
121
+ "learning_rate": 7.75e-07,
122
+ "loss": 7.21365213394165e-05,
123
+ "reward": -0.8990036994218826,
124
+ "reward_std": 0.1606585686095059,
125
+ "rewards/cosine_scaled_reward": -0.4495018497109413,
126
  "rewards/format_reward": 0.0,
127
  "step": 9
128
  },
129
  {
130
  "clip_ratio": 0.0,
131
+ "completion_length": 512.0,
132
  "epoch": 0.005714285714285714,
133
+ "kl": 0.0041675567626953125,
134
+ "learning_rate": 7.039090644965509e-07,
135
+ "loss": 0.00016715750098228455,
136
+ "reward": -0.860070988535881,
137
+ "reward_std": 0.17052607703953981,
138
+ "rewards/cosine_scaled_reward": -0.4300354793667793,
139
  "rewards/format_reward": 0.0,
140
  "step": 10
141
  },
142
  {
143
  "clip_ratio": 0.0,
144
+ "completion_length": 477.125,
145
  "epoch": 0.006285714285714286,
146
+ "kl": 0.003407001495361328,
147
+ "learning_rate": 6.281416799501187e-07,
148
+ "loss": 0.00013599544763565063,
149
+ "reward": -0.8231181800365448,
150
+ "reward_std": 0.1259385095909238,
151
+ "rewards/cosine_scaled_reward": -0.4115590825676918,
152
  "rewards/format_reward": 0.0,
153
  "step": 11
154
  },
155
  {
156
  "clip_ratio": 0.0,
157
+ "completion_length": 512.0,
158
  "epoch": 0.006857142857142857,
159
+ "kl": 0.0012102127075195312,
160
+ "learning_rate": 5.5e-07,
161
+ "loss": 5.13419508934021e-05,
162
+ "reward": -0.9447444081306458,
163
+ "reward_std": 0.08061030774842948,
164
+ "rewards/cosine_scaled_reward": -0.4723722040653229,
165
  "rewards/format_reward": 0.0,
166
  "step": 12
167
  },
168
  {
169
  "clip_ratio": 0.0,
170
+ "completion_length": 487.9375,
171
  "epoch": 0.0074285714285714285,
172
+ "kl": 0.00119781494140625,
173
+ "learning_rate": 4.7185832004988133e-07,
174
+ "loss": 0.07869705557823181,
175
+ "reward": -0.8934571295976639,
176
+ "reward_std": 0.15774485282599926,
177
+ "rewards/cosine_scaled_reward": -0.44672856479883194,
178
  "rewards/format_reward": 0.0,
179
  "step": 13
180
  },
181
  {
182
  "clip_ratio": 0.0,
183
+ "completion_length": 466.625,
184
  "epoch": 0.008,
185
+ "kl": 0.004004955291748047,
186
+ "learning_rate": 3.9609093550344907e-07,
187
+ "loss": 0.027497582137584686,
188
+ "reward": -0.7841525673866272,
189
+ "reward_std": 0.2220854666084051,
190
+ "rewards/cosine_scaled_reward": -0.3920762911438942,
191
  "rewards/format_reward": 0.0,
192
  "step": 14
193
  },
194
  {
195
  "clip_ratio": 0.0,
196
+ "completion_length": 497.5,
197
  "epoch": 0.008571428571428572,
198
+ "kl": 0.012208938598632812,
199
+ "learning_rate": 3.250000000000001e-07,
200
+ "loss": 0.0004888176918029785,
201
+ "reward": -0.7216114401817322,
202
+ "reward_std": 0.3453192347660661,
203
+ "rewards/cosine_scaled_reward": -0.3608057275414467,
204
  "rewards/format_reward": 0.0,
205
  "step": 15
206
  },
207
  {
208
  "clip_ratio": 0.0,
209
+ "completion_length": 499.0,
210
  "epoch": 0.009142857142857144,
211
+ "kl": 0.003875732421875,
212
+ "learning_rate": 2.6074557564105724e-07,
213
+ "loss": 0.00015526264905929565,
214
+ "reward": -0.8265384286642075,
215
+ "reward_std": 0.25778803089633584,
216
+ "rewards/cosine_scaled_reward": -0.4132692217826843,
217
  "rewards/format_reward": 0.0,
218
  "step": 16
219
  },
220
  {
221
  "clip_ratio": 0.0,
222
+ "completion_length": 501.5,
223
  "epoch": 0.009714285714285713,
224
+ "kl": 0.004252910614013672,
225
+ "learning_rate": 2.0528000059645995e-07,
226
+ "loss": 0.02472507953643799,
227
+ "reward": -0.8701069504022598,
228
+ "reward_std": 0.18937412789091468,
229
+ "rewards/cosine_scaled_reward": -0.4350534752011299,
230
  "rewards/format_reward": 0.0,
231
  "step": 17
232
  },
233
  {
234
  "clip_ratio": 0.0,
235
+ "completion_length": 506.625,
236
  "epoch": 0.010285714285714285,
237
+ "kl": 0.005939483642578125,
238
+ "learning_rate": 1.6028856829700258e-07,
239
+ "loss": 0.00023746490478515625,
240
+ "reward": -0.8200783580541611,
241
+ "reward_std": 0.26157089229673147,
242
+ "rewards/cosine_scaled_reward": -0.41003918647766113,
243
  "rewards/format_reward": 0.0,
244
  "step": 18
245
  },
246
  {
247
  "clip_ratio": 0.0,
248
+ "completion_length": 446.25,
249
  "epoch": 0.010857142857142857,
250
+ "kl": 0.002822399139404297,
251
+ "learning_rate": 1.2713832064634125e-07,
252
+ "loss": 0.016675502061843872,
253
+ "reward": -0.8233697563409805,
254
+ "reward_std": 0.20013628248125315,
255
+ "rewards/cosine_scaled_reward": -0.41168487817049026,
256
  "rewards/format_reward": 0.0,
257
  "step": 19
258
  },
259
  {
260
  "clip_ratio": 0.0,
261
+ "completion_length": 496.125,
262
  "epoch": 0.011428571428571429,
263
+ "kl": 0.00678253173828125,
264
+ "learning_rate": 1.068365111445064e-07,
265
+ "loss": 0.00027292221784591675,
266
+ "reward": -0.8044158518314362,
267
+ "reward_std": 0.2423506089253351,
268
+ "rewards/cosine_scaled_reward": -0.4022079259157181,
269
  "rewards/format_reward": 0.0,
270
  "step": 20
271
  },
272
  {
273
+ "epoch": 0.011428571428571429,
274
+ "step": 20,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
  "total_flos": 0.0,
276
+ "train_loss": 0.010430806130170823,
277
+ "train_runtime": 9458.0577,
278
+ "train_samples_per_second": 0.034,
279
+ "train_steps_per_second": 0.002
280
  }
281
  ],
282
  "logging_steps": 1,
283
+ "max_steps": 20,
284
  "num_input_tokens_seen": 0,
285
  "num_train_epochs": 1,
286
  "save_steps": 50,
training_metrics.txt CHANGED
@@ -1,6 +1,6 @@
1
- total_size_before (MB): 3424.75
2
- total_size_after (MB): 3407.14
3
- total_time (seconds): 36073.19
4
- ram_peak (MB): 5853.69
5
- ram_consump (MB): 3485.15
6
- disk_storage (MB): 333.13
 
1
+ total_size_before (MB): 1455.72
2
+ total_size_after (MB): 1445.40
3
+ total_time (seconds): 9466.33
4
+ ram_peak (MB): 3499.90
5
+ ram_consump (MB): 1492.71
6
+ disk_storage (MB): 616.56