| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 11.444444444444445, |
| "eval_steps": 500, |
| "global_step": 4200, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.136332651670075, |
| "grad_norm": 11.269988059997559, |
| "learning_rate": 0.00010652173913043477, |
| "loss": 39.8198193359375, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.27266530334015, |
| "grad_norm": 14.962249755859375, |
| "learning_rate": 0.0002152173913043478, |
| "loss": 26.48206298828125, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.40899795501022496, |
| "grad_norm": 16.134239196777344, |
| "learning_rate": 0.0003239130434782608, |
| "loss": 19.36090087890625, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5453306066803, |
| "grad_norm": 12.964629173278809, |
| "learning_rate": 0.00043260869565217385, |
| "loss": 14.5181689453125, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6816632583503749, |
| "grad_norm": 10.061964988708496, |
| "learning_rate": 0.0005413043478260869, |
| "loss": 12.766522216796876, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.8179959100204499, |
| "grad_norm": 6.430169105529785, |
| "learning_rate": 0.0005999713580566041, |
| "loss": 11.881512451171876, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.9543285616905249, |
| "grad_norm": 5.836061954498291, |
| "learning_rate": 0.0005997115106245061, |
| "loss": 11.075633544921875, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.0899795501022496, |
| "grad_norm": 4.700469017028809, |
| "learning_rate": 0.000599181221756225, |
| "loss": 10.015964965820313, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.2263122017723245, |
| "grad_norm": 4.282314777374268, |
| "learning_rate": 0.0005983809699521793, |
| "loss": 9.592711181640626, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.3626448534423994, |
| "grad_norm": 3.662461280822754, |
| "learning_rate": 0.0005973114773109183, |
| "loss": 9.211348266601563, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.4989775051124745, |
| "grad_norm": 3.783862590789795, |
| "learning_rate": 0.0005959737088775463, |
| "loss": 8.782565307617187, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.6353101567825494, |
| "grad_norm": 3.429069757461548, |
| "learning_rate": 0.0005943688717729229, |
| "loss": 8.386593627929688, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.7716428084526243, |
| "grad_norm": 2.529482841491699, |
| "learning_rate": 0.0005924984141044315, |
| "loss": 8.06916259765625, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.9079754601226995, |
| "grad_norm": 2.4761486053466797, |
| "learning_rate": 0.0005903640236592949, |
| "loss": 7.7736474609375, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.043626448534424, |
| "grad_norm": 2.3675243854522705, |
| "learning_rate": 0.0005879676263816192, |
| "loss": 7.4699859619140625, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.179959100204499, |
| "grad_norm": 2.305575132369995, |
| "learning_rate": 0.0005853113846345384, |
| "loss": 7.305302734375, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.316291751874574, |
| "grad_norm": 2.3954548835754395, |
| "learning_rate": 0.0005823976952490298, |
| "loss": 7.102890625, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.452624403544649, |
| "grad_norm": 2.2483468055725098, |
| "learning_rate": 0.0005792291873611596, |
| "loss": 6.9435498046875, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.588957055214724, |
| "grad_norm": 2.0154542922973633, |
| "learning_rate": 0.00057580872003971, |
| "loss": 6.761861572265625, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.7252897068847988, |
| "grad_norm": 2.155539035797119, |
| "learning_rate": 0.00057213937970633, |
| "loss": 6.58298583984375, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.861622358554874, |
| "grad_norm": 1.7670893669128418, |
| "learning_rate": 0.0005682244773505363, |
| "loss": 6.419741821289063, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.997955010224949, |
| "grad_norm": 2.3611533641815186, |
| "learning_rate": 0.0005640675455420765, |
| "loss": 6.288121948242187, |
| "step": 1100 |
| }, |
| { |
| "epoch": 3.1336059986366736, |
| "grad_norm": 1.8783221244812012, |
| "learning_rate": 0.0005596723352433551, |
| "loss": 6.090737915039062, |
| "step": 1150 |
| }, |
| { |
| "epoch": 3.2699386503067487, |
| "grad_norm": 2.2795541286468506, |
| "learning_rate": 0.0005550428124247912, |
| "loss": 5.979439086914063, |
| "step": 1200 |
| }, |
| { |
| "epoch": 3.4062713019768234, |
| "grad_norm": 1.3074142932891846, |
| "learning_rate": 0.0005501831544861696, |
| "loss": 5.897046508789063, |
| "step": 1250 |
| }, |
| { |
| "epoch": 3.5426039536468985, |
| "grad_norm": 1.259432077407837, |
| "learning_rate": 0.0005450977464872081, |
| "loss": 5.734913940429688, |
| "step": 1300 |
| }, |
| { |
| "epoch": 3.6789366053169736, |
| "grad_norm": 1.4511066675186157, |
| "learning_rate": 0.0005397911771907473, |
| "loss": 5.604786987304688, |
| "step": 1350 |
| }, |
| { |
| "epoch": 3.8152692569870483, |
| "grad_norm": 1.428958535194397, |
| "learning_rate": 0.0005342682349221297, |
| "loss": 5.445667114257812, |
| "step": 1400 |
| }, |
| { |
| "epoch": 3.9516019086571235, |
| "grad_norm": 1.2921603918075562, |
| "learning_rate": 0.000528533903248506, |
| "loss": 5.391282958984375, |
| "step": 1450 |
| }, |
| { |
| "epoch": 4.087252897068848, |
| "grad_norm": 1.3616167306900024, |
| "learning_rate": 0.0005225933564819676, |
| "loss": 5.183615112304688, |
| "step": 1500 |
| }, |
| { |
| "epoch": 4.223585548738923, |
| "grad_norm": 1.2340154647827148, |
| "learning_rate": 0.0005164519550105623, |
| "loss": 5.060681457519531, |
| "step": 1550 |
| }, |
| { |
| "epoch": 4.359918200408998, |
| "grad_norm": 1.5526384115219116, |
| "learning_rate": 0.0005101152404614052, |
| "loss": 4.902400817871094, |
| "step": 1600 |
| }, |
| { |
| "epoch": 4.496250852079073, |
| "grad_norm": 1.4958291053771973, |
| "learning_rate": 0.0005035889307002529, |
| "loss": 4.787099304199219, |
| "step": 1650 |
| }, |
| { |
| "epoch": 4.632583503749148, |
| "grad_norm": 1.4236118793487549, |
| "learning_rate": 0.0004968789146720478, |
| "loss": 4.660638427734375, |
| "step": 1700 |
| }, |
| { |
| "epoch": 4.768916155419223, |
| "grad_norm": 1.3592256307601929, |
| "learning_rate": 0.0004899912470870939, |
| "loss": 4.454691162109375, |
| "step": 1750 |
| }, |
| { |
| "epoch": 4.905248807089298, |
| "grad_norm": 1.502578616142273, |
| "learning_rate": 0.00048293214295765303, |
| "loss": 4.297479553222656, |
| "step": 1800 |
| }, |
| { |
| "epoch": 5.040899795501023, |
| "grad_norm": 1.7106261253356934, |
| "learning_rate": 0.0004757079719898968, |
| "loss": 4.13409423828125, |
| "step": 1850 |
| }, |
| { |
| "epoch": 5.1772324471710975, |
| "grad_norm": 1.2559808492660522, |
| "learning_rate": 0.00046832525283627114, |
| "loss": 3.96047607421875, |
| "step": 1900 |
| }, |
| { |
| "epoch": 5.313565098841172, |
| "grad_norm": 1.2694923877716064, |
| "learning_rate": 0.0004607906472134603, |
| "loss": 3.8196981811523436, |
| "step": 1950 |
| }, |
| { |
| "epoch": 5.449897750511248, |
| "grad_norm": 1.6992137432098389, |
| "learning_rate": 0.0004531109538912596, |
| "loss": 3.6628662109375, |
| "step": 2000 |
| }, |
| { |
| "epoch": 5.5862304021813225, |
| "grad_norm": 1.453190803527832, |
| "learning_rate": 0.00044529310255777855, |
| "loss": 3.52033935546875, |
| "step": 2050 |
| }, |
| { |
| "epoch": 5.722563053851397, |
| "grad_norm": 1.504873514175415, |
| "learning_rate": 0.0004373441475665124, |
| "loss": 3.3988775634765624, |
| "step": 2100 |
| }, |
| { |
| "epoch": 5.858895705521473, |
| "grad_norm": 1.4465556144714355, |
| "learning_rate": 0.00042927126157092204, |
| "loss": 3.2702841186523437, |
| "step": 2150 |
| }, |
| { |
| "epoch": 5.9952283571915475, |
| "grad_norm": 1.4014344215393066, |
| "learning_rate": 0.0004210817290522684, |
| "loss": 3.1291094970703126, |
| "step": 2200 |
| }, |
| { |
| "epoch": 6.130879345603272, |
| "grad_norm": 1.645821213722229, |
| "learning_rate": 0.00041278293974653904, |
| "loss": 2.936179504394531, |
| "step": 2250 |
| }, |
| { |
| "epoch": 6.267211997273347, |
| "grad_norm": 1.6427346467971802, |
| "learning_rate": 0.00040438238197640066, |
| "loss": 2.857735900878906, |
| "step": 2300 |
| }, |
| { |
| "epoch": 6.403544648943422, |
| "grad_norm": 1.66471529006958, |
| "learning_rate": 0.00039588763589419156, |
| "loss": 2.748570556640625, |
| "step": 2350 |
| }, |
| { |
| "epoch": 6.539877300613497, |
| "grad_norm": 1.5286723375320435, |
| "learning_rate": 0.0003873063666420535, |
| "loss": 2.6635064697265625, |
| "step": 2400 |
| }, |
| { |
| "epoch": 6.676209952283572, |
| "grad_norm": 1.5245234966278076, |
| "learning_rate": 0.00037864631743537395, |
| "loss": 2.556291046142578, |
| "step": 2450 |
| }, |
| { |
| "epoch": 6.812542603953647, |
| "grad_norm": 1.4000400304794312, |
| "learning_rate": 0.000369915302575779, |
| "loss": 2.4817964172363283, |
| "step": 2500 |
| }, |
| { |
| "epoch": 6.948875255623722, |
| "grad_norm": 1.3968268632888794, |
| "learning_rate": 0.00036112120039998323, |
| "loss": 2.362508087158203, |
| "step": 2550 |
| }, |
| { |
| "epoch": 7.084526244035446, |
| "grad_norm": 1.6692546606063843, |
| "learning_rate": 0.0003522719461708582, |
| "loss": 2.273824005126953, |
| "step": 2600 |
| }, |
| { |
| "epoch": 7.220858895705521, |
| "grad_norm": 1.4489309787750244, |
| "learning_rate": 0.00034337552491713324, |
| "loss": 2.1658897399902344, |
| "step": 2650 |
| }, |
| { |
| "epoch": 7.357191547375597, |
| "grad_norm": 1.5687353610992432, |
| "learning_rate": 0.00033443996422819145, |
| "loss": 2.108182220458984, |
| "step": 2700 |
| }, |
| { |
| "epoch": 7.493524199045671, |
| "grad_norm": 1.6350905895233154, |
| "learning_rate": 0.00032547332701046195, |
| "loss": 1.99987060546875, |
| "step": 2750 |
| }, |
| { |
| "epoch": 7.629856850715746, |
| "grad_norm": 1.5019129514694214, |
| "learning_rate": 0.0003164837042119428, |
| "loss": 1.9454510498046875, |
| "step": 2800 |
| }, |
| { |
| "epoch": 7.766189502385822, |
| "grad_norm": 1.4423465728759766, |
| "learning_rate": 0.00030747920752142186, |
| "loss": 1.9158531188964845, |
| "step": 2850 |
| }, |
| { |
| "epoch": 7.902522154055896, |
| "grad_norm": 1.5868362188339233, |
| "learning_rate": 0.0002984679620489827, |
| "loss": 1.8568917846679687, |
| "step": 2900 |
| }, |
| { |
| "epoch": 8.03817314246762, |
| "grad_norm": 1.7355551719665527, |
| "learning_rate": 0.0002894580989943989, |
| "loss": 1.7664053344726562, |
| "step": 2950 |
| }, |
| { |
| "epoch": 8.174505794137696, |
| "grad_norm": 1.4344327449798584, |
| "learning_rate": 0.0002804577483100344, |
| "loss": 1.6748054504394532, |
| "step": 3000 |
| }, |
| { |
| "epoch": 8.310838445807772, |
| "grad_norm": 1.6083476543426514, |
| "learning_rate": 0.00027147503136486895, |
| "loss": 1.6389869689941405, |
| "step": 3050 |
| }, |
| { |
| "epoch": 8.447171097477845, |
| "grad_norm": 1.412381649017334, |
| "learning_rate": 0.0002625180536162685, |
| "loss": 1.6107588195800782, |
| "step": 3100 |
| }, |
| { |
| "epoch": 8.583503749147921, |
| "grad_norm": 1.4404499530792236, |
| "learning_rate": 0.00025359489729611366, |
| "loss": 1.558354034423828, |
| "step": 3150 |
| }, |
| { |
| "epoch": 8.719836400817996, |
| "grad_norm": 1.394539713859558, |
| "learning_rate": 0.0002447136141178857, |
| "loss": 1.5231396484375, |
| "step": 3200 |
| }, |
| { |
| "epoch": 8.85616905248807, |
| "grad_norm": 1.4844084978103638, |
| "learning_rate": 0.00023588221801128917, |
| "loss": 1.4771731567382813, |
| "step": 3250 |
| }, |
| { |
| "epoch": 8.992501704158146, |
| "grad_norm": 1.3957374095916748, |
| "learning_rate": 0.0002271086778909701, |
| "loss": 1.4401710510253907, |
| "step": 3300 |
| }, |
| { |
| "epoch": 9.12815269256987, |
| "grad_norm": 1.4386154413223267, |
| "learning_rate": 0.00021840091046585182, |
| "loss": 1.3497396850585937, |
| "step": 3350 |
| }, |
| { |
| "epoch": 9.264485344239946, |
| "grad_norm": 1.4959100484848022, |
| "learning_rate": 0.000209766773095578, |
| "loss": 1.3368931579589844, |
| "step": 3400 |
| }, |
| { |
| "epoch": 9.400817995910021, |
| "grad_norm": 1.3249437808990479, |
| "learning_rate": 0.00020121405670051008, |
| "loss": 1.297091064453125, |
| "step": 3450 |
| }, |
| { |
| "epoch": 9.537150647580095, |
| "grad_norm": 1.3749561309814453, |
| "learning_rate": 0.00019275047873167374, |
| "loss": 1.260106658935547, |
| "step": 3500 |
| }, |
| { |
| "epoch": 9.67348329925017, |
| "grad_norm": 1.4010766744613647, |
| "learning_rate": 0.0001843836762070014, |
| "loss": 1.239128646850586, |
| "step": 3550 |
| }, |
| { |
| "epoch": 9.809815950920246, |
| "grad_norm": 1.5308102369308472, |
| "learning_rate": 0.00017612119882015126, |
| "loss": 1.1977056121826173, |
| "step": 3600 |
| }, |
| { |
| "epoch": 9.94614860259032, |
| "grad_norm": 1.3873751163482666, |
| "learning_rate": 0.00016797050212812275, |
| "loss": 1.1842040252685546, |
| "step": 3650 |
| }, |
| { |
| "epoch": 10.081799591002046, |
| "grad_norm": 1.3666012287139893, |
| "learning_rate": 0.00015993894082381616, |
| "loss": 1.1095658111572266, |
| "step": 3700 |
| }, |
| { |
| "epoch": 10.21813224267212, |
| "grad_norm": 1.3528972864151, |
| "learning_rate": 0.00015203376209960474, |
| "loss": 1.103120346069336, |
| "step": 3750 |
| }, |
| { |
| "epoch": 10.354464894342195, |
| "grad_norm": 1.3081281185150146, |
| "learning_rate": 0.00014426209910790887, |
| "loss": 1.0691104125976563, |
| "step": 3800 |
| }, |
| { |
| "epoch": 10.49079754601227, |
| "grad_norm": 1.3515572547912598, |
| "learning_rate": 0.00013663096452467343, |
| "loss": 1.0644143676757813, |
| "step": 3850 |
| }, |
| { |
| "epoch": 10.627130197682344, |
| "grad_norm": 1.2935131788253784, |
| "learning_rate": 0.00012914724422155598, |
| "loss": 1.0334495544433593, |
| "step": 3900 |
| }, |
| { |
| "epoch": 10.76346284935242, |
| "grad_norm": 1.3209459781646729, |
| "learning_rate": 0.00012181769105253435, |
| "loss": 1.0103805541992188, |
| "step": 3950 |
| }, |
| { |
| "epoch": 10.899795501022496, |
| "grad_norm": 1.324385643005371, |
| "learning_rate": 0.00011464891876054252, |
| "loss": 0.990460433959961, |
| "step": 4000 |
| }, |
| { |
| "epoch": 11.03544648943422, |
| "grad_norm": 1.374879002571106, |
| "learning_rate": 0.00010764739600963116, |
| "loss": 0.9643755340576172, |
| "step": 4050 |
| }, |
| { |
| "epoch": 11.171779141104295, |
| "grad_norm": 1.275993824005127, |
| "learning_rate": 0.00010081944054803842, |
| "loss": 0.936119155883789, |
| "step": 4100 |
| }, |
| { |
| "epoch": 11.308111792774369, |
| "grad_norm": 1.2590258121490479, |
| "learning_rate": 9.417121350743844e-05, |
| "loss": 0.9281440734863281, |
| "step": 4150 |
| }, |
| { |
| "epoch": 11.444444444444445, |
| "grad_norm": 1.2088381052017212, |
| "learning_rate": 8.770871384351085e-05, |
| "loss": 0.9070972442626953, |
| "step": 4200 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 5505, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 15, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.989600444122399e+17, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|