space-apps-challenge-1000 / trainer_state.json
Gka60's picture
Local dizinden birleştirilmiş model ve tokenizer yükleme
80a67d2 verified
Invalid JSON: Unexpected token 'I', ..."ad_norm": Infinity, "... is not valid JSON
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.6834102169827438,
"eval_steps": 500,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.006834102169827439,
"grad_norm": 1.7022907733917236,
"learning_rate": 1.2121212121212122e-05,
"loss": 3.5654,
"step": 10
},
{
"epoch": 0.013668204339654879,
"grad_norm": 0.8007484078407288,
"learning_rate": 2.7272727272727273e-05,
"loss": 3.0866,
"step": 20
},
{
"epoch": 0.020502306509482315,
"grad_norm": 2.242196559906006,
"learning_rate": 4.242424242424243e-05,
"loss": 2.8859,
"step": 30
},
{
"epoch": 0.027336408679309757,
"grad_norm": 2.408808469772339,
"learning_rate": 5.757575757575758e-05,
"loss": 2.8438,
"step": 40
},
{
"epoch": 0.03417051084913719,
"grad_norm": 1.8810335397720337,
"learning_rate": 7.272727272727273e-05,
"loss": 2.6887,
"step": 50
},
{
"epoch": 0.04100461301896463,
"grad_norm": 1.7286051511764526,
"learning_rate": 8.787878787878789e-05,
"loss": 2.457,
"step": 60
},
{
"epoch": 0.04783871518879207,
"grad_norm": 1.4562296867370605,
"learning_rate": 0.00010303030303030303,
"loss": 2.0197,
"step": 70
},
{
"epoch": 0.054672817358619515,
"grad_norm": 1.280148983001709,
"learning_rate": 0.0001181818181818182,
"loss": 2.3125,
"step": 80
},
{
"epoch": 0.06150691952844695,
"grad_norm": 4.578958988189697,
"learning_rate": 0.00013333333333333334,
"loss": 2.1067,
"step": 90
},
{
"epoch": 0.06834102169827438,
"grad_norm": 0.989810585975647,
"learning_rate": 0.00014848484848484849,
"loss": 2.0309,
"step": 100
},
{
"epoch": 0.07517512386810182,
"grad_norm": Infinity,
"learning_rate": 0.00016363636363636366,
"loss": 2.0687,
"step": 110
},
{
"epoch": 0.08200922603792926,
"grad_norm": 5.370285511016846,
"learning_rate": 0.00017727272727272728,
"loss": 2.2035,
"step": 120
},
{
"epoch": 0.0888433282077567,
"grad_norm": 1.0226854085922241,
"learning_rate": 0.00019242424242424245,
"loss": 1.7328,
"step": 130
},
{
"epoch": 0.09567743037758414,
"grad_norm": 8.442233085632324,
"learning_rate": 0.00019976525821596247,
"loss": 1.7355,
"step": 140
},
{
"epoch": 0.10251153254741159,
"grad_norm": 3.448438882827759,
"learning_rate": 0.00019929577464788734,
"loss": 2.0396,
"step": 150
},
{
"epoch": 0.10934563471723903,
"grad_norm": 3.707597255706787,
"learning_rate": 0.0001988262910798122,
"loss": 1.8603,
"step": 160
},
{
"epoch": 0.11617973688706647,
"grad_norm": 2.0195584297180176,
"learning_rate": 0.00019835680751173712,
"loss": 2.1021,
"step": 170
},
{
"epoch": 0.1230138390568939,
"grad_norm": 2.4128806591033936,
"learning_rate": 0.00019788732394366199,
"loss": 2.0582,
"step": 180
},
{
"epoch": 0.12984794122672133,
"grad_norm": 2.707796573638916,
"learning_rate": 0.00019741784037558685,
"loss": 2.1312,
"step": 190
},
{
"epoch": 0.13668204339654877,
"grad_norm": 2.6425483226776123,
"learning_rate": 0.00019694835680751174,
"loss": 2.086,
"step": 200
},
{
"epoch": 0.1435161455663762,
"grad_norm": 2.4248363971710205,
"learning_rate": 0.00019647887323943664,
"loss": 2.0576,
"step": 210
},
{
"epoch": 0.15035024773620365,
"grad_norm": 1.1113543510437012,
"learning_rate": 0.0001960093896713615,
"loss": 1.7884,
"step": 220
},
{
"epoch": 0.15718434990603108,
"grad_norm": 3.8414552211761475,
"learning_rate": 0.0001955399061032864,
"loss": 2.3521,
"step": 230
},
{
"epoch": 0.16401845207585852,
"grad_norm": 3.393040180206299,
"learning_rate": 0.0001950704225352113,
"loss": 2.0928,
"step": 240
},
{
"epoch": 0.17085255424568596,
"grad_norm": 3.5314691066741943,
"learning_rate": 0.00019460093896713615,
"loss": 1.9308,
"step": 250
},
{
"epoch": 0.1776866564155134,
"grad_norm": 5.881885051727295,
"learning_rate": 0.00019417840375586857,
"loss": 1.9244,
"step": 260
},
{
"epoch": 0.18452075858534084,
"grad_norm": 1.4201581478118896,
"learning_rate": 0.00019370892018779343,
"loss": 1.872,
"step": 270
},
{
"epoch": 0.19135486075516828,
"grad_norm": 2.751593828201294,
"learning_rate": 0.00019323943661971832,
"loss": 1.9267,
"step": 280
},
{
"epoch": 0.19818896292499572,
"grad_norm": 0.9784806966781616,
"learning_rate": 0.00019276995305164322,
"loss": 1.6952,
"step": 290
},
{
"epoch": 0.20502306509482318,
"grad_norm": 2.9738731384277344,
"learning_rate": 0.00019230046948356808,
"loss": 1.8987,
"step": 300
},
{
"epoch": 0.21185716726465062,
"grad_norm": 2.224365472793579,
"learning_rate": 0.00019183098591549297,
"loss": 2.0835,
"step": 310
},
{
"epoch": 0.21869126943447806,
"grad_norm": 1.2849421501159668,
"learning_rate": 0.00019136150234741784,
"loss": 1.8553,
"step": 320
},
{
"epoch": 0.2255253716043055,
"grad_norm": 3.2781999111175537,
"learning_rate": 0.00019089201877934273,
"loss": 1.8121,
"step": 330
},
{
"epoch": 0.23235947377413294,
"grad_norm": 0.7185825109481812,
"learning_rate": 0.0001904225352112676,
"loss": 1.9595,
"step": 340
},
{
"epoch": 0.23919357594396037,
"grad_norm": 4.366527080535889,
"learning_rate": 0.0001899530516431925,
"loss": 1.812,
"step": 350
},
{
"epoch": 0.2460276781137878,
"grad_norm": 4.314450740814209,
"learning_rate": 0.00018948356807511738,
"loss": 1.8416,
"step": 360
},
{
"epoch": 0.2528617802836152,
"grad_norm": 2.956653356552124,
"learning_rate": 0.00018901408450704225,
"loss": 1.9802,
"step": 370
},
{
"epoch": 0.25969588245344266,
"grad_norm": 1.395255208015442,
"learning_rate": 0.00018854460093896714,
"loss": 2.1336,
"step": 380
},
{
"epoch": 0.2665299846232701,
"grad_norm": 2.8896381855010986,
"learning_rate": 0.00018807511737089204,
"loss": 1.8297,
"step": 390
},
{
"epoch": 0.27336408679309754,
"grad_norm": 2.0925827026367188,
"learning_rate": 0.0001876056338028169,
"loss": 1.8318,
"step": 400
},
{
"epoch": 0.280198188962925,
"grad_norm": 1.436551809310913,
"learning_rate": 0.0001871361502347418,
"loss": 1.8223,
"step": 410
},
{
"epoch": 0.2870322911327524,
"grad_norm": 0.9357802867889404,
"learning_rate": 0.0001866666666666667,
"loss": 1.8287,
"step": 420
},
{
"epoch": 0.29386639330257985,
"grad_norm": 3.6884915828704834,
"learning_rate": 0.00018619718309859155,
"loss": 1.8025,
"step": 430
},
{
"epoch": 0.3007004954724073,
"grad_norm": 2.153522491455078,
"learning_rate": 0.00018572769953051642,
"loss": 1.5475,
"step": 440
},
{
"epoch": 0.30753459764223473,
"grad_norm": 3.496854305267334,
"learning_rate": 0.00018525821596244134,
"loss": 1.782,
"step": 450
},
{
"epoch": 0.31436869981206217,
"grad_norm": 2.166901111602783,
"learning_rate": 0.0001847887323943662,
"loss": 1.5982,
"step": 460
},
{
"epoch": 0.3212028019818896,
"grad_norm": 2.4054338932037354,
"learning_rate": 0.00018431924882629107,
"loss": 2.0201,
"step": 470
},
{
"epoch": 0.32803690415171705,
"grad_norm": 1.3764829635620117,
"learning_rate": 0.000183849765258216,
"loss": 1.5294,
"step": 480
},
{
"epoch": 0.3348710063215445,
"grad_norm": 5.117223262786865,
"learning_rate": 0.00018338028169014085,
"loss": 1.812,
"step": 490
},
{
"epoch": 0.3417051084913719,
"grad_norm": 1.7153640985488892,
"learning_rate": 0.00018291079812206572,
"loss": 1.7249,
"step": 500
},
{
"epoch": 0.34853921066119936,
"grad_norm": 1.0843334197998047,
"learning_rate": 0.00018244131455399064,
"loss": 1.6918,
"step": 510
},
{
"epoch": 0.3553733128310268,
"grad_norm": 3.17716383934021,
"learning_rate": 0.0001819718309859155,
"loss": 1.6265,
"step": 520
},
{
"epoch": 0.36220741500085424,
"grad_norm": 2.1360270977020264,
"learning_rate": 0.00018150234741784037,
"loss": 1.9059,
"step": 530
},
{
"epoch": 0.3690415171706817,
"grad_norm": 2.659409284591675,
"learning_rate": 0.0001810328638497653,
"loss": 1.8237,
"step": 540
},
{
"epoch": 0.3758756193405091,
"grad_norm": 1.9574990272521973,
"learning_rate": 0.00018056338028169016,
"loss": 2.0552,
"step": 550
},
{
"epoch": 0.38270972151033655,
"grad_norm": 3.7215640544891357,
"learning_rate": 0.00018009389671361502,
"loss": 1.7518,
"step": 560
},
{
"epoch": 0.389543823680164,
"grad_norm": 5.685362339019775,
"learning_rate": 0.00017962441314553991,
"loss": 1.9423,
"step": 570
},
{
"epoch": 0.39637792584999143,
"grad_norm": 5.373042106628418,
"learning_rate": 0.0001791549295774648,
"loss": 1.9343,
"step": 580
},
{
"epoch": 0.40321202801981887,
"grad_norm": 3.321650981903076,
"learning_rate": 0.00017868544600938967,
"loss": 1.718,
"step": 590
},
{
"epoch": 0.41004613018964636,
"grad_norm": 1.3837800025939941,
"learning_rate": 0.00017821596244131457,
"loss": 1.7754,
"step": 600
},
{
"epoch": 0.4168802323594738,
"grad_norm": 1.1874879598617554,
"learning_rate": 0.00017774647887323946,
"loss": 1.6449,
"step": 610
},
{
"epoch": 0.42371433452930124,
"grad_norm": 2.3453457355499268,
"learning_rate": 0.00017727699530516432,
"loss": 1.9001,
"step": 620
},
{
"epoch": 0.4305484366991287,
"grad_norm": 1.9375288486480713,
"learning_rate": 0.00017680751173708922,
"loss": 1.6097,
"step": 630
},
{
"epoch": 0.4373825388689561,
"grad_norm": 3.0950772762298584,
"learning_rate": 0.0001763380281690141,
"loss": 1.9017,
"step": 640
},
{
"epoch": 0.44421664103878356,
"grad_norm": 3.009223699569702,
"learning_rate": 0.00017586854460093898,
"loss": 2.1678,
"step": 650
},
{
"epoch": 0.451050743208611,
"grad_norm": 3.1125118732452393,
"learning_rate": 0.00017539906103286384,
"loss": 1.5252,
"step": 660
},
{
"epoch": 0.45788484537843843,
"grad_norm": 1.9371854066848755,
"learning_rate": 0.00017492957746478873,
"loss": 1.642,
"step": 670
},
{
"epoch": 0.46471894754826587,
"grad_norm": 0.8981029987335205,
"learning_rate": 0.00017446009389671363,
"loss": 2.1059,
"step": 680
},
{
"epoch": 0.4715530497180933,
"grad_norm": 0.9644233584403992,
"learning_rate": 0.0001739906103286385,
"loss": 1.482,
"step": 690
},
{
"epoch": 0.47838715188792075,
"grad_norm": 1.7749234437942505,
"learning_rate": 0.00017352112676056338,
"loss": 1.6032,
"step": 700
},
{
"epoch": 0.4852212540577482,
"grad_norm": 1.8107513189315796,
"learning_rate": 0.00017305164319248828,
"loss": 1.6941,
"step": 710
},
{
"epoch": 0.4920553562275756,
"grad_norm": 1.427687644958496,
"learning_rate": 0.00017258215962441314,
"loss": 1.5501,
"step": 720
},
{
"epoch": 0.49888945839740306,
"grad_norm": 2.521240472793579,
"learning_rate": 0.00017211267605633804,
"loss": 1.9205,
"step": 730
},
{
"epoch": 0.5057235605672304,
"grad_norm": 5.503659725189209,
"learning_rate": 0.00017164319248826293,
"loss": 1.7239,
"step": 740
},
{
"epoch": 0.5125576627370579,
"grad_norm": 4.041492462158203,
"learning_rate": 0.0001711737089201878,
"loss": 1.9131,
"step": 750
},
{
"epoch": 0.5193917649068853,
"grad_norm": 3.602377414703369,
"learning_rate": 0.0001707042253521127,
"loss": 1.5152,
"step": 760
},
{
"epoch": 0.5262258670767128,
"grad_norm": 3.496152639389038,
"learning_rate": 0.00017023474178403758,
"loss": 1.5333,
"step": 770
},
{
"epoch": 0.5330599692465402,
"grad_norm": 4.799586772918701,
"learning_rate": 0.00016976525821596245,
"loss": 1.5604,
"step": 780
},
{
"epoch": 0.5398940714163677,
"grad_norm": 1.314289927482605,
"learning_rate": 0.00016929577464788734,
"loss": 1.8581,
"step": 790
},
{
"epoch": 0.5467281735861951,
"grad_norm": 1.540637731552124,
"learning_rate": 0.0001688262910798122,
"loss": 1.9712,
"step": 800
},
{
"epoch": 0.5535622757560226,
"grad_norm": 1.2992823123931885,
"learning_rate": 0.0001683568075117371,
"loss": 1.6065,
"step": 810
},
{
"epoch": 0.56039637792585,
"grad_norm": 0.7714009881019592,
"learning_rate": 0.000167887323943662,
"loss": 1.8902,
"step": 820
},
{
"epoch": 0.5672304800956774,
"grad_norm": 1.2016668319702148,
"learning_rate": 0.00016741784037558685,
"loss": 1.854,
"step": 830
},
{
"epoch": 0.5740645822655048,
"grad_norm": 1.9129397869110107,
"learning_rate": 0.00016694835680751175,
"loss": 1.9661,
"step": 840
},
{
"epoch": 0.5808986844353323,
"grad_norm": 1.5583465099334717,
"learning_rate": 0.00016647887323943664,
"loss": 1.4921,
"step": 850
},
{
"epoch": 0.5877327866051597,
"grad_norm": 1.217874526977539,
"learning_rate": 0.0001660093896713615,
"loss": 1.6486,
"step": 860
},
{
"epoch": 0.5945668887749872,
"grad_norm": 2.096747398376465,
"learning_rate": 0.0001655399061032864,
"loss": 1.632,
"step": 870
},
{
"epoch": 0.6014009909448146,
"grad_norm": 1.9565995931625366,
"learning_rate": 0.0001650704225352113,
"loss": 1.6879,
"step": 880
},
{
"epoch": 0.6082350931146421,
"grad_norm": 2.8614614009857178,
"learning_rate": 0.00016460093896713616,
"loss": 1.7156,
"step": 890
},
{
"epoch": 0.6150691952844695,
"grad_norm": 1.9752906560897827,
"learning_rate": 0.00016413145539906105,
"loss": 1.8497,
"step": 900
},
{
"epoch": 0.621903297454297,
"grad_norm": 1.1667951345443726,
"learning_rate": 0.00016366197183098591,
"loss": 1.9354,
"step": 910
},
{
"epoch": 0.6287373996241243,
"grad_norm": 3.9172396659851074,
"learning_rate": 0.0001631924882629108,
"loss": 1.6389,
"step": 920
},
{
"epoch": 0.6355715017939518,
"grad_norm": 2.9594082832336426,
"learning_rate": 0.00016272300469483567,
"loss": 2.0553,
"step": 930
},
{
"epoch": 0.6424056039637792,
"grad_norm": 2.566627264022827,
"learning_rate": 0.00016225352112676057,
"loss": 1.547,
"step": 940
},
{
"epoch": 0.6492397061336067,
"grad_norm": 2.285865306854248,
"learning_rate": 0.00016178403755868546,
"loss": 1.6803,
"step": 950
},
{
"epoch": 0.6560738083034341,
"grad_norm": 2.1016080379486084,
"learning_rate": 0.00016131455399061032,
"loss": 1.6278,
"step": 960
},
{
"epoch": 0.6629079104732616,
"grad_norm": 2.409546375274658,
"learning_rate": 0.00016084507042253522,
"loss": 1.6959,
"step": 970
},
{
"epoch": 0.669742012643089,
"grad_norm": 3.1356201171875,
"learning_rate": 0.0001603755868544601,
"loss": 1.6983,
"step": 980
},
{
"epoch": 0.6765761148129165,
"grad_norm": 2.00640869140625,
"learning_rate": 0.00015990610328638498,
"loss": 1.5737,
"step": 990
},
{
"epoch": 0.6834102169827438,
"grad_norm": 2.788975477218628,
"learning_rate": 0.00015943661971830987,
"loss": 1.8415,
"step": 1000
}
],
"logging_steps": 10,
"max_steps": 4392,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.7056343653711872e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}