space-apps-challenge-1000 / trainer_state.json

Local dizinden birleştirilmiş model ve tokenizer yükleme

80a67d2 verified 7 months ago

18.1 kB

Invalid JSON: Unexpected token 'I', ..."ad_norm": Infinity, "... is not valid JSON

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.6834102169827438,
	"eval_steps": 500,
	"global_step": 1000,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.006834102169827439,
	"grad_norm": 1.7022907733917236,
	"learning_rate": 1.2121212121212122e-05,
	"loss": 3.5654,
	"step": 10
	},
	{
	"epoch": 0.013668204339654879,
	"grad_norm": 0.8007484078407288,
	"learning_rate": 2.7272727272727273e-05,
	"loss": 3.0866,
	"step": 20
	},
	{
	"epoch": 0.020502306509482315,
	"grad_norm": 2.242196559906006,
	"learning_rate": 4.242424242424243e-05,
	"loss": 2.8859,
	"step": 30
	},
	{
	"epoch": 0.027336408679309757,
	"grad_norm": 2.408808469772339,
	"learning_rate": 5.757575757575758e-05,
	"loss": 2.8438,
	"step": 40
	},
	{
	"epoch": 0.03417051084913719,
	"grad_norm": 1.8810335397720337,
	"learning_rate": 7.272727272727273e-05,
	"loss": 2.6887,
	"step": 50
	},
	{
	"epoch": 0.04100461301896463,
	"grad_norm": 1.7286051511764526,
	"learning_rate": 8.787878787878789e-05,
	"loss": 2.457,
	"step": 60
	},
	{
	"epoch": 0.04783871518879207,
	"grad_norm": 1.4562296867370605,
	"learning_rate": 0.00010303030303030303,
	"loss": 2.0197,
	"step": 70
	},
	{
	"epoch": 0.054672817358619515,
	"grad_norm": 1.280148983001709,
	"learning_rate": 0.0001181818181818182,
	"loss": 2.3125,
	"step": 80
	},
	{
	"epoch": 0.06150691952844695,
	"grad_norm": 4.578958988189697,
	"learning_rate": 0.00013333333333333334,
	"loss": 2.1067,
	"step": 90
	},
	{
	"epoch": 0.06834102169827438,
	"grad_norm": 0.989810585975647,
	"learning_rate": 0.00014848484848484849,
	"loss": 2.0309,
	"step": 100
	},
	{
	"epoch": 0.07517512386810182,
	"grad_norm": Infinity,
	"learning_rate": 0.00016363636363636366,
	"loss": 2.0687,
	"step": 110
	},
	{
	"epoch": 0.08200922603792926,
	"grad_norm": 5.370285511016846,
	"learning_rate": 0.00017727272727272728,
	"loss": 2.2035,
	"step": 120
	},
	{
	"epoch": 0.0888433282077567,
	"grad_norm": 1.0226854085922241,
	"learning_rate": 0.00019242424242424245,
	"loss": 1.7328,
	"step": 130
	},
	{
	"epoch": 0.09567743037758414,
	"grad_norm": 8.442233085632324,
	"learning_rate": 0.00019976525821596247,
	"loss": 1.7355,
	"step": 140
	},
	{
	"epoch": 0.10251153254741159,
	"grad_norm": 3.448438882827759,
	"learning_rate": 0.00019929577464788734,
	"loss": 2.0396,
	"step": 150
	},
	{
	"epoch": 0.10934563471723903,
	"grad_norm": 3.707597255706787,
	"learning_rate": 0.0001988262910798122,
	"loss": 1.8603,
	"step": 160
	},
	{
	"epoch": 0.11617973688706647,
	"grad_norm": 2.0195584297180176,
	"learning_rate": 0.00019835680751173712,
	"loss": 2.1021,
	"step": 170
	},
	{
	"epoch": 0.1230138390568939,
	"grad_norm": 2.4128806591033936,
	"learning_rate": 0.00019788732394366199,
	"loss": 2.0582,
	"step": 180
	},
	{
	"epoch": 0.12984794122672133,
	"grad_norm": 2.707796573638916,
	"learning_rate": 0.00019741784037558685,
	"loss": 2.1312,
	"step": 190
	},
	{
	"epoch": 0.13668204339654877,
	"grad_norm": 2.6425483226776123,
	"learning_rate": 0.00019694835680751174,
	"loss": 2.086,
	"step": 200
	},
	{
	"epoch": 0.1435161455663762,
	"grad_norm": 2.4248363971710205,
	"learning_rate": 0.00019647887323943664,
	"loss": 2.0576,
	"step": 210
	},
	{
	"epoch": 0.15035024773620365,
	"grad_norm": 1.1113543510437012,
	"learning_rate": 0.0001960093896713615,
	"loss": 1.7884,
	"step": 220
	},
	{
	"epoch": 0.15718434990603108,
	"grad_norm": 3.8414552211761475,
	"learning_rate": 0.0001955399061032864,
	"loss": 2.3521,
	"step": 230
	},
	{
	"epoch": 0.16401845207585852,
	"grad_norm": 3.393040180206299,
	"learning_rate": 0.0001950704225352113,
	"loss": 2.0928,
	"step": 240
	},
	{
	"epoch": 0.17085255424568596,
	"grad_norm": 3.5314691066741943,
	"learning_rate": 0.00019460093896713615,
	"loss": 1.9308,
	"step": 250
	},
	{
	"epoch": 0.1776866564155134,
	"grad_norm": 5.881885051727295,
	"learning_rate": 0.00019417840375586857,
	"loss": 1.9244,
	"step": 260
	},
	{
	"epoch": 0.18452075858534084,
	"grad_norm": 1.4201581478118896,
	"learning_rate": 0.00019370892018779343,
	"loss": 1.872,
	"step": 270
	},
	{
	"epoch": 0.19135486075516828,
	"grad_norm": 2.751593828201294,
	"learning_rate": 0.00019323943661971832,
	"loss": 1.9267,
	"step": 280
	},
	{
	"epoch": 0.19818896292499572,
	"grad_norm": 0.9784806966781616,
	"learning_rate": 0.00019276995305164322,
	"loss": 1.6952,
	"step": 290
	},
	{
	"epoch": 0.20502306509482318,
	"grad_norm": 2.9738731384277344,
	"learning_rate": 0.00019230046948356808,
	"loss": 1.8987,
	"step": 300
	},
	{
	"epoch": 0.21185716726465062,
	"grad_norm": 2.224365472793579,
	"learning_rate": 0.00019183098591549297,
	"loss": 2.0835,
	"step": 310
	},
	{
	"epoch": 0.21869126943447806,
	"grad_norm": 1.2849421501159668,
	"learning_rate": 0.00019136150234741784,
	"loss": 1.8553,
	"step": 320
	},
	{
	"epoch": 0.2255253716043055,
	"grad_norm": 3.2781999111175537,
	"learning_rate": 0.00019089201877934273,
	"loss": 1.8121,
	"step": 330
	},
	{
	"epoch": 0.23235947377413294,
	"grad_norm": 0.7185825109481812,
	"learning_rate": 0.0001904225352112676,
	"loss": 1.9595,
	"step": 340
	},
	{
	"epoch": 0.23919357594396037,
	"grad_norm": 4.366527080535889,
	"learning_rate": 0.0001899530516431925,
	"loss": 1.812,
	"step": 350
	},
	{
	"epoch": 0.2460276781137878,
	"grad_norm": 4.314450740814209,
	"learning_rate": 0.00018948356807511738,
	"loss": 1.8416,
	"step": 360
	},
	{
	"epoch": 0.2528617802836152,
	"grad_norm": 2.956653356552124,
	"learning_rate": 0.00018901408450704225,
	"loss": 1.9802,
	"step": 370
	},
	{
	"epoch": 0.25969588245344266,
	"grad_norm": 1.395255208015442,
	"learning_rate": 0.00018854460093896714,
	"loss": 2.1336,
	"step": 380
	},
	{
	"epoch": 0.2665299846232701,
	"grad_norm": 2.8896381855010986,
	"learning_rate": 0.00018807511737089204,
	"loss": 1.8297,
	"step": 390
	},
	{
	"epoch": 0.27336408679309754,
	"grad_norm": 2.0925827026367188,
	"learning_rate": 0.0001876056338028169,
	"loss": 1.8318,
	"step": 400
	},
	{
	"epoch": 0.280198188962925,
	"grad_norm": 1.436551809310913,
	"learning_rate": 0.0001871361502347418,
	"loss": 1.8223,
	"step": 410
	},
	{
	"epoch": 0.2870322911327524,
	"grad_norm": 0.9357802867889404,
	"learning_rate": 0.0001866666666666667,
	"loss": 1.8287,
	"step": 420
	},
	{
	"epoch": 0.29386639330257985,
	"grad_norm": 3.6884915828704834,
	"learning_rate": 0.00018619718309859155,
	"loss": 1.8025,
	"step": 430
	},
	{
	"epoch": 0.3007004954724073,
	"grad_norm": 2.153522491455078,
	"learning_rate": 0.00018572769953051642,
	"loss": 1.5475,
	"step": 440
	},
	{
	"epoch": 0.30753459764223473,
	"grad_norm": 3.496854305267334,
	"learning_rate": 0.00018525821596244134,
	"loss": 1.782,
	"step": 450
	},
	{
	"epoch": 0.31436869981206217,
	"grad_norm": 2.166901111602783,
	"learning_rate": 0.0001847887323943662,
	"loss": 1.5982,
	"step": 460
	},
	{
	"epoch": 0.3212028019818896,
	"grad_norm": 2.4054338932037354,
	"learning_rate": 0.00018431924882629107,
	"loss": 2.0201,
	"step": 470
	},
	{
	"epoch": 0.32803690415171705,
	"grad_norm": 1.3764829635620117,
	"learning_rate": 0.000183849765258216,
	"loss": 1.5294,
	"step": 480
	},
	{
	"epoch": 0.3348710063215445,
	"grad_norm": 5.117223262786865,
	"learning_rate": 0.00018338028169014085,
	"loss": 1.812,
	"step": 490
	},
	{
	"epoch": 0.3417051084913719,
	"grad_norm": 1.7153640985488892,
	"learning_rate": 0.00018291079812206572,
	"loss": 1.7249,
	"step": 500
	},
	{
	"epoch": 0.34853921066119936,
	"grad_norm": 1.0843334197998047,
	"learning_rate": 0.00018244131455399064,
	"loss": 1.6918,
	"step": 510
	},
	{
	"epoch": 0.3553733128310268,
	"grad_norm": 3.17716383934021,
	"learning_rate": 0.0001819718309859155,
	"loss": 1.6265,
	"step": 520
	},
	{
	"epoch": 0.36220741500085424,
	"grad_norm": 2.1360270977020264,
	"learning_rate": 0.00018150234741784037,
	"loss": 1.9059,
	"step": 530
	},
	{
	"epoch": 0.3690415171706817,
	"grad_norm": 2.659409284591675,
	"learning_rate": 0.0001810328638497653,
	"loss": 1.8237,
	"step": 540
	},
	{
	"epoch": 0.3758756193405091,
	"grad_norm": 1.9574990272521973,
	"learning_rate": 0.00018056338028169016,
	"loss": 2.0552,
	"step": 550
	},
	{
	"epoch": 0.38270972151033655,
	"grad_norm": 3.7215640544891357,
	"learning_rate": 0.00018009389671361502,
	"loss": 1.7518,
	"step": 560
	},
	{
	"epoch": 0.389543823680164,
	"grad_norm": 5.685362339019775,
	"learning_rate": 0.00017962441314553991,
	"loss": 1.9423,
	"step": 570
	},
	{
	"epoch": 0.39637792584999143,
	"grad_norm": 5.373042106628418,
	"learning_rate": 0.0001791549295774648,
	"loss": 1.9343,
	"step": 580
	},
	{
	"epoch": 0.40321202801981887,
	"grad_norm": 3.321650981903076,
	"learning_rate": 0.00017868544600938967,
	"loss": 1.718,
	"step": 590
	},
	{
	"epoch": 0.41004613018964636,
	"grad_norm": 1.3837800025939941,
	"learning_rate": 0.00017821596244131457,
	"loss": 1.7754,
	"step": 600
	},
	{
	"epoch": 0.4168802323594738,
	"grad_norm": 1.1874879598617554,
	"learning_rate": 0.00017774647887323946,
	"loss": 1.6449,
	"step": 610
	},
	{
	"epoch": 0.42371433452930124,
	"grad_norm": 2.3453457355499268,
	"learning_rate": 0.00017727699530516432,
	"loss": 1.9001,
	"step": 620
	},
	{
	"epoch": 0.4305484366991287,
	"grad_norm": 1.9375288486480713,
	"learning_rate": 0.00017680751173708922,
	"loss": 1.6097,
	"step": 630
	},
	{
	"epoch": 0.4373825388689561,
	"grad_norm": 3.0950772762298584,
	"learning_rate": 0.0001763380281690141,
	"loss": 1.9017,
	"step": 640
	},
	{
	"epoch": 0.44421664103878356,
	"grad_norm": 3.009223699569702,
	"learning_rate": 0.00017586854460093898,
	"loss": 2.1678,
	"step": 650
	},
	{
	"epoch": 0.451050743208611,
	"grad_norm": 3.1125118732452393,
	"learning_rate": 0.00017539906103286384,
	"loss": 1.5252,
	"step": 660
	},
	{
	"epoch": 0.45788484537843843,
	"grad_norm": 1.9371854066848755,
	"learning_rate": 0.00017492957746478873,
	"loss": 1.642,
	"step": 670
	},
	{
	"epoch": 0.46471894754826587,
	"grad_norm": 0.8981029987335205,
	"learning_rate": 0.00017446009389671363,
	"loss": 2.1059,
	"step": 680
	},
	{
	"epoch": 0.4715530497180933,
	"grad_norm": 0.9644233584403992,
	"learning_rate": 0.0001739906103286385,
	"loss": 1.482,
	"step": 690
	},
	{
	"epoch": 0.47838715188792075,
	"grad_norm": 1.7749234437942505,
	"learning_rate": 0.00017352112676056338,
	"loss": 1.6032,
	"step": 700
	},
	{
	"epoch": 0.4852212540577482,
	"grad_norm": 1.8107513189315796,
	"learning_rate": 0.00017305164319248828,
	"loss": 1.6941,
	"step": 710
	},
	{
	"epoch": 0.4920553562275756,
	"grad_norm": 1.427687644958496,
	"learning_rate": 0.00017258215962441314,
	"loss": 1.5501,
	"step": 720
	},
	{
	"epoch": 0.49888945839740306,
	"grad_norm": 2.521240472793579,
	"learning_rate": 0.00017211267605633804,
	"loss": 1.9205,
	"step": 730
	},
	{
	"epoch": 0.5057235605672304,
	"grad_norm": 5.503659725189209,
	"learning_rate": 0.00017164319248826293,
	"loss": 1.7239,
	"step": 740
	},
	{
	"epoch": 0.5125576627370579,
	"grad_norm": 4.041492462158203,
	"learning_rate": 0.0001711737089201878,
	"loss": 1.9131,
	"step": 750
	},
	{
	"epoch": 0.5193917649068853,
	"grad_norm": 3.602377414703369,
	"learning_rate": 0.0001707042253521127,
	"loss": 1.5152,
	"step": 760
	},
	{
	"epoch": 0.5262258670767128,
	"grad_norm": 3.496152639389038,
	"learning_rate": 0.00017023474178403758,
	"loss": 1.5333,
	"step": 770
	},
	{
	"epoch": 0.5330599692465402,
	"grad_norm": 4.799586772918701,
	"learning_rate": 0.00016976525821596245,
	"loss": 1.5604,
	"step": 780
	},
	{
	"epoch": 0.5398940714163677,
	"grad_norm": 1.314289927482605,
	"learning_rate": 0.00016929577464788734,
	"loss": 1.8581,
	"step": 790
	},
	{
	"epoch": 0.5467281735861951,
	"grad_norm": 1.540637731552124,
	"learning_rate": 0.0001688262910798122,
	"loss": 1.9712,
	"step": 800
	},
	{
	"epoch": 0.5535622757560226,
	"grad_norm": 1.2992823123931885,
	"learning_rate": 0.0001683568075117371,
	"loss": 1.6065,
	"step": 810
	},
	{
	"epoch": 0.56039637792585,
	"grad_norm": 0.7714009881019592,
	"learning_rate": 0.000167887323943662,
	"loss": 1.8902,
	"step": 820
	},
	{
	"epoch": 0.5672304800956774,
	"grad_norm": 1.2016668319702148,
	"learning_rate": 0.00016741784037558685,
	"loss": 1.854,
	"step": 830
	},
	{
	"epoch": 0.5740645822655048,
	"grad_norm": 1.9129397869110107,
	"learning_rate": 0.00016694835680751175,
	"loss": 1.9661,
	"step": 840
	},
	{
	"epoch": 0.5808986844353323,
	"grad_norm": 1.5583465099334717,
	"learning_rate": 0.00016647887323943664,
	"loss": 1.4921,
	"step": 850
	},
	{
	"epoch": 0.5877327866051597,
	"grad_norm": 1.217874526977539,
	"learning_rate": 0.0001660093896713615,
	"loss": 1.6486,
	"step": 860
	},
	{
	"epoch": 0.5945668887749872,
	"grad_norm": 2.096747398376465,
	"learning_rate": 0.0001655399061032864,
	"loss": 1.632,
	"step": 870
	},
	{
	"epoch": 0.6014009909448146,
	"grad_norm": 1.9565995931625366,
	"learning_rate": 0.0001650704225352113,
	"loss": 1.6879,
	"step": 880
	},
	{
	"epoch": 0.6082350931146421,
	"grad_norm": 2.8614614009857178,
	"learning_rate": 0.00016460093896713616,
	"loss": 1.7156,
	"step": 890
	},
	{
	"epoch": 0.6150691952844695,
	"grad_norm": 1.9752906560897827,
	"learning_rate": 0.00016413145539906105,
	"loss": 1.8497,
	"step": 900
	},
	{
	"epoch": 0.621903297454297,
	"grad_norm": 1.1667951345443726,
	"learning_rate": 0.00016366197183098591,
	"loss": 1.9354,
	"step": 910
	},
	{
	"epoch": 0.6287373996241243,
	"grad_norm": 3.9172396659851074,
	"learning_rate": 0.0001631924882629108,
	"loss": 1.6389,
	"step": 920
	},
	{
	"epoch": 0.6355715017939518,
	"grad_norm": 2.9594082832336426,
	"learning_rate": 0.00016272300469483567,
	"loss": 2.0553,
	"step": 930
	},
	{
	"epoch": 0.6424056039637792,
	"grad_norm": 2.566627264022827,
	"learning_rate": 0.00016225352112676057,
	"loss": 1.547,
	"step": 940
	},
	{
	"epoch": 0.6492397061336067,
	"grad_norm": 2.285865306854248,
	"learning_rate": 0.00016178403755868546,
	"loss": 1.6803,
	"step": 950
	},
	{
	"epoch": 0.6560738083034341,
	"grad_norm": 2.1016080379486084,
	"learning_rate": 0.00016131455399061032,
	"loss": 1.6278,
	"step": 960
	},
	{
	"epoch": 0.6629079104732616,
	"grad_norm": 2.409546375274658,
	"learning_rate": 0.00016084507042253522,
	"loss": 1.6959,
	"step": 970
	},
	{
	"epoch": 0.669742012643089,
	"grad_norm": 3.1356201171875,
	"learning_rate": 0.0001603755868544601,
	"loss": 1.6983,
	"step": 980
	},
	{
	"epoch": 0.6765761148129165,
	"grad_norm": 2.00640869140625,
	"learning_rate": 0.00015990610328638498,
	"loss": 1.5737,
	"step": 990
	},
	{
	"epoch": 0.6834102169827438,
	"grad_norm": 2.788975477218628,
	"learning_rate": 0.00015943661971830987,
	"loss": 1.8415,
	"step": 1000
	}
	],
	"logging_steps": 10,
	"max_steps": 4392,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 200,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 1.7056343653711872e+16,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}