IKOARABICX / trainer_state.json
iko-01's picture
رفع النموذج النهائي مع جميع الملفات للتجربة أو إعادة التدريب
b8d5816 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 5187,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00963948332369385,
"grad_norm": 6.363353729248047,
"learning_rate": 4.9527665317139e-05,
"loss": 5.7207,
"step": 50
},
{
"epoch": 0.0192789666473877,
"grad_norm": 6.101255416870117,
"learning_rate": 4.904569115095431e-05,
"loss": 4.3487,
"step": 100
},
{
"epoch": 0.02891844997108155,
"grad_norm": 5.563638210296631,
"learning_rate": 4.856371698476962e-05,
"loss": 4.077,
"step": 150
},
{
"epoch": 0.0385579332947754,
"grad_norm": 5.894318580627441,
"learning_rate": 4.8081742818584925e-05,
"loss": 3.8687,
"step": 200
},
{
"epoch": 0.04819741661846925,
"grad_norm": 5.281033992767334,
"learning_rate": 4.7599768652400236e-05,
"loss": 3.698,
"step": 250
},
{
"epoch": 0.0578368999421631,
"grad_norm": 5.9448089599609375,
"learning_rate": 4.711779448621554e-05,
"loss": 3.5277,
"step": 300
},
{
"epoch": 0.06747638326585695,
"grad_norm": 5.122015953063965,
"learning_rate": 4.6635820320030846e-05,
"loss": 3.524,
"step": 350
},
{
"epoch": 0.0771158665895508,
"grad_norm": 5.508582592010498,
"learning_rate": 4.615384615384616e-05,
"loss": 3.3969,
"step": 400
},
{
"epoch": 0.08675534991324466,
"grad_norm": 4.6635661125183105,
"learning_rate": 4.567187198766146e-05,
"loss": 3.4104,
"step": 450
},
{
"epoch": 0.0963948332369385,
"grad_norm": 4.993603706359863,
"learning_rate": 4.5189897821476775e-05,
"loss": 3.317,
"step": 500
},
{
"epoch": 0.10603431656063235,
"grad_norm": 4.281131267547607,
"learning_rate": 4.470792365529208e-05,
"loss": 3.27,
"step": 550
},
{
"epoch": 0.1156737998843262,
"grad_norm": 4.340400218963623,
"learning_rate": 4.4225949489107385e-05,
"loss": 3.2469,
"step": 600
},
{
"epoch": 0.12531328320802004,
"grad_norm": 4.594860076904297,
"learning_rate": 4.374397532292269e-05,
"loss": 3.1855,
"step": 650
},
{
"epoch": 0.1349527665317139,
"grad_norm": 4.50878381729126,
"learning_rate": 4.3262001156738e-05,
"loss": 3.1646,
"step": 700
},
{
"epoch": 0.14459224985540775,
"grad_norm": 4.4101715087890625,
"learning_rate": 4.278002699055331e-05,
"loss": 3.0478,
"step": 750
},
{
"epoch": 0.1542317331791016,
"grad_norm": 4.285751819610596,
"learning_rate": 4.229805282436862e-05,
"loss": 3.0654,
"step": 800
},
{
"epoch": 0.16387121650279546,
"grad_norm": 4.346066474914551,
"learning_rate": 4.1816078658183924e-05,
"loss": 3.1125,
"step": 850
},
{
"epoch": 0.1735106998264893,
"grad_norm": 4.069290637969971,
"learning_rate": 4.133410449199923e-05,
"loss": 3.0389,
"step": 900
},
{
"epoch": 0.18315018315018314,
"grad_norm": 4.5510382652282715,
"learning_rate": 4.0852130325814534e-05,
"loss": 2.9808,
"step": 950
},
{
"epoch": 0.192789666473877,
"grad_norm": 4.202208995819092,
"learning_rate": 4.0370156159629845e-05,
"loss": 3.0209,
"step": 1000
},
{
"epoch": 0.20242914979757085,
"grad_norm": 5.317561149597168,
"learning_rate": 3.988818199344515e-05,
"loss": 3.0355,
"step": 1050
},
{
"epoch": 0.2120686331212647,
"grad_norm": 4.339507102966309,
"learning_rate": 3.940620782726046e-05,
"loss": 2.9353,
"step": 1100
},
{
"epoch": 0.22170811644495855,
"grad_norm": 4.036981105804443,
"learning_rate": 3.8924233661075774e-05,
"loss": 2.9902,
"step": 1150
},
{
"epoch": 0.2313475997686524,
"grad_norm": 4.5993757247924805,
"learning_rate": 3.844225949489107e-05,
"loss": 2.9957,
"step": 1200
},
{
"epoch": 0.24098708309234626,
"grad_norm": 4.273294448852539,
"learning_rate": 3.7960285328706384e-05,
"loss": 2.9309,
"step": 1250
},
{
"epoch": 0.2506265664160401,
"grad_norm": 4.0223774909973145,
"learning_rate": 3.747831116252169e-05,
"loss": 2.8886,
"step": 1300
},
{
"epoch": 0.26026604973973394,
"grad_norm": 4.813283443450928,
"learning_rate": 3.6996336996337e-05,
"loss": 2.7496,
"step": 1350
},
{
"epoch": 0.2699055330634278,
"grad_norm": 4.476084232330322,
"learning_rate": 3.6514362830152306e-05,
"loss": 2.8673,
"step": 1400
},
{
"epoch": 0.27954501638712165,
"grad_norm": 4.051555156707764,
"learning_rate": 3.603238866396762e-05,
"loss": 2.8135,
"step": 1450
},
{
"epoch": 0.2891844997108155,
"grad_norm": 4.186788558959961,
"learning_rate": 3.5550414497782916e-05,
"loss": 2.9019,
"step": 1500
},
{
"epoch": 0.29882398303450936,
"grad_norm": 4.216615200042725,
"learning_rate": 3.506844033159823e-05,
"loss": 2.8877,
"step": 1550
},
{
"epoch": 0.3084634663582032,
"grad_norm": 4.653785705566406,
"learning_rate": 3.458646616541353e-05,
"loss": 2.8091,
"step": 1600
},
{
"epoch": 0.31810294968189706,
"grad_norm": 3.883335828781128,
"learning_rate": 3.4104491999228844e-05,
"loss": 2.7521,
"step": 1650
},
{
"epoch": 0.3277424330055909,
"grad_norm": 4.467517375946045,
"learning_rate": 3.362251783304415e-05,
"loss": 2.7753,
"step": 1700
},
{
"epoch": 0.33738191632928477,
"grad_norm": 3.839921474456787,
"learning_rate": 3.314054366685946e-05,
"loss": 2.7853,
"step": 1750
},
{
"epoch": 0.3470213996529786,
"grad_norm": 3.923483371734619,
"learning_rate": 3.2658569500674766e-05,
"loss": 2.872,
"step": 1800
},
{
"epoch": 0.3566608829766725,
"grad_norm": 4.523361682891846,
"learning_rate": 3.217659533449007e-05,
"loss": 2.7785,
"step": 1850
},
{
"epoch": 0.3663003663003663,
"grad_norm": 3.865365743637085,
"learning_rate": 3.169462116830538e-05,
"loss": 2.8373,
"step": 1900
},
{
"epoch": 0.37593984962406013,
"grad_norm": 3.9936673641204834,
"learning_rate": 3.121264700212069e-05,
"loss": 2.7323,
"step": 1950
},
{
"epoch": 0.385579332947754,
"grad_norm": 4.1067633628845215,
"learning_rate": 3.0730672835936e-05,
"loss": 2.7585,
"step": 2000
},
{
"epoch": 0.39521881627144784,
"grad_norm": 3.85208797454834,
"learning_rate": 3.02486986697513e-05,
"loss": 2.7564,
"step": 2050
},
{
"epoch": 0.4048582995951417,
"grad_norm": 4.24629020690918,
"learning_rate": 2.9766724503566613e-05,
"loss": 2.7274,
"step": 2100
},
{
"epoch": 0.41449778291883554,
"grad_norm": 3.905611276626587,
"learning_rate": 2.9284750337381918e-05,
"loss": 2.7094,
"step": 2150
},
{
"epoch": 0.4241372662425294,
"grad_norm": 3.9592058658599854,
"learning_rate": 2.8802776171197226e-05,
"loss": 2.7046,
"step": 2200
},
{
"epoch": 0.43377674956622325,
"grad_norm": 3.860285520553589,
"learning_rate": 2.832080200501253e-05,
"loss": 2.7154,
"step": 2250
},
{
"epoch": 0.4434162328899171,
"grad_norm": 3.989696502685547,
"learning_rate": 2.783882783882784e-05,
"loss": 2.6632,
"step": 2300
},
{
"epoch": 0.45305571621361096,
"grad_norm": 3.987741708755493,
"learning_rate": 2.7356853672643145e-05,
"loss": 2.7178,
"step": 2350
},
{
"epoch": 0.4626951995373048,
"grad_norm": 3.9146411418914795,
"learning_rate": 2.6874879506458457e-05,
"loss": 2.7039,
"step": 2400
},
{
"epoch": 0.47233468286099867,
"grad_norm": 4.281154155731201,
"learning_rate": 2.639290534027376e-05,
"loss": 2.6205,
"step": 2450
},
{
"epoch": 0.4819741661846925,
"grad_norm": 3.6197686195373535,
"learning_rate": 2.591093117408907e-05,
"loss": 2.723,
"step": 2500
},
{
"epoch": 0.4916136495083864,
"grad_norm": 3.7195041179656982,
"learning_rate": 2.5428957007904375e-05,
"loss": 2.6071,
"step": 2550
},
{
"epoch": 0.5012531328320802,
"grad_norm": 3.775972604751587,
"learning_rate": 2.4946982841719683e-05,
"loss": 2.6749,
"step": 2600
},
{
"epoch": 0.5108926161557741,
"grad_norm": 3.9212749004364014,
"learning_rate": 2.4465008675534992e-05,
"loss": 2.6913,
"step": 2650
},
{
"epoch": 0.5205320994794679,
"grad_norm": 3.9374866485595703,
"learning_rate": 2.39830345093503e-05,
"loss": 2.6457,
"step": 2700
},
{
"epoch": 0.5301715828031618,
"grad_norm": 4.192444801330566,
"learning_rate": 2.3501060343165605e-05,
"loss": 2.7204,
"step": 2750
},
{
"epoch": 0.5398110661268556,
"grad_norm": 3.428612232208252,
"learning_rate": 2.3019086176980914e-05,
"loss": 2.7221,
"step": 2800
},
{
"epoch": 0.5494505494505495,
"grad_norm": 4.013959884643555,
"learning_rate": 2.2537112010796222e-05,
"loss": 2.6046,
"step": 2850
},
{
"epoch": 0.5590900327742433,
"grad_norm": 4.100067138671875,
"learning_rate": 2.205513784461153e-05,
"loss": 2.6887,
"step": 2900
},
{
"epoch": 0.5687295160979372,
"grad_norm": 3.5404722690582275,
"learning_rate": 2.157316367842684e-05,
"loss": 2.5933,
"step": 2950
},
{
"epoch": 0.578368999421631,
"grad_norm": 3.6547091007232666,
"learning_rate": 2.1091189512242147e-05,
"loss": 2.6171,
"step": 3000
},
{
"epoch": 0.5880084827453248,
"grad_norm": 3.81042742729187,
"learning_rate": 2.0609215346057452e-05,
"loss": 2.5319,
"step": 3050
},
{
"epoch": 0.5976479660690187,
"grad_norm": 3.987117052078247,
"learning_rate": 2.012724117987276e-05,
"loss": 2.6596,
"step": 3100
},
{
"epoch": 0.6072874493927125,
"grad_norm": 3.5897133350372314,
"learning_rate": 1.964526701368807e-05,
"loss": 2.634,
"step": 3150
},
{
"epoch": 0.6169269327164064,
"grad_norm": 4.190171241760254,
"learning_rate": 1.9163292847503374e-05,
"loss": 2.5889,
"step": 3200
},
{
"epoch": 0.6265664160401002,
"grad_norm": 3.7671003341674805,
"learning_rate": 1.8681318681318682e-05,
"loss": 2.6186,
"step": 3250
},
{
"epoch": 0.6362058993637941,
"grad_norm": 4.126290798187256,
"learning_rate": 1.819934451513399e-05,
"loss": 2.5847,
"step": 3300
},
{
"epoch": 0.6458453826874879,
"grad_norm": 4.023561000823975,
"learning_rate": 1.7717370348949296e-05,
"loss": 2.5474,
"step": 3350
},
{
"epoch": 0.6554848660111818,
"grad_norm": 3.9225897789001465,
"learning_rate": 1.7235396182764604e-05,
"loss": 2.6056,
"step": 3400
},
{
"epoch": 0.6651243493348756,
"grad_norm": 3.6160168647766113,
"learning_rate": 1.6753422016579912e-05,
"loss": 2.5559,
"step": 3450
},
{
"epoch": 0.6747638326585695,
"grad_norm": 4.005686283111572,
"learning_rate": 1.6271447850395217e-05,
"loss": 2.5416,
"step": 3500
},
{
"epoch": 0.6844033159822633,
"grad_norm": 3.8741414546966553,
"learning_rate": 1.5789473684210526e-05,
"loss": 2.5972,
"step": 3550
},
{
"epoch": 0.6940427993059572,
"grad_norm": 3.710710048675537,
"learning_rate": 1.5307499518025834e-05,
"loss": 2.5787,
"step": 3600
},
{
"epoch": 0.703682282629651,
"grad_norm": 3.460242748260498,
"learning_rate": 1.4825525351841141e-05,
"loss": 2.554,
"step": 3650
},
{
"epoch": 0.713321765953345,
"grad_norm": 3.8803932666778564,
"learning_rate": 1.4343551185656451e-05,
"loss": 2.5613,
"step": 3700
},
{
"epoch": 0.7229612492770388,
"grad_norm": 3.8178253173828125,
"learning_rate": 1.3861577019471758e-05,
"loss": 2.5626,
"step": 3750
},
{
"epoch": 0.7326007326007326,
"grad_norm": 3.364790201187134,
"learning_rate": 1.3379602853287066e-05,
"loss": 2.5721,
"step": 3800
},
{
"epoch": 0.7422402159244265,
"grad_norm": 3.5198776721954346,
"learning_rate": 1.2897628687102373e-05,
"loss": 2.5233,
"step": 3850
},
{
"epoch": 0.7518796992481203,
"grad_norm": 3.782043695449829,
"learning_rate": 1.241565452091768e-05,
"loss": 2.541,
"step": 3900
},
{
"epoch": 0.7615191825718142,
"grad_norm": 4.032742023468018,
"learning_rate": 1.1933680354732988e-05,
"loss": 2.493,
"step": 3950
},
{
"epoch": 0.771158665895508,
"grad_norm": 3.8995766639709473,
"learning_rate": 1.1451706188548295e-05,
"loss": 2.5361,
"step": 4000
},
{
"epoch": 0.7807981492192019,
"grad_norm": 3.67946457862854,
"learning_rate": 1.0969732022363601e-05,
"loss": 2.5408,
"step": 4050
},
{
"epoch": 0.7904376325428957,
"grad_norm": 3.4958484172821045,
"learning_rate": 1.048775785617891e-05,
"loss": 2.5261,
"step": 4100
},
{
"epoch": 0.8000771158665896,
"grad_norm": 3.8448803424835205,
"learning_rate": 1.0005783689994216e-05,
"loss": 2.5389,
"step": 4150
},
{
"epoch": 0.8097165991902834,
"grad_norm": 3.7735507488250732,
"learning_rate": 9.523809523809523e-06,
"loss": 2.6247,
"step": 4200
},
{
"epoch": 0.8193560825139773,
"grad_norm": 3.4487788677215576,
"learning_rate": 9.041835357624833e-06,
"loss": 2.5704,
"step": 4250
},
{
"epoch": 0.8289955658376711,
"grad_norm": 3.943000316619873,
"learning_rate": 8.55986119144014e-06,
"loss": 2.5289,
"step": 4300
},
{
"epoch": 0.838635049161365,
"grad_norm": 3.7842445373535156,
"learning_rate": 8.077887025255447e-06,
"loss": 2.5765,
"step": 4350
},
{
"epoch": 0.8482745324850588,
"grad_norm": 3.741563320159912,
"learning_rate": 7.595912859070754e-06,
"loss": 2.4869,
"step": 4400
},
{
"epoch": 0.8579140158087527,
"grad_norm": 3.6693384647369385,
"learning_rate": 7.113938692886062e-06,
"loss": 2.5513,
"step": 4450
},
{
"epoch": 0.8675534991324465,
"grad_norm": 3.590758800506592,
"learning_rate": 6.631964526701369e-06,
"loss": 2.566,
"step": 4500
},
{
"epoch": 0.8771929824561403,
"grad_norm": 3.282844305038452,
"learning_rate": 6.149990360516677e-06,
"loss": 2.5001,
"step": 4550
},
{
"epoch": 0.8868324657798342,
"grad_norm": 4.462714195251465,
"learning_rate": 5.668016194331984e-06,
"loss": 2.4838,
"step": 4600
},
{
"epoch": 0.896471949103528,
"grad_norm": 4.180957317352295,
"learning_rate": 5.186042028147292e-06,
"loss": 2.477,
"step": 4650
},
{
"epoch": 0.9061114324272219,
"grad_norm": 4.506474018096924,
"learning_rate": 4.7040678619625985e-06,
"loss": 2.5097,
"step": 4700
},
{
"epoch": 0.9157509157509157,
"grad_norm": 3.257143974304199,
"learning_rate": 4.222093695777907e-06,
"loss": 2.6252,
"step": 4750
},
{
"epoch": 0.9253903990746096,
"grad_norm": 3.498189926147461,
"learning_rate": 3.740119529593214e-06,
"loss": 2.5133,
"step": 4800
},
{
"epoch": 0.9350298823983034,
"grad_norm": 3.404567241668701,
"learning_rate": 3.258145363408521e-06,
"loss": 2.4482,
"step": 4850
},
{
"epoch": 0.9446693657219973,
"grad_norm": 3.703936815261841,
"learning_rate": 2.776171197223829e-06,
"loss": 2.5769,
"step": 4900
},
{
"epoch": 0.9543088490456911,
"grad_norm": 4.4313883781433105,
"learning_rate": 2.2941970310391366e-06,
"loss": 2.5262,
"step": 4950
},
{
"epoch": 0.963948332369385,
"grad_norm": 3.5869264602661133,
"learning_rate": 1.8122228648544438e-06,
"loss": 2.5166,
"step": 5000
},
{
"epoch": 0.9735878156930788,
"grad_norm": 3.5782413482666016,
"learning_rate": 1.3302486986697513e-06,
"loss": 2.4681,
"step": 5050
},
{
"epoch": 0.9832272990167727,
"grad_norm": 3.565708637237549,
"learning_rate": 8.482745324850588e-07,
"loss": 2.4661,
"step": 5100
},
{
"epoch": 0.9928667823404665,
"grad_norm": 3.5679666996002197,
"learning_rate": 3.663003663003663e-07,
"loss": 2.5452,
"step": 5150
}
],
"logging_steps": 50,
"max_steps": 5187,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1355321769984000.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}