llava_4_6 / checkpoint-600 /trainer_state.json
naveel10's picture
Upload trained llava adapter
bf7ec54 verified
{
"best_global_step": 500,
"best_metric": 0.19536998867988586,
"best_model_checkpoint": "outputs/checkpoint-500",
"epoch": 2.247191011235955,
"eval_steps": 100,
"global_step": 600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03745318352059925,
"grad_norm": 2.6471238136291504,
"learning_rate": 1.9865168539325844e-05,
"loss": 3.9924,
"mean_token_accuracy": 0.3569513201713562,
"num_tokens": 1110.0,
"step": 10
},
{
"epoch": 0.0749063670411985,
"grad_norm": 2.9193994998931885,
"learning_rate": 1.9715355805243446e-05,
"loss": 2.5013,
"mean_token_accuracy": 0.5000596195459366,
"num_tokens": 2220.0,
"step": 20
},
{
"epoch": 0.11235955056179775,
"grad_norm": 1.090408444404602,
"learning_rate": 1.956554307116105e-05,
"loss": 1.2021,
"mean_token_accuracy": 0.7512393116950988,
"num_tokens": 3329.0,
"step": 30
},
{
"epoch": 0.149812734082397,
"grad_norm": 1.412244200706482,
"learning_rate": 1.9415730337078652e-05,
"loss": 0.6237,
"mean_token_accuracy": 0.8658290803432465,
"num_tokens": 4437.0,
"step": 40
},
{
"epoch": 0.18726591760299627,
"grad_norm": 0.9774134755134583,
"learning_rate": 1.9265917602996254e-05,
"loss": 0.4264,
"mean_token_accuracy": 0.9105254471302032,
"num_tokens": 5553.0,
"step": 50
},
{
"epoch": 0.2247191011235955,
"grad_norm": 0.6166325211524963,
"learning_rate": 1.9116104868913857e-05,
"loss": 0.3806,
"mean_token_accuracy": 0.8969066739082336,
"num_tokens": 6660.0,
"step": 60
},
{
"epoch": 0.26217228464419473,
"grad_norm": 0.5820680856704712,
"learning_rate": 1.8966292134831463e-05,
"loss": 0.3484,
"mean_token_accuracy": 0.8972096979618073,
"num_tokens": 7769.0,
"step": 70
},
{
"epoch": 0.299625468164794,
"grad_norm": 0.31422552466392517,
"learning_rate": 1.8816479400749066e-05,
"loss": 0.3196,
"mean_token_accuracy": 0.898263669013977,
"num_tokens": 8880.0,
"step": 80
},
{
"epoch": 0.33707865168539325,
"grad_norm": 0.5825852155685425,
"learning_rate": 1.866666666666667e-05,
"loss": 0.2965,
"mean_token_accuracy": 0.9046498596668243,
"num_tokens": 9992.0,
"step": 90
},
{
"epoch": 0.37453183520599254,
"grad_norm": 0.38430944085121155,
"learning_rate": 1.851685393258427e-05,
"loss": 0.2839,
"mean_token_accuracy": 0.9051393151283265,
"num_tokens": 11098.0,
"step": 100
},
{
"epoch": 0.37453183520599254,
"eval_loss": 0.2852214574813843,
"eval_mean_token_accuracy": 0.9032742083072662,
"eval_num_tokens": 11098.0,
"eval_runtime": 2.4929,
"eval_samples_per_second": 11.633,
"eval_steps_per_second": 1.605,
"step": 100
},
{
"epoch": 0.41198501872659177,
"grad_norm": 0.312187522649765,
"learning_rate": 1.8367041198501874e-05,
"loss": 0.2752,
"mean_token_accuracy": 0.9036725044250489,
"num_tokens": 12207.0,
"step": 110
},
{
"epoch": 0.449438202247191,
"grad_norm": 0.3875369131565094,
"learning_rate": 1.8217228464419477e-05,
"loss": 0.2659,
"mean_token_accuracy": 0.9044483065605163,
"num_tokens": 13316.0,
"step": 120
},
{
"epoch": 0.4868913857677903,
"grad_norm": 0.6050882339477539,
"learning_rate": 1.8067415730337083e-05,
"loss": 0.258,
"mean_token_accuracy": 0.9100114285945893,
"num_tokens": 14426.0,
"step": 130
},
{
"epoch": 0.5243445692883895,
"grad_norm": 0.5287177562713623,
"learning_rate": 1.7917602996254685e-05,
"loss": 0.2455,
"mean_token_accuracy": 0.9222747385501862,
"num_tokens": 15539.0,
"step": 140
},
{
"epoch": 0.5617977528089888,
"grad_norm": 0.5224889516830444,
"learning_rate": 1.7767790262172285e-05,
"loss": 0.2368,
"mean_token_accuracy": 0.9263923704624176,
"num_tokens": 16647.0,
"step": 150
},
{
"epoch": 0.599250936329588,
"grad_norm": 0.4501174986362457,
"learning_rate": 1.7617977528089887e-05,
"loss": 0.2299,
"mean_token_accuracy": 0.9313735246658326,
"num_tokens": 17760.0,
"step": 160
},
{
"epoch": 0.6367041198501873,
"grad_norm": 0.43853962421417236,
"learning_rate": 1.746816479400749e-05,
"loss": 0.2222,
"mean_token_accuracy": 0.9402973234653473,
"num_tokens": 18869.0,
"step": 170
},
{
"epoch": 0.6741573033707865,
"grad_norm": 0.31908461451530457,
"learning_rate": 1.7318352059925093e-05,
"loss": 0.2117,
"mean_token_accuracy": 0.9458102405071258,
"num_tokens": 19977.0,
"step": 180
},
{
"epoch": 0.7116104868913857,
"grad_norm": 0.2825154662132263,
"learning_rate": 1.71685393258427e-05,
"loss": 0.2094,
"mean_token_accuracy": 0.938564246892929,
"num_tokens": 21088.0,
"step": 190
},
{
"epoch": 0.7490636704119851,
"grad_norm": 0.2939445674419403,
"learning_rate": 1.70187265917603e-05,
"loss": 0.2051,
"mean_token_accuracy": 0.9392363965511322,
"num_tokens": 22195.0,
"step": 200
},
{
"epoch": 0.7490636704119851,
"eval_loss": 0.2029074728488922,
"eval_mean_token_accuracy": 0.9482556581497192,
"eval_num_tokens": 22195.0,
"eval_runtime": 2.4927,
"eval_samples_per_second": 11.634,
"eval_steps_per_second": 1.605,
"step": 200
},
{
"epoch": 0.7865168539325843,
"grad_norm": 0.18860529363155365,
"learning_rate": 1.6868913857677904e-05,
"loss": 0.1991,
"mean_token_accuracy": 0.9431917011737824,
"num_tokens": 23306.0,
"step": 210
},
{
"epoch": 0.8239700374531835,
"grad_norm": 0.22066630423069,
"learning_rate": 1.6719101123595507e-05,
"loss": 0.2001,
"mean_token_accuracy": 0.9430991888046265,
"num_tokens": 24417.0,
"step": 220
},
{
"epoch": 0.8614232209737828,
"grad_norm": 0.17636580765247345,
"learning_rate": 1.656928838951311e-05,
"loss": 0.1968,
"mean_token_accuracy": 0.9465341567993164,
"num_tokens": 25522.0,
"step": 230
},
{
"epoch": 0.898876404494382,
"grad_norm": 0.14720433950424194,
"learning_rate": 1.6419475655430712e-05,
"loss": 0.1982,
"mean_token_accuracy": 0.9413078784942627,
"num_tokens": 26632.0,
"step": 240
},
{
"epoch": 0.9363295880149812,
"grad_norm": 0.11868773400783539,
"learning_rate": 1.626966292134832e-05,
"loss": 0.1955,
"mean_token_accuracy": 0.9468274474143982,
"num_tokens": 27742.0,
"step": 250
},
{
"epoch": 0.9737827715355806,
"grad_norm": 0.14357531070709229,
"learning_rate": 1.611985018726592e-05,
"loss": 0.1943,
"mean_token_accuracy": 0.9457764148712158,
"num_tokens": 28851.0,
"step": 260
},
{
"epoch": 1.0112359550561798,
"grad_norm": 0.21999526023864746,
"learning_rate": 1.5970037453183524e-05,
"loss": 0.1966,
"mean_token_accuracy": 0.9422410607337952,
"num_tokens": 29905.0,
"step": 270
},
{
"epoch": 1.048689138576779,
"grad_norm": 0.10375912487506866,
"learning_rate": 1.5820224719101127e-05,
"loss": 0.1935,
"mean_token_accuracy": 0.9441024959087372,
"num_tokens": 31016.0,
"step": 280
},
{
"epoch": 1.0861423220973783,
"grad_norm": 0.2760375738143921,
"learning_rate": 1.5670411985018726e-05,
"loss": 0.1947,
"mean_token_accuracy": 0.9411046266555786,
"num_tokens": 32124.0,
"step": 290
},
{
"epoch": 1.1235955056179776,
"grad_norm": 0.2127188742160797,
"learning_rate": 1.552059925093633e-05,
"loss": 0.1943,
"mean_token_accuracy": 0.9514408648014069,
"num_tokens": 33234.0,
"step": 300
},
{
"epoch": 1.1235955056179776,
"eval_loss": 0.19752565026283264,
"eval_mean_token_accuracy": 0.9434169828891754,
"eval_num_tokens": 33234.0,
"eval_runtime": 2.4936,
"eval_samples_per_second": 11.63,
"eval_steps_per_second": 1.604,
"step": 300
},
{
"epoch": 1.1610486891385767,
"grad_norm": 0.10267303138971329,
"learning_rate": 1.537078651685393e-05,
"loss": 0.1919,
"mean_token_accuracy": 0.9476523637771607,
"num_tokens": 34342.0,
"step": 310
},
{
"epoch": 1.198501872659176,
"grad_norm": 0.23754256963729858,
"learning_rate": 1.5220973782771537e-05,
"loss": 0.1927,
"mean_token_accuracy": 0.9512970626354218,
"num_tokens": 35450.0,
"step": 320
},
{
"epoch": 1.2359550561797752,
"grad_norm": 0.09665194898843765,
"learning_rate": 1.507116104868914e-05,
"loss": 0.1911,
"mean_token_accuracy": 0.9494555711746215,
"num_tokens": 36558.0,
"step": 330
},
{
"epoch": 1.2734082397003745,
"grad_norm": 0.11535191535949707,
"learning_rate": 1.4921348314606743e-05,
"loss": 0.1915,
"mean_token_accuracy": 0.9493873059749603,
"num_tokens": 37664.0,
"step": 340
},
{
"epoch": 1.3108614232209739,
"grad_norm": 0.11016673594713211,
"learning_rate": 1.4771535580524345e-05,
"loss": 0.1931,
"mean_token_accuracy": 0.9440759301185608,
"num_tokens": 38774.0,
"step": 350
},
{
"epoch": 1.348314606741573,
"grad_norm": 0.24848656356334686,
"learning_rate": 1.4621722846441948e-05,
"loss": 0.1925,
"mean_token_accuracy": 0.9458104014396668,
"num_tokens": 39883.0,
"step": 360
},
{
"epoch": 1.3857677902621723,
"grad_norm": 0.1400669664144516,
"learning_rate": 1.447191011235955e-05,
"loss": 0.1936,
"mean_token_accuracy": 0.9457758069038391,
"num_tokens": 40990.0,
"step": 370
},
{
"epoch": 1.4232209737827715,
"grad_norm": 0.1753997802734375,
"learning_rate": 1.4322097378277155e-05,
"loss": 0.1921,
"mean_token_accuracy": 0.9477294445037842,
"num_tokens": 42099.0,
"step": 380
},
{
"epoch": 1.4606741573033708,
"grad_norm": 0.11102133989334106,
"learning_rate": 1.4172284644194758e-05,
"loss": 0.1904,
"mean_token_accuracy": 0.9459109544754029,
"num_tokens": 43209.0,
"step": 390
},
{
"epoch": 1.4981273408239701,
"grad_norm": 0.12153730541467667,
"learning_rate": 1.402247191011236e-05,
"loss": 0.1908,
"mean_token_accuracy": 0.9495814442634583,
"num_tokens": 44320.0,
"step": 400
},
{
"epoch": 1.4981273408239701,
"eval_loss": 0.19722126424312592,
"eval_mean_token_accuracy": 0.9426012635231018,
"eval_num_tokens": 44320.0,
"eval_runtime": 2.4929,
"eval_samples_per_second": 11.633,
"eval_steps_per_second": 1.605,
"step": 400
},
{
"epoch": 1.5355805243445693,
"grad_norm": 0.13351161777973175,
"learning_rate": 1.3872659176029963e-05,
"loss": 0.1906,
"mean_token_accuracy": 0.9469557940959931,
"num_tokens": 45434.0,
"step": 410
},
{
"epoch": 1.5730337078651684,
"grad_norm": 0.1454717516899109,
"learning_rate": 1.3722846441947566e-05,
"loss": 0.1906,
"mean_token_accuracy": 0.9468878388404847,
"num_tokens": 46547.0,
"step": 420
},
{
"epoch": 1.6104868913857677,
"grad_norm": 0.21453846991062164,
"learning_rate": 1.3573033707865169e-05,
"loss": 0.1919,
"mean_token_accuracy": 0.9432088494300842,
"num_tokens": 47659.0,
"step": 430
},
{
"epoch": 1.647940074906367,
"grad_norm": 0.1796715408563614,
"learning_rate": 1.3423220973782773e-05,
"loss": 0.1924,
"mean_token_accuracy": 0.9468723952770233,
"num_tokens": 48771.0,
"step": 440
},
{
"epoch": 1.6853932584269664,
"grad_norm": 0.18729475140571594,
"learning_rate": 1.3273408239700376e-05,
"loss": 0.1918,
"mean_token_accuracy": 0.9448257863521576,
"num_tokens": 49878.0,
"step": 450
},
{
"epoch": 1.7228464419475655,
"grad_norm": 0.20833182334899902,
"learning_rate": 1.3123595505617978e-05,
"loss": 0.19,
"mean_token_accuracy": 0.9460108697414398,
"num_tokens": 50990.0,
"step": 460
},
{
"epoch": 1.7602996254681647,
"grad_norm": 0.09931682050228119,
"learning_rate": 1.2973782771535581e-05,
"loss": 0.1898,
"mean_token_accuracy": 0.9476029396057128,
"num_tokens": 52099.0,
"step": 470
},
{
"epoch": 1.797752808988764,
"grad_norm": 0.2103966772556305,
"learning_rate": 1.2823970037453184e-05,
"loss": 0.1932,
"mean_token_accuracy": 0.9421666264533997,
"num_tokens": 53208.0,
"step": 480
},
{
"epoch": 1.8352059925093633,
"grad_norm": 0.07852394878864288,
"learning_rate": 1.2674157303370786e-05,
"loss": 0.1915,
"mean_token_accuracy": 0.9441100597381592,
"num_tokens": 54319.0,
"step": 490
},
{
"epoch": 1.8726591760299627,
"grad_norm": 0.09249723702669144,
"learning_rate": 1.2524344569288391e-05,
"loss": 0.19,
"mean_token_accuracy": 0.9484964370727539,
"num_tokens": 55426.0,
"step": 500
},
{
"epoch": 1.8726591760299627,
"eval_loss": 0.19536998867988586,
"eval_mean_token_accuracy": 0.945041760802269,
"eval_num_tokens": 55426.0,
"eval_runtime": 2.499,
"eval_samples_per_second": 11.605,
"eval_steps_per_second": 1.601,
"step": 500
},
{
"epoch": 1.9101123595505618,
"grad_norm": 0.07890783250331879,
"learning_rate": 1.2374531835205994e-05,
"loss": 0.1909,
"mean_token_accuracy": 0.9412918269634247,
"num_tokens": 56536.0,
"step": 510
},
{
"epoch": 1.947565543071161,
"grad_norm": 0.2816140353679657,
"learning_rate": 1.2224719101123596e-05,
"loss": 0.1923,
"mean_token_accuracy": 0.9376968383789063,
"num_tokens": 57648.0,
"step": 520
},
{
"epoch": 1.9850187265917603,
"grad_norm": 0.08590656518936157,
"learning_rate": 1.2074906367041199e-05,
"loss": 0.1904,
"mean_token_accuracy": 0.9467627465724945,
"num_tokens": 58758.0,
"step": 530
},
{
"epoch": 2.0224719101123596,
"grad_norm": 0.1013297438621521,
"learning_rate": 1.1925093632958802e-05,
"loss": 0.1903,
"mean_token_accuracy": 0.9485378265380859,
"num_tokens": 59811.0,
"step": 540
},
{
"epoch": 2.059925093632959,
"grad_norm": 0.07267877459526062,
"learning_rate": 1.1775280898876404e-05,
"loss": 0.1897,
"mean_token_accuracy": 0.9469048321247101,
"num_tokens": 60923.0,
"step": 550
},
{
"epoch": 2.097378277153558,
"grad_norm": 0.08559578657150269,
"learning_rate": 1.1625468164794009e-05,
"loss": 0.1913,
"mean_token_accuracy": 0.943006819486618,
"num_tokens": 62031.0,
"step": 560
},
{
"epoch": 2.134831460674157,
"grad_norm": 0.2162655144929886,
"learning_rate": 1.1475655430711611e-05,
"loss": 0.188,
"mean_token_accuracy": 0.9467701494693757,
"num_tokens": 63140.0,
"step": 570
},
{
"epoch": 2.1722846441947565,
"grad_norm": 0.08606795221567154,
"learning_rate": 1.1325842696629214e-05,
"loss": 0.189,
"mean_token_accuracy": 0.9439931452274323,
"num_tokens": 64249.0,
"step": 580
},
{
"epoch": 2.209737827715356,
"grad_norm": 0.2562474310398102,
"learning_rate": 1.1176029962546817e-05,
"loss": 0.1926,
"mean_token_accuracy": 0.9457504689693451,
"num_tokens": 65356.0,
"step": 590
},
{
"epoch": 2.247191011235955,
"grad_norm": 0.0770883709192276,
"learning_rate": 1.102621722846442e-05,
"loss": 0.1895,
"mean_token_accuracy": 0.9449774503707886,
"num_tokens": 66466.0,
"step": 600
},
{
"epoch": 2.247191011235955,
"eval_loss": 0.19561618566513062,
"eval_mean_token_accuracy": 0.9387146234512329,
"eval_num_tokens": 66466.0,
"eval_runtime": 2.498,
"eval_samples_per_second": 11.609,
"eval_steps_per_second": 1.601,
"step": 600
}
],
"logging_steps": 10,
"max_steps": 1335,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3049029865728000.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}