deepfake_detection / checkpoint-35710 /trainer_state.json
alkatraz445's picture
Upload folder using huggingface_hub
b78b249 verified
{
"best_metric": 0.11153655499219894,
"best_model_checkpoint": "deepfake_detection/checkpoint-35710",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 35710,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14001680201624195,
"grad_norm": 1.0528628826141357,
"learning_rate": 9.873808188446438e-07,
"loss": 0.6555,
"step": 500
},
{
"epoch": 0.2800336040324839,
"grad_norm": 1.7406058311462402,
"learning_rate": 9.733595064498036e-07,
"loss": 0.4817,
"step": 1000
},
{
"epoch": 0.42005040604872584,
"grad_norm": 2.337244987487793,
"learning_rate": 9.593381940549635e-07,
"loss": 0.3393,
"step": 1500
},
{
"epoch": 0.5600672080649678,
"grad_norm": 2.840176582336426,
"learning_rate": 9.453168816601234e-07,
"loss": 0.2739,
"step": 2000
},
{
"epoch": 0.7000840100812098,
"grad_norm": 2.7442541122436523,
"learning_rate": 9.312955692652832e-07,
"loss": 0.2335,
"step": 2500
},
{
"epoch": 0.8401008120974517,
"grad_norm": 4.048594951629639,
"learning_rate": 9.17274256870443e-07,
"loss": 0.2087,
"step": 3000
},
{
"epoch": 0.9801176141136937,
"grad_norm": 2.741589069366455,
"learning_rate": 9.032529444756028e-07,
"loss": 0.1953,
"step": 3500
},
{
"epoch": 1.0,
"eval_accuracy": 0.9271411877470096,
"eval_loss": 0.277784526348114,
"eval_model_preparation_time": 0.003,
"eval_runtime": 461.9798,
"eval_samples_per_second": 164.858,
"eval_steps_per_second": 20.609,
"step": 3571
},
{
"epoch": 1.1201344161299356,
"grad_norm": 2.69423246383667,
"learning_rate": 8.892316320807627e-07,
"loss": 0.1865,
"step": 4000
},
{
"epoch": 1.2601512181461776,
"grad_norm": 3.063028335571289,
"learning_rate": 8.752103196859225e-07,
"loss": 0.1789,
"step": 4500
},
{
"epoch": 1.4001680201624196,
"grad_norm": 2.7112696170806885,
"learning_rate": 8.611890072910825e-07,
"loss": 0.1587,
"step": 5000
},
{
"epoch": 1.5401848221786616,
"grad_norm": 1.334972858428955,
"learning_rate": 8.471676948962423e-07,
"loss": 0.1654,
"step": 5500
},
{
"epoch": 1.6802016241949034,
"grad_norm": 4.083142280578613,
"learning_rate": 8.331463825014021e-07,
"loss": 0.1472,
"step": 6000
},
{
"epoch": 1.8202184262111454,
"grad_norm": 3.0988452434539795,
"learning_rate": 8.191250701065619e-07,
"loss": 0.1454,
"step": 6500
},
{
"epoch": 1.9602352282273872,
"grad_norm": 4.7256011962890625,
"learning_rate": 8.051037577117218e-07,
"loss": 0.137,
"step": 7000
},
{
"epoch": 2.0,
"eval_accuracy": 0.9447748847835507,
"eval_loss": 0.19382010400295258,
"eval_model_preparation_time": 0.003,
"eval_runtime": 446.387,
"eval_samples_per_second": 170.617,
"eval_steps_per_second": 21.329,
"step": 7142
},
{
"epoch": 2.100252030243629,
"grad_norm": 2.3897340297698975,
"learning_rate": 7.910824453168816e-07,
"loss": 0.1272,
"step": 7500
},
{
"epoch": 2.240268832259871,
"grad_norm": 7.572381019592285,
"learning_rate": 7.770611329220415e-07,
"loss": 0.1339,
"step": 8000
},
{
"epoch": 2.380285634276113,
"grad_norm": 2.5557875633239746,
"learning_rate": 7.630398205272013e-07,
"loss": 0.1352,
"step": 8500
},
{
"epoch": 2.520302436292355,
"grad_norm": 2.867504119873047,
"learning_rate": 7.490185081323611e-07,
"loss": 0.1312,
"step": 9000
},
{
"epoch": 2.660319238308597,
"grad_norm": 5.774721622467041,
"learning_rate": 7.34997195737521e-07,
"loss": 0.1263,
"step": 9500
},
{
"epoch": 2.800336040324839,
"grad_norm": 1.3829355239868164,
"learning_rate": 7.209758833426809e-07,
"loss": 0.1222,
"step": 10000
},
{
"epoch": 2.940352842341081,
"grad_norm": 3.2408864498138428,
"learning_rate": 7.069545709478407e-07,
"loss": 0.1238,
"step": 10500
},
{
"epoch": 3.0,
"eval_accuracy": 0.9503420385761742,
"eval_loss": 0.16304399073123932,
"eval_model_preparation_time": 0.003,
"eval_runtime": 445.7116,
"eval_samples_per_second": 170.875,
"eval_steps_per_second": 21.361,
"step": 10713
},
{
"epoch": 3.0803696443573227,
"grad_norm": 5.649173259735107,
"learning_rate": 6.929332585530005e-07,
"loss": 0.1163,
"step": 11000
},
{
"epoch": 3.2203864463735647,
"grad_norm": 6.677682876586914,
"learning_rate": 6.789119461581604e-07,
"loss": 0.1151,
"step": 11500
},
{
"epoch": 3.3604032483898068,
"grad_norm": 6.577985763549805,
"learning_rate": 6.648906337633202e-07,
"loss": 0.116,
"step": 12000
},
{
"epoch": 3.5004200504060488,
"grad_norm": 4.521468162536621,
"learning_rate": 6.508693213684801e-07,
"loss": 0.1166,
"step": 12500
},
{
"epoch": 3.6404368524222908,
"grad_norm": 0.37339428067207336,
"learning_rate": 6.368480089736399e-07,
"loss": 0.1063,
"step": 13000
},
{
"epoch": 3.7804536544385328,
"grad_norm": 2.2542712688446045,
"learning_rate": 6.228266965787997e-07,
"loss": 0.108,
"step": 13500
},
{
"epoch": 3.9204704564547743,
"grad_norm": 11.904646873474121,
"learning_rate": 6.088053841839595e-07,
"loss": 0.1094,
"step": 14000
},
{
"epoch": 4.0,
"eval_accuracy": 0.9547406152755348,
"eval_loss": 0.14307229220867157,
"eval_model_preparation_time": 0.003,
"eval_runtime": 473.4476,
"eval_samples_per_second": 160.865,
"eval_steps_per_second": 20.11,
"step": 14284
},
{
"epoch": 4.060487258471016,
"grad_norm": 1.572487711906433,
"learning_rate": 5.947840717891194e-07,
"loss": 0.1105,
"step": 14500
},
{
"epoch": 4.200504060487258,
"grad_norm": 2.691279888153076,
"learning_rate": 5.807627593942794e-07,
"loss": 0.102,
"step": 15000
},
{
"epoch": 4.3405208625035,
"grad_norm": 3.9856207370758057,
"learning_rate": 5.667414469994392e-07,
"loss": 0.1032,
"step": 15500
},
{
"epoch": 4.480537664519742,
"grad_norm": 1.828147292137146,
"learning_rate": 5.52720134604599e-07,
"loss": 0.1029,
"step": 16000
},
{
"epoch": 4.620554466535984,
"grad_norm": 0.4480103850364685,
"learning_rate": 5.386988222097588e-07,
"loss": 0.1024,
"step": 16500
},
{
"epoch": 4.760571268552226,
"grad_norm": 9.204968452453613,
"learning_rate": 5.246775098149186e-07,
"loss": 0.0991,
"step": 17000
},
{
"epoch": 4.900588070568468,
"grad_norm": 7.890961170196533,
"learning_rate": 5.106561974200784e-07,
"loss": 0.1056,
"step": 17500
},
{
"epoch": 5.0,
"eval_accuracy": 0.9586927692651095,
"eval_loss": 0.12978993356227875,
"eval_model_preparation_time": 0.003,
"eval_runtime": 437.804,
"eval_samples_per_second": 173.961,
"eval_steps_per_second": 21.747,
"step": 17855
},
{
"epoch": 5.04060487258471,
"grad_norm": 2.9656715393066406,
"learning_rate": 4.966348850252384e-07,
"loss": 0.0981,
"step": 18000
},
{
"epoch": 5.180621674600952,
"grad_norm": 1.717795968055725,
"learning_rate": 4.826135726303982e-07,
"loss": 0.1049,
"step": 18500
},
{
"epoch": 5.320638476617194,
"grad_norm": 4.466497421264648,
"learning_rate": 4.6859226023555804e-07,
"loss": 0.1009,
"step": 19000
},
{
"epoch": 5.460655278633436,
"grad_norm": 2.382636547088623,
"learning_rate": 4.5457094784071786e-07,
"loss": 0.097,
"step": 19500
},
{
"epoch": 5.600672080649678,
"grad_norm": 3.537141799926758,
"learning_rate": 4.405496354458777e-07,
"loss": 0.0947,
"step": 20000
},
{
"epoch": 5.7406888826659195,
"grad_norm": 3.2444217205047607,
"learning_rate": 4.2652832305103755e-07,
"loss": 0.096,
"step": 20500
},
{
"epoch": 5.8807056846821615,
"grad_norm": 6.093824863433838,
"learning_rate": 4.125070106561974e-07,
"loss": 0.0962,
"step": 21000
},
{
"epoch": 6.0,
"eval_accuracy": 0.9606754112997465,
"eval_loss": 0.1219368726015091,
"eval_model_preparation_time": 0.003,
"eval_runtime": 461.6477,
"eval_samples_per_second": 164.976,
"eval_steps_per_second": 20.624,
"step": 21426
},
{
"epoch": 6.0207224866984035,
"grad_norm": 6.273184299468994,
"learning_rate": 3.9848569826135723e-07,
"loss": 0.1001,
"step": 21500
},
{
"epoch": 6.1607392887146455,
"grad_norm": 1.6552726030349731,
"learning_rate": 3.844643858665171e-07,
"loss": 0.0935,
"step": 22000
},
{
"epoch": 6.3007560907308875,
"grad_norm": 1.253029465675354,
"learning_rate": 3.704430734716769e-07,
"loss": 0.0944,
"step": 22500
},
{
"epoch": 6.4407728927471295,
"grad_norm": 6.506760120391846,
"learning_rate": 3.564217610768368e-07,
"loss": 0.092,
"step": 23000
},
{
"epoch": 6.5807896947633715,
"grad_norm": 6.743386268615723,
"learning_rate": 3.4240044868199666e-07,
"loss": 0.0955,
"step": 23500
},
{
"epoch": 6.7208064967796135,
"grad_norm": 7.667580604553223,
"learning_rate": 3.2837913628715647e-07,
"loss": 0.0923,
"step": 24000
},
{
"epoch": 6.8608232987958555,
"grad_norm": 3.372116804122925,
"learning_rate": 3.143578238923163e-07,
"loss": 0.0992,
"step": 24500
},
{
"epoch": 7.0,
"eval_accuracy": 0.9620934599073017,
"eval_loss": 0.11666399985551834,
"eval_model_preparation_time": 0.003,
"eval_runtime": 447.5043,
"eval_samples_per_second": 170.191,
"eval_steps_per_second": 21.276,
"step": 24997
},
{
"epoch": 7.0008401008120975,
"grad_norm": 0.35922595858573914,
"learning_rate": 3.003365114974761e-07,
"loss": 0.0889,
"step": 25000
},
{
"epoch": 7.1408569028283395,
"grad_norm": 0.6694265007972717,
"learning_rate": 2.8631519910263603e-07,
"loss": 0.0904,
"step": 25500
},
{
"epoch": 7.2808737048445815,
"grad_norm": 0.41824430227279663,
"learning_rate": 2.7229388670779584e-07,
"loss": 0.0934,
"step": 26000
},
{
"epoch": 7.4208905068608235,
"grad_norm": 6.300509929656982,
"learning_rate": 2.5827257431295566e-07,
"loss": 0.0957,
"step": 26500
},
{
"epoch": 7.5609073088770655,
"grad_norm": 3.323270559310913,
"learning_rate": 2.4425126191811553e-07,
"loss": 0.0879,
"step": 27000
},
{
"epoch": 7.7009241108933075,
"grad_norm": 3.265133857727051,
"learning_rate": 2.3022994952327537e-07,
"loss": 0.091,
"step": 27500
},
{
"epoch": 7.8409409129095495,
"grad_norm": 9.813462257385254,
"learning_rate": 2.1620863712843522e-07,
"loss": 0.0895,
"step": 28000
},
{
"epoch": 7.9809577149257915,
"grad_norm": 0.7600739002227783,
"learning_rate": 2.0218732473359506e-07,
"loss": 0.0911,
"step": 28500
},
{
"epoch": 8.0,
"eval_accuracy": 0.9630519557253713,
"eval_loss": 0.1135854721069336,
"eval_model_preparation_time": 0.003,
"eval_runtime": 439.5121,
"eval_samples_per_second": 173.285,
"eval_steps_per_second": 21.663,
"step": 28568
},
{
"epoch": 8.120974516942033,
"grad_norm": 6.328824043273926,
"learning_rate": 1.881660123387549e-07,
"loss": 0.0875,
"step": 29000
},
{
"epoch": 8.260991318958276,
"grad_norm": 16.23442840576172,
"learning_rate": 1.7414469994391472e-07,
"loss": 0.089,
"step": 29500
},
{
"epoch": 8.401008120974517,
"grad_norm": 7.651858329772949,
"learning_rate": 1.601233875490746e-07,
"loss": 0.0917,
"step": 30000
},
{
"epoch": 8.54102492299076,
"grad_norm": 2.9040281772613525,
"learning_rate": 1.4610207515423443e-07,
"loss": 0.0857,
"step": 30500
},
{
"epoch": 8.681041725007,
"grad_norm": 6.958981990814209,
"learning_rate": 1.3208076275939427e-07,
"loss": 0.09,
"step": 31000
},
{
"epoch": 8.821058527023244,
"grad_norm": 4.84717321395874,
"learning_rate": 1.1805945036455412e-07,
"loss": 0.0917,
"step": 31500
},
{
"epoch": 8.961075329039485,
"grad_norm": 6.962361812591553,
"learning_rate": 1.0403813796971396e-07,
"loss": 0.0889,
"step": 32000
},
{
"epoch": 9.0,
"eval_accuracy": 0.9630256955659721,
"eval_loss": 0.11266375333070755,
"eval_model_preparation_time": 0.003,
"eval_runtime": 440.431,
"eval_samples_per_second": 172.924,
"eval_steps_per_second": 21.617,
"step": 32139
},
{
"epoch": 9.101092131055728,
"grad_norm": 13.205183982849121,
"learning_rate": 9.00168255748738e-08,
"loss": 0.09,
"step": 32500
},
{
"epoch": 9.241108933071969,
"grad_norm": 0.3649824261665344,
"learning_rate": 7.599551318003366e-08,
"loss": 0.0818,
"step": 33000
},
{
"epoch": 9.38112573508821,
"grad_norm": 1.0988820791244507,
"learning_rate": 6.197420078519349e-08,
"loss": 0.0847,
"step": 33500
},
{
"epoch": 9.521142537104453,
"grad_norm": 10.086563110351562,
"learning_rate": 4.795288839035334e-08,
"loss": 0.0909,
"step": 34000
},
{
"epoch": 9.661159339120694,
"grad_norm": 1.5432101488113403,
"learning_rate": 3.3931575995513173e-08,
"loss": 0.0873,
"step": 34500
},
{
"epoch": 9.801176141136937,
"grad_norm": 5.793679714202881,
"learning_rate": 1.9910263600673023e-08,
"loss": 0.0847,
"step": 35000
},
{
"epoch": 9.941192943153178,
"grad_norm": 1.821306824684143,
"learning_rate": 5.888951205832866e-09,
"loss": 0.0862,
"step": 35500
},
{
"epoch": 10.0,
"eval_accuracy": 0.9634327280366591,
"eval_loss": 0.11153655499219894,
"eval_model_preparation_time": 0.003,
"eval_runtime": 439.1936,
"eval_samples_per_second": 173.411,
"eval_steps_per_second": 21.678,
"step": 35710
}
],
"logging_steps": 500,
"max_steps": 35710,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.852762385560602e+19,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}