| { |
| "best_metric": 0.11153655499219894, |
| "best_model_checkpoint": "deepfake_detection/checkpoint-35710", |
| "epoch": 10.0, |
| "eval_steps": 500, |
| "global_step": 35710, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.14001680201624195, |
| "grad_norm": 1.0528628826141357, |
| "learning_rate": 9.873808188446438e-07, |
| "loss": 0.6555, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.2800336040324839, |
| "grad_norm": 1.7406058311462402, |
| "learning_rate": 9.733595064498036e-07, |
| "loss": 0.4817, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.42005040604872584, |
| "grad_norm": 2.337244987487793, |
| "learning_rate": 9.593381940549635e-07, |
| "loss": 0.3393, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5600672080649678, |
| "grad_norm": 2.840176582336426, |
| "learning_rate": 9.453168816601234e-07, |
| "loss": 0.2739, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.7000840100812098, |
| "grad_norm": 2.7442541122436523, |
| "learning_rate": 9.312955692652832e-07, |
| "loss": 0.2335, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.8401008120974517, |
| "grad_norm": 4.048594951629639, |
| "learning_rate": 9.17274256870443e-07, |
| "loss": 0.2087, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.9801176141136937, |
| "grad_norm": 2.741589069366455, |
| "learning_rate": 9.032529444756028e-07, |
| "loss": 0.1953, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.9271411877470096, |
| "eval_loss": 0.277784526348114, |
| "eval_model_preparation_time": 0.003, |
| "eval_runtime": 461.9798, |
| "eval_samples_per_second": 164.858, |
| "eval_steps_per_second": 20.609, |
| "step": 3571 |
| }, |
| { |
| "epoch": 1.1201344161299356, |
| "grad_norm": 2.69423246383667, |
| "learning_rate": 8.892316320807627e-07, |
| "loss": 0.1865, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.2601512181461776, |
| "grad_norm": 3.063028335571289, |
| "learning_rate": 8.752103196859225e-07, |
| "loss": 0.1789, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.4001680201624196, |
| "grad_norm": 2.7112696170806885, |
| "learning_rate": 8.611890072910825e-07, |
| "loss": 0.1587, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.5401848221786616, |
| "grad_norm": 1.334972858428955, |
| "learning_rate": 8.471676948962423e-07, |
| "loss": 0.1654, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.6802016241949034, |
| "grad_norm": 4.083142280578613, |
| "learning_rate": 8.331463825014021e-07, |
| "loss": 0.1472, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.8202184262111454, |
| "grad_norm": 3.0988452434539795, |
| "learning_rate": 8.191250701065619e-07, |
| "loss": 0.1454, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.9602352282273872, |
| "grad_norm": 4.7256011962890625, |
| "learning_rate": 8.051037577117218e-07, |
| "loss": 0.137, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.9447748847835507, |
| "eval_loss": 0.19382010400295258, |
| "eval_model_preparation_time": 0.003, |
| "eval_runtime": 446.387, |
| "eval_samples_per_second": 170.617, |
| "eval_steps_per_second": 21.329, |
| "step": 7142 |
| }, |
| { |
| "epoch": 2.100252030243629, |
| "grad_norm": 2.3897340297698975, |
| "learning_rate": 7.910824453168816e-07, |
| "loss": 0.1272, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.240268832259871, |
| "grad_norm": 7.572381019592285, |
| "learning_rate": 7.770611329220415e-07, |
| "loss": 0.1339, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.380285634276113, |
| "grad_norm": 2.5557875633239746, |
| "learning_rate": 7.630398205272013e-07, |
| "loss": 0.1352, |
| "step": 8500 |
| }, |
| { |
| "epoch": 2.520302436292355, |
| "grad_norm": 2.867504119873047, |
| "learning_rate": 7.490185081323611e-07, |
| "loss": 0.1312, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.660319238308597, |
| "grad_norm": 5.774721622467041, |
| "learning_rate": 7.34997195737521e-07, |
| "loss": 0.1263, |
| "step": 9500 |
| }, |
| { |
| "epoch": 2.800336040324839, |
| "grad_norm": 1.3829355239868164, |
| "learning_rate": 7.209758833426809e-07, |
| "loss": 0.1222, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.940352842341081, |
| "grad_norm": 3.2408864498138428, |
| "learning_rate": 7.069545709478407e-07, |
| "loss": 0.1238, |
| "step": 10500 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.9503420385761742, |
| "eval_loss": 0.16304399073123932, |
| "eval_model_preparation_time": 0.003, |
| "eval_runtime": 445.7116, |
| "eval_samples_per_second": 170.875, |
| "eval_steps_per_second": 21.361, |
| "step": 10713 |
| }, |
| { |
| "epoch": 3.0803696443573227, |
| "grad_norm": 5.649173259735107, |
| "learning_rate": 6.929332585530005e-07, |
| "loss": 0.1163, |
| "step": 11000 |
| }, |
| { |
| "epoch": 3.2203864463735647, |
| "grad_norm": 6.677682876586914, |
| "learning_rate": 6.789119461581604e-07, |
| "loss": 0.1151, |
| "step": 11500 |
| }, |
| { |
| "epoch": 3.3604032483898068, |
| "grad_norm": 6.577985763549805, |
| "learning_rate": 6.648906337633202e-07, |
| "loss": 0.116, |
| "step": 12000 |
| }, |
| { |
| "epoch": 3.5004200504060488, |
| "grad_norm": 4.521468162536621, |
| "learning_rate": 6.508693213684801e-07, |
| "loss": 0.1166, |
| "step": 12500 |
| }, |
| { |
| "epoch": 3.6404368524222908, |
| "grad_norm": 0.37339428067207336, |
| "learning_rate": 6.368480089736399e-07, |
| "loss": 0.1063, |
| "step": 13000 |
| }, |
| { |
| "epoch": 3.7804536544385328, |
| "grad_norm": 2.2542712688446045, |
| "learning_rate": 6.228266965787997e-07, |
| "loss": 0.108, |
| "step": 13500 |
| }, |
| { |
| "epoch": 3.9204704564547743, |
| "grad_norm": 11.904646873474121, |
| "learning_rate": 6.088053841839595e-07, |
| "loss": 0.1094, |
| "step": 14000 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.9547406152755348, |
| "eval_loss": 0.14307229220867157, |
| "eval_model_preparation_time": 0.003, |
| "eval_runtime": 473.4476, |
| "eval_samples_per_second": 160.865, |
| "eval_steps_per_second": 20.11, |
| "step": 14284 |
| }, |
| { |
| "epoch": 4.060487258471016, |
| "grad_norm": 1.572487711906433, |
| "learning_rate": 5.947840717891194e-07, |
| "loss": 0.1105, |
| "step": 14500 |
| }, |
| { |
| "epoch": 4.200504060487258, |
| "grad_norm": 2.691279888153076, |
| "learning_rate": 5.807627593942794e-07, |
| "loss": 0.102, |
| "step": 15000 |
| }, |
| { |
| "epoch": 4.3405208625035, |
| "grad_norm": 3.9856207370758057, |
| "learning_rate": 5.667414469994392e-07, |
| "loss": 0.1032, |
| "step": 15500 |
| }, |
| { |
| "epoch": 4.480537664519742, |
| "grad_norm": 1.828147292137146, |
| "learning_rate": 5.52720134604599e-07, |
| "loss": 0.1029, |
| "step": 16000 |
| }, |
| { |
| "epoch": 4.620554466535984, |
| "grad_norm": 0.4480103850364685, |
| "learning_rate": 5.386988222097588e-07, |
| "loss": 0.1024, |
| "step": 16500 |
| }, |
| { |
| "epoch": 4.760571268552226, |
| "grad_norm": 9.204968452453613, |
| "learning_rate": 5.246775098149186e-07, |
| "loss": 0.0991, |
| "step": 17000 |
| }, |
| { |
| "epoch": 4.900588070568468, |
| "grad_norm": 7.890961170196533, |
| "learning_rate": 5.106561974200784e-07, |
| "loss": 0.1056, |
| "step": 17500 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.9586927692651095, |
| "eval_loss": 0.12978993356227875, |
| "eval_model_preparation_time": 0.003, |
| "eval_runtime": 437.804, |
| "eval_samples_per_second": 173.961, |
| "eval_steps_per_second": 21.747, |
| "step": 17855 |
| }, |
| { |
| "epoch": 5.04060487258471, |
| "grad_norm": 2.9656715393066406, |
| "learning_rate": 4.966348850252384e-07, |
| "loss": 0.0981, |
| "step": 18000 |
| }, |
| { |
| "epoch": 5.180621674600952, |
| "grad_norm": 1.717795968055725, |
| "learning_rate": 4.826135726303982e-07, |
| "loss": 0.1049, |
| "step": 18500 |
| }, |
| { |
| "epoch": 5.320638476617194, |
| "grad_norm": 4.466497421264648, |
| "learning_rate": 4.6859226023555804e-07, |
| "loss": 0.1009, |
| "step": 19000 |
| }, |
| { |
| "epoch": 5.460655278633436, |
| "grad_norm": 2.382636547088623, |
| "learning_rate": 4.5457094784071786e-07, |
| "loss": 0.097, |
| "step": 19500 |
| }, |
| { |
| "epoch": 5.600672080649678, |
| "grad_norm": 3.537141799926758, |
| "learning_rate": 4.405496354458777e-07, |
| "loss": 0.0947, |
| "step": 20000 |
| }, |
| { |
| "epoch": 5.7406888826659195, |
| "grad_norm": 3.2444217205047607, |
| "learning_rate": 4.2652832305103755e-07, |
| "loss": 0.096, |
| "step": 20500 |
| }, |
| { |
| "epoch": 5.8807056846821615, |
| "grad_norm": 6.093824863433838, |
| "learning_rate": 4.125070106561974e-07, |
| "loss": 0.0962, |
| "step": 21000 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.9606754112997465, |
| "eval_loss": 0.1219368726015091, |
| "eval_model_preparation_time": 0.003, |
| "eval_runtime": 461.6477, |
| "eval_samples_per_second": 164.976, |
| "eval_steps_per_second": 20.624, |
| "step": 21426 |
| }, |
| { |
| "epoch": 6.0207224866984035, |
| "grad_norm": 6.273184299468994, |
| "learning_rate": 3.9848569826135723e-07, |
| "loss": 0.1001, |
| "step": 21500 |
| }, |
| { |
| "epoch": 6.1607392887146455, |
| "grad_norm": 1.6552726030349731, |
| "learning_rate": 3.844643858665171e-07, |
| "loss": 0.0935, |
| "step": 22000 |
| }, |
| { |
| "epoch": 6.3007560907308875, |
| "grad_norm": 1.253029465675354, |
| "learning_rate": 3.704430734716769e-07, |
| "loss": 0.0944, |
| "step": 22500 |
| }, |
| { |
| "epoch": 6.4407728927471295, |
| "grad_norm": 6.506760120391846, |
| "learning_rate": 3.564217610768368e-07, |
| "loss": 0.092, |
| "step": 23000 |
| }, |
| { |
| "epoch": 6.5807896947633715, |
| "grad_norm": 6.743386268615723, |
| "learning_rate": 3.4240044868199666e-07, |
| "loss": 0.0955, |
| "step": 23500 |
| }, |
| { |
| "epoch": 6.7208064967796135, |
| "grad_norm": 7.667580604553223, |
| "learning_rate": 3.2837913628715647e-07, |
| "loss": 0.0923, |
| "step": 24000 |
| }, |
| { |
| "epoch": 6.8608232987958555, |
| "grad_norm": 3.372116804122925, |
| "learning_rate": 3.143578238923163e-07, |
| "loss": 0.0992, |
| "step": 24500 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.9620934599073017, |
| "eval_loss": 0.11666399985551834, |
| "eval_model_preparation_time": 0.003, |
| "eval_runtime": 447.5043, |
| "eval_samples_per_second": 170.191, |
| "eval_steps_per_second": 21.276, |
| "step": 24997 |
| }, |
| { |
| "epoch": 7.0008401008120975, |
| "grad_norm": 0.35922595858573914, |
| "learning_rate": 3.003365114974761e-07, |
| "loss": 0.0889, |
| "step": 25000 |
| }, |
| { |
| "epoch": 7.1408569028283395, |
| "grad_norm": 0.6694265007972717, |
| "learning_rate": 2.8631519910263603e-07, |
| "loss": 0.0904, |
| "step": 25500 |
| }, |
| { |
| "epoch": 7.2808737048445815, |
| "grad_norm": 0.41824430227279663, |
| "learning_rate": 2.7229388670779584e-07, |
| "loss": 0.0934, |
| "step": 26000 |
| }, |
| { |
| "epoch": 7.4208905068608235, |
| "grad_norm": 6.300509929656982, |
| "learning_rate": 2.5827257431295566e-07, |
| "loss": 0.0957, |
| "step": 26500 |
| }, |
| { |
| "epoch": 7.5609073088770655, |
| "grad_norm": 3.323270559310913, |
| "learning_rate": 2.4425126191811553e-07, |
| "loss": 0.0879, |
| "step": 27000 |
| }, |
| { |
| "epoch": 7.7009241108933075, |
| "grad_norm": 3.265133857727051, |
| "learning_rate": 2.3022994952327537e-07, |
| "loss": 0.091, |
| "step": 27500 |
| }, |
| { |
| "epoch": 7.8409409129095495, |
| "grad_norm": 9.813462257385254, |
| "learning_rate": 2.1620863712843522e-07, |
| "loss": 0.0895, |
| "step": 28000 |
| }, |
| { |
| "epoch": 7.9809577149257915, |
| "grad_norm": 0.7600739002227783, |
| "learning_rate": 2.0218732473359506e-07, |
| "loss": 0.0911, |
| "step": 28500 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.9630519557253713, |
| "eval_loss": 0.1135854721069336, |
| "eval_model_preparation_time": 0.003, |
| "eval_runtime": 439.5121, |
| "eval_samples_per_second": 173.285, |
| "eval_steps_per_second": 21.663, |
| "step": 28568 |
| }, |
| { |
| "epoch": 8.120974516942033, |
| "grad_norm": 6.328824043273926, |
| "learning_rate": 1.881660123387549e-07, |
| "loss": 0.0875, |
| "step": 29000 |
| }, |
| { |
| "epoch": 8.260991318958276, |
| "grad_norm": 16.23442840576172, |
| "learning_rate": 1.7414469994391472e-07, |
| "loss": 0.089, |
| "step": 29500 |
| }, |
| { |
| "epoch": 8.401008120974517, |
| "grad_norm": 7.651858329772949, |
| "learning_rate": 1.601233875490746e-07, |
| "loss": 0.0917, |
| "step": 30000 |
| }, |
| { |
| "epoch": 8.54102492299076, |
| "grad_norm": 2.9040281772613525, |
| "learning_rate": 1.4610207515423443e-07, |
| "loss": 0.0857, |
| "step": 30500 |
| }, |
| { |
| "epoch": 8.681041725007, |
| "grad_norm": 6.958981990814209, |
| "learning_rate": 1.3208076275939427e-07, |
| "loss": 0.09, |
| "step": 31000 |
| }, |
| { |
| "epoch": 8.821058527023244, |
| "grad_norm": 4.84717321395874, |
| "learning_rate": 1.1805945036455412e-07, |
| "loss": 0.0917, |
| "step": 31500 |
| }, |
| { |
| "epoch": 8.961075329039485, |
| "grad_norm": 6.962361812591553, |
| "learning_rate": 1.0403813796971396e-07, |
| "loss": 0.0889, |
| "step": 32000 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.9630256955659721, |
| "eval_loss": 0.11266375333070755, |
| "eval_model_preparation_time": 0.003, |
| "eval_runtime": 440.431, |
| "eval_samples_per_second": 172.924, |
| "eval_steps_per_second": 21.617, |
| "step": 32139 |
| }, |
| { |
| "epoch": 9.101092131055728, |
| "grad_norm": 13.205183982849121, |
| "learning_rate": 9.00168255748738e-08, |
| "loss": 0.09, |
| "step": 32500 |
| }, |
| { |
| "epoch": 9.241108933071969, |
| "grad_norm": 0.3649824261665344, |
| "learning_rate": 7.599551318003366e-08, |
| "loss": 0.0818, |
| "step": 33000 |
| }, |
| { |
| "epoch": 9.38112573508821, |
| "grad_norm": 1.0988820791244507, |
| "learning_rate": 6.197420078519349e-08, |
| "loss": 0.0847, |
| "step": 33500 |
| }, |
| { |
| "epoch": 9.521142537104453, |
| "grad_norm": 10.086563110351562, |
| "learning_rate": 4.795288839035334e-08, |
| "loss": 0.0909, |
| "step": 34000 |
| }, |
| { |
| "epoch": 9.661159339120694, |
| "grad_norm": 1.5432101488113403, |
| "learning_rate": 3.3931575995513173e-08, |
| "loss": 0.0873, |
| "step": 34500 |
| }, |
| { |
| "epoch": 9.801176141136937, |
| "grad_norm": 5.793679714202881, |
| "learning_rate": 1.9910263600673023e-08, |
| "loss": 0.0847, |
| "step": 35000 |
| }, |
| { |
| "epoch": 9.941192943153178, |
| "grad_norm": 1.821306824684143, |
| "learning_rate": 5.888951205832866e-09, |
| "loss": 0.0862, |
| "step": 35500 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.9634327280366591, |
| "eval_loss": 0.11153655499219894, |
| "eval_model_preparation_time": 0.003, |
| "eval_runtime": 439.1936, |
| "eval_samples_per_second": 173.411, |
| "eval_steps_per_second": 21.678, |
| "step": 35710 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 35710, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.852762385560602e+19, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|