| { | |
| "best_metric": 0.9701426609443169, | |
| "best_model_checkpoint": "mit-b4-finetuned-stroke-binary/checkpoint-1700", | |
| "epoch": 11.930232558139535, | |
| "eval_steps": 100, | |
| "global_step": 1932, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.31007751937984496, | |
| "grad_norm": 2.3132071495056152, | |
| "learning_rate": 5.154639175257732e-06, | |
| "loss": 0.6722, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.6201550387596899, | |
| "grad_norm": 6.47659969329834, | |
| "learning_rate": 1.0309278350515464e-05, | |
| "loss": 0.5714, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.6201550387596899, | |
| "eval_accuracy": 0.7878787878787878, | |
| "eval_f1": 0.7800219254126745, | |
| "eval_loss": 0.477566123008728, | |
| "eval_precision": 0.7900169125975578, | |
| "eval_recall": 0.7878787878787878, | |
| "eval_runtime": 56.589, | |
| "eval_samples_per_second": 39.071, | |
| "eval_steps_per_second": 4.895, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.9302325581395349, | |
| "grad_norm": 4.08937406539917, | |
| "learning_rate": 1.5360824742268042e-05, | |
| "loss": 0.4471, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.235658914728682, | |
| "grad_norm": 8.325615882873535, | |
| "learning_rate": 1.9999738610404825e-05, | |
| "loss": 0.3897, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.235658914728682, | |
| "eval_accuracy": 0.8715513342379014, | |
| "eval_f1": 0.8704450429260716, | |
| "eval_loss": 0.3238992393016815, | |
| "eval_precision": 0.8710997928702509, | |
| "eval_recall": 0.8715513342379014, | |
| "eval_runtime": 56.7955, | |
| "eval_samples_per_second": 38.929, | |
| "eval_steps_per_second": 4.877, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.5457364341085271, | |
| "grad_norm": 8.041431427001953, | |
| "learning_rate": 1.9952399350448247e-05, | |
| "loss": 0.3177, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.8558139534883722, | |
| "grad_norm": 7.284913063049316, | |
| "learning_rate": 1.9823819633544185e-05, | |
| "loss": 0.2951, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.8558139534883722, | |
| "eval_accuracy": 0.8765264586160109, | |
| "eval_f1": 0.8723990420158877, | |
| "eval_loss": 0.31197357177734375, | |
| "eval_precision": 0.885806076698763, | |
| "eval_recall": 0.8765264586160109, | |
| "eval_runtime": 56.5628, | |
| "eval_samples_per_second": 39.089, | |
| "eval_steps_per_second": 4.897, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.1612403100775195, | |
| "grad_norm": 3.927116870880127, | |
| "learning_rate": 1.9615049043274207e-05, | |
| "loss": 0.2786, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.471317829457364, | |
| "grad_norm": 13.424771308898926, | |
| "learning_rate": 1.932779175343134e-05, | |
| "loss": 0.23, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.471317829457364, | |
| "eval_accuracy": 0.9280868385345997, | |
| "eval_f1": 0.9271058471970156, | |
| "eval_loss": 0.1993584781885147, | |
| "eval_precision": 0.9303851930689407, | |
| "eval_recall": 0.9280868385345997, | |
| "eval_runtime": 56.4712, | |
| "eval_samples_per_second": 39.153, | |
| "eval_steps_per_second": 4.905, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.781395348837209, | |
| "grad_norm": 11.889705657958984, | |
| "learning_rate": 1.8964392617017013e-05, | |
| "loss": 0.2424, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 3.0868217054263565, | |
| "grad_norm": 14.436951637268066, | |
| "learning_rate": 1.8527818025436662e-05, | |
| "loss": 0.2135, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.0868217054263565, | |
| "eval_accuracy": 0.9280868385345997, | |
| "eval_f1": 0.9266634060111499, | |
| "eval_loss": 0.2157154679298401, | |
| "eval_precision": 0.9332581537403823, | |
| "eval_recall": 0.9280868385345997, | |
| "eval_runtime": 56.599, | |
| "eval_samples_per_second": 39.064, | |
| "eval_steps_per_second": 4.894, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.3968992248062015, | |
| "grad_norm": 16.79993438720703, | |
| "learning_rate": 1.802163169413846e-05, | |
| "loss": 0.1746, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 3.7069767441860466, | |
| "grad_norm": 14.388134002685547, | |
| "learning_rate": 1.7449965572354675e-05, | |
| "loss": 0.2106, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 3.7069767441860466, | |
| "eval_accuracy": 0.9380370872908186, | |
| "eval_f1": 0.9382102726921231, | |
| "eval_loss": 0.18085584044456482, | |
| "eval_precision": 0.9386737275368563, | |
| "eval_recall": 0.9380370872908186, | |
| "eval_runtime": 56.6285, | |
| "eval_samples_per_second": 39.044, | |
| "eval_steps_per_second": 4.892, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 4.0124031007751935, | |
| "grad_norm": 14.440871238708496, | |
| "learning_rate": 1.68174861144065e-05, | |
| "loss": 0.1802, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 4.322480620155039, | |
| "grad_norm": 6.46437931060791, | |
| "learning_rate": 1.612935618789643e-05, | |
| "loss": 0.1576, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 4.322480620155039, | |
| "eval_accuracy": 0.9402985074626866, | |
| "eval_f1": 0.9403501047780813, | |
| "eval_loss": 0.1628771722316742, | |
| "eval_precision": 0.9404274341623059, | |
| "eval_recall": 0.9402985074626866, | |
| "eval_runtime": 56.5976, | |
| "eval_samples_per_second": 39.065, | |
| "eval_steps_per_second": 4.894, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 4.632558139534884, | |
| "grad_norm": 12.626237869262695, | |
| "learning_rate": 1.5391192929727884e-05, | |
| "loss": 0.1752, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 4.942635658914728, | |
| "grad_norm": 20.061859130859375, | |
| "learning_rate": 1.460902189396916e-05, | |
| "loss": 0.1434, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 4.942635658914728, | |
| "eval_accuracy": 0.9543193125282677, | |
| "eval_f1": 0.9541868361381168, | |
| "eval_loss": 0.1526043862104416, | |
| "eval_precision": 0.9543134613523963, | |
| "eval_recall": 0.9543193125282677, | |
| "eval_runtime": 56.4759, | |
| "eval_samples_per_second": 39.149, | |
| "eval_steps_per_second": 4.905, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 5.248062015503876, | |
| "grad_norm": 11.535951614379883, | |
| "learning_rate": 1.3789227865848282e-05, | |
| "loss": 0.1601, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 5.558139534883721, | |
| "grad_norm": 9.65259838104248, | |
| "learning_rate": 1.2938502743379212e-05, | |
| "loss": 0.1391, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 5.558139534883721, | |
| "eval_accuracy": 0.9574853007688828, | |
| "eval_f1": 0.9574664394664735, | |
| "eval_loss": 0.1268271952867508, | |
| "eval_precision": 0.9574541746977356, | |
| "eval_recall": 0.9574853007688828, | |
| "eval_runtime": 56.6517, | |
| "eval_samples_per_second": 39.028, | |
| "eval_steps_per_second": 4.89, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 5.868217054263566, | |
| "grad_norm": 7.883506774902344, | |
| "learning_rate": 1.2063790912056577e-05, | |
| "loss": 0.1605, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 6.173643410852713, | |
| "grad_norm": 9.489595413208008, | |
| "learning_rate": 1.1172232558519983e-05, | |
| "loss": 0.1048, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 6.173643410852713, | |
| "eval_accuracy": 0.9556761646313885, | |
| "eval_f1": 0.9555092018481103, | |
| "eval_loss": 0.14889651536941528, | |
| "eval_precision": 0.9557688265871488, | |
| "eval_recall": 0.9556761646313885, | |
| "eval_runtime": 56.5428, | |
| "eval_samples_per_second": 39.103, | |
| "eval_steps_per_second": 4.899, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 6.4837209302325585, | |
| "grad_norm": 5.59963846206665, | |
| "learning_rate": 1.0271105385912779e-05, | |
| "loss": 0.116, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 6.793798449612403, | |
| "grad_norm": 4.873887538909912, | |
| "learning_rate": 9.367765206707174e-06, | |
| "loss": 0.1271, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 6.793798449612403, | |
| "eval_accuracy": 0.9570330167345092, | |
| "eval_f1": 0.9566191699282054, | |
| "eval_loss": 0.14482761919498444, | |
| "eval_precision": 0.9586051623091093, | |
| "eval_recall": 0.9570330167345092, | |
| "eval_runtime": 56.8397, | |
| "eval_samples_per_second": 38.899, | |
| "eval_steps_per_second": 4.873, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 7.09922480620155, | |
| "grad_norm": 4.528378963470459, | |
| "learning_rate": 8.469585897930557e-06, | |
| "loss": 0.1333, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 7.409302325581395, | |
| "grad_norm": 1.5636117458343506, | |
| "learning_rate": 7.583899208932648e-06, | |
| "loss": 0.091, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 7.409302325581395, | |
| "eval_accuracy": 0.9570330167345092, | |
| "eval_f1": 0.9567056534394107, | |
| "eval_loss": 0.14507929980754852, | |
| "eval_precision": 0.9579537259191305, | |
| "eval_recall": 0.9570330167345092, | |
| "eval_runtime": 56.7, | |
| "eval_samples_per_second": 38.995, | |
| "eval_steps_per_second": 4.885, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 7.7193798449612405, | |
| "grad_norm": 5.480973720550537, | |
| "learning_rate": 6.7179349130367235e-06, | |
| "loss": 0.1089, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 8.024806201550387, | |
| "grad_norm": 6.6158223152160645, | |
| "learning_rate": 5.878761791611129e-06, | |
| "loss": 0.1159, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 8.024806201550387, | |
| "eval_accuracy": 0.9629127091813658, | |
| "eval_f1": 0.9626761639787419, | |
| "eval_loss": 0.1205127015709877, | |
| "eval_precision": 0.9635871559570208, | |
| "eval_recall": 0.9629127091813658, | |
| "eval_runtime": 56.6004, | |
| "eval_samples_per_second": 39.063, | |
| "eval_steps_per_second": 4.894, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 8.334883720930232, | |
| "grad_norm": 5.2470316886901855, | |
| "learning_rate": 5.073229932302277e-06, | |
| "loss": 0.0886, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 8.644961240310078, | |
| "grad_norm": 6.15119743347168, | |
| "learning_rate": 4.307914812442993e-06, | |
| "loss": 0.1151, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 8.644961240310078, | |
| "eval_accuracy": 0.9665309814563546, | |
| "eval_f1": 0.9664379980687814, | |
| "eval_loss": 0.11242391169071198, | |
| "eval_precision": 0.9665871538113867, | |
| "eval_recall": 0.9665309814563546, | |
| "eval_runtime": 56.6713, | |
| "eval_samples_per_second": 39.014, | |
| "eval_steps_per_second": 4.888, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 8.955038759689922, | |
| "grad_norm": 12.04592227935791, | |
| "learning_rate": 3.589063624077802e-06, | |
| "loss": 0.0798, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 9.26046511627907, | |
| "grad_norm": 13.314713478088379, | |
| "learning_rate": 2.922544278748801e-06, | |
| "loss": 0.0735, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 9.26046511627907, | |
| "eval_accuracy": 0.9642695612844867, | |
| "eval_f1": 0.9641123145223969, | |
| "eval_loss": 0.11749936640262604, | |
| "eval_precision": 0.9645326112328965, | |
| "eval_recall": 0.9642695612844867, | |
| "eval_runtime": 56.8212, | |
| "eval_samples_per_second": 38.912, | |
| "eval_steps_per_second": 4.875, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 9.570542635658915, | |
| "grad_norm": 1.456084132194519, | |
| "learning_rate": 2.3137975083109153e-06, | |
| "loss": 0.0746, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 9.88062015503876, | |
| "grad_norm": 2.1265344619750977, | |
| "learning_rate": 1.7677924527729228e-06, | |
| "loss": 0.0537, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 9.88062015503876, | |
| "eval_accuracy": 0.9678878335594754, | |
| "eval_f1": 0.9678102645900477, | |
| "eval_loss": 0.11535227298736572, | |
| "eval_precision": 0.9679181198554704, | |
| "eval_recall": 0.9678878335594754, | |
| "eval_runtime": 56.5576, | |
| "eval_samples_per_second": 39.093, | |
| "eval_steps_per_second": 4.898, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 10.186046511627907, | |
| "grad_norm": 12.95783805847168, | |
| "learning_rate": 1.2889860976963542e-06, | |
| "loss": 0.0857, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 10.496124031007753, | |
| "grad_norm": 3.627340078353882, | |
| "learning_rate": 8.812868922607565e-07, | |
| "loss": 0.0666, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 10.496124031007753, | |
| "eval_accuracy": 0.9701492537313433, | |
| "eval_f1": 0.9701426609443169, | |
| "eval_loss": 0.11616706103086472, | |
| "eval_precision": 0.9701377402873191, | |
| "eval_recall": 0.9701492537313433, | |
| "eval_runtime": 56.4987, | |
| "eval_samples_per_second": 39.134, | |
| "eval_steps_per_second": 4.903, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 10.806201550387597, | |
| "grad_norm": 9.41781997680664, | |
| "learning_rate": 5.480228449774882e-07, | |
| "loss": 0.0722, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 11.111627906976745, | |
| "grad_norm": 12.687678337097168, | |
| "learning_rate": 2.9626582353969756e-07, | |
| "loss": 0.0732, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 11.111627906976745, | |
| "eval_accuracy": 0.9678878335594754, | |
| "eval_f1": 0.9678179490403084, | |
| "eval_loss": 0.11334193497896194, | |
| "eval_precision": 0.9678997125749722, | |
| "eval_recall": 0.9678878335594754, | |
| "eval_runtime": 56.7681, | |
| "eval_samples_per_second": 38.948, | |
| "eval_steps_per_second": 4.88, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 11.421705426356588, | |
| "grad_norm": 13.630194664001465, | |
| "learning_rate": 1.1780223451346994e-07, | |
| "loss": 0.0868, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 11.731782945736434, | |
| "grad_norm": 17.37832260131836, | |
| "learning_rate": 2.0006053801937543e-08, | |
| "loss": 0.0775, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 11.731782945736434, | |
| "eval_accuracy": 0.968340117593849, | |
| "eval_f1": 0.9682674370094175, | |
| "eval_loss": 0.112978994846344, | |
| "eval_precision": 0.9683629530510336, | |
| "eval_recall": 0.968340117593849, | |
| "eval_runtime": 56.6602, | |
| "eval_samples_per_second": 39.022, | |
| "eval_steps_per_second": 4.889, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 11.930232558139535, | |
| "step": 1932, | |
| "total_flos": 1.7667354394198278e+19, | |
| "train_loss": 0.17970547851321614, | |
| "train_runtime": 5653.5088, | |
| "train_samples_per_second": 10.948, | |
| "train_steps_per_second": 0.342 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 1932, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 12, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.7667354394198278e+19, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |