Invalid JSON: Unexpected token 'I', ..."ad_norm": Infinity,
"... is not valid JSON
| { | |
| "best_metric": 0.9004972577095032, | |
| "best_model_checkpoint": "./quickdraw-ConvNeXT-Tiny-Finetune/checkpoint-35000", | |
| "epoch": 8.0, | |
| "eval_steps": 5000, | |
| "global_step": 70320, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.11376564277588168, | |
| "grad_norm": 6.401439666748047, | |
| "learning_rate": 7.976e-05, | |
| "loss": 4.3139, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.22753128555176336, | |
| "grad_norm": 3.6617305278778076, | |
| "learning_rate": 0.00015976, | |
| "loss": 1.8761, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.3412969283276451, | |
| "grad_norm": 3.3584706783294678, | |
| "learning_rate": 0.00023976000000000002, | |
| "loss": 1.3873, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.4550625711035267, | |
| "grad_norm": 2.6944825649261475, | |
| "learning_rate": 0.00031976000000000004, | |
| "loss": 1.2623, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.5688282138794084, | |
| "grad_norm": 2.8179662227630615, | |
| "learning_rate": 0.00039968, | |
| "loss": 1.2089, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.5688282138794084, | |
| "eval_accuracy": 0.701996, | |
| "eval_loss": 1.1778656244277954, | |
| "eval_runtime": 24.8116, | |
| "eval_samples_per_second": 10075.915, | |
| "eval_steps_per_second": 19.708, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.6825938566552902, | |
| "grad_norm": 2.0137546062469482, | |
| "learning_rate": 0.00047968, | |
| "loss": 1.1807, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.7963594994311718, | |
| "grad_norm": 2.003749132156372, | |
| "learning_rate": 0.00055968, | |
| "loss": 1.1636, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.9101251422070534, | |
| "grad_norm": 1.9637384414672852, | |
| "learning_rate": 0.00063968, | |
| "loss": 1.1468, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.023890784982935, | |
| "grad_norm": 1.3369202613830566, | |
| "learning_rate": 0.00071968, | |
| "loss": 1.1219, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.1376564277588168, | |
| "grad_norm": 1.1124730110168457, | |
| "learning_rate": 0.0007996, | |
| "loss": 1.0859, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.1376564277588168, | |
| "eval_accuracy": 0.7167, | |
| "eval_loss": 1.1144882440567017, | |
| "eval_runtime": 24.7543, | |
| "eval_samples_per_second": 10099.263, | |
| "eval_steps_per_second": 19.754, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.2514220705346986, | |
| "grad_norm": 1.2223107814788818, | |
| "learning_rate": 0.0007994630225556821, | |
| "loss": 1.0823, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.36518771331058, | |
| "grad_norm": 1.073903203010559, | |
| "learning_rate": 0.0007978427418423516, | |
| "loss": 1.0654, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.4789533560864618, | |
| "grad_norm": 1.143282413482666, | |
| "learning_rate": 0.0007951435383895233, | |
| "loss": 1.0417, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.5927189988623436, | |
| "grad_norm": 0.9456229209899902, | |
| "learning_rate": 0.0007913770351641547, | |
| "loss": 1.0254, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.7064846416382253, | |
| "grad_norm": 0.8930487632751465, | |
| "learning_rate": 0.000786545907828783, | |
| "loss": 1.0115, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.7064846416382253, | |
| "eval_accuracy": 0.74018, | |
| "eval_loss": 1.0129040479660034, | |
| "eval_runtime": 24.7858, | |
| "eval_samples_per_second": 10086.44, | |
| "eval_steps_per_second": 19.729, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.820250284414107, | |
| "grad_norm": 1.3236607313156128, | |
| "learning_rate": 0.0007806664940074367, | |
| "loss": 1.0023, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.9340159271899886, | |
| "grad_norm": 0.9320289492607117, | |
| "learning_rate": 0.0007737621596772784, | |
| "loss": 0.9887, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.04778156996587, | |
| "grad_norm": 0.8199362754821777, | |
| "learning_rate": 0.0007658378095649483, | |
| "loss": 0.9422, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.161547212741752, | |
| "grad_norm": 0.932473361492157, | |
| "learning_rate": 0.0007569213319353987, | |
| "loss": 0.8915, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.2753128555176336, | |
| "grad_norm": 0.7972292900085449, | |
| "learning_rate": 0.0007470472668826751, | |
| "loss": 0.8966, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.2753128555176336, | |
| "eval_accuracy": 0.753348, | |
| "eval_loss": 0.9683550596237183, | |
| "eval_runtime": 24.9928, | |
| "eval_samples_per_second": 10002.877, | |
| "eval_steps_per_second": 19.566, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.3890784982935154, | |
| "grad_norm": 0.9584403038024902, | |
| "learning_rate": 0.0007362226287817475, | |
| "loss": 0.898, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.502844141069397, | |
| "grad_norm": 0.8328869342803955, | |
| "learning_rate": 0.0007244861775406666, | |
| "loss": 0.8903, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.616609783845279, | |
| "grad_norm": 0.8259535431861877, | |
| "learning_rate": 0.0007118827864199177, | |
| "loss": 0.8923, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.73037542662116, | |
| "grad_norm": 0.744049608707428, | |
| "learning_rate": 0.0006984214086632506, | |
| "loss": 0.8891, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.8441410693970424, | |
| "grad_norm": 0.9187034964561462, | |
| "learning_rate": 0.0006841507321312211, | |
| "loss": 0.8868, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.8441410693970424, | |
| "eval_accuracy": 0.760008, | |
| "eval_loss": 0.9375481605529785, | |
| "eval_runtime": 25.2459, | |
| "eval_samples_per_second": 9902.581, | |
| "eval_steps_per_second": 19.369, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.9579067121729237, | |
| "grad_norm": 0.7188661694526672, | |
| "learning_rate": 0.0006691094579381113, | |
| "loss": 0.879, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 3.0716723549488054, | |
| "grad_norm": 0.716447651386261, | |
| "learning_rate": 0.0006533544985504092, | |
| "loss": 0.8033, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 3.185437997724687, | |
| "grad_norm": 0.8268662691116333, | |
| "learning_rate": 0.0006369138319781458, | |
| "loss": 0.7641, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 3.299203640500569, | |
| "grad_norm": 0.8604947328567505, | |
| "learning_rate": 0.000619814550182812, | |
| "loss": 0.7689, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 3.4129692832764507, | |
| "grad_norm": 0.888422966003418, | |
| "learning_rate": 0.0006021191461217646, | |
| "loss": 0.7743, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 3.4129692832764507, | |
| "eval_accuracy": 0.763776, | |
| "eval_loss": 0.929225742816925, | |
| "eval_runtime": 24.843, | |
| "eval_samples_per_second": 10063.18, | |
| "eval_steps_per_second": 19.684, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 3.526734926052332, | |
| "grad_norm": 0.7453769445419312, | |
| "learning_rate": 0.0005838756085398471, | |
| "loss": 0.7807, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 3.640500568828214, | |
| "grad_norm": 1.0316555500030518, | |
| "learning_rate": 0.0005651523871474742, | |
| "loss": 0.7808, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 3.7542662116040955, | |
| "grad_norm": 0.7534908056259155, | |
| "learning_rate": 0.0005459627826161315, | |
| "loss": 0.7723, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 3.868031854379977, | |
| "grad_norm": 0.7549566030502319, | |
| "learning_rate": 0.0005263971024146832, | |
| "loss": 0.7749, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 3.981797497155859, | |
| "grad_norm": 0.7040752172470093, | |
| "learning_rate": 0.0005064692456836298, | |
| "loss": 0.7735, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 3.981797497155859, | |
| "eval_accuracy": 0.770676, | |
| "eval_loss": 0.9004972577095032, | |
| "eval_runtime": 25.0289, | |
| "eval_samples_per_second": 9988.447, | |
| "eval_steps_per_second": 19.537, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 4.09556313993174, | |
| "grad_norm": 0.8094387054443359, | |
| "learning_rate": 0.0004862729941524655, | |
| "loss": 0.6451, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 4.2093287827076225, | |
| "grad_norm": 0.7580990791320801, | |
| "learning_rate": 0.00046582269490929174, | |
| "loss": 0.6248, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 4.323094425483504, | |
| "grad_norm": 0.9094007611274719, | |
| "learning_rate": 0.0004452145883304621, | |
| "loss": 0.6301, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 4.436860068259386, | |
| "grad_norm": 0.8668212294578552, | |
| "learning_rate": 0.00042448410789112234, | |
| "loss": 0.6374, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 4.550625711035267, | |
| "grad_norm": 0.7345805764198303, | |
| "learning_rate": 0.000403666528938893, | |
| "loss": 0.6379, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 4.550625711035267, | |
| "eval_accuracy": 0.767544, | |
| "eval_loss": 0.9469964504241943, | |
| "eval_runtime": 25.0559, | |
| "eval_samples_per_second": 9977.701, | |
| "eval_steps_per_second": 19.516, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 4.664391353811149, | |
| "grad_norm": 0.9904046654701233, | |
| "learning_rate": 0.0003828390066069532, | |
| "loss": 0.6387, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 4.778156996587031, | |
| "grad_norm": 0.9242306351661682, | |
| "learning_rate": 0.0003620580237340932, | |
| "loss": 0.6411, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 4.891922639362912, | |
| "grad_norm": 0.8365646004676819, | |
| "learning_rate": 0.00034140054494206066, | |
| "loss": 0.6419, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 5.005688282138794, | |
| "grad_norm": 0.865073025226593, | |
| "learning_rate": 0.00032090166632320246, | |
| "loss": 0.6269, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 5.1194539249146755, | |
| "grad_norm": 1.0095815658569336, | |
| "learning_rate": 0.0003005966889078552, | |
| "loss": 0.4587, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 5.1194539249146755, | |
| "eval_accuracy": 0.763204, | |
| "eval_loss": 1.0662517547607422, | |
| "eval_runtime": 24.6245, | |
| "eval_samples_per_second": 10152.491, | |
| "eval_steps_per_second": 19.858, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 5.233219567690558, | |
| "grad_norm": 1.1001554727554321, | |
| "learning_rate": 0.0002805612865819569, | |
| "loss": 0.4611, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 5.346985210466439, | |
| "grad_norm": 1.1020069122314453, | |
| "learning_rate": 0.0002608497940081539, | |
| "loss": 0.4624, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 5.460750853242321, | |
| "grad_norm": 1.0676689147949219, | |
| "learning_rate": 0.00024153479551610994, | |
| "loss": 0.4688, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 5.5745164960182025, | |
| "grad_norm": 1.0323539972305298, | |
| "learning_rate": 0.00022263001216612193, | |
| "loss": 0.4682, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 5.688282138794084, | |
| "grad_norm": 1.0369212627410889, | |
| "learning_rate": 0.0002042244110409137, | |
| "loss": 0.469, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 5.688282138794084, | |
| "eval_accuracy": 0.764232, | |
| "eval_loss": 1.0686589479446411, | |
| "eval_runtime": 24.7037, | |
| "eval_samples_per_second": 10119.948, | |
| "eval_steps_per_second": 19.795, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 5.802047781569966, | |
| "grad_norm": 0.9783840179443359, | |
| "learning_rate": 0.0001863310671797061, | |
| "loss": 0.4676, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 5.915813424345847, | |
| "grad_norm": Infinity, | |
| "learning_rate": 0.00016903418776826956, | |
| "loss": 0.4678, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 6.0295790671217295, | |
| "grad_norm": 0.8185985684394836, | |
| "learning_rate": 0.00015234606022798043, | |
| "loss": 0.4225, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 6.143344709897611, | |
| "grad_norm": 0.8303334712982178, | |
| "learning_rate": 0.00013632955350427066, | |
| "loss": 0.3066, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 6.257110352673493, | |
| "grad_norm": 0.9623553156852722, | |
| "learning_rate": 0.00012104303358623888, | |
| "loss": 0.3053, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 6.257110352673493, | |
| "eval_accuracy": 0.756132, | |
| "eval_loss": 1.2674084901809692, | |
| "eval_runtime": 24.9552, | |
| "eval_samples_per_second": 10017.936, | |
| "eval_steps_per_second": 19.595, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 6.370875995449374, | |
| "grad_norm": 1.087319254875183, | |
| "learning_rate": 0.00010649735976862514, | |
| "loss": 0.3072, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 6.484641638225256, | |
| "grad_norm": 1.0554896593093872, | |
| "learning_rate": 9.276098489438694e-05, | |
| "loss": 0.3082, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 6.598407281001138, | |
| "grad_norm": 1.106109619140625, | |
| "learning_rate": 7.984366706047617e-05, | |
| "loss": 0.31, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 6.712172923777019, | |
| "grad_norm": 1.1292998790740967, | |
| "learning_rate": 6.780619596561706e-05, | |
| "loss": 0.3095, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 6.825938566552901, | |
| "grad_norm": 0.9473851323127747, | |
| "learning_rate": 5.665712283319109e-05, | |
| "loss": 0.3087, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 6.825938566552901, | |
| "eval_accuracy": 0.756324, | |
| "eval_loss": 1.3039071559906006, | |
| "eval_runtime": 24.8768, | |
| "eval_samples_per_second": 10049.527, | |
| "eval_steps_per_second": 19.657, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 6.939704209328783, | |
| "grad_norm": 0.9320647716522217, | |
| "learning_rate": 4.644891589270905e-05, | |
| "loss": 0.3036, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 7.053469852104665, | |
| "grad_norm": 0.809033989906311, | |
| "learning_rate": 3.718882688809506e-05, | |
| "loss": 0.2613, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 7.167235494880546, | |
| "grad_norm": 0.8693143725395203, | |
| "learning_rate": 2.8920434372419202e-05, | |
| "loss": 0.2126, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 7.281001137656427, | |
| "grad_norm": 1.1433346271514893, | |
| "learning_rate": 2.1649612077036684e-05, | |
| "loss": 0.2147, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 7.39476678043231, | |
| "grad_norm": 0.9169164299964905, | |
| "learning_rate": 1.541057694253052e-05, | |
| "loss": 0.215, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 7.39476678043231, | |
| "eval_accuracy": 0.749904, | |
| "eval_loss": 1.4452863931655884, | |
| "eval_runtime": 24.6371, | |
| "eval_samples_per_second": 10147.304, | |
| "eval_steps_per_second": 19.848, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 7.508532423208191, | |
| "grad_norm": 0.7349722385406494, | |
| "learning_rate": 1.0207761077829459e-05, | |
| "loss": 0.2124, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 7.622298065984073, | |
| "grad_norm": 0.7404098510742188, | |
| "learning_rate": 6.062035530537413e-06, | |
| "loss": 0.2143, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 7.736063708759954, | |
| "grad_norm": 0.7691236734390259, | |
| "learning_rate": 2.9871839688589755e-06, | |
| "loss": 0.2121, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 7.849829351535837, | |
| "grad_norm": 0.8680226802825928, | |
| "learning_rate": 9.853907172558518e-07, | |
| "loss": 0.2137, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 7.963594994311718, | |
| "grad_norm": 0.9800475239753723, | |
| "learning_rate": 6.607633890043374e-08, | |
| "loss": 0.2128, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 7.963594994311718, | |
| "eval_accuracy": 0.749988, | |
| "eval_loss": 1.4542275667190552, | |
| "eval_runtime": 25.1516, | |
| "eval_samples_per_second": 9939.727, | |
| "eval_steps_per_second": 19.442, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "step": 70320, | |
| "total_flos": 4.763236399488e+18, | |
| "train_loss": 0.7450065812424452, | |
| "train_runtime": 5993.3028, | |
| "train_samples_per_second": 6006.705, | |
| "train_steps_per_second": 11.733 | |
| } | |
| ], | |
| "logging_steps": 1000, | |
| "max_steps": 70320, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 8, | |
| "save_steps": 5000, | |
| "total_flos": 4.763236399488e+18, | |
| "train_batch_size": 512, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |