{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.120313143798929, "eval_steps": 500, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04120313143798929, "grad_norm": 2.6929988861083984, "learning_rate": 5.9401e-05, "loss": 1.2381, "step": 100 }, { "epoch": 0.08240626287597858, "grad_norm": 0.9781515002250671, "learning_rate": 5.8802000000000004e-05, "loss": 0.5408, "step": 200 }, { "epoch": 0.12360939431396786, "grad_norm": 1.4702354669570923, "learning_rate": 5.8203e-05, "loss": 0.3679, "step": 300 }, { "epoch": 0.16481252575195715, "grad_norm": 0.458916038274765, "learning_rate": 5.7604e-05, "loss": 0.4243, "step": 400 }, { "epoch": 0.20601565718994644, "grad_norm": 2.584094524383545, "learning_rate": 5.7005e-05, "loss": 0.341, "step": 500 }, { "epoch": 0.24721878862793573, "grad_norm": 4.845738410949707, "learning_rate": 5.6406e-05, "loss": 0.3247, "step": 600 }, { "epoch": 0.288421920065925, "grad_norm": 0.6360086798667908, "learning_rate": 5.5806999999999996e-05, "loss": 0.3736, "step": 700 }, { "epoch": 0.3296250515039143, "grad_norm": 0.5916720628738403, "learning_rate": 5.5208000000000004e-05, "loss": 0.3775, "step": 800 }, { "epoch": 0.37082818294190356, "grad_norm": 1.5302131175994873, "learning_rate": 5.4609000000000005e-05, "loss": 0.3549, "step": 900 }, { "epoch": 0.4120313143798929, "grad_norm": 1.928609848022461, "learning_rate": 5.401e-05, "loss": 0.311, "step": 1000 }, { "epoch": 0.45323444581788214, "grad_norm": 4.012447834014893, "learning_rate": 5.3411e-05, "loss": 0.3266, "step": 1100 }, { "epoch": 0.49443757725587145, "grad_norm": 1.2283966541290283, "learning_rate": 5.2812e-05, "loss": 0.3581, "step": 1200 }, { "epoch": 0.5356407086938607, "grad_norm": 2.1378538608551025, "learning_rate": 5.2213e-05, "loss": 0.3654, "step": 1300 }, { "epoch": 0.57684384013185, "grad_norm": 1.014821171760559, "learning_rate": 5.1614000000000004e-05, "loss": 0.3019, "step": 1400 }, { "epoch": 0.6180469715698393, "grad_norm": 0.6981366872787476, "learning_rate": 5.1015e-05, "loss": 0.297, "step": 1500 }, { "epoch": 0.6592501030078286, "grad_norm": 0.8797981142997742, "learning_rate": 5.0416e-05, "loss": 0.3543, "step": 1600 }, { "epoch": 0.7004532344458179, "grad_norm": 0.34348738193511963, "learning_rate": 4.9817e-05, "loss": 0.3102, "step": 1700 }, { "epoch": 0.7416563658838071, "grad_norm": 0.9645235538482666, "learning_rate": 4.9218e-05, "loss": 0.2859, "step": 1800 }, { "epoch": 0.7828594973217965, "grad_norm": 1.9135812520980835, "learning_rate": 4.8619e-05, "loss": 0.3493, "step": 1900 }, { "epoch": 0.8240626287597858, "grad_norm": 1.7853527069091797, "learning_rate": 4.8020000000000004e-05, "loss": 0.3256, "step": 2000 }, { "epoch": 0.865265760197775, "grad_norm": 1.7780035734176636, "learning_rate": 4.7421000000000006e-05, "loss": 0.2621, "step": 2100 }, { "epoch": 0.9064688916357643, "grad_norm": 0.8148425221443176, "learning_rate": 4.6822e-05, "loss": 0.3273, "step": 2200 }, { "epoch": 0.9476720230737536, "grad_norm": 2.2365009784698486, "learning_rate": 4.6223e-05, "loss": 0.2879, "step": 2300 }, { "epoch": 0.9888751545117429, "grad_norm": 1.7118935585021973, "learning_rate": 4.5624e-05, "loss": 0.2715, "step": 2400 }, { "epoch": 1.0, "eval_accuracy_Autre r\u00e9paration": 0.2533258414054248, "eval_accuracy_D\u00e9coupe": 0.08133862998794246, "eval_accuracy_Emergence": 0.0, "eval_accuracy_Gla\u00e7age ou Ressuage": 0.0, "eval_accuracy_Reflet m\u00e9t\u00e9o": 0.0, "eval_accuracy_Unlabeled": NaN, "eval_iou_Autre r\u00e9paration": 0.23621639872040598, "eval_iou_D\u00e9coupe": 0.07661059644544693, "eval_iou_Emergence": 0.0, "eval_iou_Gla\u00e7age ou Ressuage": 0.0, "eval_iou_Reflet m\u00e9t\u00e9o": 0.0, "eval_iou_Unlabeled": 0.0, "eval_loss": 0.2682347893714905, "eval_mean_accuracy": 0.06693289427867345, "eval_mean_iou": 0.05213783252764215, "eval_overall_accuracy": 0.18279080675579093, "eval_runtime": 150.5227, "eval_samples_per_second": 12.045, "eval_steps_per_second": 3.016, "step": 2427 }, { "epoch": 1.0300782859497322, "grad_norm": 3.537090539932251, "learning_rate": 4.5025000000000003e-05, "loss": 0.2763, "step": 2500 }, { "epoch": 1.0712814173877214, "grad_norm": 2.1730239391326904, "learning_rate": 4.4426000000000005e-05, "loss": 0.2981, "step": 2600 }, { "epoch": 1.1124845488257107, "grad_norm": 1.0320223569869995, "learning_rate": 4.3827e-05, "loss": 0.3227, "step": 2700 }, { "epoch": 1.1536876802637002, "grad_norm": 4.7768635749816895, "learning_rate": 4.3228e-05, "loss": 0.3398, "step": 2800 }, { "epoch": 1.1948908117016894, "grad_norm": 1.5758723020553589, "learning_rate": 4.2629e-05, "loss": 0.334, "step": 2900 }, { "epoch": 1.2360939431396787, "grad_norm": 4.915160655975342, "learning_rate": 4.203e-05, "loss": 0.2577, "step": 3000 }, { "epoch": 1.277297074577668, "grad_norm": 0.7495476603507996, "learning_rate": 4.1431e-05, "loss": 0.2807, "step": 3100 }, { "epoch": 1.3185002060156572, "grad_norm": 1.0287623405456543, "learning_rate": 4.0832e-05, "loss": 0.3277, "step": 3200 }, { "epoch": 1.3597033374536465, "grad_norm": 3.6160237789154053, "learning_rate": 4.0233e-05, "loss": 0.3073, "step": 3300 }, { "epoch": 1.4009064688916357, "grad_norm": 6.738962173461914, "learning_rate": 3.9634e-05, "loss": 0.2744, "step": 3400 }, { "epoch": 1.442109600329625, "grad_norm": 0.7060651779174805, "learning_rate": 3.9035e-05, "loss": 0.2976, "step": 3500 }, { "epoch": 1.4833127317676142, "grad_norm": 4.404435634613037, "learning_rate": 3.8436e-05, "loss": 0.2646, "step": 3600 }, { "epoch": 1.5245158632056035, "grad_norm": 1.1246055364608765, "learning_rate": 3.7837000000000004e-05, "loss": 0.3497, "step": 3700 }, { "epoch": 1.5657189946435928, "grad_norm": 3.132385015487671, "learning_rate": 3.7238000000000005e-05, "loss": 0.2437, "step": 3800 }, { "epoch": 1.6069221260815822, "grad_norm": 0.3945494592189789, "learning_rate": 3.6639e-05, "loss": 0.2616, "step": 3900 }, { "epoch": 1.6481252575195715, "grad_norm": 0.8652153015136719, "learning_rate": 3.604e-05, "loss": 0.2466, "step": 4000 }, { "epoch": 1.6893283889575608, "grad_norm": 0.44899633526802063, "learning_rate": 3.544100000000001e-05, "loss": 0.2562, "step": 4100 }, { "epoch": 1.73053152039555, "grad_norm": 3.39601993560791, "learning_rate": 3.4842e-05, "loss": 0.2795, "step": 4200 }, { "epoch": 1.7717346518335395, "grad_norm": 2.5917625427246094, "learning_rate": 3.4243000000000004e-05, "loss": 0.2933, "step": 4300 }, { "epoch": 1.8129377832715288, "grad_norm": 1.0517610311508179, "learning_rate": 3.3644000000000005e-05, "loss": 0.2632, "step": 4400 }, { "epoch": 1.854140914709518, "grad_norm": 1.573089361190796, "learning_rate": 3.3045000000000006e-05, "loss": 0.2554, "step": 4500 }, { "epoch": 1.8953440461475073, "grad_norm": 1.3932527303695679, "learning_rate": 3.2446e-05, "loss": 0.2676, "step": 4600 }, { "epoch": 1.9365471775854965, "grad_norm": 7.98951530456543, "learning_rate": 3.1847e-05, "loss": 0.2906, "step": 4700 }, { "epoch": 1.9777503090234858, "grad_norm": 0.578360378742218, "learning_rate": 3.1248e-05, "loss": 0.2815, "step": 4800 }, { "epoch": 2.0, "eval_accuracy_Autre r\u00e9paration": 0.19815143518295517, "eval_accuracy_D\u00e9coupe": 0.11079467411500263, "eval_accuracy_Emergence": 0.4089615931721195, "eval_accuracy_Gla\u00e7age ou Ressuage": 0.0, "eval_accuracy_Reflet m\u00e9t\u00e9o": 0.0, "eval_accuracy_Unlabeled": NaN, "eval_iou_Autre r\u00e9paration": 0.19162433877536195, "eval_iou_D\u00e9coupe": 0.10140688937641373, "eval_iou_Emergence": 0.40571014840298464, "eval_iou_Gla\u00e7age ou Ressuage": 0.0, "eval_iou_Reflet m\u00e9t\u00e9o": 0.0, "eval_iou_Unlabeled": 0.0, "eval_loss": 0.26819199323654175, "eval_mean_accuracy": 0.14358154049401545, "eval_mean_iou": 0.11645689609246006, "eval_overall_accuracy": 0.15928522569775833, "eval_runtime": 140.1786, "eval_samples_per_second": 12.934, "eval_steps_per_second": 3.239, "step": 4854 }, { "epoch": 2.018953440461475, "grad_norm": 0.5752081871032715, "learning_rate": 3.0649000000000004e-05, "loss": 0.2768, "step": 4900 }, { "epoch": 2.0601565718994643, "grad_norm": 0.6111757755279541, "learning_rate": 3.0050000000000002e-05, "loss": 0.2226, "step": 5000 }, { "epoch": 2.1013597033374536, "grad_norm": 0.48088550567626953, "learning_rate": 2.9451e-05, "loss": 0.334, "step": 5100 }, { "epoch": 2.142562834775443, "grad_norm": 1.2190054655075073, "learning_rate": 2.8851999999999998e-05, "loss": 0.2868, "step": 5200 }, { "epoch": 2.183765966213432, "grad_norm": 2.414565324783325, "learning_rate": 2.8253e-05, "loss": 0.3291, "step": 5300 }, { "epoch": 2.2249690976514214, "grad_norm": 0.2674981653690338, "learning_rate": 2.7653999999999996e-05, "loss": 0.2687, "step": 5400 }, { "epoch": 2.2661722290894106, "grad_norm": 2.053374767303467, "learning_rate": 2.7054999999999998e-05, "loss": 0.2559, "step": 5500 }, { "epoch": 2.3073753605274003, "grad_norm": 3.9835445880889893, "learning_rate": 2.6455999999999995e-05, "loss": 0.282, "step": 5600 }, { "epoch": 2.348578491965389, "grad_norm": 3.391972303390503, "learning_rate": 2.5857e-05, "loss": 0.3191, "step": 5700 }, { "epoch": 2.389781623403379, "grad_norm": 0.4526354968547821, "learning_rate": 2.5258e-05, "loss": 0.2732, "step": 5800 }, { "epoch": 2.430984754841368, "grad_norm": 1.3189719915390015, "learning_rate": 2.4659e-05, "loss": 0.242, "step": 5900 }, { "epoch": 2.4721878862793574, "grad_norm": 1.6163711547851562, "learning_rate": 2.406e-05, "loss": 0.278, "step": 6000 }, { "epoch": 2.5133910177173466, "grad_norm": 1.5330442190170288, "learning_rate": 2.3460999999999998e-05, "loss": 0.29, "step": 6100 }, { "epoch": 2.554594149155336, "grad_norm": 4.686217784881592, "learning_rate": 2.2862e-05, "loss": 0.2586, "step": 6200 }, { "epoch": 2.595797280593325, "grad_norm": 3.333735942840576, "learning_rate": 2.2263e-05, "loss": 0.2794, "step": 6300 }, { "epoch": 2.6370004120313144, "grad_norm": 1.2093195915222168, "learning_rate": 2.1663999999999998e-05, "loss": 0.2466, "step": 6400 }, { "epoch": 2.6782035434693037, "grad_norm": 1.6071631908416748, "learning_rate": 2.1065e-05, "loss": 0.21, "step": 6500 }, { "epoch": 2.719406674907293, "grad_norm": 1.4164949655532837, "learning_rate": 2.0465999999999997e-05, "loss": 0.2822, "step": 6600 }, { "epoch": 2.760609806345282, "grad_norm": 8.471506118774414, "learning_rate": 1.9866999999999998e-05, "loss": 0.2475, "step": 6700 }, { "epoch": 2.8018129377832715, "grad_norm": 8.533307075500488, "learning_rate": 1.9267999999999996e-05, "loss": 0.2806, "step": 6800 }, { "epoch": 2.8430160692212607, "grad_norm": 0.49498608708381653, "learning_rate": 1.8669e-05, "loss": 0.2682, "step": 6900 }, { "epoch": 2.88421920065925, "grad_norm": 1.339969515800476, "learning_rate": 1.807e-05, "loss": 0.2435, "step": 7000 }, { "epoch": 2.9254223320972392, "grad_norm": 1.8642264604568481, "learning_rate": 1.7471e-05, "loss": 0.2518, "step": 7100 }, { "epoch": 2.9666254635352285, "grad_norm": 2.9471471309661865, "learning_rate": 1.6872e-05, "loss": 0.2638, "step": 7200 }, { "epoch": 3.0, "eval_accuracy_Autre r\u00e9paration": 0.30393303904730357, "eval_accuracy_D\u00e9coupe": 0.23455367948789083, "eval_accuracy_Emergence": 0.5085131571199683, "eval_accuracy_Gla\u00e7age ou Ressuage": 0.003045137463105984, "eval_accuracy_Reflet m\u00e9t\u00e9o": 0.0, "eval_accuracy_Unlabeled": NaN, "eval_iou_Autre r\u00e9paration": 0.2853778307692313, "eval_iou_D\u00e9coupe": 0.21276477560584842, "eval_iou_Emergence": 0.49725063677040354, "eval_iou_Gla\u00e7age ou Ressuage": 0.002998539305038369, "eval_iou_Reflet m\u00e9t\u00e9o": 0.0, "eval_iou_Unlabeled": 0.0, "eval_loss": 0.2419871985912323, "eval_mean_accuracy": 0.2100090026236537, "eval_mean_iou": 0.16639863040842026, "eval_overall_accuracy": 0.2563620151228916, "eval_runtime": 137.8421, "eval_samples_per_second": 13.153, "eval_steps_per_second": 3.294, "step": 7281 }, { "epoch": 3.0078285949732178, "grad_norm": 0.4593660533428192, "learning_rate": 1.6272999999999998e-05, "loss": 0.2486, "step": 7300 }, { "epoch": 3.0490317264112075, "grad_norm": 0.8246074318885803, "learning_rate": 1.5674e-05, "loss": 0.2251, "step": 7400 }, { "epoch": 3.0902348578491967, "grad_norm": 0.9824215769767761, "learning_rate": 1.5075000000000002e-05, "loss": 0.2386, "step": 7500 }, { "epoch": 3.131437989287186, "grad_norm": 6.623724937438965, "learning_rate": 1.4476e-05, "loss": 0.2635, "step": 7600 }, { "epoch": 3.1726411207251752, "grad_norm": 0.816888689994812, "learning_rate": 1.3877e-05, "loss": 0.2821, "step": 7700 }, { "epoch": 3.2138442521631645, "grad_norm": 0.45224809646606445, "learning_rate": 1.3277999999999999e-05, "loss": 0.2238, "step": 7800 }, { "epoch": 3.2550473836011538, "grad_norm": 0.9230859279632568, "learning_rate": 1.2678999999999998e-05, "loss": 0.2238, "step": 7900 }, { "epoch": 3.296250515039143, "grad_norm": 2.5414812564849854, "learning_rate": 1.2079999999999998e-05, "loss": 0.2046, "step": 8000 }, { "epoch": 3.3374536464771323, "grad_norm": 1.6467418670654297, "learning_rate": 1.1480999999999997e-05, "loss": 0.2343, "step": 8100 }, { "epoch": 3.3786567779151215, "grad_norm": 0.6073494553565979, "learning_rate": 1.0882000000000004e-05, "loss": 0.2162, "step": 8200 }, { "epoch": 3.419859909353111, "grad_norm": 2.7378017902374268, "learning_rate": 1.0283000000000003e-05, "loss": 0.2868, "step": 8300 }, { "epoch": 3.4610630407911, "grad_norm": 1.4614454507827759, "learning_rate": 9.684000000000002e-06, "loss": 0.2145, "step": 8400 }, { "epoch": 3.5022661722290893, "grad_norm": 2.336061954498291, "learning_rate": 9.085000000000002e-06, "loss": 0.2918, "step": 8500 }, { "epoch": 3.5434693036670786, "grad_norm": 1.7232545614242554, "learning_rate": 8.486000000000001e-06, "loss": 0.2854, "step": 8600 }, { "epoch": 3.584672435105068, "grad_norm": 0.514677882194519, "learning_rate": 7.887000000000001e-06, "loss": 0.2514, "step": 8700 }, { "epoch": 3.6258755665430575, "grad_norm": 0.9662112593650818, "learning_rate": 7.2879999999999995e-06, "loss": 0.2714, "step": 8800 }, { "epoch": 3.6670786979810464, "grad_norm": 10.60983657836914, "learning_rate": 6.688999999999999e-06, "loss": 0.2548, "step": 8900 }, { "epoch": 3.708281829419036, "grad_norm": 2.669593572616577, "learning_rate": 6.0899999999999984e-06, "loss": 0.2591, "step": 9000 }, { "epoch": 3.749484960857025, "grad_norm": 1.071542501449585, "learning_rate": 5.490999999999998e-06, "loss": 0.2763, "step": 9100 }, { "epoch": 3.7906880922950146, "grad_norm": 2.664677381515503, "learning_rate": 4.891999999999997e-06, "loss": 0.2178, "step": 9200 }, { "epoch": 3.831891223733004, "grad_norm": 9.70131778717041, "learning_rate": 4.292999999999997e-06, "loss": 0.2674, "step": 9300 }, { "epoch": 3.873094355170993, "grad_norm": 4.843862056732178, "learning_rate": 3.694000000000003e-06, "loss": 0.2581, "step": 9400 }, { "epoch": 3.9142974866089824, "grad_norm": 0.8629316091537476, "learning_rate": 3.0950000000000026e-06, "loss": 0.2642, "step": 9500 }, { "epoch": 3.9555006180469716, "grad_norm": 4.216986179351807, "learning_rate": 2.496000000000002e-06, "loss": 0.1965, "step": 9600 }, { "epoch": 3.996703749484961, "grad_norm": 2.241065502166748, "learning_rate": 1.8970000000000013e-06, "loss": 0.2703, "step": 9700 }, { "epoch": 4.0, "eval_accuracy_Autre r\u00e9paration": 0.36122042935066995, "eval_accuracy_D\u00e9coupe": 0.28433059478878114, "eval_accuracy_Emergence": 0.5473337114203988, "eval_accuracy_Gla\u00e7age ou Ressuage": 0.0446288018012878, "eval_accuracy_Reflet m\u00e9t\u00e9o": 0.0, "eval_accuracy_Unlabeled": NaN, "eval_iou_Autre r\u00e9paration": 0.33834292206885924, "eval_iou_D\u00e9coupe": 0.251159370886517, "eval_iou_Emergence": 0.5319718670461905, "eval_iou_Gla\u00e7age ou Ressuage": 0.042908421138837, "eval_iou_Reflet m\u00e9t\u00e9o": 0.0, "eval_iou_Unlabeled": 0.0, "eval_loss": 0.2333020269870758, "eval_mean_accuracy": 0.24750270747222752, "eval_mean_iou": 0.1940637635234006, "eval_overall_accuracy": 0.30742270034207847, "eval_runtime": 138.4117, "eval_samples_per_second": 13.099, "eval_steps_per_second": 3.28, "step": 9708 }, { "epoch": 4.03790688092295, "grad_norm": 1.052063226699829, "learning_rate": 1.298000000000001e-06, "loss": 0.242, "step": 9800 }, { "epoch": 4.07911001236094, "grad_norm": 6.82352876663208, "learning_rate": 6.990000000000005e-07, "loss": 0.2482, "step": 9900 }, { "epoch": 4.120313143798929, "grad_norm": 2.648499011993408, "learning_rate": 1e-07, "loss": 0.2197, "step": 10000 }, { "epoch": 4.120313143798929, "eval_accuracy_Autre r\u00e9paration": 0.34369405810457515, "eval_accuracy_D\u00e9coupe": 0.2864541960267422, "eval_accuracy_Emergence": 0.5548598133737452, "eval_accuracy_Gla\u00e7age ou Ressuage": 0.03860482159488221, "eval_accuracy_Reflet m\u00e9t\u00e9o": 0.0, "eval_accuracy_Unlabeled": NaN, "eval_iou_Autre r\u00e9paration": 0.32304877421180617, "eval_iou_D\u00e9coupe": 0.2515107459482324, "eval_iou_Emergence": 0.5379450939388203, "eval_iou_Gla\u00e7age ou Ressuage": 0.03692047935180606, "eval_iou_Reflet m\u00e9t\u00e9o": 0.0, "eval_iou_Unlabeled": 0.0, "eval_loss": 0.23602528870105743, "eval_mean_accuracy": 0.244722577819989, "eval_mean_iou": 0.19157084890844414, "eval_overall_accuracy": 0.29617685609695316, "eval_runtime": 141.1695, "eval_samples_per_second": 12.843, "eval_steps_per_second": 3.216, "step": 10000 }, { "epoch": 4.120313143798929, "step": 10000, "total_flos": 8.912029734867567e+18, "train_loss": 0.29374205589294433, "train_runtime": 4666.1544, "train_samples_per_second": 8.572, "train_steps_per_second": 2.143 } ], "logging_steps": 100, "max_steps": 10000, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.912029734867567e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }