| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 100.0, | |
| "eval_steps": 500, | |
| "global_step": 131300, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.38080731150038083, | |
| "grad_norm": 1.5335801839828491, | |
| "learning_rate": 0.000996191926884996, | |
| "loss": 6.3554, | |
| "max_memory_allocated (GB)": 4.18, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 0.7616146230007617, | |
| "grad_norm": 1.3639589548110962, | |
| "learning_rate": 0.0009923838537699923, | |
| "loss": 4.0897, | |
| "max_memory_allocated (GB)": 4.18, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 1000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.0689470393855497, | |
| "eval_loss": 6.560576438903809, | |
| "eval_runtime": 321.5446, | |
| "eval_samples_per_second": 522.338, | |
| "eval_steps_per_second": 1.023, | |
| "max_memory_allocated (GB)": 21.24, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 1313, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.1424219345011424, | |
| "grad_norm": 1.2049754858016968, | |
| "learning_rate": 0.0009885757806549886, | |
| "loss": 3.3285, | |
| "max_memory_allocated (GB)": 21.24, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 1500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.5232292460015233, | |
| "grad_norm": 1.1118249893188477, | |
| "learning_rate": 0.0009847677075399848, | |
| "loss": 2.9296, | |
| "max_memory_allocated (GB)": 21.24, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 2000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.904036557501904, | |
| "grad_norm": 1.0413897037506104, | |
| "learning_rate": 0.0009809596344249809, | |
| "loss": 2.8297, | |
| "max_memory_allocated (GB)": 21.24, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 2500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.0806466017683308, | |
| "eval_loss": 6.380691051483154, | |
| "eval_runtime": 296.9621, | |
| "eval_samples_per_second": 565.577, | |
| "eval_steps_per_second": 1.108, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 2626, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.2848438690022848, | |
| "grad_norm": 1.0910780429840088, | |
| "learning_rate": 0.0009771515613099771, | |
| "loss": 2.4255, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 3000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.6656511805026657, | |
| "grad_norm": 0.9615127444267273, | |
| "learning_rate": 0.0009733434881949733, | |
| "loss": 2.3951, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 3500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.08840463219314697, | |
| "eval_loss": 6.42078161239624, | |
| "eval_runtime": 295.0967, | |
| "eval_samples_per_second": 569.152, | |
| "eval_steps_per_second": 1.115, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 3939, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.0464584920030466, | |
| "grad_norm": 0.9677470326423645, | |
| "learning_rate": 0.0009695354150799695, | |
| "loss": 2.3282, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 4000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.427265803503427, | |
| "grad_norm": 0.924505889415741, | |
| "learning_rate": 0.0009657273419649657, | |
| "loss": 2.034, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 4500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.808073115003808, | |
| "grad_norm": 0.943131685256958, | |
| "learning_rate": 0.0009619192688499619, | |
| "loss": 2.1003, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 5000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.10144979309934209, | |
| "eval_loss": 6.180690765380859, | |
| "eval_runtime": 294.8382, | |
| "eval_samples_per_second": 569.651, | |
| "eval_steps_per_second": 1.116, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 5252, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.188880426504189, | |
| "grad_norm": 0.9539695978164673, | |
| "learning_rate": 0.0009581111957349581, | |
| "loss": 1.9654, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 5500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.5696877380045695, | |
| "grad_norm": 0.958723247051239, | |
| "learning_rate": 0.0009543031226199543, | |
| "loss": 1.8655, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 6000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.9504950495049505, | |
| "grad_norm": 0.9000511765480042, | |
| "learning_rate": 0.0009504950495049505, | |
| "loss": 1.93, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 6500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.10267631210740973, | |
| "eval_loss": 6.32958459854126, | |
| "eval_runtime": 293.9778, | |
| "eval_samples_per_second": 571.319, | |
| "eval_steps_per_second": 1.119, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 6565, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.331302361005331, | |
| "grad_norm": 0.9381927251815796, | |
| "learning_rate": 0.0009466869763899467, | |
| "loss": 1.6809, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 7000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.712109672505712, | |
| "grad_norm": 0.9558943510055542, | |
| "learning_rate": 0.0009428789032749429, | |
| "loss": 1.7486, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 7500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.10908279003304457, | |
| "eval_loss": 6.237691402435303, | |
| "eval_runtime": 294.7934, | |
| "eval_samples_per_second": 569.738, | |
| "eval_steps_per_second": 1.116, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 7878, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.092916984006093, | |
| "grad_norm": 0.8602584004402161, | |
| "learning_rate": 0.0009390708301599391, | |
| "loss": 1.719, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 8000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.473724295506473, | |
| "grad_norm": 0.8856430649757385, | |
| "learning_rate": 0.0009352627570449353, | |
| "loss": 1.5757, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 8500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.854531607006854, | |
| "grad_norm": 0.8462656140327454, | |
| "learning_rate": 0.0009314546839299315, | |
| "loss": 1.6498, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 9000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.10559971420916317, | |
| "eval_loss": 6.480878829956055, | |
| "eval_runtime": 293.5226, | |
| "eval_samples_per_second": 572.205, | |
| "eval_steps_per_second": 1.121, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 9191, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.235338918507235, | |
| "grad_norm": 0.8403063416481018, | |
| "learning_rate": 0.0009276466108149277, | |
| "loss": 1.5278, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 9500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.616146230007616, | |
| "grad_norm": 0.8911013007164001, | |
| "learning_rate": 0.0009238385376999238, | |
| "loss": 1.5124, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 10000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.996953541507997, | |
| "grad_norm": 0.8742530941963196, | |
| "learning_rate": 0.00092003046458492, | |
| "loss": 1.5722, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 10500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.11500104194575927, | |
| "eval_loss": 6.268129348754883, | |
| "eval_runtime": 294.6985, | |
| "eval_samples_per_second": 569.921, | |
| "eval_steps_per_second": 1.116, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 10504, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 8.377760853008377, | |
| "grad_norm": 0.8218346834182739, | |
| "learning_rate": 0.0009162223914699162, | |
| "loss": 1.3603, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 11000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 8.758568164508759, | |
| "grad_norm": 0.8631531596183777, | |
| "learning_rate": 0.0009124143183549124, | |
| "loss": 1.458, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 11500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.11367330534964723, | |
| "eval_loss": 6.290684223175049, | |
| "eval_runtime": 293.6278, | |
| "eval_samples_per_second": 572.0, | |
| "eval_steps_per_second": 1.12, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 11817, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 9.139375476009139, | |
| "grad_norm": 0.8099371790885925, | |
| "learning_rate": 0.0009086062452399086, | |
| "loss": 1.4071, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 12000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 9.52018278750952, | |
| "grad_norm": 0.9186742901802063, | |
| "learning_rate": 0.0009047981721249048, | |
| "loss": 1.3494, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 12500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 9.900990099009901, | |
| "grad_norm": 0.8492868542671204, | |
| "learning_rate": 0.000900990099009901, | |
| "loss": 1.4131, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 13000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.1148819624304129, | |
| "eval_loss": 6.425012588500977, | |
| "eval_runtime": 294.2316, | |
| "eval_samples_per_second": 570.826, | |
| "eval_steps_per_second": 1.118, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 13130, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 10.281797410510281, | |
| "grad_norm": 0.8763368725776672, | |
| "learning_rate": 0.0008971820258948972, | |
| "loss": 1.2825, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 13500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 10.662604722010663, | |
| "grad_norm": 0.8775655627250671, | |
| "learning_rate": 0.0008933739527798934, | |
| "loss": 1.3132, | |
| "max_memory_allocated (GB)": 21.25, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 14000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.11482242267273972, | |
| "eval_loss": 6.423275470733643, | |
| "eval_runtime": 293.1251, | |
| "eval_samples_per_second": 572.981, | |
| "eval_steps_per_second": 1.122, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 14443, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 11.043412033511043, | |
| "grad_norm": 0.7483058571815491, | |
| "learning_rate": 0.0008895658796648896, | |
| "loss": 1.3292, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 14500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 11.424219345011425, | |
| "grad_norm": 0.8545175194740295, | |
| "learning_rate": 0.0008857578065498858, | |
| "loss": 1.1967, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 15000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 11.805026656511805, | |
| "grad_norm": 0.7811135649681091, | |
| "learning_rate": 0.000881949733434882, | |
| "loss": 1.2835, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 15500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.11918073293441696, | |
| "eval_loss": 6.378399848937988, | |
| "eval_runtime": 294.7386, | |
| "eval_samples_per_second": 569.844, | |
| "eval_steps_per_second": 1.116, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 15756, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 12.185833968012187, | |
| "grad_norm": 0.7616448402404785, | |
| "learning_rate": 0.0008781416603198782, | |
| "loss": 1.225, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 16000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 12.566641279512567, | |
| "grad_norm": 0.8137641549110413, | |
| "learning_rate": 0.0008743335872048743, | |
| "loss": 1.1925, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 16500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 12.947448591012947, | |
| "grad_norm": 0.8130796551704407, | |
| "learning_rate": 0.0008705255140898705, | |
| "loss": 1.2414, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 17000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.11609061951117859, | |
| "eval_loss": 6.441605091094971, | |
| "eval_runtime": 293.5962, | |
| "eval_samples_per_second": 572.061, | |
| "eval_steps_per_second": 1.121, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 17069, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 13.328255902513328, | |
| "grad_norm": 0.7482279539108276, | |
| "learning_rate": 0.0008667174409748667, | |
| "loss": 1.1175, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 17500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 13.709063214013709, | |
| "grad_norm": 0.8002237677574158, | |
| "learning_rate": 0.0008629093678598629, | |
| "loss": 1.1652, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 18000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.11941293798934238, | |
| "eval_loss": 6.50691556930542, | |
| "eval_runtime": 293.871, | |
| "eval_samples_per_second": 571.526, | |
| "eval_steps_per_second": 1.12, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 18382, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 14.08987052551409, | |
| "grad_norm": 0.7629320621490479, | |
| "learning_rate": 0.0008591012947448591, | |
| "loss": 1.1667, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 18500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 14.47067783701447, | |
| "grad_norm": 0.8081715106964111, | |
| "learning_rate": 0.0008552932216298553, | |
| "loss": 1.0783, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 19000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 14.851485148514852, | |
| "grad_norm": 0.8121427893638611, | |
| "learning_rate": 0.0008514851485148515, | |
| "loss": 1.1415, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 19500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.12653984698282278, | |
| "eval_loss": 6.384666442871094, | |
| "eval_runtime": 295.5264, | |
| "eval_samples_per_second": 568.325, | |
| "eval_steps_per_second": 1.113, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 19695, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 15.232292460015232, | |
| "grad_norm": 0.7696598768234253, | |
| "learning_rate": 0.0008476770753998477, | |
| "loss": 1.0709, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 20000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 15.613099771515612, | |
| "grad_norm": 0.8182108402252197, | |
| "learning_rate": 0.0008438690022848439, | |
| "loss": 1.0681, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 20500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 15.993907083015994, | |
| "grad_norm": 0.829287052154541, | |
| "learning_rate": 0.0008400609291698401, | |
| "loss": 1.118, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 21000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.12646244529784764, | |
| "eval_loss": 6.311014652252197, | |
| "eval_runtime": 293.3977, | |
| "eval_samples_per_second": 572.448, | |
| "eval_steps_per_second": 1.121, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 21008, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 16.374714394516374, | |
| "grad_norm": 0.8062915205955505, | |
| "learning_rate": 0.0008362528560548363, | |
| "loss": 0.9966, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 21500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 16.755521706016754, | |
| "grad_norm": 0.8203598260879517, | |
| "learning_rate": 0.0008324447829398325, | |
| "loss": 1.065, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 22000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.128337947664553, | |
| "eval_loss": 6.402354717254639, | |
| "eval_runtime": 295.2238, | |
| "eval_samples_per_second": 568.907, | |
| "eval_steps_per_second": 1.114, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 22321, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 17.136329017517138, | |
| "grad_norm": 0.732295572757721, | |
| "learning_rate": 0.0008286367098248287, | |
| "loss": 1.0415, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 22500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 17.517136329017518, | |
| "grad_norm": 0.7375497221946716, | |
| "learning_rate": 0.0008248286367098248, | |
| "loss": 0.9912, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 23000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 17.897943640517898, | |
| "grad_norm": 0.7598251104354858, | |
| "learning_rate": 0.000821020563594821, | |
| "loss": 1.0469, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 23500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.1317555297549939, | |
| "eval_loss": 6.188817977905273, | |
| "eval_runtime": 294.5179, | |
| "eval_samples_per_second": 570.271, | |
| "eval_steps_per_second": 1.117, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 23634, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 18.278750952018278, | |
| "grad_norm": 0.7098336815834045, | |
| "learning_rate": 0.0008172124904798172, | |
| "loss": 0.9608, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 24000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 18.659558263518658, | |
| "grad_norm": 0.7254419922828674, | |
| "learning_rate": 0.0008134044173648134, | |
| "loss": 0.978, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 24500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.13280938346580928, | |
| "eval_loss": 6.488752841949463, | |
| "eval_runtime": 291.3325, | |
| "eval_samples_per_second": 576.506, | |
| "eval_steps_per_second": 1.129, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 24947, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 19.04036557501904, | |
| "grad_norm": 0.759738028049469, | |
| "learning_rate": 0.0008095963442498096, | |
| "loss": 1.0032, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 25000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 19.42117288651942, | |
| "grad_norm": 0.7547454833984375, | |
| "learning_rate": 0.0008057882711348058, | |
| "loss": 0.9158, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 25500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 19.801980198019802, | |
| "grad_norm": 0.7352393269538879, | |
| "learning_rate": 0.000801980198019802, | |
| "loss": 0.9734, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 26000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.13280342949004198, | |
| "eval_loss": 6.357004165649414, | |
| "eval_runtime": 293.5583, | |
| "eval_samples_per_second": 572.135, | |
| "eval_steps_per_second": 1.121, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 26260, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 20.182787509520182, | |
| "grad_norm": 0.7476137280464172, | |
| "learning_rate": 0.0007981721249047982, | |
| "loss": 0.9462, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 26500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 20.563594821020562, | |
| "grad_norm": 0.7356305718421936, | |
| "learning_rate": 0.0007943640517897944, | |
| "loss": 0.9229, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 27000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 20.944402132520946, | |
| "grad_norm": 0.6908999085426331, | |
| "learning_rate": 0.0007905559786747906, | |
| "loss": 0.9602, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 27500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.13345836682444703, | |
| "eval_loss": 6.237915515899658, | |
| "eval_runtime": 294.0513, | |
| "eval_samples_per_second": 571.176, | |
| "eval_steps_per_second": 1.119, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 27573, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 21.325209444021326, | |
| "grad_norm": 0.6861099600791931, | |
| "learning_rate": 0.0007867479055597868, | |
| "loss": 0.8804, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 28000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 21.706016755521706, | |
| "grad_norm": 0.7063835263252258, | |
| "learning_rate": 0.000782939832444783, | |
| "loss": 0.9069, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 28500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.13340478104254117, | |
| "eval_loss": 6.306626796722412, | |
| "eval_runtime": 293.3464, | |
| "eval_samples_per_second": 572.548, | |
| "eval_steps_per_second": 1.122, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 28886, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 22.086824067022086, | |
| "grad_norm": 0.6700064539909363, | |
| "learning_rate": 0.0007791317593297792, | |
| "loss": 0.9188, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 29000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 22.46763137852247, | |
| "grad_norm": 0.7254114151000977, | |
| "learning_rate": 0.0007753236862147753, | |
| "loss": 0.8521, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 29500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 22.84843869002285, | |
| "grad_norm": 0.7956221699714661, | |
| "learning_rate": 0.0007715156130997715, | |
| "loss": 0.8996, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 30000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.13276175165967075, | |
| "eval_loss": 6.250992774963379, | |
| "eval_runtime": 293.8104, | |
| "eval_samples_per_second": 571.644, | |
| "eval_steps_per_second": 1.12, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 30199, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 23.22924600152323, | |
| "grad_norm": 0.7165413498878479, | |
| "learning_rate": 0.0007677075399847677, | |
| "loss": 0.8606, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 30500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 23.61005331302361, | |
| "grad_norm": 0.6925890445709229, | |
| "learning_rate": 0.0007638994668697639, | |
| "loss": 0.8553, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 31000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 23.99086062452399, | |
| "grad_norm": 0.7103093266487122, | |
| "learning_rate": 0.0007600913937547601, | |
| "loss": 0.893, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 31500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.13703075228483821, | |
| "eval_loss": 6.409420490264893, | |
| "eval_runtime": 292.9538, | |
| "eval_samples_per_second": 573.316, | |
| "eval_steps_per_second": 1.123, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 31512, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 24.371667936024373, | |
| "grad_norm": 0.6945925951004028, | |
| "learning_rate": 0.0007562833206397562, | |
| "loss": 0.8036, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 32000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 24.752475247524753, | |
| "grad_norm": 0.7803900241851807, | |
| "learning_rate": 0.0007524752475247525, | |
| "loss": 0.8494, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 32500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.13862046381471227, | |
| "eval_loss": 6.323179721832275, | |
| "eval_runtime": 295.4271, | |
| "eval_samples_per_second": 568.516, | |
| "eval_steps_per_second": 1.114, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 32825, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 25.133282559025133, | |
| "grad_norm": 0.6690163016319275, | |
| "learning_rate": 0.0007486671744097486, | |
| "loss": 0.8476, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 33000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 25.514089870525513, | |
| "grad_norm": 0.7143478989601135, | |
| "learning_rate": 0.0007448591012947449, | |
| "loss": 0.8046, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 33500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 25.894897182025893, | |
| "grad_norm": 0.7087588310241699, | |
| "learning_rate": 0.000741051028179741, | |
| "loss": 0.8507, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 34000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.13611979399243845, | |
| "eval_loss": 6.426168441772461, | |
| "eval_runtime": 293.8071, | |
| "eval_samples_per_second": 571.651, | |
| "eval_steps_per_second": 1.12, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 34138, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 26.275704493526277, | |
| "grad_norm": 0.6879526376724243, | |
| "learning_rate": 0.0007372429550647373, | |
| "loss": 0.7922, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 34500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 26.656511805026657, | |
| "grad_norm": 0.7182181477546692, | |
| "learning_rate": 0.0007334348819497334, | |
| "loss": 0.8065, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 35000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.13387514512815932, | |
| "eval_loss": 6.415595531463623, | |
| "eval_runtime": 294.5186, | |
| "eval_samples_per_second": 570.27, | |
| "eval_steps_per_second": 1.117, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 35451, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 27.037319116527037, | |
| "grad_norm": 0.6910179853439331, | |
| "learning_rate": 0.0007296268088347297, | |
| "loss": 0.8217, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 35500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 27.418126428027417, | |
| "grad_norm": 0.6327067017555237, | |
| "learning_rate": 0.0007258187357197257, | |
| "loss": 0.754, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 36000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 27.798933739527797, | |
| "grad_norm": 0.7196256518363953, | |
| "learning_rate": 0.000722010662604722, | |
| "loss": 0.7956, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 36500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.13950760620404276, | |
| "eval_loss": 6.287784099578857, | |
| "eval_runtime": 294.6907, | |
| "eval_samples_per_second": 569.937, | |
| "eval_steps_per_second": 1.116, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 36764, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 28.17974105102818, | |
| "grad_norm": 0.6903337836265564, | |
| "learning_rate": 0.0007182025894897181, | |
| "loss": 0.7804, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 37000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 28.56054836252856, | |
| "grad_norm": 0.7300212979316711, | |
| "learning_rate": 0.0007143945163747144, | |
| "loss": 0.7552, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 37500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 28.94135567402894, | |
| "grad_norm": 0.7145921587944031, | |
| "learning_rate": 0.0007105864432597105, | |
| "loss": 0.7889, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 38000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.13493495281474205, | |
| "eval_loss": 6.49939489364624, | |
| "eval_runtime": 294.5404, | |
| "eval_samples_per_second": 570.227, | |
| "eval_steps_per_second": 1.117, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 38077, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 29.32216298552932, | |
| "grad_norm": 0.6388425230979919, | |
| "learning_rate": 0.0007067783701447068, | |
| "loss": 0.7356, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 38500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 29.702970297029704, | |
| "grad_norm": 0.7119573354721069, | |
| "learning_rate": 0.0007029702970297029, | |
| "loss": 0.7645, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 39000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.14072817123634307, | |
| "eval_loss": 6.493895530700684, | |
| "eval_runtime": 293.9015, | |
| "eval_samples_per_second": 571.467, | |
| "eval_steps_per_second": 1.119, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 39390, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 30.083777608530085, | |
| "grad_norm": 0.6302276253700256, | |
| "learning_rate": 0.0006991622239146992, | |
| "loss": 0.7568, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 39500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 30.464584920030465, | |
| "grad_norm": 0.6814985871315002, | |
| "learning_rate": 0.0006953541507996953, | |
| "loss": 0.7356, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 40000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 30.845392231530845, | |
| "grad_norm": 0.6445801258087158, | |
| "learning_rate": 0.0006915460776846916, | |
| "loss": 0.7548, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 40500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_accuracy": 0.13752493227352564, | |
| "eval_loss": 6.484902858734131, | |
| "eval_runtime": 294.1773, | |
| "eval_samples_per_second": 570.931, | |
| "eval_steps_per_second": 1.118, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 40703, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 31.226199543031225, | |
| "grad_norm": 0.7047973871231079, | |
| "learning_rate": 0.0006877380045696877, | |
| "loss": 0.7233, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 41000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 31.60700685453161, | |
| "grad_norm": 0.7059982419013977, | |
| "learning_rate": 0.000683929931454684, | |
| "loss": 0.7176, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 41500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 31.98781416603199, | |
| "grad_norm": 0.7162789106369019, | |
| "learning_rate": 0.0006801218583396802, | |
| "loss": 0.7494, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 42000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.14151409603762913, | |
| "eval_loss": 6.554172039031982, | |
| "eval_runtime": 293.2908, | |
| "eval_samples_per_second": 572.657, | |
| "eval_steps_per_second": 1.122, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 42016, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 32.36862147753237, | |
| "grad_norm": 0.6640636324882507, | |
| "learning_rate": 0.0006763137852246763, | |
| "loss": 0.6871, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 42500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 32.74942878903275, | |
| "grad_norm": 0.646771252155304, | |
| "learning_rate": 0.0006725057121096724, | |
| "loss": 0.7162, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 43000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_accuracy": 0.14183561072906434, | |
| "eval_loss": 6.457273483276367, | |
| "eval_runtime": 292.6918, | |
| "eval_samples_per_second": 573.829, | |
| "eval_steps_per_second": 1.124, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 43329, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 33.13023610053313, | |
| "grad_norm": 0.6640152931213379, | |
| "learning_rate": 0.0006686976389946687, | |
| "loss": 0.7121, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 43500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 33.51104341203351, | |
| "grad_norm": 0.6674479246139526, | |
| "learning_rate": 0.0006648895658796648, | |
| "loss": 0.6863, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 44000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 33.89185072353389, | |
| "grad_norm": 0.6740984320640564, | |
| "learning_rate": 0.0006610814927646611, | |
| "loss": 0.7109, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 44500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.14142478640111936, | |
| "eval_loss": 6.490988731384277, | |
| "eval_runtime": 294.702, | |
| "eval_samples_per_second": 569.915, | |
| "eval_steps_per_second": 1.116, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 44642, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 34.272658035034276, | |
| "grad_norm": 0.6771370768547058, | |
| "learning_rate": 0.0006572734196496572, | |
| "loss": 0.6806, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 45000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 34.65346534653465, | |
| "grad_norm": 0.6320595145225525, | |
| "learning_rate": 0.0006534653465346535, | |
| "loss": 0.683, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 45500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_accuracy": 0.14110922568545145, | |
| "eval_loss": 6.431344509124756, | |
| "eval_runtime": 294.9108, | |
| "eval_samples_per_second": 569.511, | |
| "eval_steps_per_second": 1.116, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 45955, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 35.034272658035036, | |
| "grad_norm": 0.6397078633308411, | |
| "learning_rate": 0.0006496572734196497, | |
| "loss": 0.7004, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 46000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 35.41507996953541, | |
| "grad_norm": 0.5976701378822327, | |
| "learning_rate": 0.0006458492003046459, | |
| "loss": 0.6579, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 46500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 35.795887281035796, | |
| "grad_norm": 0.6323311924934387, | |
| "learning_rate": 0.000642041127189642, | |
| "loss": 0.6828, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 47000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.14563424726861363, | |
| "eval_loss": 6.305937767028809, | |
| "eval_runtime": 293.3837, | |
| "eval_samples_per_second": 572.476, | |
| "eval_steps_per_second": 1.121, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 47268, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 36.17669459253618, | |
| "grad_norm": 0.6800869703292847, | |
| "learning_rate": 0.0006382330540746383, | |
| "loss": 0.6765, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 47500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 36.557501904036556, | |
| "grad_norm": 0.6761651039123535, | |
| "learning_rate": 0.0006344249809596345, | |
| "loss": 0.6576, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 48000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 36.93830921553694, | |
| "grad_norm": 0.6531367301940918, | |
| "learning_rate": 0.0006306169078446307, | |
| "loss": 0.6772, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 48500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_accuracy": 0.14638444821529575, | |
| "eval_loss": 6.376420021057129, | |
| "eval_runtime": 293.926, | |
| "eval_samples_per_second": 571.419, | |
| "eval_steps_per_second": 1.119, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 48581, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 37.319116527037316, | |
| "grad_norm": 0.639673113822937, | |
| "learning_rate": 0.0006268088347296267, | |
| "loss": 0.6436, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 49000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 37.6999238385377, | |
| "grad_norm": 0.6823265552520752, | |
| "learning_rate": 0.000623000761614623, | |
| "loss": 0.652, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 49500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_accuracy": 0.15022476258521628, | |
| "eval_loss": 6.34368896484375, | |
| "eval_runtime": 293.7272, | |
| "eval_samples_per_second": 571.806, | |
| "eval_steps_per_second": 1.12, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 49894, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 38.08073115003808, | |
| "grad_norm": 0.6552994251251221, | |
| "learning_rate": 0.0006191926884996192, | |
| "loss": 0.6551, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 50000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 38.46153846153846, | |
| "grad_norm": 0.6459840536117554, | |
| "learning_rate": 0.0006153846153846154, | |
| "loss": 0.6256, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 50500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 38.84234577303884, | |
| "grad_norm": 0.7152717709541321, | |
| "learning_rate": 0.0006115765422696116, | |
| "loss": 0.6533, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 51000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_accuracy": 0.1470393855497008, | |
| "eval_loss": 6.349309921264648, | |
| "eval_runtime": 293.4965, | |
| "eval_samples_per_second": 572.256, | |
| "eval_steps_per_second": 1.121, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 51207, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 39.22315308453922, | |
| "grad_norm": 0.6504981517791748, | |
| "learning_rate": 0.0006077684691546078, | |
| "loss": 0.6289, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 51500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 39.603960396039604, | |
| "grad_norm": 0.6987379789352417, | |
| "learning_rate": 0.000603960396039604, | |
| "loss": 0.6319, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 52000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 39.98476770753999, | |
| "grad_norm": 0.7198599576950073, | |
| "learning_rate": 0.0006001523229246002, | |
| "loss": 0.6527, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 52500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_accuracy": 0.14814087106665477, | |
| "eval_loss": 6.3077898025512695, | |
| "eval_runtime": 295.0602, | |
| "eval_samples_per_second": 569.223, | |
| "eval_steps_per_second": 1.115, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 52520, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 40.365575019040364, | |
| "grad_norm": 0.6907523274421692, | |
| "learning_rate": 0.0005963442498095964, | |
| "loss": 0.6132, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 53000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 40.74638233054075, | |
| "grad_norm": 0.6374237537384033, | |
| "learning_rate": 0.0005925361766945926, | |
| "loss": 0.633, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 53500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_accuracy": 0.14128784495847102, | |
| "eval_loss": 6.535091876983643, | |
| "eval_runtime": 294.8025, | |
| "eval_samples_per_second": 569.72, | |
| "eval_steps_per_second": 1.116, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 53833, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 41.127189642041124, | |
| "grad_norm": 0.6018815636634827, | |
| "learning_rate": 0.0005887281035795888, | |
| "loss": 0.6237, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 54000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 41.50799695354151, | |
| "grad_norm": 0.5860393643379211, | |
| "learning_rate": 0.000584920030464585, | |
| "loss": 0.6081, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 54500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 41.88880426504189, | |
| "grad_norm": 0.6938556432723999, | |
| "learning_rate": 0.0005811119573495812, | |
| "loss": 0.6219, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 55000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_accuracy": 0.14922449465630674, | |
| "eval_loss": 6.377195358276367, | |
| "eval_runtime": 294.5637, | |
| "eval_samples_per_second": 570.182, | |
| "eval_steps_per_second": 1.117, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 55146, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 42.26961157654227, | |
| "grad_norm": 0.6243091821670532, | |
| "learning_rate": 0.0005773038842345773, | |
| "loss": 0.5989, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 55500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 42.65041888804265, | |
| "grad_norm": 0.5847932696342468, | |
| "learning_rate": 0.0005734958111195735, | |
| "loss": 0.6053, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 56000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "eval_accuracy": 0.1481289631151201, | |
| "eval_loss": 6.480849266052246, | |
| "eval_runtime": 294.6877, | |
| "eval_samples_per_second": 569.942, | |
| "eval_steps_per_second": 1.116, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 56459, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 43.031226199543035, | |
| "grad_norm": 0.6111719012260437, | |
| "learning_rate": 0.0005696877380045697, | |
| "loss": 0.6123, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 56500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 43.41203351104341, | |
| "grad_norm": 0.6217710971832275, | |
| "learning_rate": 0.0005658796648895659, | |
| "loss": 0.5842, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 57000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 43.792840822543795, | |
| "grad_norm": 0.6644548177719116, | |
| "learning_rate": 0.0005620715917745621, | |
| "loss": 0.5996, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 57500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_accuracy": 0.14795034384210057, | |
| "eval_loss": 6.565069675445557, | |
| "eval_runtime": 294.4066, | |
| "eval_samples_per_second": 570.486, | |
| "eval_steps_per_second": 1.118, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 57772, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 44.17364813404417, | |
| "grad_norm": 0.60784512758255, | |
| "learning_rate": 0.0005582635186595583, | |
| "loss": 0.5859, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 58000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 44.554455445544555, | |
| "grad_norm": 0.6305288076400757, | |
| "learning_rate": 0.0005544554455445545, | |
| "loss": 0.5792, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 58500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 44.93526275704494, | |
| "grad_norm": 0.663642168045044, | |
| "learning_rate": 0.0005506473724295507, | |
| "loss": 0.5974, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 59000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "eval_accuracy": 0.14877199249799053, | |
| "eval_loss": 6.533756256103516, | |
| "eval_runtime": 292.6688, | |
| "eval_samples_per_second": 573.874, | |
| "eval_steps_per_second": 1.124, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 59085, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 45.316070068545315, | |
| "grad_norm": 0.5684943199157715, | |
| "learning_rate": 0.0005468392993145469, | |
| "loss": 0.5616, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 59500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 45.6968773800457, | |
| "grad_norm": 0.6369318962097168, | |
| "learning_rate": 0.0005430312261995431, | |
| "loss": 0.5818, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 60000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_accuracy": 0.15239796374028758, | |
| "eval_loss": 6.304377555847168, | |
| "eval_runtime": 293.353, | |
| "eval_samples_per_second": 572.536, | |
| "eval_steps_per_second": 1.122, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 60398, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 46.077684691546075, | |
| "grad_norm": 0.7030180096626282, | |
| "learning_rate": 0.0005392231530845393, | |
| "loss": 0.577, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 60500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 46.45849200304646, | |
| "grad_norm": 0.6424877643585205, | |
| "learning_rate": 0.0005354150799695355, | |
| "loss": 0.5506, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 61000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 46.83929931454684, | |
| "grad_norm": 0.6298852562904358, | |
| "learning_rate": 0.0005316070068545317, | |
| "loss": 0.5803, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 61500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "eval_accuracy": 0.15137387990830878, | |
| "eval_loss": 6.536581993103027, | |
| "eval_runtime": 295.29, | |
| "eval_samples_per_second": 568.78, | |
| "eval_steps_per_second": 1.114, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 61711, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 47.22010662604722, | |
| "grad_norm": 0.6673698425292969, | |
| "learning_rate": 0.0005277989337395278, | |
| "loss": 0.5592, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 62000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 47.6009139375476, | |
| "grad_norm": 0.6058672070503235, | |
| "learning_rate": 0.000523990860624524, | |
| "loss": 0.5503, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 62500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 47.98172124904798, | |
| "grad_norm": 0.6532467603683472, | |
| "learning_rate": 0.0005201827875095202, | |
| "loss": 0.573, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 63000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_accuracy": 0.15284451192283646, | |
| "eval_loss": 6.47830057144165, | |
| "eval_runtime": 293.0849, | |
| "eval_samples_per_second": 573.059, | |
| "eval_steps_per_second": 1.123, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 63024, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 48.36252856054836, | |
| "grad_norm": 0.6419522762298584, | |
| "learning_rate": 0.0005163747143945163, | |
| "loss": 0.5408, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 63500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 48.743335872048746, | |
| "grad_norm": 0.6108247637748718, | |
| "learning_rate": 0.0005125666412795126, | |
| "loss": 0.551, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 64000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "eval_accuracy": 0.15404721502783483, | |
| "eval_loss": 6.494078636169434, | |
| "eval_runtime": 293.6217, | |
| "eval_samples_per_second": 572.012, | |
| "eval_steps_per_second": 1.12, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 64337, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 49.12414318354912, | |
| "grad_norm": 0.5675166845321655, | |
| "learning_rate": 0.0005087585681645087, | |
| "loss": 0.558, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 64500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 49.504950495049506, | |
| "grad_norm": 0.5808805823326111, | |
| "learning_rate": 0.000504950495049505, | |
| "loss": 0.5386, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 65000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 49.88575780654988, | |
| "grad_norm": 0.6140856146812439, | |
| "learning_rate": 0.0005011424219345011, | |
| "loss": 0.5447, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 65500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_accuracy": 0.1527849721651633, | |
| "eval_loss": 6.451413154602051, | |
| "eval_runtime": 293.3727, | |
| "eval_samples_per_second": 572.497, | |
| "eval_steps_per_second": 1.121, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 65650, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 50.266565118050266, | |
| "grad_norm": 0.5688324570655823, | |
| "learning_rate": 0.0004973343488194974, | |
| "loss": 0.5331, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 66000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 50.64737242955065, | |
| "grad_norm": 0.6783095598220825, | |
| "learning_rate": 0.0004935262757044935, | |
| "loss": 0.5326, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 66500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 51.0, | |
| "eval_accuracy": 0.15468429043493792, | |
| "eval_loss": 6.373196125030518, | |
| "eval_runtime": 294.0871, | |
| "eval_samples_per_second": 571.106, | |
| "eval_steps_per_second": 1.119, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 66963, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 51.02817974105103, | |
| "grad_norm": 0.6522232294082642, | |
| "learning_rate": 0.0004897182025894898, | |
| "loss": 0.5431, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 67000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 51.40898705255141, | |
| "grad_norm": 0.5886669158935547, | |
| "learning_rate": 0.0004859101294744859, | |
| "loss": 0.519, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 67500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 51.78979436405179, | |
| "grad_norm": 0.5981758832931519, | |
| "learning_rate": 0.0004821020563594821, | |
| "loss": 0.5307, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 68000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_accuracy": 0.15460093477419548, | |
| "eval_loss": 6.580268383026123, | |
| "eval_runtime": 294.46, | |
| "eval_samples_per_second": 570.383, | |
| "eval_steps_per_second": 1.117, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 68276, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 52.17060167555217, | |
| "grad_norm": 0.5313323736190796, | |
| "learning_rate": 0.0004782939832444783, | |
| "loss": 0.5281, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 68500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 52.551408987052554, | |
| "grad_norm": 0.5757377743721008, | |
| "learning_rate": 0.0004744859101294745, | |
| "loss": 0.5118, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 69000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 52.93221629855293, | |
| "grad_norm": 0.5655534863471985, | |
| "learning_rate": 0.0004706778370144707, | |
| "loss": 0.5265, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 69500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 53.0, | |
| "eval_accuracy": 0.1594057932184216, | |
| "eval_loss": 6.225406169891357, | |
| "eval_runtime": 295.5483, | |
| "eval_samples_per_second": 568.283, | |
| "eval_steps_per_second": 1.113, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 69589, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 53.313023610053314, | |
| "grad_norm": 0.6315691471099854, | |
| "learning_rate": 0.00046686976389946687, | |
| "loss": 0.5063, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 70000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 53.69383092155369, | |
| "grad_norm": 0.660926103591919, | |
| "learning_rate": 0.00046306169078446307, | |
| "loss": 0.5216, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 70500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "eval_accuracy": 0.15738739543330058, | |
| "eval_loss": 6.288092136383057, | |
| "eval_runtime": 292.8381, | |
| "eval_samples_per_second": 573.542, | |
| "eval_steps_per_second": 1.123, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 70902, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 54.074638233054074, | |
| "grad_norm": 0.5640078186988831, | |
| "learning_rate": 0.00045925361766945927, | |
| "loss": 0.5181, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 71000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 54.45544554455446, | |
| "grad_norm": 0.6716725826263428, | |
| "learning_rate": 0.00045544554455445547, | |
| "loss": 0.4994, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 71500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 54.836252856054834, | |
| "grad_norm": 0.5691807866096497, | |
| "learning_rate": 0.00045163747143945167, | |
| "loss": 0.5214, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 72000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "eval_accuracy": 0.15635735762555447, | |
| "eval_loss": 6.411758899688721, | |
| "eval_runtime": 295.5856, | |
| "eval_samples_per_second": 568.211, | |
| "eval_steps_per_second": 1.113, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 72215, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 55.21706016755522, | |
| "grad_norm": 0.6010560989379883, | |
| "learning_rate": 0.00044782939832444787, | |
| "loss": 0.5047, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 72500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 55.5978674790556, | |
| "grad_norm": 0.6005520820617676, | |
| "learning_rate": 0.0004440213252094441, | |
| "loss": 0.4939, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 73000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 55.97867479055598, | |
| "grad_norm": 0.6255254745483398, | |
| "learning_rate": 0.0004402132520944402, | |
| "loss": 0.5163, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 73500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_accuracy": 0.1574290732636718, | |
| "eval_loss": 6.470302104949951, | |
| "eval_runtime": 294.9784, | |
| "eval_samples_per_second": 569.381, | |
| "eval_steps_per_second": 1.115, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 73528, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 56.35948210205636, | |
| "grad_norm": 0.593974769115448, | |
| "learning_rate": 0.0004364051789794364, | |
| "loss": 0.4895, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 74000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 56.74028941355674, | |
| "grad_norm": 0.5752760767936707, | |
| "learning_rate": 0.0004325971058644326, | |
| "loss": 0.4954, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 74500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 57.0, | |
| "eval_accuracy": 0.16022744187431157, | |
| "eval_loss": 6.3909783363342285, | |
| "eval_runtime": 293.5227, | |
| "eval_samples_per_second": 572.205, | |
| "eval_steps_per_second": 1.121, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 74841, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 57.12109672505712, | |
| "grad_norm": 0.5993156433105469, | |
| "learning_rate": 0.0004287890327494288, | |
| "loss": 0.5, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 75000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 57.501904036557505, | |
| "grad_norm": 0.6445265412330627, | |
| "learning_rate": 0.000424980959634425, | |
| "loss": 0.4888, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 75500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 57.88271134805788, | |
| "grad_norm": 0.5880737900733948, | |
| "learning_rate": 0.0004211728865194212, | |
| "loss": 0.4946, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 76000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "eval_accuracy": 0.1607156678872317, | |
| "eval_loss": 6.456667900085449, | |
| "eval_runtime": 294.1379, | |
| "eval_samples_per_second": 571.008, | |
| "eval_steps_per_second": 1.119, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 76154, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 58.263518659558265, | |
| "grad_norm": 0.6263173818588257, | |
| "learning_rate": 0.00041736481340441737, | |
| "loss": 0.4825, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 76500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 58.64432597105864, | |
| "grad_norm": 0.630531907081604, | |
| "learning_rate": 0.0004135567402894136, | |
| "loss": 0.4764, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 77000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 59.0, | |
| "eval_accuracy": 0.15916168021196153, | |
| "eval_loss": 6.475037097930908, | |
| "eval_runtime": 293.105, | |
| "eval_samples_per_second": 573.02, | |
| "eval_steps_per_second": 1.122, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 77467, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 59.025133282559025, | |
| "grad_norm": 0.5097931623458862, | |
| "learning_rate": 0.0004097486671744098, | |
| "loss": 0.4917, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 77500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 59.40594059405941, | |
| "grad_norm": 0.5600863695144653, | |
| "learning_rate": 0.000405940594059406, | |
| "loss": 0.469, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 78000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 59.786747905559785, | |
| "grad_norm": 0.543627917766571, | |
| "learning_rate": 0.0004021325209444021, | |
| "loss": 0.4797, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 78500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_accuracy": 0.15797088505849782, | |
| "eval_loss": 6.5070600509643555, | |
| "eval_runtime": 293.3914, | |
| "eval_samples_per_second": 572.461, | |
| "eval_steps_per_second": 1.121, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 78780, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 60.16755521706017, | |
| "grad_norm": 0.5502893328666687, | |
| "learning_rate": 0.0003983244478293983, | |
| "loss": 0.4723, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 79000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 60.548362528560546, | |
| "grad_norm": 0.6071276068687439, | |
| "learning_rate": 0.0003945163747143945, | |
| "loss": 0.4628, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 79500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 60.92916984006093, | |
| "grad_norm": 0.534694492816925, | |
| "learning_rate": 0.00039070830159939067, | |
| "loss": 0.4773, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 80000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 61.0, | |
| "eval_accuracy": 0.16487749694858742, | |
| "eval_loss": 6.299595832824707, | |
| "eval_runtime": 292.6075, | |
| "eval_samples_per_second": 573.994, | |
| "eval_steps_per_second": 1.124, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 80093, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 61.30997715156131, | |
| "grad_norm": 0.5583491325378418, | |
| "learning_rate": 0.00038690022848438687, | |
| "loss": 0.4689, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 80500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 61.69078446306169, | |
| "grad_norm": 0.5769256353378296, | |
| "learning_rate": 0.0003830921553693831, | |
| "loss": 0.4638, | |
| "max_memory_allocated (GB)": 21.28, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 81000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 62.0, | |
| "eval_accuracy": 0.1581852281861213, | |
| "eval_loss": 6.37566614151001, | |
| "eval_runtime": 295.0035, | |
| "eval_samples_per_second": 569.332, | |
| "eval_steps_per_second": 1.115, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 81406, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 62.07159177456207, | |
| "grad_norm": 0.6076428890228271, | |
| "learning_rate": 0.0003792840822543793, | |
| "loss": 0.471, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 81500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 62.45239908606245, | |
| "grad_norm": 0.5036810040473938, | |
| "learning_rate": 0.0003754760091393755, | |
| "loss": 0.4594, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 82000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 62.83320639756283, | |
| "grad_norm": 0.5345487594604492, | |
| "learning_rate": 0.0003716679360243717, | |
| "loss": 0.4634, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 82500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 63.0, | |
| "eval_accuracy": 0.15785180554315145, | |
| "eval_loss": 6.494438171386719, | |
| "eval_runtime": 293.3816, | |
| "eval_samples_per_second": 572.48, | |
| "eval_steps_per_second": 1.121, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 82719, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 63.21401370906322, | |
| "grad_norm": 0.5531702637672424, | |
| "learning_rate": 0.0003678598629093678, | |
| "loss": 0.4539, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 83000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 63.59482102056359, | |
| "grad_norm": 0.6362270712852478, | |
| "learning_rate": 0.000364051789794364, | |
| "loss": 0.4442, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 83500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 63.97562833206398, | |
| "grad_norm": 0.5610324144363403, | |
| "learning_rate": 0.0003602437166793602, | |
| "loss": 0.4605, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 84000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "eval_accuracy": 0.15728617784525617, | |
| "eval_loss": 6.63605260848999, | |
| "eval_runtime": 294.3948, | |
| "eval_samples_per_second": 570.509, | |
| "eval_steps_per_second": 1.118, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 84032, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 64.35643564356435, | |
| "grad_norm": 0.5020191073417664, | |
| "learning_rate": 0.0003564356435643564, | |
| "loss": 0.4363, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 84500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 64.73724295506474, | |
| "grad_norm": 0.5931971073150635, | |
| "learning_rate": 0.00035262757044935263, | |
| "loss": 0.4541, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 85000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 65.0, | |
| "eval_accuracy": 0.15664910243815308, | |
| "eval_loss": 6.532097816467285, | |
| "eval_runtime": 294.244, | |
| "eval_samples_per_second": 570.802, | |
| "eval_steps_per_second": 1.118, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 85345, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 65.11805026656512, | |
| "grad_norm": 0.5734322667121887, | |
| "learning_rate": 0.00034881949733434883, | |
| "loss": 0.4573, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 85500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 65.4988575780655, | |
| "grad_norm": 0.597134530544281, | |
| "learning_rate": 0.00034501142421934503, | |
| "loss": 0.4381, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 86000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 65.87966488956587, | |
| "grad_norm": 0.6044827103614807, | |
| "learning_rate": 0.0003412033511043412, | |
| "loss": 0.447, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 86500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 66.0, | |
| "eval_accuracy": 0.1647167396028698, | |
| "eval_loss": 6.29494047164917, | |
| "eval_runtime": 293.9365, | |
| "eval_samples_per_second": 571.399, | |
| "eval_steps_per_second": 1.119, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 86658, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 66.26047220106626, | |
| "grad_norm": 0.5792707800865173, | |
| "learning_rate": 0.0003373952779893374, | |
| "loss": 0.4338, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 87000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 66.64127951256664, | |
| "grad_norm": 0.5685553550720215, | |
| "learning_rate": 0.0003335872048743336, | |
| "loss": 0.4392, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 87500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 67.0, | |
| "eval_accuracy": 0.16156113244619094, | |
| "eval_loss": 6.429385662078857, | |
| "eval_runtime": 293.8448, | |
| "eval_samples_per_second": 571.577, | |
| "eval_steps_per_second": 1.12, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 87971, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 67.02208682406702, | |
| "grad_norm": 0.5447320342063904, | |
| "learning_rate": 0.0003297791317593298, | |
| "loss": 0.4436, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 88000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 67.40289413556741, | |
| "grad_norm": 0.572354257106781, | |
| "learning_rate": 0.000325971058644326, | |
| "loss": 0.4261, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 88500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 67.78370144706778, | |
| "grad_norm": 0.6266507506370544, | |
| "learning_rate": 0.0003221629855293222, | |
| "loss": 0.4319, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 89000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "eval_accuracy": 0.16571105355601204, | |
| "eval_loss": 6.468620300292969, | |
| "eval_runtime": 294.3847, | |
| "eval_samples_per_second": 570.529, | |
| "eval_steps_per_second": 1.118, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 89284, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 68.16450875856816, | |
| "grad_norm": 0.6514284014701843, | |
| "learning_rate": 0.00031835491241431833, | |
| "loss": 0.4371, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 89500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 68.54531607006855, | |
| "grad_norm": 0.6122202277183533, | |
| "learning_rate": 0.00031454683929931453, | |
| "loss": 0.4232, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 90000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 68.92612338156893, | |
| "grad_norm": 0.617365300655365, | |
| "learning_rate": 0.00031073876618431073, | |
| "loss": 0.4321, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 90500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 69.0, | |
| "eval_accuracy": 0.16538953886457683, | |
| "eval_loss": 6.5044403076171875, | |
| "eval_runtime": 292.9492, | |
| "eval_samples_per_second": 573.325, | |
| "eval_steps_per_second": 1.123, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 90597, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 69.3069306930693, | |
| "grad_norm": 0.5891785025596619, | |
| "learning_rate": 0.00030693069306930693, | |
| "loss": 0.4156, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 91000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 69.68773800456968, | |
| "grad_norm": 0.5906224846839905, | |
| "learning_rate": 0.00030312261995430313, | |
| "loss": 0.4239, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 91500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "eval_accuracy": 0.1670447441278914, | |
| "eval_loss": 6.288415908813477, | |
| "eval_runtime": 294.6055, | |
| "eval_samples_per_second": 570.101, | |
| "eval_steps_per_second": 1.117, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 91910, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 70.06854531607007, | |
| "grad_norm": 0.4991946518421173, | |
| "learning_rate": 0.00029931454683929933, | |
| "loss": 0.4317, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 92000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 70.44935262757045, | |
| "grad_norm": 0.5375520586967468, | |
| "learning_rate": 0.00029550647372429554, | |
| "loss": 0.4111, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 92500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 70.83015993907082, | |
| "grad_norm": 0.5114530324935913, | |
| "learning_rate": 0.0002916984006092917, | |
| "loss": 0.424, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 93000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 71.0, | |
| "eval_accuracy": 0.16497276056086452, | |
| "eval_loss": 6.455724716186523, | |
| "eval_runtime": 293.9553, | |
| "eval_samples_per_second": 571.362, | |
| "eval_steps_per_second": 1.119, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 93223, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 71.21096725057122, | |
| "grad_norm": 0.5219191312789917, | |
| "learning_rate": 0.0002878903274942879, | |
| "loss": 0.4153, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 93500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 71.59177456207159, | |
| "grad_norm": 0.5508619546890259, | |
| "learning_rate": 0.0002840822543792841, | |
| "loss": 0.4138, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 94000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 71.97258187357197, | |
| "grad_norm": 0.5840057134628296, | |
| "learning_rate": 0.0002802741812642803, | |
| "loss": 0.4189, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 94500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "eval_accuracy": 0.16434759310529606, | |
| "eval_loss": 6.51508903503418, | |
| "eval_runtime": 294.2848, | |
| "eval_samples_per_second": 570.723, | |
| "eval_steps_per_second": 1.118, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 94536, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 72.35338918507236, | |
| "grad_norm": 0.545428991317749, | |
| "learning_rate": 0.0002764661081492765, | |
| "loss": 0.4125, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 95000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 72.73419649657274, | |
| "grad_norm": 0.5353052616119385, | |
| "learning_rate": 0.0002726580350342727, | |
| "loss": 0.4056, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 95500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 73.0, | |
| "eval_accuracy": 0.16853919204548837, | |
| "eval_loss": 6.449789524078369, | |
| "eval_runtime": 294.3037, | |
| "eval_samples_per_second": 570.686, | |
| "eval_steps_per_second": 1.118, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 95849, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 73.11500380807311, | |
| "grad_norm": 0.5559654831886292, | |
| "learning_rate": 0.00026884996191926883, | |
| "loss": 0.4057, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 96000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 73.4958111195735, | |
| "grad_norm": 0.562247097492218, | |
| "learning_rate": 0.00026504188880426504, | |
| "loss": 0.4081, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 96500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 73.87661843107388, | |
| "grad_norm": 0.4949222207069397, | |
| "learning_rate": 0.00026123381568926124, | |
| "loss": 0.4113, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 97000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 74.0, | |
| "eval_accuracy": 0.16715786966747045, | |
| "eval_loss": 6.463613986968994, | |
| "eval_runtime": 294.07, | |
| "eval_samples_per_second": 571.14, | |
| "eval_steps_per_second": 1.119, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 97162, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 74.25742574257426, | |
| "grad_norm": 0.516631007194519, | |
| "learning_rate": 0.00025742574257425744, | |
| "loss": 0.4041, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 97500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 74.63823305407463, | |
| "grad_norm": 0.5838146209716797, | |
| "learning_rate": 0.00025361766945925364, | |
| "loss": 0.4031, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 98000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 75.0, | |
| "eval_accuracy": 0.16266261796314488, | |
| "eval_loss": 6.646434307098389, | |
| "eval_runtime": 293.4935, | |
| "eval_samples_per_second": 572.261, | |
| "eval_steps_per_second": 1.121, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 98475, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 75.01904036557502, | |
| "grad_norm": 0.49288421869277954, | |
| "learning_rate": 0.00024980959634424984, | |
| "loss": 0.4003, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 98500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 75.3998476770754, | |
| "grad_norm": 0.5597474575042725, | |
| "learning_rate": 0.00024600152322924604, | |
| "loss": 0.393, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 99000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 75.78065498857578, | |
| "grad_norm": 0.605529248714447, | |
| "learning_rate": 0.00024219345011424221, | |
| "loss": 0.3965, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 99500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 76.0, | |
| "eval_accuracy": 0.16858682385162693, | |
| "eval_loss": 6.5632758140563965, | |
| "eval_runtime": 293.7256, | |
| "eval_samples_per_second": 571.809, | |
| "eval_steps_per_second": 1.12, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 99788, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 76.16146230007617, | |
| "grad_norm": 0.4536028802394867, | |
| "learning_rate": 0.0002383853769992384, | |
| "loss": 0.3944, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 100000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 76.54226961157654, | |
| "grad_norm": 0.480114221572876, | |
| "learning_rate": 0.0002345773038842346, | |
| "loss": 0.3848, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 100500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 76.92307692307692, | |
| "grad_norm": 0.5449275970458984, | |
| "learning_rate": 0.0002307692307692308, | |
| "loss": 0.393, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 101000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 77.0, | |
| "eval_accuracy": 0.1632758774671787, | |
| "eval_loss": 6.687775135040283, | |
| "eval_runtime": 295.5696, | |
| "eval_samples_per_second": 568.242, | |
| "eval_steps_per_second": 1.113, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 101101, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 77.30388423457731, | |
| "grad_norm": 0.5685235857963562, | |
| "learning_rate": 0.000226961157654227, | |
| "loss": 0.3919, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 101500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 77.68469154607769, | |
| "grad_norm": 0.4742094874382019, | |
| "learning_rate": 0.00022315308453922314, | |
| "loss": 0.3958, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 102000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 78.0, | |
| "eval_accuracy": 0.17422523890327765, | |
| "eval_loss": 6.410009384155273, | |
| "eval_runtime": 294.3708, | |
| "eval_samples_per_second": 570.556, | |
| "eval_steps_per_second": 1.118, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 102414, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 78.06549885757806, | |
| "grad_norm": 0.5653303861618042, | |
| "learning_rate": 0.00021934501142421934, | |
| "loss": 0.3924, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 102500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 78.44630616907844, | |
| "grad_norm": 0.5051060914993286, | |
| "learning_rate": 0.00021553693830921554, | |
| "loss": 0.3866, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 103000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 78.82711348057883, | |
| "grad_norm": 0.5776985287666321, | |
| "learning_rate": 0.00021172886519421171, | |
| "loss": 0.3848, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 103500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 79.0, | |
| "eval_accuracy": 0.17083147271590604, | |
| "eval_loss": 6.537196159362793, | |
| "eval_runtime": 293.4855, | |
| "eval_samples_per_second": 572.277, | |
| "eval_steps_per_second": 1.121, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 103727, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 79.20792079207921, | |
| "grad_norm": 0.48929280042648315, | |
| "learning_rate": 0.00020792079207920792, | |
| "loss": 0.3856, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 104000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 79.58872810357958, | |
| "grad_norm": 0.5744351744651794, | |
| "learning_rate": 0.00020411271896420412, | |
| "loss": 0.3838, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 104500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 79.96953541507997, | |
| "grad_norm": 0.5490047335624695, | |
| "learning_rate": 0.0002003046458492003, | |
| "loss": 0.3785, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 105000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "eval_accuracy": 0.17015867345419905, | |
| "eval_loss": 6.4460296630859375, | |
| "eval_runtime": 295.5798, | |
| "eval_samples_per_second": 568.222, | |
| "eval_steps_per_second": 1.113, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 105040, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 80.35034272658035, | |
| "grad_norm": 0.5375010967254639, | |
| "learning_rate": 0.0001964965727341965, | |
| "loss": 0.3743, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 105500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 80.73115003808073, | |
| "grad_norm": 0.48377513885498047, | |
| "learning_rate": 0.0001926884996191927, | |
| "loss": 0.3709, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 106000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 81.0, | |
| "eval_accuracy": 0.17629126849453722, | |
| "eval_loss": 6.449659824371338, | |
| "eval_runtime": 295.4724, | |
| "eval_samples_per_second": 568.429, | |
| "eval_steps_per_second": 1.113, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 106353, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 81.11195734958112, | |
| "grad_norm": 0.5163039565086365, | |
| "learning_rate": 0.00018888042650418887, | |
| "loss": 0.3748, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 106500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 81.4927646610815, | |
| "grad_norm": 0.44855397939682007, | |
| "learning_rate": 0.00018507235338918507, | |
| "loss": 0.3688, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 107000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 81.87357197258187, | |
| "grad_norm": 0.4645147919654846, | |
| "learning_rate": 0.00018126428027418127, | |
| "loss": 0.3692, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 107500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 82.0, | |
| "eval_accuracy": 0.17463606323122266, | |
| "eval_loss": 6.449412822723389, | |
| "eval_runtime": 293.3089, | |
| "eval_samples_per_second": 572.621, | |
| "eval_steps_per_second": 1.122, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 107666, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 82.25437928408225, | |
| "grad_norm": 0.5181131362915039, | |
| "learning_rate": 0.00017745620715917747, | |
| "loss": 0.3689, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 108000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 82.63518659558264, | |
| "grad_norm": 0.48766759037971497, | |
| "learning_rate": 0.00017364813404417364, | |
| "loss": 0.3667, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 108500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 83.0, | |
| "eval_accuracy": 0.17326069482897205, | |
| "eval_loss": 6.478708744049072, | |
| "eval_runtime": 294.5422, | |
| "eval_samples_per_second": 570.224, | |
| "eval_steps_per_second": 1.117, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 108979, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 83.01599390708301, | |
| "grad_norm": 0.5013307929039001, | |
| "learning_rate": 0.00016984006092916984, | |
| "loss": 0.3755, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 109000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 83.39680121858339, | |
| "grad_norm": 0.4935319125652313, | |
| "learning_rate": 0.00016603198781416605, | |
| "loss": 0.3573, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 109500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 83.77760853008378, | |
| "grad_norm": 0.47369396686553955, | |
| "learning_rate": 0.00016222391469916222, | |
| "loss": 0.3642, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 110000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 84.0, | |
| "eval_accuracy": 0.1762198207853294, | |
| "eval_loss": 6.379220008850098, | |
| "eval_runtime": 295.6777, | |
| "eval_samples_per_second": 568.034, | |
| "eval_steps_per_second": 1.113, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 110292, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 84.15841584158416, | |
| "grad_norm": 0.4918299615383148, | |
| "learning_rate": 0.00015841584158415842, | |
| "loss": 0.36, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 110500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 84.53922315308454, | |
| "grad_norm": 0.4875505268573761, | |
| "learning_rate": 0.00015460776846915462, | |
| "loss": 0.365, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 111000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 84.92003046458493, | |
| "grad_norm": 0.4853888750076294, | |
| "learning_rate": 0.0001507996953541508, | |
| "loss": 0.3648, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 111500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 85.0, | |
| "eval_accuracy": 0.17838111398886605, | |
| "eval_loss": 6.410529613494873, | |
| "eval_runtime": 294.3712, | |
| "eval_samples_per_second": 570.555, | |
| "eval_steps_per_second": 1.118, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 111605, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 85.3008377760853, | |
| "grad_norm": 0.4833332598209381, | |
| "learning_rate": 0.000146991622239147, | |
| "loss": 0.3548, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 112000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 85.68164508758568, | |
| "grad_norm": 0.5711667537689209, | |
| "learning_rate": 0.0001431835491241432, | |
| "loss": 0.3595, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 112500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 86.0, | |
| "eval_accuracy": 0.17176029293560774, | |
| "eval_loss": 6.682053089141846, | |
| "eval_runtime": 294.7944, | |
| "eval_samples_per_second": 569.736, | |
| "eval_steps_per_second": 1.116, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 112918, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 86.06245239908607, | |
| "grad_norm": 0.503462553024292, | |
| "learning_rate": 0.00013937547600913937, | |
| "loss": 0.3593, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 113000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 86.44325971058645, | |
| "grad_norm": 0.5184527635574341, | |
| "learning_rate": 0.00013556740289413557, | |
| "loss": 0.3451, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 113500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 86.82406702208682, | |
| "grad_norm": 0.5371147394180298, | |
| "learning_rate": 0.00013175932977913177, | |
| "loss": 0.3575, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 114000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 87.0, | |
| "eval_accuracy": 0.17632103837337382, | |
| "eval_loss": 6.518656253814697, | |
| "eval_runtime": 293.8373, | |
| "eval_samples_per_second": 571.592, | |
| "eval_steps_per_second": 1.12, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 114231, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 87.2048743335872, | |
| "grad_norm": 0.5780415534973145, | |
| "learning_rate": 0.00012795125666412798, | |
| "loss": 0.3571, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 114500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 87.58568164508759, | |
| "grad_norm": 0.47814422845840454, | |
| "learning_rate": 0.00012414318354912415, | |
| "loss": 0.3524, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 115000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 87.96648895658797, | |
| "grad_norm": 0.529201865196228, | |
| "learning_rate": 0.00012033511043412034, | |
| "loss": 0.3512, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 115500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 88.0, | |
| "eval_accuracy": 0.17518382900181595, | |
| "eval_loss": 6.586051940917969, | |
| "eval_runtime": 293.3082, | |
| "eval_samples_per_second": 572.623, | |
| "eval_steps_per_second": 1.122, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 115544, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 88.34729626808834, | |
| "grad_norm": 0.5704371929168701, | |
| "learning_rate": 0.00011652703731911652, | |
| "loss": 0.3506, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 116000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 88.72810357958873, | |
| "grad_norm": 0.4795701503753662, | |
| "learning_rate": 0.00011271896420411273, | |
| "loss": 0.3416, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 116500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 89.0, | |
| "eval_accuracy": 0.17716054895656574, | |
| "eval_loss": 6.533735275268555, | |
| "eval_runtime": 295.8371, | |
| "eval_samples_per_second": 567.728, | |
| "eval_steps_per_second": 1.112, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 116857, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 89.10891089108911, | |
| "grad_norm": 0.5546539425849915, | |
| "learning_rate": 0.00010891089108910891, | |
| "loss": 0.3448, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 117000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 89.48971820258949, | |
| "grad_norm": 0.5290449857711792, | |
| "learning_rate": 0.0001051028179741051, | |
| "loss": 0.3446, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 117500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 89.87052551408988, | |
| "grad_norm": 0.5648689866065979, | |
| "learning_rate": 0.0001012947448591013, | |
| "loss": 0.3454, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 118000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 90.0, | |
| "eval_accuracy": 0.17577922657854783, | |
| "eval_loss": 6.607510089874268, | |
| "eval_runtime": 292.9613, | |
| "eval_samples_per_second": 573.301, | |
| "eval_steps_per_second": 1.123, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 118170, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 90.25133282559025, | |
| "grad_norm": 0.5280727744102478, | |
| "learning_rate": 9.748667174409749e-05, | |
| "loss": 0.3411, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 118500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 90.63214013709063, | |
| "grad_norm": 0.5048713088035583, | |
| "learning_rate": 9.367859862909369e-05, | |
| "loss": 0.3401, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 119000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 91.0, | |
| "eval_accuracy": 0.1758983060938942, | |
| "eval_loss": 6.536908149719238, | |
| "eval_runtime": 293.2277, | |
| "eval_samples_per_second": 572.78, | |
| "eval_steps_per_second": 1.122, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 119483, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 91.012947448591, | |
| "grad_norm": 0.4610355496406555, | |
| "learning_rate": 8.987052551408988e-05, | |
| "loss": 0.3417, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 119500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 91.3937547600914, | |
| "grad_norm": 0.5393335819244385, | |
| "learning_rate": 8.606245239908606e-05, | |
| "loss": 0.3334, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 120000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 91.77456207159177, | |
| "grad_norm": 0.5774939060211182, | |
| "learning_rate": 8.225437928408227e-05, | |
| "loss": 0.3361, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 120500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 92.0, | |
| "eval_accuracy": 0.17773808460599566, | |
| "eval_loss": 6.614775657653809, | |
| "eval_runtime": 295.4182, | |
| "eval_samples_per_second": 568.533, | |
| "eval_steps_per_second": 1.114, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 120796, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 92.15536938309215, | |
| "grad_norm": 0.5457442402839661, | |
| "learning_rate": 7.844630616907845e-05, | |
| "loss": 0.3276, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 121000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 92.53617669459254, | |
| "grad_norm": 0.5042428970336914, | |
| "learning_rate": 7.463823305407463e-05, | |
| "loss": 0.3348, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 121500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 92.91698400609292, | |
| "grad_norm": 0.6076750159263611, | |
| "learning_rate": 7.083015993907083e-05, | |
| "loss": 0.3377, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 122000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 93.0, | |
| "eval_accuracy": 0.17985174600339376, | |
| "eval_loss": 6.484330654144287, | |
| "eval_runtime": 294.3563, | |
| "eval_samples_per_second": 570.584, | |
| "eval_steps_per_second": 1.118, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 122109, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 93.2977913175933, | |
| "grad_norm": 0.5274074077606201, | |
| "learning_rate": 6.702208682406702e-05, | |
| "loss": 0.3318, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 122500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 93.67859862909368, | |
| "grad_norm": 0.4592651426792145, | |
| "learning_rate": 6.321401370906322e-05, | |
| "loss": 0.3344, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 123000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 94.0, | |
| "eval_accuracy": 0.1790300973475038, | |
| "eval_loss": 6.447075843811035, | |
| "eval_runtime": 294.1853, | |
| "eval_samples_per_second": 570.916, | |
| "eval_steps_per_second": 1.118, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 123422, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 94.05940594059406, | |
| "grad_norm": 0.4637293815612793, | |
| "learning_rate": 5.9405940594059404e-05, | |
| "loss": 0.33, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 123500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 94.44021325209444, | |
| "grad_norm": 0.4784545600414276, | |
| "learning_rate": 5.55978674790556e-05, | |
| "loss": 0.3299, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 124000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 94.82102056359481, | |
| "grad_norm": 0.5497499108314514, | |
| "learning_rate": 5.178979436405179e-05, | |
| "loss": 0.3262, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 124500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 95.0, | |
| "eval_accuracy": 0.1810127712780209, | |
| "eval_loss": 6.450591087341309, | |
| "eval_runtime": 294.1798, | |
| "eval_samples_per_second": 570.926, | |
| "eval_steps_per_second": 1.118, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 124735, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 95.2018278750952, | |
| "grad_norm": 0.5419259071350098, | |
| "learning_rate": 4.798172124904798e-05, | |
| "loss": 0.3309, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 125000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 95.58263518659558, | |
| "grad_norm": 0.49941861629486084, | |
| "learning_rate": 4.4173648134044175e-05, | |
| "loss": 0.3301, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 125500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 95.96344249809596, | |
| "grad_norm": 0.4585070312023163, | |
| "learning_rate": 4.036557501904037e-05, | |
| "loss": 0.3228, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 126000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 96.0, | |
| "eval_accuracy": 0.17943496769968145, | |
| "eval_loss": 6.566529750823975, | |
| "eval_runtime": 291.9607, | |
| "eval_samples_per_second": 575.266, | |
| "eval_steps_per_second": 1.127, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 126048, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 96.34424980959635, | |
| "grad_norm": 0.507581353187561, | |
| "learning_rate": 3.655750190403656e-05, | |
| "loss": 0.3236, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 126500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 96.72505712109673, | |
| "grad_norm": 0.5264877080917358, | |
| "learning_rate": 3.274942878903275e-05, | |
| "loss": 0.327, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 127000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 97.0, | |
| "eval_accuracy": 0.17927421035396385, | |
| "eval_loss": 6.534857749938965, | |
| "eval_runtime": 293.1746, | |
| "eval_samples_per_second": 572.884, | |
| "eval_steps_per_second": 1.122, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 127361, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 97.1058644325971, | |
| "grad_norm": 0.4856893718242645, | |
| "learning_rate": 2.8941355674028942e-05, | |
| "loss": 0.3286, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 127500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 97.48667174409749, | |
| "grad_norm": 0.4634458124637604, | |
| "learning_rate": 2.5133282559025133e-05, | |
| "loss": 0.3249, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 128000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 97.86747905559787, | |
| "grad_norm": 0.5153778791427612, | |
| "learning_rate": 2.1325209444021324e-05, | |
| "loss": 0.3275, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 128500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 98.0, | |
| "eval_accuracy": 0.17994105563990354, | |
| "eval_loss": 6.51284646987915, | |
| "eval_runtime": 296.4245, | |
| "eval_samples_per_second": 566.603, | |
| "eval_steps_per_second": 1.11, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 128674, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 98.24828636709825, | |
| "grad_norm": 0.5075029730796814, | |
| "learning_rate": 1.7517136329017518e-05, | |
| "loss": 0.3245, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 129000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 98.62909367859864, | |
| "grad_norm": 0.462927907705307, | |
| "learning_rate": 1.3709063214013709e-05, | |
| "loss": 0.321, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 129500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 99.0, | |
| "eval_accuracy": 0.1801494447917597, | |
| "eval_loss": 6.557428359985352, | |
| "eval_runtime": 296.1671, | |
| "eval_samples_per_second": 567.095, | |
| "eval_steps_per_second": 1.111, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 129987, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 99.00990099009901, | |
| "grad_norm": 0.48631536960601807, | |
| "learning_rate": 9.900990099009901e-06, | |
| "loss": 0.3226, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 130000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 99.39070830159939, | |
| "grad_norm": 0.4906682074069977, | |
| "learning_rate": 6.092916984006093e-06, | |
| "loss": 0.3186, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 130500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 99.77151561309977, | |
| "grad_norm": 0.4988425672054291, | |
| "learning_rate": 2.284843869002285e-06, | |
| "loss": 0.3217, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 131000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "eval_accuracy": 0.17984579202762646, | |
| "eval_loss": 6.551010608673096, | |
| "eval_runtime": 294.715, | |
| "eval_samples_per_second": 569.89, | |
| "eval_steps_per_second": 1.116, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 131300, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "max_memory_allocated (GB)": 21.35, | |
| "memory_allocated (GB)": 2.46, | |
| "step": 131300, | |
| "total_flos": 9.770330117395578e+21, | |
| "total_memory_available (GB)": 94.62, | |
| "train_loss": 0.7415650523898262, | |
| "train_runtime": 105713.6949, | |
| "train_samples_per_second": 635.506, | |
| "train_steps_per_second": 1.242 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 131300, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 100, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.770330117395578e+21, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |