| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.5002627430373096, | |
| "eval_steps": 500, | |
| "global_step": 238, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0021019442984760903, | |
| "grad_norm": 0.7624253401313755, | |
| "learning_rate": 0.0, | |
| "loss": 2.2955, | |
| "memory/device_mem_reserved(gib)": 68.22, | |
| "memory/max_mem_active(gib)": 63.52, | |
| "memory/max_mem_allocated(gib)": 62.82, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.004203888596952181, | |
| "grad_norm": 0.7189116302541813, | |
| "learning_rate": 2e-08, | |
| "loss": 2.2824, | |
| "memory/device_mem_reserved(gib)": 68.31, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.006305832895428271, | |
| "grad_norm": 0.8431595274838072, | |
| "learning_rate": 4e-08, | |
| "loss": 2.3101, | |
| "memory/device_mem_reserved(gib)": 68.94, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.008407777193904361, | |
| "grad_norm": 0.8637289443313003, | |
| "learning_rate": 6e-08, | |
| "loss": 2.3514, | |
| "memory/device_mem_reserved(gib)": 68.94, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.010509721492380452, | |
| "grad_norm": 0.8128827491990301, | |
| "learning_rate": 8e-08, | |
| "loss": 2.3621, | |
| "memory/device_mem_reserved(gib)": 68.94, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.012611665790856543, | |
| "grad_norm": 0.9504830158009488, | |
| "learning_rate": 1e-07, | |
| "loss": 2.4108, | |
| "memory/device_mem_reserved(gib)": 68.94, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.014713610089332634, | |
| "grad_norm": 0.9140479063802851, | |
| "learning_rate": 1.2e-07, | |
| "loss": 2.2224, | |
| "memory/device_mem_reserved(gib)": 68.94, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.016815554387808723, | |
| "grad_norm": 0.8632210617655338, | |
| "learning_rate": 1.4e-07, | |
| "loss": 2.3589, | |
| "memory/device_mem_reserved(gib)": 68.94, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.018917498686284815, | |
| "grad_norm": 0.8747745167339828, | |
| "learning_rate": 1.6e-07, | |
| "loss": 2.2411, | |
| "memory/device_mem_reserved(gib)": 68.94, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.021019442984760904, | |
| "grad_norm": 0.7739481898974889, | |
| "learning_rate": 1.8e-07, | |
| "loss": 2.2522, | |
| "memory/device_mem_reserved(gib)": 68.94, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.023121387283236993, | |
| "grad_norm": 0.8301921811025426, | |
| "learning_rate": 2e-07, | |
| "loss": 2.3565, | |
| "memory/device_mem_reserved(gib)": 68.94, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.025223331581713086, | |
| "grad_norm": 0.7677035533090953, | |
| "learning_rate": 2.1999999999999998e-07, | |
| "loss": 2.2208, | |
| "memory/device_mem_reserved(gib)": 68.94, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.027325275880189175, | |
| "grad_norm": 0.7834629656153209, | |
| "learning_rate": 2.4e-07, | |
| "loss": 2.2526, | |
| "memory/device_mem_reserved(gib)": 68.94, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.029427220178665267, | |
| "grad_norm": 0.776588932490268, | |
| "learning_rate": 2.6e-07, | |
| "loss": 2.2727, | |
| "memory/device_mem_reserved(gib)": 68.94, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.03152916447714135, | |
| "grad_norm": 0.7753598356301531, | |
| "learning_rate": 2.8e-07, | |
| "loss": 2.3564, | |
| "memory/device_mem_reserved(gib)": 68.94, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.033631108775617445, | |
| "grad_norm": 0.7165697716264268, | |
| "learning_rate": 3e-07, | |
| "loss": 2.3331, | |
| "memory/device_mem_reserved(gib)": 68.94, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.03573305307409354, | |
| "grad_norm": 0.729205845829164, | |
| "learning_rate": 3.2e-07, | |
| "loss": 2.322, | |
| "memory/device_mem_reserved(gib)": 68.94, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.03783499737256963, | |
| "grad_norm": 0.8495080537327478, | |
| "learning_rate": 3.4000000000000003e-07, | |
| "loss": 2.4952, | |
| "memory/device_mem_reserved(gib)": 68.94, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.039936941671045716, | |
| "grad_norm": 0.7578372584471679, | |
| "learning_rate": 3.6e-07, | |
| "loss": 2.3132, | |
| "memory/device_mem_reserved(gib)": 68.94, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.04203888596952181, | |
| "grad_norm": 0.6681608647353537, | |
| "learning_rate": 3.7999999999999996e-07, | |
| "loss": 2.3086, | |
| "memory/device_mem_reserved(gib)": 68.94, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0441408302679979, | |
| "grad_norm": 0.7042221303721394, | |
| "learning_rate": 4e-07, | |
| "loss": 2.3644, | |
| "memory/device_mem_reserved(gib)": 68.94, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.046242774566473986, | |
| "grad_norm": 0.725951911870576, | |
| "learning_rate": 4.1999999999999995e-07, | |
| "loss": 2.3208, | |
| "memory/device_mem_reserved(gib)": 68.94, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.04834471886495008, | |
| "grad_norm": 0.6347256826610295, | |
| "learning_rate": 4.3999999999999997e-07, | |
| "loss": 2.328, | |
| "memory/device_mem_reserved(gib)": 68.94, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.05044666316342617, | |
| "grad_norm": 0.5682080423503054, | |
| "learning_rate": 4.6e-07, | |
| "loss": 2.2008, | |
| "memory/device_mem_reserved(gib)": 68.94, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.05254860746190226, | |
| "grad_norm": 0.5787647024012217, | |
| "learning_rate": 4.8e-07, | |
| "loss": 2.2841, | |
| "memory/device_mem_reserved(gib)": 68.94, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.05465055176037835, | |
| "grad_norm": 0.5807941293103913, | |
| "learning_rate": 5e-07, | |
| "loss": 2.2661, | |
| "memory/device_mem_reserved(gib)": 68.94, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.05675249605885444, | |
| "grad_norm": 0.5238787661221586, | |
| "learning_rate": 5.2e-07, | |
| "loss": 2.292, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.058854440357330534, | |
| "grad_norm": 0.6607378470156829, | |
| "learning_rate": 5.4e-07, | |
| "loss": 2.346, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.06095638465580662, | |
| "grad_norm": 0.5949442201958344, | |
| "learning_rate": 5.6e-07, | |
| "loss": 2.2856, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.0630583289542827, | |
| "grad_norm": 0.6213907595973902, | |
| "learning_rate": 5.8e-07, | |
| "loss": 2.3527, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0651602732527588, | |
| "grad_norm": 0.6574213245120029, | |
| "learning_rate": 6e-07, | |
| "loss": 2.2896, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.06726221755123489, | |
| "grad_norm": 0.7904069125236015, | |
| "learning_rate": 6.2e-07, | |
| "loss": 2.4192, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.06936416184971098, | |
| "grad_norm": 0.6912774106481298, | |
| "learning_rate": 6.4e-07, | |
| "loss": 2.3085, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.07146610614818708, | |
| "grad_norm": 0.6819796440725628, | |
| "learning_rate": 6.6e-07, | |
| "loss": 2.2756, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.07356805044666316, | |
| "grad_norm": 0.7580978517321655, | |
| "learning_rate": 6.800000000000001e-07, | |
| "loss": 2.3645, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.07566999474513926, | |
| "grad_norm": 0.6791446776516942, | |
| "learning_rate": 7e-07, | |
| "loss": 2.2628, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.07777193904361535, | |
| "grad_norm": 0.569840280711906, | |
| "learning_rate": 7.2e-07, | |
| "loss": 2.2602, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.07987388334209143, | |
| "grad_norm": 0.5498888556096215, | |
| "learning_rate": 7.4e-07, | |
| "loss": 2.3167, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.08197582764056753, | |
| "grad_norm": 0.5268765090754575, | |
| "learning_rate": 7.599999999999999e-07, | |
| "loss": 2.2378, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.08407777193904362, | |
| "grad_norm": 0.4848125502462646, | |
| "learning_rate": 7.799999999999999e-07, | |
| "loss": 2.2882, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0861797162375197, | |
| "grad_norm": 0.5814992292096023, | |
| "learning_rate": 8e-07, | |
| "loss": 2.3471, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.0882816605359958, | |
| "grad_norm": 0.6166392360245904, | |
| "learning_rate": 8.199999999999999e-07, | |
| "loss": 2.441, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.09038360483447189, | |
| "grad_norm": 0.6377322312855411, | |
| "learning_rate": 8.399999999999999e-07, | |
| "loss": 2.3912, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.09248554913294797, | |
| "grad_norm": 0.5055719418643514, | |
| "learning_rate": 8.599999999999999e-07, | |
| "loss": 2.2561, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.09458749343142407, | |
| "grad_norm": 0.49178646668795084, | |
| "learning_rate": 8.799999999999999e-07, | |
| "loss": 2.2599, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.09668943772990016, | |
| "grad_norm": 0.47537370207387974, | |
| "learning_rate": 9e-07, | |
| "loss": 2.3064, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.09879138202837624, | |
| "grad_norm": 0.5089053853006482, | |
| "learning_rate": 9.2e-07, | |
| "loss": 2.391, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.10089332632685234, | |
| "grad_norm": 0.4728302009023318, | |
| "learning_rate": 9.399999999999999e-07, | |
| "loss": 2.3139, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.10299527062532843, | |
| "grad_norm": 0.4974785018291372, | |
| "learning_rate": 9.6e-07, | |
| "loss": 2.3599, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.10509721492380451, | |
| "grad_norm": 0.5140106787374947, | |
| "learning_rate": 9.8e-07, | |
| "loss": 2.4427, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.10719915922228061, | |
| "grad_norm": 0.5361457578321233, | |
| "learning_rate": 1e-06, | |
| "loss": 2.3295, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.1093011035207567, | |
| "grad_norm": 0.49844160829734835, | |
| "learning_rate": 9.999863397100894e-07, | |
| "loss": 2.2672, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.11140304781923278, | |
| "grad_norm": 0.5385578770440957, | |
| "learning_rate": 9.999453595867715e-07, | |
| "loss": 2.3261, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.11350499211770888, | |
| "grad_norm": 0.497092836247932, | |
| "learning_rate": 9.998770618692484e-07, | |
| "loss": 2.3326, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.11560693641618497, | |
| "grad_norm": 0.539870187568986, | |
| "learning_rate": 9.997814502893856e-07, | |
| "loss": 2.2381, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.11770888071466107, | |
| "grad_norm": 0.5073884711048833, | |
| "learning_rate": 9.996585300715115e-07, | |
| "loss": 2.3122, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.11981082501313715, | |
| "grad_norm": 0.5162826315178152, | |
| "learning_rate": 9.99508307932129e-07, | |
| "loss": 2.2719, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.12191276931161324, | |
| "grad_norm": 0.5135640558488429, | |
| "learning_rate": 9.9933079207955e-07, | |
| "loss": 2.4354, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.12401471361008934, | |
| "grad_norm": 0.48495138081284994, | |
| "learning_rate": 9.991259922134465e-07, | |
| "loss": 2.2913, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.1261166579085654, | |
| "grad_norm": 0.5031121760605395, | |
| "learning_rate": 9.98893919524321e-07, | |
| "loss": 2.293, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.1282186022070415, | |
| "grad_norm": 0.4053272758920918, | |
| "learning_rate": 9.98634586692894e-07, | |
| "loss": 2.2873, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.1303205465055176, | |
| "grad_norm": 0.4532646932853173, | |
| "learning_rate": 9.983480078894123e-07, | |
| "loss": 2.3065, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.13242249080399368, | |
| "grad_norm": 0.4496821436560576, | |
| "learning_rate": 9.98034198772874e-07, | |
| "loss": 2.2886, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.13452443510246978, | |
| "grad_norm": 0.48430661978532813, | |
| "learning_rate": 9.976931764901733e-07, | |
| "loss": 2.3404, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.13662637940094588, | |
| "grad_norm": 0.5163168950805126, | |
| "learning_rate": 9.97324959675163e-07, | |
| "loss": 2.286, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.13872832369942195, | |
| "grad_norm": 0.4385342628062273, | |
| "learning_rate": 9.969295684476368e-07, | |
| "loss": 2.2923, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.14083026799789805, | |
| "grad_norm": 0.4476245967273303, | |
| "learning_rate": 9.9650702441223e-07, | |
| "loss": 2.2454, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.14293221229637415, | |
| "grad_norm": 0.4493507785126621, | |
| "learning_rate": 9.960573506572389e-07, | |
| "loss": 2.3361, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.14503415659485025, | |
| "grad_norm": 0.4676473798188462, | |
| "learning_rate": 9.955805717533585e-07, | |
| "loss": 2.3795, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.14713610089332632, | |
| "grad_norm": 0.5003504816633514, | |
| "learning_rate": 9.950767137523416e-07, | |
| "loss": 2.3638, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.14923804519180242, | |
| "grad_norm": 0.41298653135277646, | |
| "learning_rate": 9.94545804185573e-07, | |
| "loss": 2.2986, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.15133998949027852, | |
| "grad_norm": 0.48549576119983434, | |
| "learning_rate": 9.939878720625673e-07, | |
| "loss": 2.3772, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.1534419337887546, | |
| "grad_norm": 0.37069853589006974, | |
| "learning_rate": 9.93402947869383e-07, | |
| "loss": 2.2609, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.1555438780872307, | |
| "grad_norm": 0.3822824223589903, | |
| "learning_rate": 9.927910635669561e-07, | |
| "loss": 2.3263, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.1576458223857068, | |
| "grad_norm": 0.4645424064190486, | |
| "learning_rate": 9.921522525893547e-07, | |
| "loss": 2.421, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.15974776668418286, | |
| "grad_norm": 0.40728550126377283, | |
| "learning_rate": 9.91486549841951e-07, | |
| "loss": 2.3488, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.16184971098265896, | |
| "grad_norm": 0.39534534329560483, | |
| "learning_rate": 9.907939916995152e-07, | |
| "loss": 2.2277, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.16395165528113506, | |
| "grad_norm": 0.3994213467776548, | |
| "learning_rate": 9.900746160042272e-07, | |
| "loss": 2.3751, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.16605359957961113, | |
| "grad_norm": 0.3952978443639354, | |
| "learning_rate": 9.893284620636098e-07, | |
| "loss": 2.3407, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.16815554387808723, | |
| "grad_norm": 0.3847266788854899, | |
| "learning_rate": 9.88555570648379e-07, | |
| "loss": 2.2882, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.17025748817656333, | |
| "grad_norm": 0.3942404109616697, | |
| "learning_rate": 9.877559839902183e-07, | |
| "loss": 2.3809, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.1723594324750394, | |
| "grad_norm": 0.3726144315608755, | |
| "learning_rate": 9.869297457794698e-07, | |
| "loss": 2.2965, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.1744613767735155, | |
| "grad_norm": 0.4044669149844896, | |
| "learning_rate": 9.860769011627474e-07, | |
| "loss": 2.3778, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.1765633210719916, | |
| "grad_norm": 0.44263984303122605, | |
| "learning_rate": 9.851974967404702e-07, | |
| "loss": 2.3655, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.17866526537046767, | |
| "grad_norm": 0.3800348736088796, | |
| "learning_rate": 9.842915805643156e-07, | |
| "loss": 2.2951, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.18076720966894377, | |
| "grad_norm": 0.38644114608168073, | |
| "learning_rate": 9.833592021345937e-07, | |
| "loss": 2.3567, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.18286915396741987, | |
| "grad_norm": 0.5360672745714498, | |
| "learning_rate": 9.824004123975434e-07, | |
| "loss": 2.3769, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.18497109826589594, | |
| "grad_norm": 0.3826183850679395, | |
| "learning_rate": 9.814152637425477e-07, | |
| "loss": 2.2676, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.18707304256437204, | |
| "grad_norm": 0.3874657198676833, | |
| "learning_rate": 9.804038099992716e-07, | |
| "loss": 2.2044, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.18917498686284814, | |
| "grad_norm": 0.42284650951618596, | |
| "learning_rate": 9.793661064347204e-07, | |
| "loss": 2.2791, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.19127693116132422, | |
| "grad_norm": 0.4012146632153047, | |
| "learning_rate": 9.783022097502203e-07, | |
| "loss": 2.2554, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.19337887545980031, | |
| "grad_norm": 0.37104574503246424, | |
| "learning_rate": 9.772121780783201e-07, | |
| "loss": 2.2696, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.19548081975827641, | |
| "grad_norm": 0.4115506199685101, | |
| "learning_rate": 9.76096070979614e-07, | |
| "loss": 2.292, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.19758276405675249, | |
| "grad_norm": 0.4949212386577297, | |
| "learning_rate": 9.749539494394885e-07, | |
| "loss": 2.3154, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.19968470835522859, | |
| "grad_norm": 0.5305093079330326, | |
| "learning_rate": 9.737858758647889e-07, | |
| "loss": 2.3967, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.20178665265370468, | |
| "grad_norm": 0.39802845026570083, | |
| "learning_rate": 9.725919140804098e-07, | |
| "loss": 2.3833, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.20388859695218076, | |
| "grad_norm": 0.392517153138478, | |
| "learning_rate": 9.713721293258078e-07, | |
| "loss": 2.3458, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.20599054125065686, | |
| "grad_norm": 0.38285765355194634, | |
| "learning_rate": 9.70126588251436e-07, | |
| "loss": 2.2321, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.20809248554913296, | |
| "grad_norm": 0.42890083185292094, | |
| "learning_rate": 9.688553589151037e-07, | |
| "loss": 2.2823, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.21019442984760903, | |
| "grad_norm": 0.3788992789108253, | |
| "learning_rate": 9.675585107782555e-07, | |
| "loss": 2.2955, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.21229637414608513, | |
| "grad_norm": 0.40025954957804155, | |
| "learning_rate": 9.66236114702178e-07, | |
| "loss": 2.3454, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.21439831844456123, | |
| "grad_norm": 0.4040329751371346, | |
| "learning_rate": 9.648882429441256e-07, | |
| "loss": 2.3362, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.2165002627430373, | |
| "grad_norm": 0.35667806143435715, | |
| "learning_rate": 9.635149691533747e-07, | |
| "loss": 2.3089, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.2186022070415134, | |
| "grad_norm": 0.42503183804867145, | |
| "learning_rate": 9.621163683671978e-07, | |
| "loss": 2.3024, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.2207041513399895, | |
| "grad_norm": 0.3833710476470682, | |
| "learning_rate": 9.606925170067636e-07, | |
| "loss": 2.2944, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.22280609563846557, | |
| "grad_norm": 0.38645757625412946, | |
| "learning_rate": 9.592434928729615e-07, | |
| "loss": 2.2595, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.22490803993694167, | |
| "grad_norm": 0.37424692884672933, | |
| "learning_rate": 9.577693751421505e-07, | |
| "loss": 2.3025, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.22700998423541777, | |
| "grad_norm": 0.3776781851494623, | |
| "learning_rate": 9.562702443618331e-07, | |
| "loss": 2.2724, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.22911192853389384, | |
| "grad_norm": 0.392564325121222, | |
| "learning_rate": 9.547461824462533e-07, | |
| "loss": 2.3737, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.23121387283236994, | |
| "grad_norm": 0.3480699195596026, | |
| "learning_rate": 9.531972726719215e-07, | |
| "loss": 2.2591, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.23331581713084604, | |
| "grad_norm": 0.3563697131151561, | |
| "learning_rate": 9.516235996730644e-07, | |
| "loss": 2.3639, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.23541776142932214, | |
| "grad_norm": 0.4943435915920374, | |
| "learning_rate": 9.500252494369991e-07, | |
| "loss": 2.3605, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.2375197057277982, | |
| "grad_norm": 0.3975018845059572, | |
| "learning_rate": 9.484023092994364e-07, | |
| "loss": 2.4139, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.2396216500262743, | |
| "grad_norm": 0.37314820478206834, | |
| "learning_rate": 9.467548679397071e-07, | |
| "loss": 2.293, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.2417235943247504, | |
| "grad_norm": 0.39240855452269136, | |
| "learning_rate": 9.450830153759176e-07, | |
| "loss": 2.3568, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.24382553862322648, | |
| "grad_norm": 0.36202032847414545, | |
| "learning_rate": 9.433868429600309e-07, | |
| "loss": 2.36, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.24592748292170258, | |
| "grad_norm": 0.3852500669591038, | |
| "learning_rate": 9.416664433728748e-07, | |
| "loss": 2.335, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.24802942722017868, | |
| "grad_norm": 0.35255828976101133, | |
| "learning_rate": 9.399219106190775e-07, | |
| "loss": 2.3367, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.2501313715186548, | |
| "grad_norm": 0.4145689168519548, | |
| "learning_rate": 9.381533400219317e-07, | |
| "loss": 2.3807, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.2522333158171308, | |
| "grad_norm": 0.3638037446306906, | |
| "learning_rate": 9.363608282181861e-07, | |
| "loss": 2.2441, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2543352601156069, | |
| "grad_norm": 0.3892269635122991, | |
| "learning_rate": 9.345444731527641e-07, | |
| "loss": 2.3285, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.256437204414083, | |
| "grad_norm": 0.3848382071231666, | |
| "learning_rate": 9.327043740734128e-07, | |
| "loss": 2.2713, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.2585391487125591, | |
| "grad_norm": 0.3602411460013298, | |
| "learning_rate": 9.308406315252798e-07, | |
| "loss": 2.32, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.2606410930110352, | |
| "grad_norm": 0.36833348877975325, | |
| "learning_rate": 9.289533473454192e-07, | |
| "loss": 2.1967, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.2627430373095113, | |
| "grad_norm": 0.3585772049526573, | |
| "learning_rate": 9.270426246572272e-07, | |
| "loss": 2.3642, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.26484498160798736, | |
| "grad_norm": 0.34020805834208123, | |
| "learning_rate": 9.251085678648071e-07, | |
| "loss": 2.237, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.26694692590646346, | |
| "grad_norm": 0.38311234004852174, | |
| "learning_rate": 9.23151282647265e-07, | |
| "loss": 2.2439, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.26904887020493956, | |
| "grad_norm": 0.40490379874064303, | |
| "learning_rate": 9.211708759529346e-07, | |
| "loss": 2.3447, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.27115081450341566, | |
| "grad_norm": 0.38814226346705333, | |
| "learning_rate": 9.191674559935347e-07, | |
| "loss": 2.2642, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.27325275880189176, | |
| "grad_norm": 0.392535018684069, | |
| "learning_rate": 9.171411322382551e-07, | |
| "loss": 2.4222, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.27535470310036786, | |
| "grad_norm": 0.36293069595975763, | |
| "learning_rate": 9.150920154077753e-07, | |
| "loss": 2.2375, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.2774566473988439, | |
| "grad_norm": 0.3827224228744126, | |
| "learning_rate": 9.130202174682153e-07, | |
| "loss": 2.3121, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.27955859169732, | |
| "grad_norm": 0.39154739093650776, | |
| "learning_rate": 9.109258516250171e-07, | |
| "loss": 2.3246, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.2816605359957961, | |
| "grad_norm": 0.35430283896633147, | |
| "learning_rate": 9.08809032316759e-07, | |
| "loss": 2.2922, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.2837624802942722, | |
| "grad_norm": 0.39840449294712393, | |
| "learning_rate": 9.066698752089028e-07, | |
| "loss": 2.34, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.2858644245927483, | |
| "grad_norm": 0.3657527770786503, | |
| "learning_rate": 9.045084971874737e-07, | |
| "loss": 2.3127, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.2879663688912244, | |
| "grad_norm": 0.40390275500061623, | |
| "learning_rate": 9.02325016352673e-07, | |
| "loss": 2.2761, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.2900683131897005, | |
| "grad_norm": 0.3486049947257035, | |
| "learning_rate": 9.001195520124255e-07, | |
| "loss": 2.2909, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.29217025748817654, | |
| "grad_norm": 0.369271223650673, | |
| "learning_rate": 8.978922246758606e-07, | |
| "loss": 2.3146, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.29427220178665264, | |
| "grad_norm": 0.34559908408986584, | |
| "learning_rate": 8.956431560467266e-07, | |
| "loss": 2.3861, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.29637414608512874, | |
| "grad_norm": 0.40663130251420515, | |
| "learning_rate": 8.933724690167416e-07, | |
| "loss": 2.3351, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.29847609038360484, | |
| "grad_norm": 0.36345254242299446, | |
| "learning_rate": 8.910802876588781e-07, | |
| "loss": 2.2782, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.30057803468208094, | |
| "grad_norm": 0.38393881395986873, | |
| "learning_rate": 8.887667372205838e-07, | |
| "loss": 2.2808, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.30267997898055704, | |
| "grad_norm": 0.35972360098945216, | |
| "learning_rate": 8.864319441169372e-07, | |
| "loss": 2.2753, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.3047819232790331, | |
| "grad_norm": 0.4197014359486705, | |
| "learning_rate": 8.840760359237411e-07, | |
| "loss": 2.3163, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.3068838675775092, | |
| "grad_norm": 0.3698464136493578, | |
| "learning_rate": 8.816991413705514e-07, | |
| "loss": 2.3585, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.3089858118759853, | |
| "grad_norm": 0.38628726944167563, | |
| "learning_rate": 8.793013903336427e-07, | |
| "loss": 2.2954, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.3110877561744614, | |
| "grad_norm": 0.33899721461114324, | |
| "learning_rate": 8.768829138289122e-07, | |
| "loss": 2.2799, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.3131897004729375, | |
| "grad_norm": 0.39286568836433555, | |
| "learning_rate": 8.744438440047206e-07, | |
| "loss": 2.3867, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.3152916447714136, | |
| "grad_norm": 0.36680644419068636, | |
| "learning_rate": 8.719843141346717e-07, | |
| "loss": 2.2539, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3173935890698896, | |
| "grad_norm": 0.4226555891529418, | |
| "learning_rate": 8.695044586103295e-07, | |
| "loss": 2.4062, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.3194955333683657, | |
| "grad_norm": 0.343763121119237, | |
| "learning_rate": 8.67004412933876e-07, | |
| "loss": 2.2993, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.3215974776668418, | |
| "grad_norm": 0.34716852552812194, | |
| "learning_rate": 8.644843137107057e-07, | |
| "loss": 2.3404, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.3236994219653179, | |
| "grad_norm": 0.3968883598563259, | |
| "learning_rate": 8.619442986419629e-07, | |
| "loss": 2.3012, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.325801366263794, | |
| "grad_norm": 0.33889705699000894, | |
| "learning_rate": 8.593845065170163e-07, | |
| "loss": 2.2621, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.3279033105622701, | |
| "grad_norm": 0.351512969072057, | |
| "learning_rate": 8.568050772058761e-07, | |
| "loss": 2.357, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.33000525486074617, | |
| "grad_norm": 0.3668822383961036, | |
| "learning_rate": 8.542061516515511e-07, | |
| "loss": 2.3499, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.33210719915922227, | |
| "grad_norm": 0.3813104081247767, | |
| "learning_rate": 8.515878718623473e-07, | |
| "loss": 2.3762, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.33420914345769837, | |
| "grad_norm": 0.3555623840160132, | |
| "learning_rate": 8.489503809041087e-07, | |
| "loss": 2.2511, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.33631108775617446, | |
| "grad_norm": 0.3426684159571787, | |
| "learning_rate": 8.462938228923999e-07, | |
| "loss": 2.3354, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.33841303205465056, | |
| "grad_norm": 0.3491214060448838, | |
| "learning_rate": 8.436183429846313e-07, | |
| "loss": 2.2395, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.34051497635312666, | |
| "grad_norm": 0.4563165572220967, | |
| "learning_rate": 8.409240873721276e-07, | |
| "loss": 2.3872, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.3426169206516027, | |
| "grad_norm": 0.33319192852547314, | |
| "learning_rate": 8.382112032721398e-07, | |
| "loss": 2.3122, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.3447188649500788, | |
| "grad_norm": 0.38388679911113793, | |
| "learning_rate": 8.354798389198012e-07, | |
| "loss": 2.3693, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.3468208092485549, | |
| "grad_norm": 0.39313380831692907, | |
| "learning_rate": 8.327301435600272e-07, | |
| "loss": 2.3085, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.348922753547031, | |
| "grad_norm": 0.41915800484281546, | |
| "learning_rate": 8.299622674393614e-07, | |
| "loss": 2.3851, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.3510246978455071, | |
| "grad_norm": 0.35317676640002343, | |
| "learning_rate": 8.271763617977641e-07, | |
| "loss": 2.2271, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.3531266421439832, | |
| "grad_norm": 0.3528294909167197, | |
| "learning_rate": 8.243725788603508e-07, | |
| "loss": 2.3087, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.35522858644245925, | |
| "grad_norm": 0.38320812537843413, | |
| "learning_rate": 8.215510718290723e-07, | |
| "loss": 2.2441, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.35733053074093535, | |
| "grad_norm": 0.403856066195845, | |
| "learning_rate": 8.187119948743449e-07, | |
| "loss": 2.3326, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.35943247503941145, | |
| "grad_norm": 0.35785694946938973, | |
| "learning_rate": 8.158555031266254e-07, | |
| "loss": 2.332, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.36153441933788755, | |
| "grad_norm": 0.34400894235353774, | |
| "learning_rate": 8.129817526679357e-07, | |
| "loss": 2.2897, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.36363636363636365, | |
| "grad_norm": 0.4126959309133071, | |
| "learning_rate": 8.100909005233334e-07, | |
| "loss": 2.3507, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.36573830793483975, | |
| "grad_norm": 0.42717818377517935, | |
| "learning_rate": 8.071831046523318e-07, | |
| "loss": 2.3917, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.3678402522333158, | |
| "grad_norm": 0.3579933408679328, | |
| "learning_rate": 8.042585239402697e-07, | |
| "loss": 2.2518, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.3699421965317919, | |
| "grad_norm": 0.39551576662619, | |
| "learning_rate": 8.013173181896282e-07, | |
| "loss": 2.4125, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.372044140830268, | |
| "grad_norm": 0.365049869078283, | |
| "learning_rate": 7.983596481113005e-07, | |
| "loss": 2.2727, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.3741460851287441, | |
| "grad_norm": 0.4011873410931577, | |
| "learning_rate": 7.953856753158094e-07, | |
| "loss": 2.3436, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.3762480294272202, | |
| "grad_norm": 0.3842765318105432, | |
| "learning_rate": 7.923955623044775e-07, | |
| "loss": 2.3529, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.3783499737256963, | |
| "grad_norm": 0.3554244239833299, | |
| "learning_rate": 7.893894724605468e-07, | |
| "loss": 2.2397, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3804519180241724, | |
| "grad_norm": 0.4463726239175773, | |
| "learning_rate": 7.863675700402526e-07, | |
| "loss": 2.3635, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.38255386232264843, | |
| "grad_norm": 0.36826384823166514, | |
| "learning_rate": 7.833300201638474e-07, | |
| "loss": 2.3262, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.38465580662112453, | |
| "grad_norm": 0.40131324496051124, | |
| "learning_rate": 7.802769888065789e-07, | |
| "loss": 2.3718, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.38675775091960063, | |
| "grad_norm": 0.3849897857523413, | |
| "learning_rate": 7.772086427896211e-07, | |
| "loss": 2.2332, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.38885969521807673, | |
| "grad_norm": 0.36493755016771345, | |
| "learning_rate": 7.741251497709583e-07, | |
| "loss": 2.3377, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.39096163951655283, | |
| "grad_norm": 0.35102308079227, | |
| "learning_rate": 7.710266782362247e-07, | |
| "loss": 2.3105, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.3930635838150289, | |
| "grad_norm": 0.38998005813653297, | |
| "learning_rate": 7.679133974894982e-07, | |
| "loss": 2.3349, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.39516552811350497, | |
| "grad_norm": 0.379125931962091, | |
| "learning_rate": 7.647854776440495e-07, | |
| "loss": 2.2724, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.39726747241198107, | |
| "grad_norm": 0.3947787843638888, | |
| "learning_rate": 7.616430896130455e-07, | |
| "loss": 2.337, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.39936941671045717, | |
| "grad_norm": 0.37487637035067606, | |
| "learning_rate": 7.584864051002126e-07, | |
| "loss": 2.3746, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.40147136100893327, | |
| "grad_norm": 0.366432821290813, | |
| "learning_rate": 7.553155965904534e-07, | |
| "loss": 2.3042, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.40357330530740937, | |
| "grad_norm": 0.34566325498775646, | |
| "learning_rate": 7.521308373404217e-07, | |
| "loss": 2.2799, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.40567524960588547, | |
| "grad_norm": 0.360571812024321, | |
| "learning_rate": 7.489323013690561e-07, | |
| "loss": 2.1848, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.4077771939043615, | |
| "grad_norm": 0.38102049467871574, | |
| "learning_rate": 7.457201634480712e-07, | |
| "loss": 2.3506, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.4098791382028376, | |
| "grad_norm": 0.4157458990557322, | |
| "learning_rate": 7.424945990924079e-07, | |
| "loss": 2.2602, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.4119810825013137, | |
| "grad_norm": 0.3815988927632132, | |
| "learning_rate": 7.392557845506432e-07, | |
| "loss": 2.39, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.4140830267997898, | |
| "grad_norm": 0.3575553199145919, | |
| "learning_rate": 7.360038967953597e-07, | |
| "loss": 2.3257, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.4161849710982659, | |
| "grad_norm": 0.37179609481335857, | |
| "learning_rate": 7.327391135134749e-07, | |
| "loss": 2.3281, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.418286915396742, | |
| "grad_norm": 0.35686209920084183, | |
| "learning_rate": 7.294616130965336e-07, | |
| "loss": 2.2884, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.42038885969521805, | |
| "grad_norm": 0.3966932403444605, | |
| "learning_rate": 7.261715746309593e-07, | |
| "loss": 2.3668, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.42249080399369415, | |
| "grad_norm": 0.37119910362955255, | |
| "learning_rate": 7.228691778882692e-07, | |
| "loss": 2.216, | |
| "memory/device_mem_reserved(gib)": 69.0, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.42459274829217025, | |
| "grad_norm": 0.34835664991688975, | |
| "learning_rate": 7.195546033152506e-07, | |
| "loss": 2.3013, | |
| "memory/device_mem_reserved(gib)": 69.04, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.42669469259064635, | |
| "grad_norm": 0.36756486717782244, | |
| "learning_rate": 7.162280320241019e-07, | |
| "loss": 2.2983, | |
| "memory/device_mem_reserved(gib)": 69.04, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.42879663688912245, | |
| "grad_norm": 0.3580849549174155, | |
| "learning_rate": 7.128896457825363e-07, | |
| "loss": 2.2168, | |
| "memory/device_mem_reserved(gib)": 69.04, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.43089858118759855, | |
| "grad_norm": 0.38919198730377413, | |
| "learning_rate": 7.095396270038492e-07, | |
| "loss": 2.3673, | |
| "memory/device_mem_reserved(gib)": 69.04, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.4330005254860746, | |
| "grad_norm": 0.45321125836545045, | |
| "learning_rate": 7.061781587369518e-07, | |
| "loss": 2.2495, | |
| "memory/device_mem_reserved(gib)": 69.04, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.4351024697845507, | |
| "grad_norm": 0.3390158068246942, | |
| "learning_rate": 7.028054246563678e-07, | |
| "loss": 2.2959, | |
| "memory/device_mem_reserved(gib)": 69.04, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.4372044140830268, | |
| "grad_norm": 0.3932391160329032, | |
| "learning_rate": 6.99421609052199e-07, | |
| "loss": 2.3348, | |
| "memory/device_mem_reserved(gib)": 69.04, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.4393063583815029, | |
| "grad_norm": 0.35196191595880966, | |
| "learning_rate": 6.960268968200538e-07, | |
| "loss": 2.3416, | |
| "memory/device_mem_reserved(gib)": 69.04, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.441408302679979, | |
| "grad_norm": 0.3970691259787115, | |
| "learning_rate": 6.92621473450945e-07, | |
| "loss": 2.3328, | |
| "memory/device_mem_reserved(gib)": 69.04, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4435102469784551, | |
| "grad_norm": 0.3362178241906251, | |
| "learning_rate": 6.892055250211551e-07, | |
| "loss": 2.2666, | |
| "memory/device_mem_reserved(gib)": 69.04, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.44561219127693114, | |
| "grad_norm": 0.3370093871143424, | |
| "learning_rate": 6.857792381820672e-07, | |
| "loss": 2.3654, | |
| "memory/device_mem_reserved(gib)": 69.04, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.44771413557540723, | |
| "grad_norm": 0.3439137104265468, | |
| "learning_rate": 6.823428001499676e-07, | |
| "loss": 2.3236, | |
| "memory/device_mem_reserved(gib)": 69.1, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.44981607987388333, | |
| "grad_norm": 0.363363512278423, | |
| "learning_rate": 6.788963986958152e-07, | |
| "loss": 2.3153, | |
| "memory/device_mem_reserved(gib)": 69.1, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.45191802417235943, | |
| "grad_norm": 0.3550856155819428, | |
| "learning_rate": 6.754402221349825e-07, | |
| "loss": 2.3337, | |
| "memory/device_mem_reserved(gib)": 69.1, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.45401996847083553, | |
| "grad_norm": 0.43364470288014306, | |
| "learning_rate": 6.71974459316964e-07, | |
| "loss": 2.3817, | |
| "memory/device_mem_reserved(gib)": 69.1, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.45612191276931163, | |
| "grad_norm": 0.3594186404992842, | |
| "learning_rate": 6.684992996150598e-07, | |
| "loss": 2.282, | |
| "memory/device_mem_reserved(gib)": 69.1, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.4582238570677877, | |
| "grad_norm": 0.348193721582919, | |
| "learning_rate": 6.650149329160257e-07, | |
| "loss": 2.3266, | |
| "memory/device_mem_reserved(gib)": 69.1, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.4603258013662638, | |
| "grad_norm": 0.36563818617010935, | |
| "learning_rate": 6.615215496096986e-07, | |
| "loss": 2.2706, | |
| "memory/device_mem_reserved(gib)": 69.1, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.4624277456647399, | |
| "grad_norm": 0.3766707141167757, | |
| "learning_rate": 6.580193405785938e-07, | |
| "loss": 2.2786, | |
| "memory/device_mem_reserved(gib)": 69.1, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.464529689963216, | |
| "grad_norm": 0.37040693778721345, | |
| "learning_rate": 6.545084971874736e-07, | |
| "loss": 2.3041, | |
| "memory/device_mem_reserved(gib)": 69.1, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.4666316342616921, | |
| "grad_norm": 0.3814375380964394, | |
| "learning_rate": 6.509892112728928e-07, | |
| "loss": 2.2896, | |
| "memory/device_mem_reserved(gib)": 69.1, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.4687335785601682, | |
| "grad_norm": 0.38809585401355357, | |
| "learning_rate": 6.474616751327142e-07, | |
| "loss": 2.407, | |
| "memory/device_mem_reserved(gib)": 69.1, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.4708355228586443, | |
| "grad_norm": 0.3450197035654617, | |
| "learning_rate": 6.439260815156038e-07, | |
| "loss": 2.3212, | |
| "memory/device_mem_reserved(gib)": 69.1, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.4729374671571203, | |
| "grad_norm": 0.3638524400528564, | |
| "learning_rate": 6.403826236104965e-07, | |
| "loss": 2.3958, | |
| "memory/device_mem_reserved(gib)": 69.1, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.4750394114555964, | |
| "grad_norm": 11.416588328761524, | |
| "learning_rate": 6.368314950360415e-07, | |
| "loss": 2.4091, | |
| "memory/device_mem_reserved(gib)": 69.1, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.4771413557540725, | |
| "grad_norm": 0.38009234188930396, | |
| "learning_rate": 6.33272889830022e-07, | |
| "loss": 2.3481, | |
| "memory/device_mem_reserved(gib)": 69.1, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.4792433000525486, | |
| "grad_norm": 0.3874884323158228, | |
| "learning_rate": 6.297070024387534e-07, | |
| "loss": 2.2936, | |
| "memory/device_mem_reserved(gib)": 69.1, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.4813452443510247, | |
| "grad_norm": 0.3875050817077963, | |
| "learning_rate": 6.261340277064578e-07, | |
| "loss": 2.2781, | |
| "memory/device_mem_reserved(gib)": 69.1, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.4834471886495008, | |
| "grad_norm": 0.35862524615310853, | |
| "learning_rate": 6.225541608646179e-07, | |
| "loss": 2.317, | |
| "memory/device_mem_reserved(gib)": 69.1, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.48554913294797686, | |
| "grad_norm": 0.3684856860526338, | |
| "learning_rate": 6.189675975213093e-07, | |
| "loss": 2.2496, | |
| "memory/device_mem_reserved(gib)": 69.1, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.48765107724645296, | |
| "grad_norm": 0.3592072791824982, | |
| "learning_rate": 6.153745336505124e-07, | |
| "loss": 2.3916, | |
| "memory/device_mem_reserved(gib)": 69.1, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.48975302154492906, | |
| "grad_norm": 0.3492976591005929, | |
| "learning_rate": 6.117751655814037e-07, | |
| "loss": 2.3432, | |
| "memory/device_mem_reserved(gib)": 69.1, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.49185496584340516, | |
| "grad_norm": 0.39657164130018213, | |
| "learning_rate": 6.081696899876281e-07, | |
| "loss": 2.2399, | |
| "memory/device_mem_reserved(gib)": 69.1, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.49395691014188126, | |
| "grad_norm": 0.33918396528061745, | |
| "learning_rate": 6.045583038765537e-07, | |
| "loss": 2.2886, | |
| "memory/device_mem_reserved(gib)": 69.1, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.49605885444035736, | |
| "grad_norm": 0.3812617838396709, | |
| "learning_rate": 6.009412045785051e-07, | |
| "loss": 2.3345, | |
| "memory/device_mem_reserved(gib)": 69.1, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.4981607987388334, | |
| "grad_norm": 0.35316898092715904, | |
| "learning_rate": 5.973185897359827e-07, | |
| "loss": 2.3495, | |
| "memory/device_mem_reserved(gib)": 69.1, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.5002627430373096, | |
| "grad_norm": 0.35687689358418945, | |
| "learning_rate": 5.936906572928624e-07, | |
| "loss": 2.3206, | |
| "memory/device_mem_reserved(gib)": 69.1, | |
| "memory/max_mem_active(gib)": 63.57, | |
| "memory/max_mem_allocated(gib)": 62.86, | |
| "step": 238 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 475, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 238, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.365676632322867e+17, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |