| { | |
| "best_metric": 0.79888368, | |
| "best_model_checkpoint": "/mnt/nas1/daoze/code/swift/output/llava1_6-llama3_1-8b-instruct-my/v33-20240901-191352/checkpoint-4200", | |
| "epoch": 1.9995860070378804, | |
| "eval_steps": 300, | |
| "global_step": 4830, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00041399296211964395, | |
| "grad_norm": 68.31517175098004, | |
| "learning_rate": 0.0, | |
| "loss": 7.09566689, | |
| "memory(GiB)": 42.23, | |
| "step": 1, | |
| "train_speed(iter/s)": 0.013537 | |
| }, | |
| { | |
| "epoch": 0.0020699648105982197, | |
| "grad_norm": 75.27486731705963, | |
| "learning_rate": 3.2339240870284233e-06, | |
| "loss": 4.4965229, | |
| "memory(GiB)": 48.78, | |
| "step": 5, | |
| "train_speed(iter/s)": 0.018721 | |
| }, | |
| { | |
| "epoch": 0.004139929621196439, | |
| "grad_norm": 7.7589717582741615, | |
| "learning_rate": 4.626699381900465e-06, | |
| "loss": 1.79170246, | |
| "memory(GiB)": 52.1, | |
| "step": 10, | |
| "train_speed(iter/s)": 0.01954 | |
| }, | |
| { | |
| "epoch": 0.00620989443179466, | |
| "grad_norm": 4.8519342782653485, | |
| "learning_rate": 5.44142070133146e-06, | |
| "loss": 1.36023655, | |
| "memory(GiB)": 52.1, | |
| "step": 15, | |
| "train_speed(iter/s)": 0.019784 | |
| }, | |
| { | |
| "epoch": 0.008279859242392879, | |
| "grad_norm": 3.375818313593022, | |
| "learning_rate": 6.0194746767725065e-06, | |
| "loss": 1.18811779, | |
| "memory(GiB)": 52.1, | |
| "step": 20, | |
| "train_speed(iter/s)": 0.020008 | |
| }, | |
| { | |
| "epoch": 0.0103498240529911, | |
| "grad_norm": 3.5284378077706813, | |
| "learning_rate": 6.467848174056847e-06, | |
| "loss": 1.1600296, | |
| "memory(GiB)": 52.1, | |
| "step": 25, | |
| "train_speed(iter/s)": 0.020192 | |
| }, | |
| { | |
| "epoch": 0.01241978886358932, | |
| "grad_norm": 3.163950589670924, | |
| "learning_rate": 6.834195996203502e-06, | |
| "loss": 1.11185207, | |
| "memory(GiB)": 52.1, | |
| "step": 30, | |
| "train_speed(iter/s)": 0.020268 | |
| }, | |
| { | |
| "epoch": 0.014489753674187538, | |
| "grad_norm": 2.212271768126557, | |
| "learning_rate": 7.143938666407679e-06, | |
| "loss": 1.0538393, | |
| "memory(GiB)": 52.1, | |
| "step": 35, | |
| "train_speed(iter/s)": 0.020328 | |
| }, | |
| { | |
| "epoch": 0.016559718484785758, | |
| "grad_norm": 3.493503288451801, | |
| "learning_rate": 7.412249971644547e-06, | |
| "loss": 1.04750004, | |
| "memory(GiB)": 52.1, | |
| "step": 40, | |
| "train_speed(iter/s)": 0.020393 | |
| }, | |
| { | |
| "epoch": 0.01862968329538398, | |
| "grad_norm": 2.6291679253427644, | |
| "learning_rate": 7.648917315634497e-06, | |
| "loss": 1.04756851, | |
| "memory(GiB)": 52.1, | |
| "step": 45, | |
| "train_speed(iter/s)": 0.020444 | |
| }, | |
| { | |
| "epoch": 0.0206996481059822, | |
| "grad_norm": 3.1377476353112312, | |
| "learning_rate": 7.860623468928888e-06, | |
| "loss": 1.00407467, | |
| "memory(GiB)": 52.1, | |
| "step": 50, | |
| "train_speed(iter/s)": 0.02043 | |
| }, | |
| { | |
| "epoch": 0.022769612916580417, | |
| "grad_norm": 2.6547100112946733, | |
| "learning_rate": 8.05213497976565e-06, | |
| "loss": 1.01097145, | |
| "memory(GiB)": 52.1, | |
| "step": 55, | |
| "train_speed(iter/s)": 0.020407 | |
| }, | |
| { | |
| "epoch": 0.02483957772717864, | |
| "grad_norm": 2.6383576740248555, | |
| "learning_rate": 8.226971291075542e-06, | |
| "loss": 0.98796005, | |
| "memory(GiB)": 52.1, | |
| "step": 60, | |
| "train_speed(iter/s)": 0.020419 | |
| }, | |
| { | |
| "epoch": 0.02690954253777686, | |
| "grad_norm": 2.542613928151568, | |
| "learning_rate": 8.387805106618597e-06, | |
| "loss": 0.98850031, | |
| "memory(GiB)": 52.1, | |
| "step": 65, | |
| "train_speed(iter/s)": 0.020424 | |
| }, | |
| { | |
| "epoch": 0.028979507348375077, | |
| "grad_norm": 2.877488922903144, | |
| "learning_rate": 8.536713961279723e-06, | |
| "loss": 0.98919926, | |
| "memory(GiB)": 52.1, | |
| "step": 70, | |
| "train_speed(iter/s)": 0.020439 | |
| }, | |
| { | |
| "epoch": 0.031049472158973298, | |
| "grad_norm": 1.8237010588157114, | |
| "learning_rate": 8.675344788359883e-06, | |
| "loss": 1.012813, | |
| "memory(GiB)": 52.1, | |
| "step": 75, | |
| "train_speed(iter/s)": 0.020466 | |
| }, | |
| { | |
| "epoch": 0.033119436969571515, | |
| "grad_norm": 2.5261639534973788, | |
| "learning_rate": 8.805025266516589e-06, | |
| "loss": 0.98800411, | |
| "memory(GiB)": 52.1, | |
| "step": 80, | |
| "train_speed(iter/s)": 0.02047 | |
| }, | |
| { | |
| "epoch": 0.035189401780169736, | |
| "grad_norm": 2.2711243603226032, | |
| "learning_rate": 8.926841351029377e-06, | |
| "loss": 1.00139637, | |
| "memory(GiB)": 55.52, | |
| "step": 85, | |
| "train_speed(iter/s)": 0.020479 | |
| }, | |
| { | |
| "epoch": 0.03725936659076796, | |
| "grad_norm": 2.145948654847171, | |
| "learning_rate": 9.041692610506539e-06, | |
| "loss": 0.99127426, | |
| "memory(GiB)": 55.52, | |
| "step": 90, | |
| "train_speed(iter/s)": 0.020491 | |
| }, | |
| { | |
| "epoch": 0.03932933140136618, | |
| "grad_norm": 2.836740011107602, | |
| "learning_rate": 9.150332582159872e-06, | |
| "loss": 0.96552677, | |
| "memory(GiB)": 55.52, | |
| "step": 95, | |
| "train_speed(iter/s)": 0.020506 | |
| }, | |
| { | |
| "epoch": 0.0413992962119644, | |
| "grad_norm": 2.2398611362766006, | |
| "learning_rate": 9.25339876380093e-06, | |
| "loss": 0.98038893, | |
| "memory(GiB)": 55.52, | |
| "step": 100, | |
| "train_speed(iter/s)": 0.020501 | |
| }, | |
| { | |
| "epoch": 0.043469261022562614, | |
| "grad_norm": 2.4986121497854192, | |
| "learning_rate": 9.351435280710716e-06, | |
| "loss": 0.9604641, | |
| "memory(GiB)": 63.35, | |
| "step": 105, | |
| "train_speed(iter/s)": 0.020504 | |
| }, | |
| { | |
| "epoch": 0.045539225833160835, | |
| "grad_norm": 2.1909523678523755, | |
| "learning_rate": 9.444910274637691e-06, | |
| "loss": 0.93479166, | |
| "memory(GiB)": 63.35, | |
| "step": 110, | |
| "train_speed(iter/s)": 0.020519 | |
| }, | |
| { | |
| "epoch": 0.047609190643759056, | |
| "grad_norm": 2.5301628247983663, | |
| "learning_rate": 9.534229424247679e-06, | |
| "loss": 0.97118149, | |
| "memory(GiB)": 63.35, | |
| "step": 115, | |
| "train_speed(iter/s)": 0.020522 | |
| }, | |
| { | |
| "epoch": 0.04967915545435728, | |
| "grad_norm": 1.92102667189958, | |
| "learning_rate": 9.619746585947584e-06, | |
| "loss": 0.97038708, | |
| "memory(GiB)": 63.35, | |
| "step": 120, | |
| "train_speed(iter/s)": 0.020545 | |
| }, | |
| { | |
| "epoch": 0.0517491202649555, | |
| "grad_norm": 2.2968578337197036, | |
| "learning_rate": 9.701772261085271e-06, | |
| "loss": 0.96241703, | |
| "memory(GiB)": 63.35, | |
| "step": 125, | |
| "train_speed(iter/s)": 0.020544 | |
| }, | |
| { | |
| "epoch": 0.05381908507555372, | |
| "grad_norm": 2.110866882726078, | |
| "learning_rate": 9.780580401490638e-06, | |
| "loss": 0.95122089, | |
| "memory(GiB)": 63.35, | |
| "step": 130, | |
| "train_speed(iter/s)": 0.020551 | |
| }, | |
| { | |
| "epoch": 0.05588904988615193, | |
| "grad_norm": 2.082289679777962, | |
| "learning_rate": 9.856413929937534e-06, | |
| "loss": 0.95173302, | |
| "memory(GiB)": 63.35, | |
| "step": 135, | |
| "train_speed(iter/s)": 0.020557 | |
| }, | |
| { | |
| "epoch": 0.057959014696750154, | |
| "grad_norm": 2.6318522567639784, | |
| "learning_rate": 9.929489256151762e-06, | |
| "loss": 0.94850836, | |
| "memory(GiB)": 63.35, | |
| "step": 140, | |
| "train_speed(iter/s)": 0.020559 | |
| }, | |
| { | |
| "epoch": 0.060028979507348375, | |
| "grad_norm": 2.091985474615185, | |
| "learning_rate": 1e-05, | |
| "loss": 0.91576376, | |
| "memory(GiB)": 63.35, | |
| "step": 145, | |
| "train_speed(iter/s)": 0.020557 | |
| }, | |
| { | |
| "epoch": 0.062098944317946596, | |
| "grad_norm": 2.3841665763842705, | |
| "learning_rate": 9.991462113127002e-06, | |
| "loss": 0.93616524, | |
| "memory(GiB)": 63.35, | |
| "step": 150, | |
| "train_speed(iter/s)": 0.020563 | |
| }, | |
| { | |
| "epoch": 0.06416890912854481, | |
| "grad_norm": 2.1042524174231505, | |
| "learning_rate": 9.980789754535753e-06, | |
| "loss": 0.9277298, | |
| "memory(GiB)": 63.35, | |
| "step": 155, | |
| "train_speed(iter/s)": 0.020558 | |
| }, | |
| { | |
| "epoch": 0.06623887393914303, | |
| "grad_norm": 2.276077986025351, | |
| "learning_rate": 9.970117395944504e-06, | |
| "loss": 0.95900288, | |
| "memory(GiB)": 63.35, | |
| "step": 160, | |
| "train_speed(iter/s)": 0.020556 | |
| }, | |
| { | |
| "epoch": 0.06830883874974125, | |
| "grad_norm": 2.2459536201915387, | |
| "learning_rate": 9.959445037353256e-06, | |
| "loss": 0.94394236, | |
| "memory(GiB)": 63.35, | |
| "step": 165, | |
| "train_speed(iter/s)": 0.020552 | |
| }, | |
| { | |
| "epoch": 0.07037880356033947, | |
| "grad_norm": 2.517051668755459, | |
| "learning_rate": 9.948772678762007e-06, | |
| "loss": 0.94310379, | |
| "memory(GiB)": 63.35, | |
| "step": 170, | |
| "train_speed(iter/s)": 0.020552 | |
| }, | |
| { | |
| "epoch": 0.0724487683709377, | |
| "grad_norm": 2.2026553318124313, | |
| "learning_rate": 9.938100320170759e-06, | |
| "loss": 0.95436573, | |
| "memory(GiB)": 63.35, | |
| "step": 175, | |
| "train_speed(iter/s)": 0.020554 | |
| }, | |
| { | |
| "epoch": 0.07451873318153591, | |
| "grad_norm": 2.2476130486911465, | |
| "learning_rate": 9.92742796157951e-06, | |
| "loss": 0.93582458, | |
| "memory(GiB)": 63.35, | |
| "step": 180, | |
| "train_speed(iter/s)": 0.020553 | |
| }, | |
| { | |
| "epoch": 0.07658869799213414, | |
| "grad_norm": 2.388512026493847, | |
| "learning_rate": 9.916755602988262e-06, | |
| "loss": 0.91813745, | |
| "memory(GiB)": 63.35, | |
| "step": 185, | |
| "train_speed(iter/s)": 0.020558 | |
| }, | |
| { | |
| "epoch": 0.07865866280273236, | |
| "grad_norm": 2.3220003726970204, | |
| "learning_rate": 9.906083244397012e-06, | |
| "loss": 0.92115765, | |
| "memory(GiB)": 63.35, | |
| "step": 190, | |
| "train_speed(iter/s)": 0.020561 | |
| }, | |
| { | |
| "epoch": 0.08072862761333058, | |
| "grad_norm": 2.3768930570990805, | |
| "learning_rate": 9.895410885805764e-06, | |
| "loss": 0.91676846, | |
| "memory(GiB)": 63.35, | |
| "step": 195, | |
| "train_speed(iter/s)": 0.020562 | |
| }, | |
| { | |
| "epoch": 0.0827985924239288, | |
| "grad_norm": 4.2054479128966165, | |
| "learning_rate": 9.884738527214515e-06, | |
| "loss": 0.91852398, | |
| "memory(GiB)": 63.35, | |
| "step": 200, | |
| "train_speed(iter/s)": 0.020566 | |
| }, | |
| { | |
| "epoch": 0.084868557234527, | |
| "grad_norm": 2.14133807915419, | |
| "learning_rate": 9.874066168623266e-06, | |
| "loss": 0.91718044, | |
| "memory(GiB)": 63.35, | |
| "step": 205, | |
| "train_speed(iter/s)": 0.020563 | |
| }, | |
| { | |
| "epoch": 0.08693852204512523, | |
| "grad_norm": 1.9390816379786193, | |
| "learning_rate": 9.863393810032017e-06, | |
| "loss": 0.92340775, | |
| "memory(GiB)": 63.35, | |
| "step": 210, | |
| "train_speed(iter/s)": 0.020559 | |
| }, | |
| { | |
| "epoch": 0.08900848685572345, | |
| "grad_norm": 2.1261095243289208, | |
| "learning_rate": 9.852721451440769e-06, | |
| "loss": 0.90159931, | |
| "memory(GiB)": 63.35, | |
| "step": 215, | |
| "train_speed(iter/s)": 0.020561 | |
| }, | |
| { | |
| "epoch": 0.09107845166632167, | |
| "grad_norm": 2.9284829291227243, | |
| "learning_rate": 9.842049092849521e-06, | |
| "loss": 0.91123104, | |
| "memory(GiB)": 63.35, | |
| "step": 220, | |
| "train_speed(iter/s)": 0.020557 | |
| }, | |
| { | |
| "epoch": 0.09314841647691989, | |
| "grad_norm": 2.7049739018013583, | |
| "learning_rate": 9.831376734258272e-06, | |
| "loss": 0.941084, | |
| "memory(GiB)": 63.35, | |
| "step": 225, | |
| "train_speed(iter/s)": 0.02056 | |
| }, | |
| { | |
| "epoch": 0.09521838128751811, | |
| "grad_norm": 2.0467249315549845, | |
| "learning_rate": 9.820704375667023e-06, | |
| "loss": 0.90574436, | |
| "memory(GiB)": 63.35, | |
| "step": 230, | |
| "train_speed(iter/s)": 0.020561 | |
| }, | |
| { | |
| "epoch": 0.09728834609811633, | |
| "grad_norm": 2.1061950654127006, | |
| "learning_rate": 9.810032017075774e-06, | |
| "loss": 0.92933855, | |
| "memory(GiB)": 63.35, | |
| "step": 235, | |
| "train_speed(iter/s)": 0.020568 | |
| }, | |
| { | |
| "epoch": 0.09935831090871455, | |
| "grad_norm": 2.390745868374031, | |
| "learning_rate": 9.799359658484527e-06, | |
| "loss": 0.9383173, | |
| "memory(GiB)": 63.35, | |
| "step": 240, | |
| "train_speed(iter/s)": 0.02057 | |
| }, | |
| { | |
| "epoch": 0.10142827571931277, | |
| "grad_norm": 1.9948725536279355, | |
| "learning_rate": 9.788687299893276e-06, | |
| "loss": 0.85975437, | |
| "memory(GiB)": 63.35, | |
| "step": 245, | |
| "train_speed(iter/s)": 0.020577 | |
| }, | |
| { | |
| "epoch": 0.103498240529911, | |
| "grad_norm": 2.29936461247775, | |
| "learning_rate": 9.77801494130203e-06, | |
| "loss": 0.90683613, | |
| "memory(GiB)": 63.47, | |
| "step": 250, | |
| "train_speed(iter/s)": 0.020575 | |
| }, | |
| { | |
| "epoch": 0.10556820534050922, | |
| "grad_norm": 1.8961456174283475, | |
| "learning_rate": 9.76734258271078e-06, | |
| "loss": 0.88947477, | |
| "memory(GiB)": 63.47, | |
| "step": 255, | |
| "train_speed(iter/s)": 0.020574 | |
| }, | |
| { | |
| "epoch": 0.10763817015110744, | |
| "grad_norm": 1.8558977060217532, | |
| "learning_rate": 9.756670224119531e-06, | |
| "loss": 0.90292645, | |
| "memory(GiB)": 63.47, | |
| "step": 260, | |
| "train_speed(iter/s)": 0.020581 | |
| }, | |
| { | |
| "epoch": 0.10970813496170564, | |
| "grad_norm": 2.1017797656853725, | |
| "learning_rate": 9.745997865528282e-06, | |
| "loss": 0.92776756, | |
| "memory(GiB)": 63.47, | |
| "step": 265, | |
| "train_speed(iter/s)": 0.020579 | |
| }, | |
| { | |
| "epoch": 0.11177809977230387, | |
| "grad_norm": 1.9051226867861688, | |
| "learning_rate": 9.735325506937033e-06, | |
| "loss": 0.90565796, | |
| "memory(GiB)": 63.59, | |
| "step": 270, | |
| "train_speed(iter/s)": 0.020578 | |
| }, | |
| { | |
| "epoch": 0.11384806458290209, | |
| "grad_norm": 1.932217719000402, | |
| "learning_rate": 9.724653148345784e-06, | |
| "loss": 0.90403481, | |
| "memory(GiB)": 63.59, | |
| "step": 275, | |
| "train_speed(iter/s)": 0.020575 | |
| }, | |
| { | |
| "epoch": 0.11591802939350031, | |
| "grad_norm": 3.033913485092789, | |
| "learning_rate": 9.713980789754537e-06, | |
| "loss": 0.86916351, | |
| "memory(GiB)": 63.59, | |
| "step": 280, | |
| "train_speed(iter/s)": 0.020575 | |
| }, | |
| { | |
| "epoch": 0.11798799420409853, | |
| "grad_norm": 1.9434774416915237, | |
| "learning_rate": 9.703308431163288e-06, | |
| "loss": 0.87491503, | |
| "memory(GiB)": 63.59, | |
| "step": 285, | |
| "train_speed(iter/s)": 0.020569 | |
| }, | |
| { | |
| "epoch": 0.12005795901469675, | |
| "grad_norm": 2.051766135268311, | |
| "learning_rate": 9.69263607257204e-06, | |
| "loss": 0.89817352, | |
| "memory(GiB)": 63.59, | |
| "step": 290, | |
| "train_speed(iter/s)": 0.020572 | |
| }, | |
| { | |
| "epoch": 0.12212792382529497, | |
| "grad_norm": 2.2615047465793796, | |
| "learning_rate": 9.68196371398079e-06, | |
| "loss": 0.89681797, | |
| "memory(GiB)": 63.59, | |
| "step": 295, | |
| "train_speed(iter/s)": 0.020574 | |
| }, | |
| { | |
| "epoch": 0.12419788863589319, | |
| "grad_norm": 2.3348971307519637, | |
| "learning_rate": 9.671291355389541e-06, | |
| "loss": 0.91658554, | |
| "memory(GiB)": 63.59, | |
| "step": 300, | |
| "train_speed(iter/s)": 0.020576 | |
| }, | |
| { | |
| "epoch": 0.12419788863589319, | |
| "eval_loss": 0.9112715721130371, | |
| "eval_runtime": 338.0523, | |
| "eval_samples_per_second": 18.476, | |
| "eval_steps_per_second": 1.157, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.1262678534464914, | |
| "grad_norm": 2.0234892490546614, | |
| "learning_rate": 9.660618996798294e-06, | |
| "loss": 0.89021435, | |
| "memory(GiB)": 63.59, | |
| "step": 305, | |
| "train_speed(iter/s)": 0.02005 | |
| }, | |
| { | |
| "epoch": 0.12833781825708962, | |
| "grad_norm": 2.3363054707830195, | |
| "learning_rate": 9.649946638207045e-06, | |
| "loss": 0.87254162, | |
| "memory(GiB)": 63.59, | |
| "step": 310, | |
| "train_speed(iter/s)": 0.02006 | |
| }, | |
| { | |
| "epoch": 0.13040778306768785, | |
| "grad_norm": 1.820741232007416, | |
| "learning_rate": 9.639274279615796e-06, | |
| "loss": 0.90060663, | |
| "memory(GiB)": 63.59, | |
| "step": 315, | |
| "train_speed(iter/s)": 0.020066 | |
| }, | |
| { | |
| "epoch": 0.13247774787828606, | |
| "grad_norm": 1.9084755504752218, | |
| "learning_rate": 9.628601921024547e-06, | |
| "loss": 0.8869771, | |
| "memory(GiB)": 63.59, | |
| "step": 320, | |
| "train_speed(iter/s)": 0.020075 | |
| }, | |
| { | |
| "epoch": 0.1345477126888843, | |
| "grad_norm": 1.8856515103808584, | |
| "learning_rate": 9.617929562433298e-06, | |
| "loss": 0.87808056, | |
| "memory(GiB)": 63.59, | |
| "step": 325, | |
| "train_speed(iter/s)": 0.020084 | |
| }, | |
| { | |
| "epoch": 0.1366176774994825, | |
| "grad_norm": 1.9384672045466198, | |
| "learning_rate": 9.60725720384205e-06, | |
| "loss": 0.88911896, | |
| "memory(GiB)": 63.59, | |
| "step": 330, | |
| "train_speed(iter/s)": 0.020095 | |
| }, | |
| { | |
| "epoch": 0.13868764231008074, | |
| "grad_norm": 1.9948177737503383, | |
| "learning_rate": 9.596584845250802e-06, | |
| "loss": 0.8958828, | |
| "memory(GiB)": 63.59, | |
| "step": 335, | |
| "train_speed(iter/s)": 0.020101 | |
| }, | |
| { | |
| "epoch": 0.14075760712067895, | |
| "grad_norm": 2.2317608749634874, | |
| "learning_rate": 9.585912486659551e-06, | |
| "loss": 0.89190845, | |
| "memory(GiB)": 63.59, | |
| "step": 340, | |
| "train_speed(iter/s)": 0.020112 | |
| }, | |
| { | |
| "epoch": 0.14282757193127718, | |
| "grad_norm": 2.1298996429991908, | |
| "learning_rate": 9.575240128068304e-06, | |
| "loss": 0.87826462, | |
| "memory(GiB)": 63.59, | |
| "step": 345, | |
| "train_speed(iter/s)": 0.02012 | |
| }, | |
| { | |
| "epoch": 0.1448975367418754, | |
| "grad_norm": 1.9367356654913552, | |
| "learning_rate": 9.564567769477055e-06, | |
| "loss": 0.88935146, | |
| "memory(GiB)": 63.59, | |
| "step": 350, | |
| "train_speed(iter/s)": 0.020128 | |
| }, | |
| { | |
| "epoch": 0.1469675015524736, | |
| "grad_norm": 2.965286627679984, | |
| "learning_rate": 9.553895410885806e-06, | |
| "loss": 0.91815538, | |
| "memory(GiB)": 63.59, | |
| "step": 355, | |
| "train_speed(iter/s)": 0.020135 | |
| }, | |
| { | |
| "epoch": 0.14903746636307183, | |
| "grad_norm": 2.322705848413725, | |
| "learning_rate": 9.543223052294557e-06, | |
| "loss": 0.89534588, | |
| "memory(GiB)": 63.59, | |
| "step": 360, | |
| "train_speed(iter/s)": 0.020143 | |
| }, | |
| { | |
| "epoch": 0.15110743117367004, | |
| "grad_norm": 1.9467115924709606, | |
| "learning_rate": 9.53255069370331e-06, | |
| "loss": 0.87547607, | |
| "memory(GiB)": 63.59, | |
| "step": 365, | |
| "train_speed(iter/s)": 0.020146 | |
| }, | |
| { | |
| "epoch": 0.15317739598426827, | |
| "grad_norm": 2.056244423727218, | |
| "learning_rate": 9.521878335112061e-06, | |
| "loss": 0.87522736, | |
| "memory(GiB)": 63.59, | |
| "step": 370, | |
| "train_speed(iter/s)": 0.02015 | |
| }, | |
| { | |
| "epoch": 0.15524736079486648, | |
| "grad_norm": 1.9570323722204157, | |
| "learning_rate": 9.511205976520812e-06, | |
| "loss": 0.88565502, | |
| "memory(GiB)": 63.59, | |
| "step": 375, | |
| "train_speed(iter/s)": 0.020154 | |
| }, | |
| { | |
| "epoch": 0.15731732560546471, | |
| "grad_norm": 1.9075809920338722, | |
| "learning_rate": 9.500533617929563e-06, | |
| "loss": 0.87662697, | |
| "memory(GiB)": 63.59, | |
| "step": 380, | |
| "train_speed(iter/s)": 0.020159 | |
| }, | |
| { | |
| "epoch": 0.15938729041606292, | |
| "grad_norm": 2.3674969791857983, | |
| "learning_rate": 9.489861259338314e-06, | |
| "loss": 0.88602619, | |
| "memory(GiB)": 63.59, | |
| "step": 385, | |
| "train_speed(iter/s)": 0.020163 | |
| }, | |
| { | |
| "epoch": 0.16145725522666116, | |
| "grad_norm": 2.157304860709474, | |
| "learning_rate": 9.479188900747067e-06, | |
| "loss": 0.86379642, | |
| "memory(GiB)": 63.59, | |
| "step": 390, | |
| "train_speed(iter/s)": 0.02017 | |
| }, | |
| { | |
| "epoch": 0.16352722003725936, | |
| "grad_norm": 1.8795995948139297, | |
| "learning_rate": 9.468516542155816e-06, | |
| "loss": 0.88137684, | |
| "memory(GiB)": 63.59, | |
| "step": 395, | |
| "train_speed(iter/s)": 0.020176 | |
| }, | |
| { | |
| "epoch": 0.1655971848478576, | |
| "grad_norm": 1.8996836205094734, | |
| "learning_rate": 9.457844183564569e-06, | |
| "loss": 0.86949444, | |
| "memory(GiB)": 63.59, | |
| "step": 400, | |
| "train_speed(iter/s)": 0.020182 | |
| }, | |
| { | |
| "epoch": 0.1676671496584558, | |
| "grad_norm": 2.0977604679128854, | |
| "learning_rate": 9.44717182497332e-06, | |
| "loss": 0.85557442, | |
| "memory(GiB)": 63.59, | |
| "step": 405, | |
| "train_speed(iter/s)": 0.020184 | |
| }, | |
| { | |
| "epoch": 0.169737114469054, | |
| "grad_norm": 2.2253818762342155, | |
| "learning_rate": 9.436499466382071e-06, | |
| "loss": 0.85497751, | |
| "memory(GiB)": 63.59, | |
| "step": 410, | |
| "train_speed(iter/s)": 0.02019 | |
| }, | |
| { | |
| "epoch": 0.17180707927965225, | |
| "grad_norm": 1.9007105346828383, | |
| "learning_rate": 9.425827107790822e-06, | |
| "loss": 0.86072025, | |
| "memory(GiB)": 63.59, | |
| "step": 415, | |
| "train_speed(iter/s)": 0.020193 | |
| }, | |
| { | |
| "epoch": 0.17387704409025045, | |
| "grad_norm": 2.1940409564689656, | |
| "learning_rate": 9.415154749199575e-06, | |
| "loss": 0.89283857, | |
| "memory(GiB)": 63.59, | |
| "step": 420, | |
| "train_speed(iter/s)": 0.020198 | |
| }, | |
| { | |
| "epoch": 0.1759470089008487, | |
| "grad_norm": 2.2711317580338912, | |
| "learning_rate": 9.404482390608326e-06, | |
| "loss": 0.8422184, | |
| "memory(GiB)": 63.59, | |
| "step": 425, | |
| "train_speed(iter/s)": 0.020202 | |
| }, | |
| { | |
| "epoch": 0.1780169737114469, | |
| "grad_norm": 1.9734252369885248, | |
| "learning_rate": 9.393810032017077e-06, | |
| "loss": 0.90587616, | |
| "memory(GiB)": 63.59, | |
| "step": 430, | |
| "train_speed(iter/s)": 0.020208 | |
| }, | |
| { | |
| "epoch": 0.18008693852204513, | |
| "grad_norm": 2.1358019624149653, | |
| "learning_rate": 9.383137673425828e-06, | |
| "loss": 0.87137203, | |
| "memory(GiB)": 63.59, | |
| "step": 435, | |
| "train_speed(iter/s)": 0.020212 | |
| }, | |
| { | |
| "epoch": 0.18215690333264334, | |
| "grad_norm": 2.136663123639741, | |
| "learning_rate": 9.372465314834579e-06, | |
| "loss": 0.84276152, | |
| "memory(GiB)": 63.59, | |
| "step": 440, | |
| "train_speed(iter/s)": 0.020218 | |
| }, | |
| { | |
| "epoch": 0.18422686814324157, | |
| "grad_norm": 2.1730526677654005, | |
| "learning_rate": 9.361792956243332e-06, | |
| "loss": 0.86879997, | |
| "memory(GiB)": 63.59, | |
| "step": 445, | |
| "train_speed(iter/s)": 0.020222 | |
| }, | |
| { | |
| "epoch": 0.18629683295383978, | |
| "grad_norm": 1.7007840288725673, | |
| "learning_rate": 9.351120597652081e-06, | |
| "loss": 0.85356216, | |
| "memory(GiB)": 63.59, | |
| "step": 450, | |
| "train_speed(iter/s)": 0.020224 | |
| }, | |
| { | |
| "epoch": 0.18836679776443802, | |
| "grad_norm": 2.333224503644692, | |
| "learning_rate": 9.340448239060834e-06, | |
| "loss": 0.87298975, | |
| "memory(GiB)": 63.59, | |
| "step": 455, | |
| "train_speed(iter/s)": 0.02023 | |
| }, | |
| { | |
| "epoch": 0.19043676257503622, | |
| "grad_norm": 1.7497884985137717, | |
| "learning_rate": 9.329775880469585e-06, | |
| "loss": 0.89487724, | |
| "memory(GiB)": 63.59, | |
| "step": 460, | |
| "train_speed(iter/s)": 0.02023 | |
| }, | |
| { | |
| "epoch": 0.19250672738563446, | |
| "grad_norm": 1.7881559718064066, | |
| "learning_rate": 9.319103521878336e-06, | |
| "loss": 0.88939381, | |
| "memory(GiB)": 63.59, | |
| "step": 465, | |
| "train_speed(iter/s)": 0.020232 | |
| }, | |
| { | |
| "epoch": 0.19457669219623266, | |
| "grad_norm": 2.5056663267756605, | |
| "learning_rate": 9.308431163287087e-06, | |
| "loss": 0.85123787, | |
| "memory(GiB)": 63.59, | |
| "step": 470, | |
| "train_speed(iter/s)": 0.020236 | |
| }, | |
| { | |
| "epoch": 0.19664665700683087, | |
| "grad_norm": 2.2621024798210403, | |
| "learning_rate": 9.29775880469584e-06, | |
| "loss": 0.87380323, | |
| "memory(GiB)": 63.59, | |
| "step": 475, | |
| "train_speed(iter/s)": 0.020238 | |
| }, | |
| { | |
| "epoch": 0.1987166218174291, | |
| "grad_norm": 1.9008868824283842, | |
| "learning_rate": 9.287086446104589e-06, | |
| "loss": 0.82988033, | |
| "memory(GiB)": 63.59, | |
| "step": 480, | |
| "train_speed(iter/s)": 0.020241 | |
| }, | |
| { | |
| "epoch": 0.2007865866280273, | |
| "grad_norm": 1.6960491773696469, | |
| "learning_rate": 9.276414087513342e-06, | |
| "loss": 0.85842476, | |
| "memory(GiB)": 63.59, | |
| "step": 485, | |
| "train_speed(iter/s)": 0.020243 | |
| }, | |
| { | |
| "epoch": 0.20285655143862555, | |
| "grad_norm": 1.8755094068628242, | |
| "learning_rate": 9.265741728922093e-06, | |
| "loss": 0.86819458, | |
| "memory(GiB)": 63.59, | |
| "step": 490, | |
| "train_speed(iter/s)": 0.020249 | |
| }, | |
| { | |
| "epoch": 0.20492651624922376, | |
| "grad_norm": 1.6029255395235227, | |
| "learning_rate": 9.255069370330844e-06, | |
| "loss": 0.86832209, | |
| "memory(GiB)": 63.59, | |
| "step": 495, | |
| "train_speed(iter/s)": 0.020253 | |
| }, | |
| { | |
| "epoch": 0.206996481059822, | |
| "grad_norm": 1.8312986531673774, | |
| "learning_rate": 9.244397011739595e-06, | |
| "loss": 0.87804108, | |
| "memory(GiB)": 63.59, | |
| "step": 500, | |
| "train_speed(iter/s)": 0.020256 | |
| }, | |
| { | |
| "epoch": 0.2090664458704202, | |
| "grad_norm": 2.2159879518724686, | |
| "learning_rate": 9.233724653148346e-06, | |
| "loss": 0.84401827, | |
| "memory(GiB)": 63.59, | |
| "step": 505, | |
| "train_speed(iter/s)": 0.020258 | |
| }, | |
| { | |
| "epoch": 0.21113641068101843, | |
| "grad_norm": 1.9489190185976173, | |
| "learning_rate": 9.223052294557098e-06, | |
| "loss": 0.83081837, | |
| "memory(GiB)": 63.59, | |
| "step": 510, | |
| "train_speed(iter/s)": 0.020262 | |
| }, | |
| { | |
| "epoch": 0.21320637549161664, | |
| "grad_norm": 1.8621375658015202, | |
| "learning_rate": 9.21237993596585e-06, | |
| "loss": 0.84451389, | |
| "memory(GiB)": 63.59, | |
| "step": 515, | |
| "train_speed(iter/s)": 0.020265 | |
| }, | |
| { | |
| "epoch": 0.21527634030221487, | |
| "grad_norm": 2.657592267470185, | |
| "learning_rate": 9.2017075773746e-06, | |
| "loss": 0.85252399, | |
| "memory(GiB)": 63.59, | |
| "step": 520, | |
| "train_speed(iter/s)": 0.020272 | |
| }, | |
| { | |
| "epoch": 0.21734630511281308, | |
| "grad_norm": 3.2134734541192556, | |
| "learning_rate": 9.191035218783352e-06, | |
| "loss": 0.85981674, | |
| "memory(GiB)": 63.59, | |
| "step": 525, | |
| "train_speed(iter/s)": 0.020278 | |
| }, | |
| { | |
| "epoch": 0.2194162699234113, | |
| "grad_norm": 1.979014196110588, | |
| "learning_rate": 9.180362860192104e-06, | |
| "loss": 0.85360508, | |
| "memory(GiB)": 63.59, | |
| "step": 530, | |
| "train_speed(iter/s)": 0.020281 | |
| }, | |
| { | |
| "epoch": 0.22148623473400952, | |
| "grad_norm": 2.263346027010783, | |
| "learning_rate": 9.169690501600854e-06, | |
| "loss": 0.8649641, | |
| "memory(GiB)": 63.59, | |
| "step": 535, | |
| "train_speed(iter/s)": 0.020285 | |
| }, | |
| { | |
| "epoch": 0.22355619954460773, | |
| "grad_norm": 2.0257701801426786, | |
| "learning_rate": 9.159018143009606e-06, | |
| "loss": 0.85079117, | |
| "memory(GiB)": 63.59, | |
| "step": 540, | |
| "train_speed(iter/s)": 0.020291 | |
| }, | |
| { | |
| "epoch": 0.22562616435520597, | |
| "grad_norm": 2.125007231598407, | |
| "learning_rate": 9.148345784418357e-06, | |
| "loss": 0.83591347, | |
| "memory(GiB)": 63.59, | |
| "step": 545, | |
| "train_speed(iter/s)": 0.020292 | |
| }, | |
| { | |
| "epoch": 0.22769612916580417, | |
| "grad_norm": 1.830826573395782, | |
| "learning_rate": 9.137673425827108e-06, | |
| "loss": 0.8597187, | |
| "memory(GiB)": 63.59, | |
| "step": 550, | |
| "train_speed(iter/s)": 0.020297 | |
| }, | |
| { | |
| "epoch": 0.2297660939764024, | |
| "grad_norm": 2.145744349446719, | |
| "learning_rate": 9.12700106723586e-06, | |
| "loss": 0.82627001, | |
| "memory(GiB)": 63.59, | |
| "step": 555, | |
| "train_speed(iter/s)": 0.020299 | |
| }, | |
| { | |
| "epoch": 0.23183605878700062, | |
| "grad_norm": 1.895819945079046, | |
| "learning_rate": 9.116328708644612e-06, | |
| "loss": 0.83418722, | |
| "memory(GiB)": 63.59, | |
| "step": 560, | |
| "train_speed(iter/s)": 0.020301 | |
| }, | |
| { | |
| "epoch": 0.23390602359759885, | |
| "grad_norm": 1.7330277741570008, | |
| "learning_rate": 9.105656350053362e-06, | |
| "loss": 0.83898754, | |
| "memory(GiB)": 63.59, | |
| "step": 565, | |
| "train_speed(iter/s)": 0.020301 | |
| }, | |
| { | |
| "epoch": 0.23597598840819706, | |
| "grad_norm": 1.873262235276853, | |
| "learning_rate": 9.094983991462114e-06, | |
| "loss": 0.86104965, | |
| "memory(GiB)": 63.59, | |
| "step": 570, | |
| "train_speed(iter/s)": 0.020307 | |
| }, | |
| { | |
| "epoch": 0.2380459532187953, | |
| "grad_norm": 1.8133555971052358, | |
| "learning_rate": 9.084311632870865e-06, | |
| "loss": 0.83760166, | |
| "memory(GiB)": 63.59, | |
| "step": 575, | |
| "train_speed(iter/s)": 0.020309 | |
| }, | |
| { | |
| "epoch": 0.2401159180293935, | |
| "grad_norm": 2.1026280097135377, | |
| "learning_rate": 9.073639274279616e-06, | |
| "loss": 0.85497513, | |
| "memory(GiB)": 63.59, | |
| "step": 580, | |
| "train_speed(iter/s)": 0.02031 | |
| }, | |
| { | |
| "epoch": 0.2421858828399917, | |
| "grad_norm": 1.7385609176743078, | |
| "learning_rate": 9.062966915688367e-06, | |
| "loss": 0.83206367, | |
| "memory(GiB)": 63.59, | |
| "step": 585, | |
| "train_speed(iter/s)": 0.02031 | |
| }, | |
| { | |
| "epoch": 0.24425584765058994, | |
| "grad_norm": 2.310226819070514, | |
| "learning_rate": 9.052294557097118e-06, | |
| "loss": 0.82417412, | |
| "memory(GiB)": 63.59, | |
| "step": 590, | |
| "train_speed(iter/s)": 0.020312 | |
| }, | |
| { | |
| "epoch": 0.24632581246118815, | |
| "grad_norm": 2.1660135303280126, | |
| "learning_rate": 9.041622198505871e-06, | |
| "loss": 0.8371232, | |
| "memory(GiB)": 63.59, | |
| "step": 595, | |
| "train_speed(iter/s)": 0.020312 | |
| }, | |
| { | |
| "epoch": 0.24839577727178638, | |
| "grad_norm": 2.0932218548460493, | |
| "learning_rate": 9.030949839914622e-06, | |
| "loss": 0.86303692, | |
| "memory(GiB)": 63.59, | |
| "step": 600, | |
| "train_speed(iter/s)": 0.020315 | |
| }, | |
| { | |
| "epoch": 0.24839577727178638, | |
| "eval_loss": 0.8761223554611206, | |
| "eval_runtime": 333.7076, | |
| "eval_samples_per_second": 18.717, | |
| "eval_steps_per_second": 1.172, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.2504657420823846, | |
| "grad_norm": 1.9850417129561133, | |
| "learning_rate": 9.020277481323373e-06, | |
| "loss": 0.84975281, | |
| "memory(GiB)": 63.59, | |
| "step": 605, | |
| "train_speed(iter/s)": 0.02006 | |
| }, | |
| { | |
| "epoch": 0.2525357068929828, | |
| "grad_norm": 1.8024791223082373, | |
| "learning_rate": 9.009605122732124e-06, | |
| "loss": 0.83602209, | |
| "memory(GiB)": 63.59, | |
| "step": 610, | |
| "train_speed(iter/s)": 0.020063 | |
| }, | |
| { | |
| "epoch": 0.25460567170358106, | |
| "grad_norm": 1.795807849269691, | |
| "learning_rate": 8.998932764140877e-06, | |
| "loss": 0.84287434, | |
| "memory(GiB)": 63.59, | |
| "step": 615, | |
| "train_speed(iter/s)": 0.020066 | |
| }, | |
| { | |
| "epoch": 0.25667563651417924, | |
| "grad_norm": 2.8462701485274855, | |
| "learning_rate": 8.988260405549626e-06, | |
| "loss": 0.86969414, | |
| "memory(GiB)": 63.59, | |
| "step": 620, | |
| "train_speed(iter/s)": 0.020069 | |
| }, | |
| { | |
| "epoch": 0.2587456013247775, | |
| "grad_norm": 1.9467464102992238, | |
| "learning_rate": 8.977588046958379e-06, | |
| "loss": 0.84205284, | |
| "memory(GiB)": 63.59, | |
| "step": 625, | |
| "train_speed(iter/s)": 0.020075 | |
| }, | |
| { | |
| "epoch": 0.2608155661353757, | |
| "grad_norm": 1.9359113111268293, | |
| "learning_rate": 8.96691568836713e-06, | |
| "loss": 0.83059912, | |
| "memory(GiB)": 63.59, | |
| "step": 630, | |
| "train_speed(iter/s)": 0.02008 | |
| }, | |
| { | |
| "epoch": 0.26288553094597394, | |
| "grad_norm": 2.269649830017561, | |
| "learning_rate": 8.956243329775881e-06, | |
| "loss": 0.85204124, | |
| "memory(GiB)": 63.59, | |
| "step": 635, | |
| "train_speed(iter/s)": 0.020082 | |
| }, | |
| { | |
| "epoch": 0.2649554957565721, | |
| "grad_norm": 2.0739328945699014, | |
| "learning_rate": 8.945570971184632e-06, | |
| "loss": 0.83141527, | |
| "memory(GiB)": 63.59, | |
| "step": 640, | |
| "train_speed(iter/s)": 0.020085 | |
| }, | |
| { | |
| "epoch": 0.26702546056717036, | |
| "grad_norm": 2.39308971265692, | |
| "learning_rate": 8.934898612593383e-06, | |
| "loss": 0.82764578, | |
| "memory(GiB)": 63.59, | |
| "step": 645, | |
| "train_speed(iter/s)": 0.020088 | |
| }, | |
| { | |
| "epoch": 0.2690954253777686, | |
| "grad_norm": 2.014414879589864, | |
| "learning_rate": 8.924226254002136e-06, | |
| "loss": 0.8550128, | |
| "memory(GiB)": 63.59, | |
| "step": 650, | |
| "train_speed(iter/s)": 0.020095 | |
| }, | |
| { | |
| "epoch": 0.2711653901883668, | |
| "grad_norm": 2.8374721965360887, | |
| "learning_rate": 8.913553895410887e-06, | |
| "loss": 0.86329079, | |
| "memory(GiB)": 63.59, | |
| "step": 655, | |
| "train_speed(iter/s)": 0.020098 | |
| }, | |
| { | |
| "epoch": 0.273235354998965, | |
| "grad_norm": 1.9583062241735367, | |
| "learning_rate": 8.902881536819638e-06, | |
| "loss": 0.83884621, | |
| "memory(GiB)": 63.59, | |
| "step": 660, | |
| "train_speed(iter/s)": 0.020101 | |
| }, | |
| { | |
| "epoch": 0.27530531980956324, | |
| "grad_norm": 1.765892011718539, | |
| "learning_rate": 8.892209178228389e-06, | |
| "loss": 0.86448555, | |
| "memory(GiB)": 63.59, | |
| "step": 665, | |
| "train_speed(iter/s)": 0.020105 | |
| }, | |
| { | |
| "epoch": 0.2773752846201615, | |
| "grad_norm": 1.7474590269235404, | |
| "learning_rate": 8.88153681963714e-06, | |
| "loss": 0.81313992, | |
| "memory(GiB)": 63.59, | |
| "step": 670, | |
| "train_speed(iter/s)": 0.020107 | |
| }, | |
| { | |
| "epoch": 0.27944524943075966, | |
| "grad_norm": 1.9301834126279824, | |
| "learning_rate": 8.870864461045891e-06, | |
| "loss": 0.8374301, | |
| "memory(GiB)": 63.59, | |
| "step": 675, | |
| "train_speed(iter/s)": 0.02011 | |
| }, | |
| { | |
| "epoch": 0.2815152142413579, | |
| "grad_norm": 1.876790685008959, | |
| "learning_rate": 8.860192102454644e-06, | |
| "loss": 0.82494678, | |
| "memory(GiB)": 63.59, | |
| "step": 680, | |
| "train_speed(iter/s)": 0.020115 | |
| }, | |
| { | |
| "epoch": 0.2835851790519561, | |
| "grad_norm": 1.7854143077330529, | |
| "learning_rate": 8.849519743863395e-06, | |
| "loss": 0.83442841, | |
| "memory(GiB)": 63.59, | |
| "step": 685, | |
| "train_speed(iter/s)": 0.020116 | |
| }, | |
| { | |
| "epoch": 0.28565514386255436, | |
| "grad_norm": 1.8923637542669056, | |
| "learning_rate": 8.838847385272146e-06, | |
| "loss": 0.82085514, | |
| "memory(GiB)": 63.72, | |
| "step": 690, | |
| "train_speed(iter/s)": 0.020118 | |
| }, | |
| { | |
| "epoch": 0.28772510867315254, | |
| "grad_norm": 2.0170179705017066, | |
| "learning_rate": 8.828175026680897e-06, | |
| "loss": 0.85945168, | |
| "memory(GiB)": 63.72, | |
| "step": 695, | |
| "train_speed(iter/s)": 0.020121 | |
| }, | |
| { | |
| "epoch": 0.2897950734837508, | |
| "grad_norm": 3.3823068740775755, | |
| "learning_rate": 8.817502668089648e-06, | |
| "loss": 0.82407131, | |
| "memory(GiB)": 63.72, | |
| "step": 700, | |
| "train_speed(iter/s)": 0.020123 | |
| }, | |
| { | |
| "epoch": 0.291865038294349, | |
| "grad_norm": 1.9439481100139924, | |
| "learning_rate": 8.806830309498399e-06, | |
| "loss": 0.82105274, | |
| "memory(GiB)": 63.72, | |
| "step": 705, | |
| "train_speed(iter/s)": 0.020126 | |
| }, | |
| { | |
| "epoch": 0.2939350031049472, | |
| "grad_norm": 1.8993570568677929, | |
| "learning_rate": 8.796157950907152e-06, | |
| "loss": 0.83391781, | |
| "memory(GiB)": 63.72, | |
| "step": 710, | |
| "train_speed(iter/s)": 0.020131 | |
| }, | |
| { | |
| "epoch": 0.2960049679155454, | |
| "grad_norm": 2.175929579998878, | |
| "learning_rate": 8.785485592315903e-06, | |
| "loss": 0.84003325, | |
| "memory(GiB)": 63.72, | |
| "step": 715, | |
| "train_speed(iter/s)": 0.020137 | |
| }, | |
| { | |
| "epoch": 0.29807493272614366, | |
| "grad_norm": 1.6910798969618672, | |
| "learning_rate": 8.774813233724654e-06, | |
| "loss": 0.82005548, | |
| "memory(GiB)": 63.72, | |
| "step": 720, | |
| "train_speed(iter/s)": 0.02014 | |
| }, | |
| { | |
| "epoch": 0.3001448975367419, | |
| "grad_norm": 1.7577054177826072, | |
| "learning_rate": 8.764140875133405e-06, | |
| "loss": 0.84406672, | |
| "memory(GiB)": 63.72, | |
| "step": 725, | |
| "train_speed(iter/s)": 0.020144 | |
| }, | |
| { | |
| "epoch": 0.3022148623473401, | |
| "grad_norm": 1.9343300270246129, | |
| "learning_rate": 8.753468516542156e-06, | |
| "loss": 0.81861668, | |
| "memory(GiB)": 63.72, | |
| "step": 730, | |
| "train_speed(iter/s)": 0.020144 | |
| }, | |
| { | |
| "epoch": 0.3042848271579383, | |
| "grad_norm": 1.8986244788103208, | |
| "learning_rate": 8.742796157950909e-06, | |
| "loss": 0.81786537, | |
| "memory(GiB)": 63.72, | |
| "step": 735, | |
| "train_speed(iter/s)": 0.020148 | |
| }, | |
| { | |
| "epoch": 0.30635479196853654, | |
| "grad_norm": 2.095799409220846, | |
| "learning_rate": 8.73212379935966e-06, | |
| "loss": 0.83321962, | |
| "memory(GiB)": 63.72, | |
| "step": 740, | |
| "train_speed(iter/s)": 0.020151 | |
| }, | |
| { | |
| "epoch": 0.3084247567791348, | |
| "grad_norm": 1.9094006901482394, | |
| "learning_rate": 8.72145144076841e-06, | |
| "loss": 0.82653723, | |
| "memory(GiB)": 63.72, | |
| "step": 745, | |
| "train_speed(iter/s)": 0.020152 | |
| }, | |
| { | |
| "epoch": 0.31049472158973296, | |
| "grad_norm": 2.126120113530993, | |
| "learning_rate": 8.710779082177162e-06, | |
| "loss": 0.85463696, | |
| "memory(GiB)": 63.72, | |
| "step": 750, | |
| "train_speed(iter/s)": 0.020156 | |
| }, | |
| { | |
| "epoch": 0.3125646864003312, | |
| "grad_norm": 1.766780713214732, | |
| "learning_rate": 8.700106723585913e-06, | |
| "loss": 0.83797083, | |
| "memory(GiB)": 63.72, | |
| "step": 755, | |
| "train_speed(iter/s)": 0.020157 | |
| }, | |
| { | |
| "epoch": 0.31463465121092943, | |
| "grad_norm": 1.8957319688723608, | |
| "learning_rate": 8.689434364994664e-06, | |
| "loss": 0.81888847, | |
| "memory(GiB)": 63.72, | |
| "step": 760, | |
| "train_speed(iter/s)": 0.020159 | |
| }, | |
| { | |
| "epoch": 0.3167046160215276, | |
| "grad_norm": 1.9661061189594824, | |
| "learning_rate": 8.678762006403417e-06, | |
| "loss": 0.78800874, | |
| "memory(GiB)": 63.72, | |
| "step": 765, | |
| "train_speed(iter/s)": 0.020164 | |
| }, | |
| { | |
| "epoch": 0.31877458083212584, | |
| "grad_norm": 1.8837863956075926, | |
| "learning_rate": 8.668089647812166e-06, | |
| "loss": 0.84463196, | |
| "memory(GiB)": 63.72, | |
| "step": 770, | |
| "train_speed(iter/s)": 0.020167 | |
| }, | |
| { | |
| "epoch": 0.3208445456427241, | |
| "grad_norm": 2.5248078655238326, | |
| "learning_rate": 8.657417289220919e-06, | |
| "loss": 0.83094559, | |
| "memory(GiB)": 63.72, | |
| "step": 775, | |
| "train_speed(iter/s)": 0.02017 | |
| }, | |
| { | |
| "epoch": 0.3229145104533223, | |
| "grad_norm": 1.8996595550385447, | |
| "learning_rate": 8.64674493062967e-06, | |
| "loss": 0.81705608, | |
| "memory(GiB)": 63.72, | |
| "step": 780, | |
| "train_speed(iter/s)": 0.020173 | |
| }, | |
| { | |
| "epoch": 0.3249844752639205, | |
| "grad_norm": 1.8243459235808355, | |
| "learning_rate": 8.63607257203842e-06, | |
| "loss": 0.82983418, | |
| "memory(GiB)": 63.72, | |
| "step": 785, | |
| "train_speed(iter/s)": 0.020177 | |
| }, | |
| { | |
| "epoch": 0.3270544400745187, | |
| "grad_norm": 2.125198435674726, | |
| "learning_rate": 8.625400213447172e-06, | |
| "loss": 0.85153885, | |
| "memory(GiB)": 63.72, | |
| "step": 790, | |
| "train_speed(iter/s)": 0.02018 | |
| }, | |
| { | |
| "epoch": 0.32912440488511696, | |
| "grad_norm": 1.822527258966965, | |
| "learning_rate": 8.614727854855925e-06, | |
| "loss": 0.7932189, | |
| "memory(GiB)": 63.72, | |
| "step": 795, | |
| "train_speed(iter/s)": 0.020184 | |
| }, | |
| { | |
| "epoch": 0.3311943696957152, | |
| "grad_norm": 1.9585269031801074, | |
| "learning_rate": 8.604055496264676e-06, | |
| "loss": 0.80502253, | |
| "memory(GiB)": 63.72, | |
| "step": 800, | |
| "train_speed(iter/s)": 0.020188 | |
| }, | |
| { | |
| "epoch": 0.3332643345063134, | |
| "grad_norm": 1.9244862407118186, | |
| "learning_rate": 8.593383137673427e-06, | |
| "loss": 0.81400661, | |
| "memory(GiB)": 63.72, | |
| "step": 805, | |
| "train_speed(iter/s)": 0.020192 | |
| }, | |
| { | |
| "epoch": 0.3353342993169116, | |
| "grad_norm": 1.8781928942945239, | |
| "learning_rate": 8.582710779082178e-06, | |
| "loss": 0.82624207, | |
| "memory(GiB)": 63.72, | |
| "step": 810, | |
| "train_speed(iter/s)": 0.020195 | |
| }, | |
| { | |
| "epoch": 0.33740426412750985, | |
| "grad_norm": 2.4821098212553108, | |
| "learning_rate": 8.572038420490929e-06, | |
| "loss": 0.81296177, | |
| "memory(GiB)": 63.72, | |
| "step": 815, | |
| "train_speed(iter/s)": 0.020197 | |
| }, | |
| { | |
| "epoch": 0.339474228938108, | |
| "grad_norm": 3.2468832100225877, | |
| "learning_rate": 8.561366061899681e-06, | |
| "loss": 0.81447935, | |
| "memory(GiB)": 63.72, | |
| "step": 820, | |
| "train_speed(iter/s)": 0.0202 | |
| }, | |
| { | |
| "epoch": 0.34154419374870626, | |
| "grad_norm": 1.726217016729622, | |
| "learning_rate": 8.55069370330843e-06, | |
| "loss": 0.82119083, | |
| "memory(GiB)": 63.72, | |
| "step": 825, | |
| "train_speed(iter/s)": 0.020203 | |
| }, | |
| { | |
| "epoch": 0.3436141585593045, | |
| "grad_norm": 1.8200397633087098, | |
| "learning_rate": 8.540021344717184e-06, | |
| "loss": 0.80688438, | |
| "memory(GiB)": 63.72, | |
| "step": 830, | |
| "train_speed(iter/s)": 0.020205 | |
| }, | |
| { | |
| "epoch": 0.34568412336990273, | |
| "grad_norm": 1.7077741062644576, | |
| "learning_rate": 8.529348986125935e-06, | |
| "loss": 0.83244801, | |
| "memory(GiB)": 63.72, | |
| "step": 835, | |
| "train_speed(iter/s)": 0.020205 | |
| }, | |
| { | |
| "epoch": 0.3477540881805009, | |
| "grad_norm": 2.582896676288874, | |
| "learning_rate": 8.518676627534686e-06, | |
| "loss": 0.81135302, | |
| "memory(GiB)": 63.72, | |
| "step": 840, | |
| "train_speed(iter/s)": 0.020208 | |
| }, | |
| { | |
| "epoch": 0.34982405299109914, | |
| "grad_norm": 3.4613638587514033, | |
| "learning_rate": 8.508004268943437e-06, | |
| "loss": 0.80561113, | |
| "memory(GiB)": 63.72, | |
| "step": 845, | |
| "train_speed(iter/s)": 0.020209 | |
| }, | |
| { | |
| "epoch": 0.3518940178016974, | |
| "grad_norm": 1.6179386547462884, | |
| "learning_rate": 8.49733191035219e-06, | |
| "loss": 0.82198238, | |
| "memory(GiB)": 63.72, | |
| "step": 850, | |
| "train_speed(iter/s)": 0.020211 | |
| }, | |
| { | |
| "epoch": 0.3539639826122956, | |
| "grad_norm": 2.202413903162471, | |
| "learning_rate": 8.48665955176094e-06, | |
| "loss": 0.78598285, | |
| "memory(GiB)": 63.72, | |
| "step": 855, | |
| "train_speed(iter/s)": 0.020214 | |
| }, | |
| { | |
| "epoch": 0.3560339474228938, | |
| "grad_norm": 1.9513315920239633, | |
| "learning_rate": 8.475987193169691e-06, | |
| "loss": 0.80893326, | |
| "memory(GiB)": 63.72, | |
| "step": 860, | |
| "train_speed(iter/s)": 0.020219 | |
| }, | |
| { | |
| "epoch": 0.358103912233492, | |
| "grad_norm": 1.9113374189570778, | |
| "learning_rate": 8.465314834578443e-06, | |
| "loss": 0.8136569, | |
| "memory(GiB)": 63.72, | |
| "step": 865, | |
| "train_speed(iter/s)": 0.020222 | |
| }, | |
| { | |
| "epoch": 0.36017387704409026, | |
| "grad_norm": 2.084935583050277, | |
| "learning_rate": 8.454642475987194e-06, | |
| "loss": 0.81384058, | |
| "memory(GiB)": 63.72, | |
| "step": 870, | |
| "train_speed(iter/s)": 0.020226 | |
| }, | |
| { | |
| "epoch": 0.3622438418546885, | |
| "grad_norm": 1.6048226105298027, | |
| "learning_rate": 8.443970117395945e-06, | |
| "loss": 0.81689348, | |
| "memory(GiB)": 63.72, | |
| "step": 875, | |
| "train_speed(iter/s)": 0.020228 | |
| }, | |
| { | |
| "epoch": 0.3643138066652867, | |
| "grad_norm": 1.8081549724032602, | |
| "learning_rate": 8.433297758804696e-06, | |
| "loss": 0.8224082, | |
| "memory(GiB)": 63.72, | |
| "step": 880, | |
| "train_speed(iter/s)": 0.02023 | |
| }, | |
| { | |
| "epoch": 0.3663837714758849, | |
| "grad_norm": 1.8184484923663322, | |
| "learning_rate": 8.422625400213448e-06, | |
| "loss": 0.78473282, | |
| "memory(GiB)": 63.72, | |
| "step": 885, | |
| "train_speed(iter/s)": 0.020232 | |
| }, | |
| { | |
| "epoch": 0.36845373628648315, | |
| "grad_norm": 2.010882441005616, | |
| "learning_rate": 8.4119530416222e-06, | |
| "loss": 0.81135426, | |
| "memory(GiB)": 63.72, | |
| "step": 890, | |
| "train_speed(iter/s)": 0.020232 | |
| }, | |
| { | |
| "epoch": 0.3705237010970813, | |
| "grad_norm": 2.363919887534564, | |
| "learning_rate": 8.40128068303095e-06, | |
| "loss": 0.80090466, | |
| "memory(GiB)": 63.72, | |
| "step": 895, | |
| "train_speed(iter/s)": 0.020234 | |
| }, | |
| { | |
| "epoch": 0.37259366590767956, | |
| "grad_norm": 1.6332844070852461, | |
| "learning_rate": 8.390608324439701e-06, | |
| "loss": 0.81239138, | |
| "memory(GiB)": 63.72, | |
| "step": 900, | |
| "train_speed(iter/s)": 0.020237 | |
| }, | |
| { | |
| "epoch": 0.37259366590767956, | |
| "eval_loss": 0.8537026047706604, | |
| "eval_runtime": 333.2325, | |
| "eval_samples_per_second": 18.744, | |
| "eval_steps_per_second": 1.173, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.3746636307182778, | |
| "grad_norm": 2.4425379037284607, | |
| "learning_rate": 8.379935965848454e-06, | |
| "loss": 0.81980333, | |
| "memory(GiB)": 63.72, | |
| "step": 905, | |
| "train_speed(iter/s)": 0.02007 | |
| }, | |
| { | |
| "epoch": 0.37673359552887603, | |
| "grad_norm": 2.2417018048030575, | |
| "learning_rate": 8.369263607257204e-06, | |
| "loss": 0.82684288, | |
| "memory(GiB)": 63.72, | |
| "step": 910, | |
| "train_speed(iter/s)": 0.020071 | |
| }, | |
| { | |
| "epoch": 0.3788035603394742, | |
| "grad_norm": 1.7587001424417825, | |
| "learning_rate": 8.358591248665956e-06, | |
| "loss": 0.83183241, | |
| "memory(GiB)": 63.72, | |
| "step": 915, | |
| "train_speed(iter/s)": 0.020074 | |
| }, | |
| { | |
| "epoch": 0.38087352515007245, | |
| "grad_norm": 1.5739208911232379, | |
| "learning_rate": 8.347918890074707e-06, | |
| "loss": 0.76680841, | |
| "memory(GiB)": 63.72, | |
| "step": 920, | |
| "train_speed(iter/s)": 0.020076 | |
| }, | |
| { | |
| "epoch": 0.3829434899606707, | |
| "grad_norm": 1.8684171867799126, | |
| "learning_rate": 8.337246531483458e-06, | |
| "loss": 0.85463772, | |
| "memory(GiB)": 63.72, | |
| "step": 925, | |
| "train_speed(iter/s)": 0.020079 | |
| }, | |
| { | |
| "epoch": 0.3850134547712689, | |
| "grad_norm": 1.8550376770414303, | |
| "learning_rate": 8.32657417289221e-06, | |
| "loss": 0.81911144, | |
| "memory(GiB)": 63.72, | |
| "step": 930, | |
| "train_speed(iter/s)": 0.020083 | |
| }, | |
| { | |
| "epoch": 0.3870834195818671, | |
| "grad_norm": 1.8194471444369447, | |
| "learning_rate": 8.31590181430096e-06, | |
| "loss": 0.83044968, | |
| "memory(GiB)": 63.72, | |
| "step": 935, | |
| "train_speed(iter/s)": 0.020086 | |
| }, | |
| { | |
| "epoch": 0.38915338439246533, | |
| "grad_norm": 2.095169413730359, | |
| "learning_rate": 8.305229455709713e-06, | |
| "loss": 0.82608871, | |
| "memory(GiB)": 63.72, | |
| "step": 940, | |
| "train_speed(iter/s)": 0.020088 | |
| }, | |
| { | |
| "epoch": 0.39122334920306356, | |
| "grad_norm": 13.855393242400119, | |
| "learning_rate": 8.294557097118464e-06, | |
| "loss": 0.81676388, | |
| "memory(GiB)": 63.72, | |
| "step": 945, | |
| "train_speed(iter/s)": 0.02009 | |
| }, | |
| { | |
| "epoch": 0.39329331401366174, | |
| "grad_norm": 2.100046441650532, | |
| "learning_rate": 8.283884738527215e-06, | |
| "loss": 0.81071377, | |
| "memory(GiB)": 63.72, | |
| "step": 950, | |
| "train_speed(iter/s)": 0.020093 | |
| }, | |
| { | |
| "epoch": 0.39536327882426, | |
| "grad_norm": 2.1608254386705594, | |
| "learning_rate": 8.273212379935966e-06, | |
| "loss": 0.78902674, | |
| "memory(GiB)": 63.72, | |
| "step": 955, | |
| "train_speed(iter/s)": 0.020095 | |
| }, | |
| { | |
| "epoch": 0.3974332436348582, | |
| "grad_norm": 1.61490095503505, | |
| "learning_rate": 8.262540021344719e-06, | |
| "loss": 0.78527632, | |
| "memory(GiB)": 63.72, | |
| "step": 960, | |
| "train_speed(iter/s)": 0.020097 | |
| }, | |
| { | |
| "epoch": 0.39950320844545645, | |
| "grad_norm": 2.589460194979307, | |
| "learning_rate": 8.251867662753468e-06, | |
| "loss": 0.81925201, | |
| "memory(GiB)": 63.72, | |
| "step": 965, | |
| "train_speed(iter/s)": 0.020098 | |
| }, | |
| { | |
| "epoch": 0.4015731732560546, | |
| "grad_norm": 1.8550853889727008, | |
| "learning_rate": 8.241195304162221e-06, | |
| "loss": 0.80688972, | |
| "memory(GiB)": 63.72, | |
| "step": 970, | |
| "train_speed(iter/s)": 0.020101 | |
| }, | |
| { | |
| "epoch": 0.40364313806665286, | |
| "grad_norm": 2.4637090355397517, | |
| "learning_rate": 8.230522945570972e-06, | |
| "loss": 0.82061405, | |
| "memory(GiB)": 63.72, | |
| "step": 975, | |
| "train_speed(iter/s)": 0.020104 | |
| }, | |
| { | |
| "epoch": 0.4057131028772511, | |
| "grad_norm": 1.9846780448601058, | |
| "learning_rate": 8.219850586979723e-06, | |
| "loss": 0.80861397, | |
| "memory(GiB)": 63.72, | |
| "step": 980, | |
| "train_speed(iter/s)": 0.020106 | |
| }, | |
| { | |
| "epoch": 0.40778306768784933, | |
| "grad_norm": 1.9138045073506678, | |
| "learning_rate": 8.209178228388474e-06, | |
| "loss": 0.79171362, | |
| "memory(GiB)": 63.72, | |
| "step": 985, | |
| "train_speed(iter/s)": 0.020108 | |
| }, | |
| { | |
| "epoch": 0.4098530324984475, | |
| "grad_norm": 2.100897386160194, | |
| "learning_rate": 8.198505869797227e-06, | |
| "loss": 0.78543482, | |
| "memory(GiB)": 63.72, | |
| "step": 990, | |
| "train_speed(iter/s)": 0.020113 | |
| }, | |
| { | |
| "epoch": 0.41192299730904575, | |
| "grad_norm": 2.1398479736163667, | |
| "learning_rate": 8.187833511205976e-06, | |
| "loss": 0.79959226, | |
| "memory(GiB)": 63.72, | |
| "step": 995, | |
| "train_speed(iter/s)": 0.020114 | |
| }, | |
| { | |
| "epoch": 0.413992962119644, | |
| "grad_norm": 1.8543885416746075, | |
| "learning_rate": 8.177161152614729e-06, | |
| "loss": 0.80144148, | |
| "memory(GiB)": 63.72, | |
| "step": 1000, | |
| "train_speed(iter/s)": 0.020116 | |
| }, | |
| { | |
| "epoch": 0.41606292693024216, | |
| "grad_norm": 1.8895963845216188, | |
| "learning_rate": 8.16648879402348e-06, | |
| "loss": 0.78557086, | |
| "memory(GiB)": 63.72, | |
| "step": 1005, | |
| "train_speed(iter/s)": 0.020118 | |
| }, | |
| { | |
| "epoch": 0.4181328917408404, | |
| "grad_norm": 1.7583886118404264, | |
| "learning_rate": 8.155816435432231e-06, | |
| "loss": 0.81441746, | |
| "memory(GiB)": 63.72, | |
| "step": 1010, | |
| "train_speed(iter/s)": 0.02012 | |
| }, | |
| { | |
| "epoch": 0.42020285655143863, | |
| "grad_norm": 1.8640464710188405, | |
| "learning_rate": 8.145144076840982e-06, | |
| "loss": 0.76718016, | |
| "memory(GiB)": 63.72, | |
| "step": 1015, | |
| "train_speed(iter/s)": 0.020122 | |
| }, | |
| { | |
| "epoch": 0.42227282136203687, | |
| "grad_norm": 2.0754981449007084, | |
| "learning_rate": 8.134471718249733e-06, | |
| "loss": 0.78537526, | |
| "memory(GiB)": 63.72, | |
| "step": 1020, | |
| "train_speed(iter/s)": 0.020125 | |
| }, | |
| { | |
| "epoch": 0.42434278617263504, | |
| "grad_norm": 2.1358764475250105, | |
| "learning_rate": 8.123799359658486e-06, | |
| "loss": 0.82194328, | |
| "memory(GiB)": 63.72, | |
| "step": 1025, | |
| "train_speed(iter/s)": 0.020128 | |
| }, | |
| { | |
| "epoch": 0.4264127509832333, | |
| "grad_norm": 1.940572767867165, | |
| "learning_rate": 8.113127001067237e-06, | |
| "loss": 0.8162715, | |
| "memory(GiB)": 63.72, | |
| "step": 1030, | |
| "train_speed(iter/s)": 0.020131 | |
| }, | |
| { | |
| "epoch": 0.4284827157938315, | |
| "grad_norm": 1.7824953515185047, | |
| "learning_rate": 8.102454642475988e-06, | |
| "loss": 0.78834782, | |
| "memory(GiB)": 63.72, | |
| "step": 1035, | |
| "train_speed(iter/s)": 0.020133 | |
| }, | |
| { | |
| "epoch": 0.43055268060442975, | |
| "grad_norm": 1.9585541886433688, | |
| "learning_rate": 8.091782283884739e-06, | |
| "loss": 0.79206867, | |
| "memory(GiB)": 63.72, | |
| "step": 1040, | |
| "train_speed(iter/s)": 0.020136 | |
| }, | |
| { | |
| "epoch": 0.43262264541502793, | |
| "grad_norm": 1.6194935665114412, | |
| "learning_rate": 8.081109925293492e-06, | |
| "loss": 0.80889845, | |
| "memory(GiB)": 63.72, | |
| "step": 1045, | |
| "train_speed(iter/s)": 0.020139 | |
| }, | |
| { | |
| "epoch": 0.43469261022562616, | |
| "grad_norm": 1.5909296898104581, | |
| "learning_rate": 8.070437566702241e-06, | |
| "loss": 0.76998816, | |
| "memory(GiB)": 63.72, | |
| "step": 1050, | |
| "train_speed(iter/s)": 0.020141 | |
| }, | |
| { | |
| "epoch": 0.4367625750362244, | |
| "grad_norm": 1.582985265467202, | |
| "learning_rate": 8.059765208110994e-06, | |
| "loss": 0.79827099, | |
| "memory(GiB)": 63.72, | |
| "step": 1055, | |
| "train_speed(iter/s)": 0.020143 | |
| }, | |
| { | |
| "epoch": 0.4388325398468226, | |
| "grad_norm": 1.9696406410447012, | |
| "learning_rate": 8.049092849519743e-06, | |
| "loss": 0.79220991, | |
| "memory(GiB)": 63.72, | |
| "step": 1060, | |
| "train_speed(iter/s)": 0.020146 | |
| }, | |
| { | |
| "epoch": 0.4409025046574208, | |
| "grad_norm": 1.9479888997003834, | |
| "learning_rate": 8.038420490928496e-06, | |
| "loss": 0.79184585, | |
| "memory(GiB)": 63.72, | |
| "step": 1065, | |
| "train_speed(iter/s)": 0.020148 | |
| }, | |
| { | |
| "epoch": 0.44297246946801905, | |
| "grad_norm": 1.7883498032309324, | |
| "learning_rate": 8.027748132337247e-06, | |
| "loss": 0.78507504, | |
| "memory(GiB)": 63.72, | |
| "step": 1070, | |
| "train_speed(iter/s)": 0.02015 | |
| }, | |
| { | |
| "epoch": 0.4450424342786173, | |
| "grad_norm": 1.6985331753731079, | |
| "learning_rate": 8.017075773745998e-06, | |
| "loss": 0.81149197, | |
| "memory(GiB)": 63.72, | |
| "step": 1075, | |
| "train_speed(iter/s)": 0.020154 | |
| }, | |
| { | |
| "epoch": 0.44711239908921546, | |
| "grad_norm": 1.7646873640943033, | |
| "learning_rate": 8.006403415154749e-06, | |
| "loss": 0.77548814, | |
| "memory(GiB)": 63.72, | |
| "step": 1080, | |
| "train_speed(iter/s)": 0.020158 | |
| }, | |
| { | |
| "epoch": 0.4491823638998137, | |
| "grad_norm": 1.7739180508215708, | |
| "learning_rate": 7.995731056563502e-06, | |
| "loss": 0.77309542, | |
| "memory(GiB)": 63.72, | |
| "step": 1085, | |
| "train_speed(iter/s)": 0.02016 | |
| }, | |
| { | |
| "epoch": 0.45125232871041193, | |
| "grad_norm": 1.6366153166920923, | |
| "learning_rate": 7.985058697972253e-06, | |
| "loss": 0.80448093, | |
| "memory(GiB)": 63.72, | |
| "step": 1090, | |
| "train_speed(iter/s)": 0.020163 | |
| }, | |
| { | |
| "epoch": 0.45332229352101017, | |
| "grad_norm": 1.9520729703143727, | |
| "learning_rate": 7.974386339381004e-06, | |
| "loss": 0.78142538, | |
| "memory(GiB)": 63.72, | |
| "step": 1095, | |
| "train_speed(iter/s)": 0.020164 | |
| }, | |
| { | |
| "epoch": 0.45539225833160835, | |
| "grad_norm": 1.9086497373347489, | |
| "learning_rate": 7.963713980789755e-06, | |
| "loss": 0.7876678, | |
| "memory(GiB)": 63.72, | |
| "step": 1100, | |
| "train_speed(iter/s)": 0.020167 | |
| }, | |
| { | |
| "epoch": 0.4574622231422066, | |
| "grad_norm": 1.773963819363606, | |
| "learning_rate": 7.953041622198506e-06, | |
| "loss": 0.817309, | |
| "memory(GiB)": 63.72, | |
| "step": 1105, | |
| "train_speed(iter/s)": 0.020169 | |
| }, | |
| { | |
| "epoch": 0.4595321879528048, | |
| "grad_norm": 1.7610901122064158, | |
| "learning_rate": 7.942369263607259e-06, | |
| "loss": 0.81472855, | |
| "memory(GiB)": 63.72, | |
| "step": 1110, | |
| "train_speed(iter/s)": 0.020171 | |
| }, | |
| { | |
| "epoch": 0.461602152763403, | |
| "grad_norm": 1.8809865504177992, | |
| "learning_rate": 7.931696905016008e-06, | |
| "loss": 0.79487166, | |
| "memory(GiB)": 63.72, | |
| "step": 1115, | |
| "train_speed(iter/s)": 0.020174 | |
| }, | |
| { | |
| "epoch": 0.46367211757400123, | |
| "grad_norm": 1.8810868009507724, | |
| "learning_rate": 7.92102454642476e-06, | |
| "loss": 0.78505554, | |
| "memory(GiB)": 63.72, | |
| "step": 1120, | |
| "train_speed(iter/s)": 0.020176 | |
| }, | |
| { | |
| "epoch": 0.46574208238459947, | |
| "grad_norm": 1.7134608698183469, | |
| "learning_rate": 7.910352187833512e-06, | |
| "loss": 0.77789249, | |
| "memory(GiB)": 63.72, | |
| "step": 1125, | |
| "train_speed(iter/s)": 0.020179 | |
| }, | |
| { | |
| "epoch": 0.4678120471951977, | |
| "grad_norm": 1.9145455941813492, | |
| "learning_rate": 7.899679829242263e-06, | |
| "loss": 0.76669245, | |
| "memory(GiB)": 63.72, | |
| "step": 1130, | |
| "train_speed(iter/s)": 0.020182 | |
| }, | |
| { | |
| "epoch": 0.4698820120057959, | |
| "grad_norm": 1.9752917655252427, | |
| "learning_rate": 7.889007470651014e-06, | |
| "loss": 0.77915101, | |
| "memory(GiB)": 63.72, | |
| "step": 1135, | |
| "train_speed(iter/s)": 0.020184 | |
| }, | |
| { | |
| "epoch": 0.4719519768163941, | |
| "grad_norm": 1.8705706085741929, | |
| "learning_rate": 7.878335112059767e-06, | |
| "loss": 0.78053985, | |
| "memory(GiB)": 63.72, | |
| "step": 1140, | |
| "train_speed(iter/s)": 0.020187 | |
| }, | |
| { | |
| "epoch": 0.47402194162699235, | |
| "grad_norm": 1.8137417073497548, | |
| "learning_rate": 7.867662753468518e-06, | |
| "loss": 0.8304471, | |
| "memory(GiB)": 63.72, | |
| "step": 1145, | |
| "train_speed(iter/s)": 0.020189 | |
| }, | |
| { | |
| "epoch": 0.4760919064375906, | |
| "grad_norm": 1.7537064971860614, | |
| "learning_rate": 7.856990394877269e-06, | |
| "loss": 0.76652546, | |
| "memory(GiB)": 63.72, | |
| "step": 1150, | |
| "train_speed(iter/s)": 0.020192 | |
| }, | |
| { | |
| "epoch": 0.47816187124818876, | |
| "grad_norm": 1.8981437943138895, | |
| "learning_rate": 7.84631803628602e-06, | |
| "loss": 0.77384648, | |
| "memory(GiB)": 63.72, | |
| "step": 1155, | |
| "train_speed(iter/s)": 0.020194 | |
| }, | |
| { | |
| "epoch": 0.480231836058787, | |
| "grad_norm": 1.968718081590253, | |
| "learning_rate": 7.83564567769477e-06, | |
| "loss": 0.81451969, | |
| "memory(GiB)": 63.72, | |
| "step": 1160, | |
| "train_speed(iter/s)": 0.020196 | |
| }, | |
| { | |
| "epoch": 0.48230180086938523, | |
| "grad_norm": 1.9755371858466928, | |
| "learning_rate": 7.824973319103523e-06, | |
| "loss": 0.79220142, | |
| "memory(GiB)": 63.72, | |
| "step": 1165, | |
| "train_speed(iter/s)": 0.020198 | |
| }, | |
| { | |
| "epoch": 0.4843717656799834, | |
| "grad_norm": 1.8485795416766981, | |
| "learning_rate": 7.814300960512274e-06, | |
| "loss": 0.80019064, | |
| "memory(GiB)": 63.72, | |
| "step": 1170, | |
| "train_speed(iter/s)": 0.0202 | |
| }, | |
| { | |
| "epoch": 0.48644173049058165, | |
| "grad_norm": 2.388358446370589, | |
| "learning_rate": 7.803628601921026e-06, | |
| "loss": 0.8037425, | |
| "memory(GiB)": 63.72, | |
| "step": 1175, | |
| "train_speed(iter/s)": 0.020202 | |
| }, | |
| { | |
| "epoch": 0.4885116953011799, | |
| "grad_norm": 1.7963803355457697, | |
| "learning_rate": 7.792956243329777e-06, | |
| "loss": 0.76506805, | |
| "memory(GiB)": 63.72, | |
| "step": 1180, | |
| "train_speed(iter/s)": 0.020205 | |
| }, | |
| { | |
| "epoch": 0.4905816601117781, | |
| "grad_norm": 1.642582867995439, | |
| "learning_rate": 7.782283884738528e-06, | |
| "loss": 0.76571236, | |
| "memory(GiB)": 63.72, | |
| "step": 1185, | |
| "train_speed(iter/s)": 0.020206 | |
| }, | |
| { | |
| "epoch": 0.4926516249223763, | |
| "grad_norm": 1.8722199369588735, | |
| "learning_rate": 7.771611526147279e-06, | |
| "loss": 0.81547689, | |
| "memory(GiB)": 63.72, | |
| "step": 1190, | |
| "train_speed(iter/s)": 0.020209 | |
| }, | |
| { | |
| "epoch": 0.49472158973297453, | |
| "grad_norm": 1.6444393246271363, | |
| "learning_rate": 7.760939167556031e-06, | |
| "loss": 0.77196584, | |
| "memory(GiB)": 63.72, | |
| "step": 1195, | |
| "train_speed(iter/s)": 0.02021 | |
| }, | |
| { | |
| "epoch": 0.49679155454357277, | |
| "grad_norm": 1.7322851516861686, | |
| "learning_rate": 7.75026680896478e-06, | |
| "loss": 0.78245749, | |
| "memory(GiB)": 63.72, | |
| "step": 1200, | |
| "train_speed(iter/s)": 0.020211 | |
| }, | |
| { | |
| "epoch": 0.49679155454357277, | |
| "eval_loss": 0.8388283252716064, | |
| "eval_runtime": 333.9836, | |
| "eval_samples_per_second": 18.702, | |
| "eval_steps_per_second": 1.171, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.498861519354171, | |
| "grad_norm": 1.779853897918009, | |
| "learning_rate": 7.739594450373533e-06, | |
| "loss": 0.76194401, | |
| "memory(GiB)": 63.72, | |
| "step": 1205, | |
| "train_speed(iter/s)": 0.020085 | |
| }, | |
| { | |
| "epoch": 0.5009314841647692, | |
| "grad_norm": 1.6935101575699751, | |
| "learning_rate": 7.728922091782284e-06, | |
| "loss": 0.76538324, | |
| "memory(GiB)": 63.72, | |
| "step": 1210, | |
| "train_speed(iter/s)": 0.020088 | |
| }, | |
| { | |
| "epoch": 0.5030014489753675, | |
| "grad_norm": 2.1568052824101196, | |
| "learning_rate": 7.718249733191036e-06, | |
| "loss": 0.80119467, | |
| "memory(GiB)": 63.72, | |
| "step": 1215, | |
| "train_speed(iter/s)": 0.02009 | |
| }, | |
| { | |
| "epoch": 0.5050714137859657, | |
| "grad_norm": 2.0414007101619815, | |
| "learning_rate": 7.707577374599787e-06, | |
| "loss": 0.78779106, | |
| "memory(GiB)": 63.72, | |
| "step": 1220, | |
| "train_speed(iter/s)": 0.020091 | |
| }, | |
| { | |
| "epoch": 0.5071413785965638, | |
| "grad_norm": 1.6191663238961727, | |
| "learning_rate": 7.69690501600854e-06, | |
| "loss": 0.79252872, | |
| "memory(GiB)": 63.72, | |
| "step": 1225, | |
| "train_speed(iter/s)": 0.020092 | |
| }, | |
| { | |
| "epoch": 0.5092113434071621, | |
| "grad_norm": 1.8617354993121655, | |
| "learning_rate": 7.68623265741729e-06, | |
| "loss": 0.76940928, | |
| "memory(GiB)": 63.72, | |
| "step": 1230, | |
| "train_speed(iter/s)": 0.020095 | |
| }, | |
| { | |
| "epoch": 0.5112813082177603, | |
| "grad_norm": 2.1148307227706, | |
| "learning_rate": 7.675560298826041e-06, | |
| "loss": 0.81177235, | |
| "memory(GiB)": 63.72, | |
| "step": 1235, | |
| "train_speed(iter/s)": 0.020096 | |
| }, | |
| { | |
| "epoch": 0.5133512730283585, | |
| "grad_norm": 1.858296305288767, | |
| "learning_rate": 7.664887940234792e-06, | |
| "loss": 0.78712654, | |
| "memory(GiB)": 63.72, | |
| "step": 1240, | |
| "train_speed(iter/s)": 0.020097 | |
| }, | |
| { | |
| "epoch": 0.5154212378389568, | |
| "grad_norm": 2.040424767723149, | |
| "learning_rate": 7.654215581643543e-06, | |
| "loss": 0.7963089, | |
| "memory(GiB)": 63.72, | |
| "step": 1245, | |
| "train_speed(iter/s)": 0.020099 | |
| }, | |
| { | |
| "epoch": 0.517491202649555, | |
| "grad_norm": 1.7313703601186623, | |
| "learning_rate": 7.643543223052296e-06, | |
| "loss": 0.76896205, | |
| "memory(GiB)": 63.72, | |
| "step": 1250, | |
| "train_speed(iter/s)": 0.020102 | |
| }, | |
| { | |
| "epoch": 0.5195611674601531, | |
| "grad_norm": 1.6916331849372186, | |
| "learning_rate": 7.632870864461046e-06, | |
| "loss": 0.78258944, | |
| "memory(GiB)": 63.72, | |
| "step": 1255, | |
| "train_speed(iter/s)": 0.020104 | |
| }, | |
| { | |
| "epoch": 0.5216311322707514, | |
| "grad_norm": 2.1058096966812303, | |
| "learning_rate": 7.622198505869797e-06, | |
| "loss": 0.80921211, | |
| "memory(GiB)": 63.72, | |
| "step": 1260, | |
| "train_speed(iter/s)": 0.020105 | |
| }, | |
| { | |
| "epoch": 0.5237010970813496, | |
| "grad_norm": 1.7220759067410432, | |
| "learning_rate": 7.611526147278549e-06, | |
| "loss": 0.77246647, | |
| "memory(GiB)": 63.72, | |
| "step": 1265, | |
| "train_speed(iter/s)": 0.020106 | |
| }, | |
| { | |
| "epoch": 0.5257710618919479, | |
| "grad_norm": 2.084224319084108, | |
| "learning_rate": 7.6008537886873e-06, | |
| "loss": 0.7677907, | |
| "memory(GiB)": 63.72, | |
| "step": 1270, | |
| "train_speed(iter/s)": 0.020107 | |
| }, | |
| { | |
| "epoch": 0.5278410267025461, | |
| "grad_norm": 1.7928505615246706, | |
| "learning_rate": 7.590181430096052e-06, | |
| "loss": 0.78496704, | |
| "memory(GiB)": 63.72, | |
| "step": 1275, | |
| "train_speed(iter/s)": 0.020108 | |
| }, | |
| { | |
| "epoch": 0.5299109915131442, | |
| "grad_norm": 1.8397320603347174, | |
| "learning_rate": 7.579509071504803e-06, | |
| "loss": 0.77303753, | |
| "memory(GiB)": 63.72, | |
| "step": 1280, | |
| "train_speed(iter/s)": 0.02011 | |
| }, | |
| { | |
| "epoch": 0.5319809563237425, | |
| "grad_norm": 2.1479969295234187, | |
| "learning_rate": 7.568836712913554e-06, | |
| "loss": 0.75871119, | |
| "memory(GiB)": 63.72, | |
| "step": 1285, | |
| "train_speed(iter/s)": 0.020112 | |
| }, | |
| { | |
| "epoch": 0.5340509211343407, | |
| "grad_norm": 1.94767502078934, | |
| "learning_rate": 7.558164354322306e-06, | |
| "loss": 0.75106993, | |
| "memory(GiB)": 63.72, | |
| "step": 1290, | |
| "train_speed(iter/s)": 0.020114 | |
| }, | |
| { | |
| "epoch": 0.5361208859449389, | |
| "grad_norm": 1.5236425325852578, | |
| "learning_rate": 7.547491995731058e-06, | |
| "loss": 0.79110327, | |
| "memory(GiB)": 63.72, | |
| "step": 1295, | |
| "train_speed(iter/s)": 0.020116 | |
| }, | |
| { | |
| "epoch": 0.5381908507555372, | |
| "grad_norm": 1.8541149671409907, | |
| "learning_rate": 7.536819637139808e-06, | |
| "loss": 0.77403798, | |
| "memory(GiB)": 63.72, | |
| "step": 1300, | |
| "train_speed(iter/s)": 0.020116 | |
| }, | |
| { | |
| "epoch": 0.5402608155661354, | |
| "grad_norm": 1.8743174944996448, | |
| "learning_rate": 7.52614727854856e-06, | |
| "loss": 0.77032347, | |
| "memory(GiB)": 71.94, | |
| "step": 1305, | |
| "train_speed(iter/s)": 0.020118 | |
| }, | |
| { | |
| "epoch": 0.5423307803767335, | |
| "grad_norm": 2.579806479546849, | |
| "learning_rate": 7.51547491995731e-06, | |
| "loss": 0.76461482, | |
| "memory(GiB)": 71.94, | |
| "step": 1310, | |
| "train_speed(iter/s)": 0.020119 | |
| }, | |
| { | |
| "epoch": 0.5444007451873318, | |
| "grad_norm": 2.0039452129208035, | |
| "learning_rate": 7.504802561366062e-06, | |
| "loss": 0.77457762, | |
| "memory(GiB)": 71.94, | |
| "step": 1315, | |
| "train_speed(iter/s)": 0.020121 | |
| }, | |
| { | |
| "epoch": 0.54647070999793, | |
| "grad_norm": 2.060283685569936, | |
| "learning_rate": 7.494130202774814e-06, | |
| "loss": 0.77914829, | |
| "memory(GiB)": 71.94, | |
| "step": 1320, | |
| "train_speed(iter/s)": 0.020123 | |
| }, | |
| { | |
| "epoch": 0.5485406748085283, | |
| "grad_norm": 2.163132135636586, | |
| "learning_rate": 7.483457844183565e-06, | |
| "loss": 0.77322574, | |
| "memory(GiB)": 71.94, | |
| "step": 1325, | |
| "train_speed(iter/s)": 0.020125 | |
| }, | |
| { | |
| "epoch": 0.5506106396191265, | |
| "grad_norm": 1.842195467860799, | |
| "learning_rate": 7.472785485592316e-06, | |
| "loss": 0.77454052, | |
| "memory(GiB)": 71.94, | |
| "step": 1330, | |
| "train_speed(iter/s)": 0.020126 | |
| }, | |
| { | |
| "epoch": 0.5526806044297247, | |
| "grad_norm": 1.775275008552653, | |
| "learning_rate": 7.462113127001068e-06, | |
| "loss": 0.77025108, | |
| "memory(GiB)": 71.94, | |
| "step": 1335, | |
| "train_speed(iter/s)": 0.020128 | |
| }, | |
| { | |
| "epoch": 0.554750569240323, | |
| "grad_norm": 2.165651142341684, | |
| "learning_rate": 7.451440768409819e-06, | |
| "loss": 0.78470011, | |
| "memory(GiB)": 71.94, | |
| "step": 1340, | |
| "train_speed(iter/s)": 0.02013 | |
| }, | |
| { | |
| "epoch": 0.5568205340509211, | |
| "grad_norm": 1.6530168942960388, | |
| "learning_rate": 7.440768409818571e-06, | |
| "loss": 0.74261112, | |
| "memory(GiB)": 71.94, | |
| "step": 1345, | |
| "train_speed(iter/s)": 0.020131 | |
| }, | |
| { | |
| "epoch": 0.5588904988615193, | |
| "grad_norm": 2.1178890231616694, | |
| "learning_rate": 7.430096051227322e-06, | |
| "loss": 0.77076225, | |
| "memory(GiB)": 71.94, | |
| "step": 1350, | |
| "train_speed(iter/s)": 0.020132 | |
| }, | |
| { | |
| "epoch": 0.5609604636721176, | |
| "grad_norm": 1.6332209286889638, | |
| "learning_rate": 7.419423692636073e-06, | |
| "loss": 0.76129122, | |
| "memory(GiB)": 71.94, | |
| "step": 1355, | |
| "train_speed(iter/s)": 0.020134 | |
| }, | |
| { | |
| "epoch": 0.5630304284827158, | |
| "grad_norm": 1.9276105656674607, | |
| "learning_rate": 7.408751334044825e-06, | |
| "loss": 0.77616062, | |
| "memory(GiB)": 71.94, | |
| "step": 1360, | |
| "train_speed(iter/s)": 0.020136 | |
| }, | |
| { | |
| "epoch": 0.565100393293314, | |
| "grad_norm": 1.838664332126464, | |
| "learning_rate": 7.398078975453575e-06, | |
| "loss": 0.77545385, | |
| "memory(GiB)": 71.94, | |
| "step": 1365, | |
| "train_speed(iter/s)": 0.020137 | |
| }, | |
| { | |
| "epoch": 0.5671703581039123, | |
| "grad_norm": 2.090052030958157, | |
| "learning_rate": 7.387406616862327e-06, | |
| "loss": 0.7824297, | |
| "memory(GiB)": 71.94, | |
| "step": 1370, | |
| "train_speed(iter/s)": 0.020139 | |
| }, | |
| { | |
| "epoch": 0.5692403229145104, | |
| "grad_norm": 1.7799554116738177, | |
| "learning_rate": 7.376734258271079e-06, | |
| "loss": 0.77833185, | |
| "memory(GiB)": 71.94, | |
| "step": 1375, | |
| "train_speed(iter/s)": 0.020142 | |
| }, | |
| { | |
| "epoch": 0.5713102877251087, | |
| "grad_norm": 2.266691996975209, | |
| "learning_rate": 7.366061899679829e-06, | |
| "loss": 0.77535782, | |
| "memory(GiB)": 71.94, | |
| "step": 1380, | |
| "train_speed(iter/s)": 0.020144 | |
| }, | |
| { | |
| "epoch": 0.5733802525357069, | |
| "grad_norm": 1.8220471587007605, | |
| "learning_rate": 7.355389541088581e-06, | |
| "loss": 0.76158247, | |
| "memory(GiB)": 71.94, | |
| "step": 1385, | |
| "train_speed(iter/s)": 0.020146 | |
| }, | |
| { | |
| "epoch": 0.5754502173463051, | |
| "grad_norm": 1.7869060368578336, | |
| "learning_rate": 7.344717182497333e-06, | |
| "loss": 0.79457912, | |
| "memory(GiB)": 71.94, | |
| "step": 1390, | |
| "train_speed(iter/s)": 0.020147 | |
| }, | |
| { | |
| "epoch": 0.5775201821569034, | |
| "grad_norm": 2.730877403121895, | |
| "learning_rate": 7.334044823906084e-06, | |
| "loss": 0.75181475, | |
| "memory(GiB)": 71.94, | |
| "step": 1395, | |
| "train_speed(iter/s)": 0.020149 | |
| }, | |
| { | |
| "epoch": 0.5795901469675016, | |
| "grad_norm": 2.091944883020518, | |
| "learning_rate": 7.323372465314835e-06, | |
| "loss": 0.75992446, | |
| "memory(GiB)": 71.94, | |
| "step": 1400, | |
| "train_speed(iter/s)": 0.02015 | |
| }, | |
| { | |
| "epoch": 0.5816601117780997, | |
| "grad_norm": 1.5904822426334966, | |
| "learning_rate": 7.312700106723587e-06, | |
| "loss": 0.77254944, | |
| "memory(GiB)": 71.94, | |
| "step": 1405, | |
| "train_speed(iter/s)": 0.020152 | |
| }, | |
| { | |
| "epoch": 0.583730076588698, | |
| "grad_norm": 1.673083919686743, | |
| "learning_rate": 7.302027748132338e-06, | |
| "loss": 0.74836388, | |
| "memory(GiB)": 71.94, | |
| "step": 1410, | |
| "train_speed(iter/s)": 0.020152 | |
| }, | |
| { | |
| "epoch": 0.5858000413992962, | |
| "grad_norm": 2.05811523971159, | |
| "learning_rate": 7.29135538954109e-06, | |
| "loss": 0.74358282, | |
| "memory(GiB)": 71.94, | |
| "step": 1415, | |
| "train_speed(iter/s)": 0.020154 | |
| }, | |
| { | |
| "epoch": 0.5878700062098944, | |
| "grad_norm": 1.983632865952002, | |
| "learning_rate": 7.28068303094984e-06, | |
| "loss": 0.78234367, | |
| "memory(GiB)": 71.94, | |
| "step": 1420, | |
| "train_speed(iter/s)": 0.020156 | |
| }, | |
| { | |
| "epoch": 0.5899399710204927, | |
| "grad_norm": 1.6612296882759847, | |
| "learning_rate": 7.270010672358592e-06, | |
| "loss": 0.76740494, | |
| "memory(GiB)": 71.94, | |
| "step": 1425, | |
| "train_speed(iter/s)": 0.020157 | |
| }, | |
| { | |
| "epoch": 0.5920099358310908, | |
| "grad_norm": 1.8232818202515155, | |
| "learning_rate": 7.259338313767344e-06, | |
| "loss": 0.76410437, | |
| "memory(GiB)": 71.94, | |
| "step": 1430, | |
| "train_speed(iter/s)": 0.020159 | |
| }, | |
| { | |
| "epoch": 0.5940799006416891, | |
| "grad_norm": 1.6871789120586522, | |
| "learning_rate": 7.248665955176094e-06, | |
| "loss": 0.76673613, | |
| "memory(GiB)": 71.94, | |
| "step": 1435, | |
| "train_speed(iter/s)": 0.02016 | |
| }, | |
| { | |
| "epoch": 0.5961498654522873, | |
| "grad_norm": 1.9181669169557467, | |
| "learning_rate": 7.237993596584846e-06, | |
| "loss": 0.73530726, | |
| "memory(GiB)": 71.94, | |
| "step": 1440, | |
| "train_speed(iter/s)": 0.020161 | |
| }, | |
| { | |
| "epoch": 0.5982198302628855, | |
| "grad_norm": 2.0425311715534513, | |
| "learning_rate": 7.227321237993598e-06, | |
| "loss": 0.78409719, | |
| "memory(GiB)": 71.94, | |
| "step": 1445, | |
| "train_speed(iter/s)": 0.020163 | |
| }, | |
| { | |
| "epoch": 0.6002897950734838, | |
| "grad_norm": 1.725457162133973, | |
| "learning_rate": 7.216648879402348e-06, | |
| "loss": 0.74391842, | |
| "memory(GiB)": 71.94, | |
| "step": 1450, | |
| "train_speed(iter/s)": 0.020163 | |
| }, | |
| { | |
| "epoch": 0.602359759884082, | |
| "grad_norm": 2.2362927243629613, | |
| "learning_rate": 7.2059765208111e-06, | |
| "loss": 0.77035971, | |
| "memory(GiB)": 71.94, | |
| "step": 1455, | |
| "train_speed(iter/s)": 0.020166 | |
| }, | |
| { | |
| "epoch": 0.6044297246946801, | |
| "grad_norm": 2.379202645179455, | |
| "learning_rate": 7.195304162219852e-06, | |
| "loss": 0.74266062, | |
| "memory(GiB)": 71.94, | |
| "step": 1460, | |
| "train_speed(iter/s)": 0.020168 | |
| }, | |
| { | |
| "epoch": 0.6064996895052784, | |
| "grad_norm": 1.6006607749389805, | |
| "learning_rate": 7.184631803628602e-06, | |
| "loss": 0.76957574, | |
| "memory(GiB)": 71.94, | |
| "step": 1465, | |
| "train_speed(iter/s)": 0.020169 | |
| }, | |
| { | |
| "epoch": 0.6085696543158766, | |
| "grad_norm": 1.7633012594109296, | |
| "learning_rate": 7.173959445037354e-06, | |
| "loss": 0.77078071, | |
| "memory(GiB)": 71.94, | |
| "step": 1470, | |
| "train_speed(iter/s)": 0.02017 | |
| }, | |
| { | |
| "epoch": 0.6106396191264748, | |
| "grad_norm": 1.6009632285824897, | |
| "learning_rate": 7.163287086446106e-06, | |
| "loss": 0.7669549, | |
| "memory(GiB)": 71.94, | |
| "step": 1475, | |
| "train_speed(iter/s)": 0.020171 | |
| }, | |
| { | |
| "epoch": 0.6127095839370731, | |
| "grad_norm": 1.932344117154099, | |
| "learning_rate": 7.152614727854857e-06, | |
| "loss": 0.76528344, | |
| "memory(GiB)": 71.94, | |
| "step": 1480, | |
| "train_speed(iter/s)": 0.020172 | |
| }, | |
| { | |
| "epoch": 0.6147795487476713, | |
| "grad_norm": 2.02896587820159, | |
| "learning_rate": 7.141942369263608e-06, | |
| "loss": 0.75168095, | |
| "memory(GiB)": 71.94, | |
| "step": 1485, | |
| "train_speed(iter/s)": 0.020174 | |
| }, | |
| { | |
| "epoch": 0.6168495135582696, | |
| "grad_norm": 1.9974467066335662, | |
| "learning_rate": 7.131270010672359e-06, | |
| "loss": 0.7488194, | |
| "memory(GiB)": 71.94, | |
| "step": 1490, | |
| "train_speed(iter/s)": 0.020176 | |
| }, | |
| { | |
| "epoch": 0.6189194783688677, | |
| "grad_norm": 1.7902382164373858, | |
| "learning_rate": 7.120597652081111e-06, | |
| "loss": 0.75267982, | |
| "memory(GiB)": 71.94, | |
| "step": 1495, | |
| "train_speed(iter/s)": 0.020177 | |
| }, | |
| { | |
| "epoch": 0.6209894431794659, | |
| "grad_norm": 2.5929739472863838, | |
| "learning_rate": 7.1099252934898625e-06, | |
| "loss": 0.73765378, | |
| "memory(GiB)": 71.94, | |
| "step": 1500, | |
| "train_speed(iter/s)": 0.020178 | |
| }, | |
| { | |
| "epoch": 0.6209894431794659, | |
| "eval_loss": 0.8280953168869019, | |
| "eval_runtime": 333.1777, | |
| "eval_samples_per_second": 18.747, | |
| "eval_steps_per_second": 1.174, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.6230594079900642, | |
| "grad_norm": 1.9847247838289337, | |
| "learning_rate": 7.099252934898613e-06, | |
| "loss": 0.75908709, | |
| "memory(GiB)": 71.94, | |
| "step": 1505, | |
| "train_speed(iter/s)": 0.020077 | |
| }, | |
| { | |
| "epoch": 0.6251293728006624, | |
| "grad_norm": 1.801176606187135, | |
| "learning_rate": 7.088580576307365e-06, | |
| "loss": 0.78817225, | |
| "memory(GiB)": 71.94, | |
| "step": 1510, | |
| "train_speed(iter/s)": 0.020079 | |
| }, | |
| { | |
| "epoch": 0.6271993376112606, | |
| "grad_norm": 1.8685868923530962, | |
| "learning_rate": 7.0779082177161165e-06, | |
| "loss": 0.74608173, | |
| "memory(GiB)": 71.94, | |
| "step": 1515, | |
| "train_speed(iter/s)": 0.02008 | |
| }, | |
| { | |
| "epoch": 0.6292693024218589, | |
| "grad_norm": 1.9112331285520685, | |
| "learning_rate": 7.067235859124867e-06, | |
| "loss": 0.78047667, | |
| "memory(GiB)": 71.94, | |
| "step": 1520, | |
| "train_speed(iter/s)": 0.020081 | |
| }, | |
| { | |
| "epoch": 0.631339267232457, | |
| "grad_norm": 1.69066034933461, | |
| "learning_rate": 7.0565635005336185e-06, | |
| "loss": 0.75149813, | |
| "memory(GiB)": 71.94, | |
| "step": 1525, | |
| "train_speed(iter/s)": 0.02008 | |
| }, | |
| { | |
| "epoch": 0.6334092320430552, | |
| "grad_norm": 1.5394308738884603, | |
| "learning_rate": 7.0458911419423704e-06, | |
| "loss": 0.76838903, | |
| "memory(GiB)": 71.94, | |
| "step": 1530, | |
| "train_speed(iter/s)": 0.020081 | |
| }, | |
| { | |
| "epoch": 0.6354791968536535, | |
| "grad_norm": 1.7690454856119193, | |
| "learning_rate": 7.035218783351121e-06, | |
| "loss": 0.7524828, | |
| "memory(GiB)": 71.94, | |
| "step": 1535, | |
| "train_speed(iter/s)": 0.020083 | |
| }, | |
| { | |
| "epoch": 0.6375491616642517, | |
| "grad_norm": 2.0548651954814154, | |
| "learning_rate": 7.0245464247598725e-06, | |
| "loss": 0.76242485, | |
| "memory(GiB)": 71.94, | |
| "step": 1540, | |
| "train_speed(iter/s)": 0.020083 | |
| }, | |
| { | |
| "epoch": 0.63961912647485, | |
| "grad_norm": 1.5706142249866388, | |
| "learning_rate": 7.0138740661686235e-06, | |
| "loss": 0.78198986, | |
| "memory(GiB)": 71.94, | |
| "step": 1545, | |
| "train_speed(iter/s)": 0.020084 | |
| }, | |
| { | |
| "epoch": 0.6416890912854482, | |
| "grad_norm": 2.9215645341915275, | |
| "learning_rate": 7.0032017075773754e-06, | |
| "loss": 0.74943571, | |
| "memory(GiB)": 71.94, | |
| "step": 1550, | |
| "train_speed(iter/s)": 0.020086 | |
| }, | |
| { | |
| "epoch": 0.6437590560960463, | |
| "grad_norm": 1.983762828992232, | |
| "learning_rate": 6.9925293489861265e-06, | |
| "loss": 0.75862083, | |
| "memory(GiB)": 71.94, | |
| "step": 1555, | |
| "train_speed(iter/s)": 0.020087 | |
| }, | |
| { | |
| "epoch": 0.6458290209066446, | |
| "grad_norm": 1.8256568832087245, | |
| "learning_rate": 6.9818569903948775e-06, | |
| "loss": 0.75860863, | |
| "memory(GiB)": 71.94, | |
| "step": 1560, | |
| "train_speed(iter/s)": 0.020089 | |
| }, | |
| { | |
| "epoch": 0.6478989857172428, | |
| "grad_norm": 1.7082999758601491, | |
| "learning_rate": 6.971184631803629e-06, | |
| "loss": 0.78126869, | |
| "memory(GiB)": 71.94, | |
| "step": 1565, | |
| "train_speed(iter/s)": 0.02009 | |
| }, | |
| { | |
| "epoch": 0.649968950527841, | |
| "grad_norm": 2.388449730753909, | |
| "learning_rate": 6.960512273212381e-06, | |
| "loss": 0.76754818, | |
| "memory(GiB)": 71.94, | |
| "step": 1570, | |
| "train_speed(iter/s)": 0.020091 | |
| }, | |
| { | |
| "epoch": 0.6520389153384393, | |
| "grad_norm": 2.570709108294184, | |
| "learning_rate": 6.9498399146211315e-06, | |
| "loss": 0.74245424, | |
| "memory(GiB)": 71.94, | |
| "step": 1575, | |
| "train_speed(iter/s)": 0.020093 | |
| }, | |
| { | |
| "epoch": 0.6541088801490375, | |
| "grad_norm": 1.942646301485773, | |
| "learning_rate": 6.939167556029883e-06, | |
| "loss": 0.77264175, | |
| "memory(GiB)": 71.94, | |
| "step": 1580, | |
| "train_speed(iter/s)": 0.020094 | |
| }, | |
| { | |
| "epoch": 0.6561788449596356, | |
| "grad_norm": 1.7902561992868253, | |
| "learning_rate": 6.928495197438635e-06, | |
| "loss": 0.76845627, | |
| "memory(GiB)": 71.94, | |
| "step": 1585, | |
| "train_speed(iter/s)": 0.020096 | |
| }, | |
| { | |
| "epoch": 0.6582488097702339, | |
| "grad_norm": 1.8253085296468832, | |
| "learning_rate": 6.9178228388473854e-06, | |
| "loss": 0.75771255, | |
| "memory(GiB)": 71.94, | |
| "step": 1590, | |
| "train_speed(iter/s)": 0.020097 | |
| }, | |
| { | |
| "epoch": 0.6603187745808321, | |
| "grad_norm": 1.8440321320283706, | |
| "learning_rate": 6.907150480256137e-06, | |
| "loss": 0.74345121, | |
| "memory(GiB)": 71.94, | |
| "step": 1595, | |
| "train_speed(iter/s)": 0.020096 | |
| }, | |
| { | |
| "epoch": 0.6623887393914304, | |
| "grad_norm": 1.8894245153228149, | |
| "learning_rate": 6.896478121664889e-06, | |
| "loss": 0.76188393, | |
| "memory(GiB)": 71.94, | |
| "step": 1600, | |
| "train_speed(iter/s)": 0.020098 | |
| }, | |
| { | |
| "epoch": 0.6644587042020286, | |
| "grad_norm": 2.055312059883184, | |
| "learning_rate": 6.885805763073639e-06, | |
| "loss": 0.75247483, | |
| "memory(GiB)": 71.94, | |
| "step": 1605, | |
| "train_speed(iter/s)": 0.020099 | |
| }, | |
| { | |
| "epoch": 0.6665286690126268, | |
| "grad_norm": 1.8131130404445874, | |
| "learning_rate": 6.875133404482391e-06, | |
| "loss": 0.77208357, | |
| "memory(GiB)": 71.94, | |
| "step": 1610, | |
| "train_speed(iter/s)": 0.0201 | |
| }, | |
| { | |
| "epoch": 0.668598633823225, | |
| "grad_norm": 1.721876122278255, | |
| "learning_rate": 6.864461045891142e-06, | |
| "loss": 0.7271523, | |
| "memory(GiB)": 71.94, | |
| "step": 1615, | |
| "train_speed(iter/s)": 0.020101 | |
| }, | |
| { | |
| "epoch": 0.6706685986338232, | |
| "grad_norm": 1.880195637198508, | |
| "learning_rate": 6.853788687299893e-06, | |
| "loss": 0.74756432, | |
| "memory(GiB)": 71.94, | |
| "step": 1620, | |
| "train_speed(iter/s)": 0.020101 | |
| }, | |
| { | |
| "epoch": 0.6727385634444214, | |
| "grad_norm": 1.5895741550527986, | |
| "learning_rate": 6.843116328708645e-06, | |
| "loss": 0.77960148, | |
| "memory(GiB)": 71.94, | |
| "step": 1625, | |
| "train_speed(iter/s)": 0.020102 | |
| }, | |
| { | |
| "epoch": 0.6748085282550197, | |
| "grad_norm": 1.762407790878705, | |
| "learning_rate": 6.832443970117396e-06, | |
| "loss": 0.76564293, | |
| "memory(GiB)": 71.94, | |
| "step": 1630, | |
| "train_speed(iter/s)": 0.020104 | |
| }, | |
| { | |
| "epoch": 0.6768784930656179, | |
| "grad_norm": 1.85152265865232, | |
| "learning_rate": 6.821771611526148e-06, | |
| "loss": 0.75755472, | |
| "memory(GiB)": 71.94, | |
| "step": 1635, | |
| "train_speed(iter/s)": 0.020104 | |
| }, | |
| { | |
| "epoch": 0.678948457876216, | |
| "grad_norm": 2.4048195292609464, | |
| "learning_rate": 6.811099252934899e-06, | |
| "loss": 0.75722828, | |
| "memory(GiB)": 71.94, | |
| "step": 1640, | |
| "train_speed(iter/s)": 0.020106 | |
| }, | |
| { | |
| "epoch": 0.6810184226868143, | |
| "grad_norm": 1.7461290969223273, | |
| "learning_rate": 6.80042689434365e-06, | |
| "loss": 0.74719772, | |
| "memory(GiB)": 71.94, | |
| "step": 1645, | |
| "train_speed(iter/s)": 0.020107 | |
| }, | |
| { | |
| "epoch": 0.6830883874974125, | |
| "grad_norm": 1.7606017366047548, | |
| "learning_rate": 6.789754535752402e-06, | |
| "loss": 0.74246426, | |
| "memory(GiB)": 71.94, | |
| "step": 1650, | |
| "train_speed(iter/s)": 0.020108 | |
| }, | |
| { | |
| "epoch": 0.6851583523080108, | |
| "grad_norm": 2.3484261163252884, | |
| "learning_rate": 6.779082177161154e-06, | |
| "loss": 0.7567915, | |
| "memory(GiB)": 71.94, | |
| "step": 1655, | |
| "train_speed(iter/s)": 0.020109 | |
| }, | |
| { | |
| "epoch": 0.687228317118609, | |
| "grad_norm": 1.686698632081635, | |
| "learning_rate": 6.768409818569904e-06, | |
| "loss": 0.73500414, | |
| "memory(GiB)": 71.94, | |
| "step": 1660, | |
| "train_speed(iter/s)": 0.020111 | |
| }, | |
| { | |
| "epoch": 0.6892982819292072, | |
| "grad_norm": 1.9785908023609375, | |
| "learning_rate": 6.757737459978656e-06, | |
| "loss": 0.7035881, | |
| "memory(GiB)": 71.94, | |
| "step": 1665, | |
| "train_speed(iter/s)": 0.020112 | |
| }, | |
| { | |
| "epoch": 0.6913682467398055, | |
| "grad_norm": 1.8288827641332985, | |
| "learning_rate": 6.747065101387406e-06, | |
| "loss": 0.74135156, | |
| "memory(GiB)": 71.94, | |
| "step": 1670, | |
| "train_speed(iter/s)": 0.020113 | |
| }, | |
| { | |
| "epoch": 0.6934382115504036, | |
| "grad_norm": 2.106219884662748, | |
| "learning_rate": 6.736392742796158e-06, | |
| "loss": 0.77649341, | |
| "memory(GiB)": 71.94, | |
| "step": 1675, | |
| "train_speed(iter/s)": 0.020115 | |
| }, | |
| { | |
| "epoch": 0.6955081763610018, | |
| "grad_norm": 1.857981089347382, | |
| "learning_rate": 6.72572038420491e-06, | |
| "loss": 0.73271251, | |
| "memory(GiB)": 71.94, | |
| "step": 1680, | |
| "train_speed(iter/s)": 0.020116 | |
| }, | |
| { | |
| "epoch": 0.6975781411716001, | |
| "grad_norm": 1.8252469751324223, | |
| "learning_rate": 6.715048025613661e-06, | |
| "loss": 0.76072979, | |
| "memory(GiB)": 71.94, | |
| "step": 1685, | |
| "train_speed(iter/s)": 0.020117 | |
| }, | |
| { | |
| "epoch": 0.6996481059821983, | |
| "grad_norm": 1.9875787155351985, | |
| "learning_rate": 6.704375667022412e-06, | |
| "loss": 0.73438239, | |
| "memory(GiB)": 71.94, | |
| "step": 1690, | |
| "train_speed(iter/s)": 0.020118 | |
| }, | |
| { | |
| "epoch": 0.7017180707927966, | |
| "grad_norm": 1.685302389303672, | |
| "learning_rate": 6.693703308431164e-06, | |
| "loss": 0.76429882, | |
| "memory(GiB)": 71.94, | |
| "step": 1695, | |
| "train_speed(iter/s)": 0.020119 | |
| }, | |
| { | |
| "epoch": 0.7037880356033948, | |
| "grad_norm": 2.30374573526697, | |
| "learning_rate": 6.683030949839915e-06, | |
| "loss": 0.7437336, | |
| "memory(GiB)": 71.94, | |
| "step": 1700, | |
| "train_speed(iter/s)": 0.020119 | |
| }, | |
| { | |
| "epoch": 0.7058580004139929, | |
| "grad_norm": 2.4577356463267104, | |
| "learning_rate": 6.672358591248667e-06, | |
| "loss": 0.79406719, | |
| "memory(GiB)": 71.94, | |
| "step": 1705, | |
| "train_speed(iter/s)": 0.02012 | |
| }, | |
| { | |
| "epoch": 0.7079279652245912, | |
| "grad_norm": 1.7489028068953179, | |
| "learning_rate": 6.661686232657418e-06, | |
| "loss": 0.75482893, | |
| "memory(GiB)": 71.94, | |
| "step": 1710, | |
| "train_speed(iter/s)": 0.020121 | |
| }, | |
| { | |
| "epoch": 0.7099979300351894, | |
| "grad_norm": 1.929474801980816, | |
| "learning_rate": 6.651013874066169e-06, | |
| "loss": 0.74136767, | |
| "memory(GiB)": 71.94, | |
| "step": 1715, | |
| "train_speed(iter/s)": 0.020122 | |
| }, | |
| { | |
| "epoch": 0.7120678948457876, | |
| "grad_norm": 1.6790168198096502, | |
| "learning_rate": 6.640341515474921e-06, | |
| "loss": 0.72392588, | |
| "memory(GiB)": 71.94, | |
| "step": 1720, | |
| "train_speed(iter/s)": 0.020122 | |
| }, | |
| { | |
| "epoch": 0.7141378596563859, | |
| "grad_norm": 2.2963610149429488, | |
| "learning_rate": 6.629669156883671e-06, | |
| "loss": 0.7462635, | |
| "memory(GiB)": 71.94, | |
| "step": 1725, | |
| "train_speed(iter/s)": 0.020123 | |
| }, | |
| { | |
| "epoch": 0.716207824466984, | |
| "grad_norm": 4.289784718847475, | |
| "learning_rate": 6.618996798292423e-06, | |
| "loss": 0.73541126, | |
| "memory(GiB)": 71.94, | |
| "step": 1730, | |
| "train_speed(iter/s)": 0.020124 | |
| }, | |
| { | |
| "epoch": 0.7182777892775822, | |
| "grad_norm": 2.1972884462976263, | |
| "learning_rate": 6.608324439701175e-06, | |
| "loss": 0.7353497, | |
| "memory(GiB)": 71.94, | |
| "step": 1735, | |
| "train_speed(iter/s)": 0.020124 | |
| }, | |
| { | |
| "epoch": 0.7203477540881805, | |
| "grad_norm": 2.1738189409828377, | |
| "learning_rate": 6.597652081109925e-06, | |
| "loss": 0.71738148, | |
| "memory(GiB)": 71.94, | |
| "step": 1740, | |
| "train_speed(iter/s)": 0.020126 | |
| }, | |
| { | |
| "epoch": 0.7224177188987787, | |
| "grad_norm": 1.6342074890059992, | |
| "learning_rate": 6.586979722518677e-06, | |
| "loss": 0.75047336, | |
| "memory(GiB)": 71.94, | |
| "step": 1745, | |
| "train_speed(iter/s)": 0.020126 | |
| }, | |
| { | |
| "epoch": 0.724487683709377, | |
| "grad_norm": 1.7007570391919413, | |
| "learning_rate": 6.576307363927429e-06, | |
| "loss": 0.73253298, | |
| "memory(GiB)": 71.94, | |
| "step": 1750, | |
| "train_speed(iter/s)": 0.020127 | |
| }, | |
| { | |
| "epoch": 0.7265576485199752, | |
| "grad_norm": 1.5323053950217638, | |
| "learning_rate": 6.56563500533618e-06, | |
| "loss": 0.74062099, | |
| "memory(GiB)": 71.94, | |
| "step": 1755, | |
| "train_speed(iter/s)": 0.020128 | |
| }, | |
| { | |
| "epoch": 0.7286276133305734, | |
| "grad_norm": 1.9624071404199714, | |
| "learning_rate": 6.554962646744931e-06, | |
| "loss": 0.76860294, | |
| "memory(GiB)": 71.94, | |
| "step": 1760, | |
| "train_speed(iter/s)": 0.02013 | |
| }, | |
| { | |
| "epoch": 0.7306975781411716, | |
| "grad_norm": 1.8145041855689747, | |
| "learning_rate": 6.544290288153683e-06, | |
| "loss": 0.72403975, | |
| "memory(GiB)": 71.94, | |
| "step": 1765, | |
| "train_speed(iter/s)": 0.020131 | |
| }, | |
| { | |
| "epoch": 0.7327675429517698, | |
| "grad_norm": 1.7793196071264126, | |
| "learning_rate": 6.533617929562434e-06, | |
| "loss": 0.76407566, | |
| "memory(GiB)": 71.94, | |
| "step": 1770, | |
| "train_speed(iter/s)": 0.020132 | |
| }, | |
| { | |
| "epoch": 0.734837507762368, | |
| "grad_norm": 1.8806974947113853, | |
| "learning_rate": 6.522945570971186e-06, | |
| "loss": 0.73674612, | |
| "memory(GiB)": 71.94, | |
| "step": 1775, | |
| "train_speed(iter/s)": 0.020133 | |
| }, | |
| { | |
| "epoch": 0.7369074725729663, | |
| "grad_norm": 1.6203999356727887, | |
| "learning_rate": 6.512273212379937e-06, | |
| "loss": 0.73720975, | |
| "memory(GiB)": 71.94, | |
| "step": 1780, | |
| "train_speed(iter/s)": 0.020135 | |
| }, | |
| { | |
| "epoch": 0.7389774373835645, | |
| "grad_norm": 1.8256501665131275, | |
| "learning_rate": 6.501600853788688e-06, | |
| "loss": 0.74560528, | |
| "memory(GiB)": 71.94, | |
| "step": 1785, | |
| "train_speed(iter/s)": 0.020136 | |
| }, | |
| { | |
| "epoch": 0.7410474021941627, | |
| "grad_norm": 2.3313828860511294, | |
| "learning_rate": 6.49092849519744e-06, | |
| "loss": 0.73726311, | |
| "memory(GiB)": 71.94, | |
| "step": 1790, | |
| "train_speed(iter/s)": 0.020137 | |
| }, | |
| { | |
| "epoch": 0.7431173670047609, | |
| "grad_norm": 2.01967250245603, | |
| "learning_rate": 6.48025613660619e-06, | |
| "loss": 0.72599983, | |
| "memory(GiB)": 71.94, | |
| "step": 1795, | |
| "train_speed(iter/s)": 0.020138 | |
| }, | |
| { | |
| "epoch": 0.7451873318153591, | |
| "grad_norm": 1.5873931113082191, | |
| "learning_rate": 6.469583778014942e-06, | |
| "loss": 0.72361288, | |
| "memory(GiB)": 71.94, | |
| "step": 1800, | |
| "train_speed(iter/s)": 0.020139 | |
| }, | |
| { | |
| "epoch": 0.7451873318153591, | |
| "eval_loss": 0.821691632270813, | |
| "eval_runtime": 333.5584, | |
| "eval_samples_per_second": 18.725, | |
| "eval_steps_per_second": 1.172, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.7472572966259574, | |
| "grad_norm": 1.6284031081935817, | |
| "learning_rate": 6.458911419423694e-06, | |
| "loss": 0.73620996, | |
| "memory(GiB)": 71.94, | |
| "step": 1805, | |
| "train_speed(iter/s)": 0.020055 | |
| }, | |
| { | |
| "epoch": 0.7493272614365556, | |
| "grad_norm": 1.8143587680892548, | |
| "learning_rate": 6.448239060832444e-06, | |
| "loss": 0.73436375, | |
| "memory(GiB)": 71.94, | |
| "step": 1810, | |
| "train_speed(iter/s)": 0.020056 | |
| }, | |
| { | |
| "epoch": 0.7513972262471538, | |
| "grad_norm": 1.9502300953167138, | |
| "learning_rate": 6.437566702241196e-06, | |
| "loss": 0.74698448, | |
| "memory(GiB)": 71.94, | |
| "step": 1815, | |
| "train_speed(iter/s)": 0.020057 | |
| }, | |
| { | |
| "epoch": 0.7534671910577521, | |
| "grad_norm": 1.72820315761644, | |
| "learning_rate": 6.426894343649948e-06, | |
| "loss": 0.75661831, | |
| "memory(GiB)": 71.94, | |
| "step": 1820, | |
| "train_speed(iter/s)": 0.020059 | |
| }, | |
| { | |
| "epoch": 0.7555371558683502, | |
| "grad_norm": 1.8039931329917855, | |
| "learning_rate": 6.416221985058698e-06, | |
| "loss": 0.74954405, | |
| "memory(GiB)": 71.94, | |
| "step": 1825, | |
| "train_speed(iter/s)": 0.020059 | |
| }, | |
| { | |
| "epoch": 0.7576071206789484, | |
| "grad_norm": 2.0711566925028433, | |
| "learning_rate": 6.40554962646745e-06, | |
| "loss": 0.73745756, | |
| "memory(GiB)": 71.94, | |
| "step": 1830, | |
| "train_speed(iter/s)": 0.020061 | |
| }, | |
| { | |
| "epoch": 0.7596770854895467, | |
| "grad_norm": 1.826394447351557, | |
| "learning_rate": 6.3948772678762016e-06, | |
| "loss": 0.73249888, | |
| "memory(GiB)": 71.94, | |
| "step": 1835, | |
| "train_speed(iter/s)": 0.020062 | |
| }, | |
| { | |
| "epoch": 0.7617470503001449, | |
| "grad_norm": 2.4929262063136175, | |
| "learning_rate": 6.384204909284953e-06, | |
| "loss": 0.72507439, | |
| "memory(GiB)": 71.94, | |
| "step": 1840, | |
| "train_speed(iter/s)": 0.020062 | |
| }, | |
| { | |
| "epoch": 0.7638170151107431, | |
| "grad_norm": 1.7606115982834467, | |
| "learning_rate": 6.373532550693704e-06, | |
| "loss": 0.72618227, | |
| "memory(GiB)": 71.94, | |
| "step": 1845, | |
| "train_speed(iter/s)": 0.020064 | |
| }, | |
| { | |
| "epoch": 0.7658869799213414, | |
| "grad_norm": 1.7780633999979434, | |
| "learning_rate": 6.362860192102455e-06, | |
| "loss": 0.73577065, | |
| "memory(GiB)": 71.94, | |
| "step": 1850, | |
| "train_speed(iter/s)": 0.020064 | |
| }, | |
| { | |
| "epoch": 0.7679569447319395, | |
| "grad_norm": 2.2901271183050294, | |
| "learning_rate": 6.3521878335112066e-06, | |
| "loss": 0.75972033, | |
| "memory(GiB)": 71.94, | |
| "step": 1855, | |
| "train_speed(iter/s)": 0.020065 | |
| }, | |
| { | |
| "epoch": 0.7700269095425378, | |
| "grad_norm": 1.974709118996213, | |
| "learning_rate": 6.3415154749199585e-06, | |
| "loss": 0.7513834, | |
| "memory(GiB)": 71.94, | |
| "step": 1860, | |
| "train_speed(iter/s)": 0.020065 | |
| }, | |
| { | |
| "epoch": 0.772096874353136, | |
| "grad_norm": 1.7795619053561953, | |
| "learning_rate": 6.330843116328709e-06, | |
| "loss": 0.70549402, | |
| "memory(GiB)": 71.94, | |
| "step": 1865, | |
| "train_speed(iter/s)": 0.020066 | |
| }, | |
| { | |
| "epoch": 0.7741668391637342, | |
| "grad_norm": 1.7925210555170064, | |
| "learning_rate": 6.3201707577374605e-06, | |
| "loss": 0.75538568, | |
| "memory(GiB)": 71.94, | |
| "step": 1870, | |
| "train_speed(iter/s)": 0.020067 | |
| }, | |
| { | |
| "epoch": 0.7762368039743325, | |
| "grad_norm": 1.7592519440523378, | |
| "learning_rate": 6.309498399146212e-06, | |
| "loss": 0.74328322, | |
| "memory(GiB)": 71.94, | |
| "step": 1875, | |
| "train_speed(iter/s)": 0.020068 | |
| }, | |
| { | |
| "epoch": 0.7783067687849307, | |
| "grad_norm": 1.7630568722667896, | |
| "learning_rate": 6.298826040554963e-06, | |
| "loss": 0.73995676, | |
| "memory(GiB)": 71.94, | |
| "step": 1880, | |
| "train_speed(iter/s)": 0.020069 | |
| }, | |
| { | |
| "epoch": 0.7803767335955288, | |
| "grad_norm": 1.732047211183728, | |
| "learning_rate": 6.2881536819637145e-06, | |
| "loss": 0.70023623, | |
| "memory(GiB)": 71.94, | |
| "step": 1885, | |
| "train_speed(iter/s)": 0.02007 | |
| }, | |
| { | |
| "epoch": 0.7824466984061271, | |
| "grad_norm": 1.7004814074017778, | |
| "learning_rate": 6.277481323372466e-06, | |
| "loss": 0.73256617, | |
| "memory(GiB)": 71.94, | |
| "step": 1890, | |
| "train_speed(iter/s)": 0.020071 | |
| }, | |
| { | |
| "epoch": 0.7845166632167253, | |
| "grad_norm": 1.7170761577962488, | |
| "learning_rate": 6.2668089647812166e-06, | |
| "loss": 0.71450982, | |
| "memory(GiB)": 71.94, | |
| "step": 1895, | |
| "train_speed(iter/s)": 0.020071 | |
| }, | |
| { | |
| "epoch": 0.7865866280273235, | |
| "grad_norm": 2.0086204171681565, | |
| "learning_rate": 6.2561366061899685e-06, | |
| "loss": 0.7096673, | |
| "memory(GiB)": 71.94, | |
| "step": 1900, | |
| "train_speed(iter/s)": 0.020072 | |
| }, | |
| { | |
| "epoch": 0.7886565928379218, | |
| "grad_norm": 1.6109075949007228, | |
| "learning_rate": 6.2454642475987195e-06, | |
| "loss": 0.7170536, | |
| "memory(GiB)": 71.94, | |
| "step": 1905, | |
| "train_speed(iter/s)": 0.020073 | |
| }, | |
| { | |
| "epoch": 0.79072655764852, | |
| "grad_norm": 1.6468982825229455, | |
| "learning_rate": 6.234791889007471e-06, | |
| "loss": 0.7817646, | |
| "memory(GiB)": 71.94, | |
| "step": 1910, | |
| "train_speed(iter/s)": 0.020073 | |
| }, | |
| { | |
| "epoch": 0.7927965224591182, | |
| "grad_norm": 1.8405361482523723, | |
| "learning_rate": 6.224119530416222e-06, | |
| "loss": 0.71389322, | |
| "memory(GiB)": 71.94, | |
| "step": 1915, | |
| "train_speed(iter/s)": 0.020075 | |
| }, | |
| { | |
| "epoch": 0.7948664872697164, | |
| "grad_norm": 1.7937559729338877, | |
| "learning_rate": 6.2134471718249735e-06, | |
| "loss": 0.72494421, | |
| "memory(GiB)": 71.94, | |
| "step": 1920, | |
| "train_speed(iter/s)": 0.020076 | |
| }, | |
| { | |
| "epoch": 0.7969364520803146, | |
| "grad_norm": 1.98762799360225, | |
| "learning_rate": 6.202774813233725e-06, | |
| "loss": 0.75637407, | |
| "memory(GiB)": 71.94, | |
| "step": 1925, | |
| "train_speed(iter/s)": 0.020077 | |
| }, | |
| { | |
| "epoch": 0.7990064168909129, | |
| "grad_norm": 2.469167716565665, | |
| "learning_rate": 6.192102454642477e-06, | |
| "loss": 0.71725979, | |
| "memory(GiB)": 71.94, | |
| "step": 1930, | |
| "train_speed(iter/s)": 0.020078 | |
| }, | |
| { | |
| "epoch": 0.8010763817015111, | |
| "grad_norm": 1.6526117746871118, | |
| "learning_rate": 6.181430096051227e-06, | |
| "loss": 0.73172369, | |
| "memory(GiB)": 71.94, | |
| "step": 1935, | |
| "train_speed(iter/s)": 0.020079 | |
| }, | |
| { | |
| "epoch": 0.8031463465121093, | |
| "grad_norm": 1.8881085526929478, | |
| "learning_rate": 6.170757737459979e-06, | |
| "loss": 0.68869176, | |
| "memory(GiB)": 71.94, | |
| "step": 1940, | |
| "train_speed(iter/s)": 0.02008 | |
| }, | |
| { | |
| "epoch": 0.8052163113227075, | |
| "grad_norm": 2.1341112552467107, | |
| "learning_rate": 6.160085378868731e-06, | |
| "loss": 0.74425526, | |
| "memory(GiB)": 71.94, | |
| "step": 1945, | |
| "train_speed(iter/s)": 0.020081 | |
| }, | |
| { | |
| "epoch": 0.8072862761333057, | |
| "grad_norm": 2.1587279161379906, | |
| "learning_rate": 6.149413020277481e-06, | |
| "loss": 0.72686901, | |
| "memory(GiB)": 71.94, | |
| "step": 1950, | |
| "train_speed(iter/s)": 0.020081 | |
| }, | |
| { | |
| "epoch": 0.8093562409439039, | |
| "grad_norm": 1.7183166805211196, | |
| "learning_rate": 6.138740661686233e-06, | |
| "loss": 0.70801015, | |
| "memory(GiB)": 71.94, | |
| "step": 1955, | |
| "train_speed(iter/s)": 0.020082 | |
| }, | |
| { | |
| "epoch": 0.8114262057545022, | |
| "grad_norm": 1.6918548000232365, | |
| "learning_rate": 6.128068303094985e-06, | |
| "loss": 0.71407743, | |
| "memory(GiB)": 71.94, | |
| "step": 1960, | |
| "train_speed(iter/s)": 0.020083 | |
| }, | |
| { | |
| "epoch": 0.8134961705651004, | |
| "grad_norm": 1.6783017839137153, | |
| "learning_rate": 6.117395944503735e-06, | |
| "loss": 0.74968853, | |
| "memory(GiB)": 71.94, | |
| "step": 1965, | |
| "train_speed(iter/s)": 0.020083 | |
| }, | |
| { | |
| "epoch": 0.8155661353756987, | |
| "grad_norm": 1.7117864547494115, | |
| "learning_rate": 6.106723585912487e-06, | |
| "loss": 0.71134882, | |
| "memory(GiB)": 71.94, | |
| "step": 1970, | |
| "train_speed(iter/s)": 0.020085 | |
| }, | |
| { | |
| "epoch": 0.8176361001862968, | |
| "grad_norm": 1.9043254368072582, | |
| "learning_rate": 6.096051227321238e-06, | |
| "loss": 0.74376335, | |
| "memory(GiB)": 71.94, | |
| "step": 1975, | |
| "train_speed(iter/s)": 0.020085 | |
| }, | |
| { | |
| "epoch": 0.819706064996895, | |
| "grad_norm": 1.8416766338299921, | |
| "learning_rate": 6.08537886872999e-06, | |
| "loss": 0.74048576, | |
| "memory(GiB)": 71.94, | |
| "step": 1980, | |
| "train_speed(iter/s)": 0.020086 | |
| }, | |
| { | |
| "epoch": 0.8217760298074933, | |
| "grad_norm": 1.9993092375152783, | |
| "learning_rate": 6.074706510138741e-06, | |
| "loss": 0.73070145, | |
| "memory(GiB)": 71.94, | |
| "step": 1985, | |
| "train_speed(iter/s)": 0.020087 | |
| }, | |
| { | |
| "epoch": 0.8238459946180915, | |
| "grad_norm": 1.9255287807426156, | |
| "learning_rate": 6.064034151547492e-06, | |
| "loss": 0.71732969, | |
| "memory(GiB)": 71.94, | |
| "step": 1990, | |
| "train_speed(iter/s)": 0.020088 | |
| }, | |
| { | |
| "epoch": 0.8259159594286897, | |
| "grad_norm": 1.699758287154301, | |
| "learning_rate": 6.053361792956244e-06, | |
| "loss": 0.70977745, | |
| "memory(GiB)": 71.94, | |
| "step": 1995, | |
| "train_speed(iter/s)": 0.020089 | |
| }, | |
| { | |
| "epoch": 0.827985924239288, | |
| "grad_norm": 1.7447876663920783, | |
| "learning_rate": 6.042689434364995e-06, | |
| "loss": 0.72701621, | |
| "memory(GiB)": 71.94, | |
| "step": 2000, | |
| "train_speed(iter/s)": 0.02009 | |
| }, | |
| { | |
| "epoch": 0.8300558890498861, | |
| "grad_norm": 1.8875191736470693, | |
| "learning_rate": 6.032017075773746e-06, | |
| "loss": 0.72644696, | |
| "memory(GiB)": 71.94, | |
| "step": 2005, | |
| "train_speed(iter/s)": 0.02009 | |
| }, | |
| { | |
| "epoch": 0.8321258538604843, | |
| "grad_norm": 1.6472530019204896, | |
| "learning_rate": 6.021344717182498e-06, | |
| "loss": 0.70005097, | |
| "memory(GiB)": 71.94, | |
| "step": 2010, | |
| "train_speed(iter/s)": 0.020091 | |
| }, | |
| { | |
| "epoch": 0.8341958186710826, | |
| "grad_norm": 2.2818513681921266, | |
| "learning_rate": 6.01067235859125e-06, | |
| "loss": 0.72365198, | |
| "memory(GiB)": 71.94, | |
| "step": 2015, | |
| "train_speed(iter/s)": 0.020092 | |
| }, | |
| { | |
| "epoch": 0.8362657834816808, | |
| "grad_norm": 1.8205665836409684, | |
| "learning_rate": 6e-06, | |
| "loss": 0.7322978, | |
| "memory(GiB)": 71.94, | |
| "step": 2020, | |
| "train_speed(iter/s)": 0.020092 | |
| }, | |
| { | |
| "epoch": 0.8383357482922791, | |
| "grad_norm": 1.980690292432822, | |
| "learning_rate": 5.989327641408752e-06, | |
| "loss": 0.73382425, | |
| "memory(GiB)": 71.94, | |
| "step": 2025, | |
| "train_speed(iter/s)": 0.020093 | |
| }, | |
| { | |
| "epoch": 0.8404057131028773, | |
| "grad_norm": 1.7748584169287815, | |
| "learning_rate": 5.978655282817502e-06, | |
| "loss": 0.7543438, | |
| "memory(GiB)": 71.94, | |
| "step": 2030, | |
| "train_speed(iter/s)": 0.020094 | |
| }, | |
| { | |
| "epoch": 0.8424756779134754, | |
| "grad_norm": 1.9477910790390784, | |
| "learning_rate": 5.967982924226254e-06, | |
| "loss": 0.73893185, | |
| "memory(GiB)": 71.94, | |
| "step": 2035, | |
| "train_speed(iter/s)": 0.020095 | |
| }, | |
| { | |
| "epoch": 0.8445456427240737, | |
| "grad_norm": 1.5695526526206898, | |
| "learning_rate": 5.957310565635006e-06, | |
| "loss": 0.71403108, | |
| "memory(GiB)": 71.94, | |
| "step": 2040, | |
| "train_speed(iter/s)": 0.020096 | |
| }, | |
| { | |
| "epoch": 0.8466156075346719, | |
| "grad_norm": 2.1517602299856557, | |
| "learning_rate": 5.946638207043757e-06, | |
| "loss": 0.71713848, | |
| "memory(GiB)": 71.94, | |
| "step": 2045, | |
| "train_speed(iter/s)": 0.020097 | |
| }, | |
| { | |
| "epoch": 0.8486855723452701, | |
| "grad_norm": 2.739525728221928, | |
| "learning_rate": 5.935965848452508e-06, | |
| "loss": 0.72253714, | |
| "memory(GiB)": 71.94, | |
| "step": 2050, | |
| "train_speed(iter/s)": 0.020099 | |
| }, | |
| { | |
| "epoch": 0.8507555371558684, | |
| "grad_norm": 1.9454548994868823, | |
| "learning_rate": 5.92529348986126e-06, | |
| "loss": 0.74796634, | |
| "memory(GiB)": 71.94, | |
| "step": 2055, | |
| "train_speed(iter/s)": 0.0201 | |
| }, | |
| { | |
| "epoch": 0.8528255019664666, | |
| "grad_norm": 1.7996528216814918, | |
| "learning_rate": 5.914621131270011e-06, | |
| "loss": 0.71262107, | |
| "memory(GiB)": 71.94, | |
| "step": 2060, | |
| "train_speed(iter/s)": 0.020101 | |
| }, | |
| { | |
| "epoch": 0.8548954667770647, | |
| "grad_norm": 1.9001952709840753, | |
| "learning_rate": 5.903948772678763e-06, | |
| "loss": 0.71814876, | |
| "memory(GiB)": 71.94, | |
| "step": 2065, | |
| "train_speed(iter/s)": 0.020101 | |
| }, | |
| { | |
| "epoch": 0.856965431587663, | |
| "grad_norm": 3.4631327121051036, | |
| "learning_rate": 5.893276414087514e-06, | |
| "loss": 0.70359154, | |
| "memory(GiB)": 71.94, | |
| "step": 2070, | |
| "train_speed(iter/s)": 0.020103 | |
| }, | |
| { | |
| "epoch": 0.8590353963982612, | |
| "grad_norm": 1.9078035163840932, | |
| "learning_rate": 5.882604055496265e-06, | |
| "loss": 0.74100571, | |
| "memory(GiB)": 71.94, | |
| "step": 2075, | |
| "train_speed(iter/s)": 0.020103 | |
| }, | |
| { | |
| "epoch": 0.8611053612088595, | |
| "grad_norm": 2.7698267455997576, | |
| "learning_rate": 5.871931696905017e-06, | |
| "loss": 0.71972656, | |
| "memory(GiB)": 71.94, | |
| "step": 2080, | |
| "train_speed(iter/s)": 0.020104 | |
| }, | |
| { | |
| "epoch": 0.8631753260194577, | |
| "grad_norm": 1.9640858230267009, | |
| "learning_rate": 5.861259338313769e-06, | |
| "loss": 0.71868258, | |
| "memory(GiB)": 71.94, | |
| "step": 2085, | |
| "train_speed(iter/s)": 0.020105 | |
| }, | |
| { | |
| "epoch": 0.8652452908300559, | |
| "grad_norm": 1.9593324236104832, | |
| "learning_rate": 5.850586979722519e-06, | |
| "loss": 0.70707326, | |
| "memory(GiB)": 71.94, | |
| "step": 2090, | |
| "train_speed(iter/s)": 0.020106 | |
| }, | |
| { | |
| "epoch": 0.8673152556406541, | |
| "grad_norm": 2.0337621679872946, | |
| "learning_rate": 5.839914621131271e-06, | |
| "loss": 0.7303226, | |
| "memory(GiB)": 71.94, | |
| "step": 2095, | |
| "train_speed(iter/s)": 0.020107 | |
| }, | |
| { | |
| "epoch": 0.8693852204512523, | |
| "grad_norm": 1.7464491411508778, | |
| "learning_rate": 5.829242262540021e-06, | |
| "loss": 0.70045071, | |
| "memory(GiB)": 71.94, | |
| "step": 2100, | |
| "train_speed(iter/s)": 0.020108 | |
| }, | |
| { | |
| "epoch": 0.8693852204512523, | |
| "eval_loss": 0.8155556321144104, | |
| "eval_runtime": 334.2741, | |
| "eval_samples_per_second": 18.685, | |
| "eval_steps_per_second": 1.17, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.8714551852618505, | |
| "grad_norm": 1.893698412703024, | |
| "learning_rate": 5.818569903948773e-06, | |
| "loss": 0.71434135, | |
| "memory(GiB)": 71.94, | |
| "step": 2105, | |
| "train_speed(iter/s)": 0.020036 | |
| }, | |
| { | |
| "epoch": 0.8735251500724488, | |
| "grad_norm": 1.8915863471870793, | |
| "learning_rate": 5.807897545357525e-06, | |
| "loss": 0.73264565, | |
| "memory(GiB)": 71.94, | |
| "step": 2110, | |
| "train_speed(iter/s)": 0.020037 | |
| }, | |
| { | |
| "epoch": 0.875595114883047, | |
| "grad_norm": 1.5909748405215243, | |
| "learning_rate": 5.797225186766276e-06, | |
| "loss": 0.6809236, | |
| "memory(GiB)": 71.94, | |
| "step": 2115, | |
| "train_speed(iter/s)": 0.020037 | |
| }, | |
| { | |
| "epoch": 0.8776650796936452, | |
| "grad_norm": 2.298029244846505, | |
| "learning_rate": 5.786552828175027e-06, | |
| "loss": 0.72660675, | |
| "memory(GiB)": 71.94, | |
| "step": 2120, | |
| "train_speed(iter/s)": 0.020038 | |
| }, | |
| { | |
| "epoch": 0.8797350445042434, | |
| "grad_norm": 2.4471148736933634, | |
| "learning_rate": 5.775880469583779e-06, | |
| "loss": 0.71458502, | |
| "memory(GiB)": 71.94, | |
| "step": 2125, | |
| "train_speed(iter/s)": 0.020039 | |
| }, | |
| { | |
| "epoch": 0.8818050093148416, | |
| "grad_norm": 2.237641446928337, | |
| "learning_rate": 5.76520811099253e-06, | |
| "loss": 0.72527971, | |
| "memory(GiB)": 71.94, | |
| "step": 2130, | |
| "train_speed(iter/s)": 0.02004 | |
| }, | |
| { | |
| "epoch": 0.8838749741254399, | |
| "grad_norm": 2.1373854033173667, | |
| "learning_rate": 5.754535752401282e-06, | |
| "loss": 0.68969226, | |
| "memory(GiB)": 71.94, | |
| "step": 2135, | |
| "train_speed(iter/s)": 0.020041 | |
| }, | |
| { | |
| "epoch": 0.8859449389360381, | |
| "grad_norm": 2.4814505236104254, | |
| "learning_rate": 5.743863393810033e-06, | |
| "loss": 0.72110472, | |
| "memory(GiB)": 71.94, | |
| "step": 2140, | |
| "train_speed(iter/s)": 0.020042 | |
| }, | |
| { | |
| "epoch": 0.8880149037466363, | |
| "grad_norm": 1.8846949427239792, | |
| "learning_rate": 5.733191035218784e-06, | |
| "loss": 0.71526957, | |
| "memory(GiB)": 71.94, | |
| "step": 2145, | |
| "train_speed(iter/s)": 0.020043 | |
| }, | |
| { | |
| "epoch": 0.8900848685572346, | |
| "grad_norm": 1.6414510600406789, | |
| "learning_rate": 5.722518676627536e-06, | |
| "loss": 0.72875662, | |
| "memory(GiB)": 71.94, | |
| "step": 2150, | |
| "train_speed(iter/s)": 0.020043 | |
| }, | |
| { | |
| "epoch": 0.8921548333678327, | |
| "grad_norm": 2.027904279012944, | |
| "learning_rate": 5.711846318036286e-06, | |
| "loss": 0.73166924, | |
| "memory(GiB)": 71.94, | |
| "step": 2155, | |
| "train_speed(iter/s)": 0.020044 | |
| }, | |
| { | |
| "epoch": 0.8942247981784309, | |
| "grad_norm": 2.0007554119149136, | |
| "learning_rate": 5.701173959445038e-06, | |
| "loss": 0.69900818, | |
| "memory(GiB)": 71.94, | |
| "step": 2160, | |
| "train_speed(iter/s)": 0.020046 | |
| }, | |
| { | |
| "epoch": 0.8962947629890292, | |
| "grad_norm": 1.732929144774659, | |
| "learning_rate": 5.69050160085379e-06, | |
| "loss": 0.70091386, | |
| "memory(GiB)": 71.94, | |
| "step": 2165, | |
| "train_speed(iter/s)": 0.020046 | |
| }, | |
| { | |
| "epoch": 0.8983647277996274, | |
| "grad_norm": 1.6575807865879337, | |
| "learning_rate": 5.67982924226254e-06, | |
| "loss": 0.70530014, | |
| "memory(GiB)": 71.94, | |
| "step": 2170, | |
| "train_speed(iter/s)": 0.020047 | |
| }, | |
| { | |
| "epoch": 0.9004346926102256, | |
| "grad_norm": 1.8402505669080536, | |
| "learning_rate": 5.669156883671292e-06, | |
| "loss": 0.72022433, | |
| "memory(GiB)": 71.94, | |
| "step": 2175, | |
| "train_speed(iter/s)": 0.020048 | |
| }, | |
| { | |
| "epoch": 0.9025046574208239, | |
| "grad_norm": 1.8122270786550385, | |
| "learning_rate": 5.6584845250800435e-06, | |
| "loss": 0.67802505, | |
| "memory(GiB)": 71.94, | |
| "step": 2180, | |
| "train_speed(iter/s)": 0.020048 | |
| }, | |
| { | |
| "epoch": 0.904574622231422, | |
| "grad_norm": 1.7969445274298348, | |
| "learning_rate": 5.647812166488794e-06, | |
| "loss": 0.70636382, | |
| "memory(GiB)": 71.94, | |
| "step": 2185, | |
| "train_speed(iter/s)": 0.020049 | |
| }, | |
| { | |
| "epoch": 0.9066445870420203, | |
| "grad_norm": 2.1142537074713412, | |
| "learning_rate": 5.637139807897546e-06, | |
| "loss": 0.71558409, | |
| "memory(GiB)": 71.94, | |
| "step": 2190, | |
| "train_speed(iter/s)": 0.02005 | |
| }, | |
| { | |
| "epoch": 0.9087145518526185, | |
| "grad_norm": 1.944413383820558, | |
| "learning_rate": 5.6264674493062975e-06, | |
| "loss": 0.72004523, | |
| "memory(GiB)": 71.94, | |
| "step": 2195, | |
| "train_speed(iter/s)": 0.020051 | |
| }, | |
| { | |
| "epoch": 0.9107845166632167, | |
| "grad_norm": 2.3051719970776934, | |
| "learning_rate": 5.6157950907150485e-06, | |
| "loss": 0.7308382, | |
| "memory(GiB)": 71.94, | |
| "step": 2200, | |
| "train_speed(iter/s)": 0.020052 | |
| }, | |
| { | |
| "epoch": 0.912854481473815, | |
| "grad_norm": 2.27411033803044, | |
| "learning_rate": 5.6051227321238e-06, | |
| "loss": 0.74844613, | |
| "memory(GiB)": 71.94, | |
| "step": 2205, | |
| "train_speed(iter/s)": 0.020053 | |
| }, | |
| { | |
| "epoch": 0.9149244462844132, | |
| "grad_norm": 1.8444250783225764, | |
| "learning_rate": 5.594450373532551e-06, | |
| "loss": 0.68941135, | |
| "memory(GiB)": 71.94, | |
| "step": 2210, | |
| "train_speed(iter/s)": 0.020053 | |
| }, | |
| { | |
| "epoch": 0.9169944110950113, | |
| "grad_norm": 1.8662325411124825, | |
| "learning_rate": 5.5837780149413025e-06, | |
| "loss": 0.73066435, | |
| "memory(GiB)": 71.94, | |
| "step": 2215, | |
| "train_speed(iter/s)": 0.020054 | |
| }, | |
| { | |
| "epoch": 0.9190643759056096, | |
| "grad_norm": 1.6833532844662813, | |
| "learning_rate": 5.573105656350054e-06, | |
| "loss": 0.7062602, | |
| "memory(GiB)": 71.94, | |
| "step": 2220, | |
| "train_speed(iter/s)": 0.020055 | |
| }, | |
| { | |
| "epoch": 0.9211343407162078, | |
| "grad_norm": 1.6070808318678096, | |
| "learning_rate": 5.562433297758805e-06, | |
| "loss": 0.69585543, | |
| "memory(GiB)": 71.94, | |
| "step": 2225, | |
| "train_speed(iter/s)": 0.020055 | |
| }, | |
| { | |
| "epoch": 0.923204305526806, | |
| "grad_norm": 2.0016548598313024, | |
| "learning_rate": 5.5517609391675565e-06, | |
| "loss": 0.7085475, | |
| "memory(GiB)": 71.94, | |
| "step": 2230, | |
| "train_speed(iter/s)": 0.020056 | |
| }, | |
| { | |
| "epoch": 0.9252742703374043, | |
| "grad_norm": 1.9201243304059477, | |
| "learning_rate": 5.541088580576308e-06, | |
| "loss": 0.71516237, | |
| "memory(GiB)": 71.94, | |
| "step": 2235, | |
| "train_speed(iter/s)": 0.020057 | |
| }, | |
| { | |
| "epoch": 0.9273442351480025, | |
| "grad_norm": 1.9055608045022892, | |
| "learning_rate": 5.5304162219850586e-06, | |
| "loss": 0.69740834, | |
| "memory(GiB)": 71.94, | |
| "step": 2240, | |
| "train_speed(iter/s)": 0.020058 | |
| }, | |
| { | |
| "epoch": 0.9294141999586008, | |
| "grad_norm": 2.0041753291659026, | |
| "learning_rate": 5.5197438633938104e-06, | |
| "loss": 0.71469507, | |
| "memory(GiB)": 71.94, | |
| "step": 2245, | |
| "train_speed(iter/s)": 0.02006 | |
| }, | |
| { | |
| "epoch": 0.9314841647691989, | |
| "grad_norm": 2.025723530711083, | |
| "learning_rate": 5.509071504802562e-06, | |
| "loss": 0.69406614, | |
| "memory(GiB)": 71.94, | |
| "step": 2250, | |
| "train_speed(iter/s)": 0.02006 | |
| }, | |
| { | |
| "epoch": 0.9335541295797971, | |
| "grad_norm": 1.7845786824484478, | |
| "learning_rate": 5.4983991462113125e-06, | |
| "loss": 0.7137248, | |
| "memory(GiB)": 71.94, | |
| "step": 2255, | |
| "train_speed(iter/s)": 0.020061 | |
| }, | |
| { | |
| "epoch": 0.9356240943903954, | |
| "grad_norm": 2.3504717810438196, | |
| "learning_rate": 5.487726787620064e-06, | |
| "loss": 0.70752907, | |
| "memory(GiB)": 71.94, | |
| "step": 2260, | |
| "train_speed(iter/s)": 0.020062 | |
| }, | |
| { | |
| "epoch": 0.9376940592009936, | |
| "grad_norm": 2.0225261644141797, | |
| "learning_rate": 5.477054429028816e-06, | |
| "loss": 0.70490198, | |
| "memory(GiB)": 71.94, | |
| "step": 2265, | |
| "train_speed(iter/s)": 0.020063 | |
| }, | |
| { | |
| "epoch": 0.9397640240115918, | |
| "grad_norm": 2.2107863770119747, | |
| "learning_rate": 5.466382070437567e-06, | |
| "loss": 0.71501665, | |
| "memory(GiB)": 71.94, | |
| "step": 2270, | |
| "train_speed(iter/s)": 0.020064 | |
| }, | |
| { | |
| "epoch": 0.94183398882219, | |
| "grad_norm": 1.9030684437330223, | |
| "learning_rate": 5.455709711846318e-06, | |
| "loss": 0.70587921, | |
| "memory(GiB)": 71.94, | |
| "step": 2275, | |
| "train_speed(iter/s)": 0.020064 | |
| }, | |
| { | |
| "epoch": 0.9439039536327882, | |
| "grad_norm": 1.8981878877998872, | |
| "learning_rate": 5.445037353255069e-06, | |
| "loss": 0.70294933, | |
| "memory(GiB)": 71.94, | |
| "step": 2280, | |
| "train_speed(iter/s)": 0.020064 | |
| }, | |
| { | |
| "epoch": 0.9459739184433864, | |
| "grad_norm": 1.7195321236677932, | |
| "learning_rate": 5.434364994663821e-06, | |
| "loss": 0.71657829, | |
| "memory(GiB)": 71.94, | |
| "step": 2285, | |
| "train_speed(iter/s)": 0.020065 | |
| }, | |
| { | |
| "epoch": 0.9480438832539847, | |
| "grad_norm": 1.6695574824900545, | |
| "learning_rate": 5.423692636072573e-06, | |
| "loss": 0.70917149, | |
| "memory(GiB)": 71.94, | |
| "step": 2290, | |
| "train_speed(iter/s)": 0.020065 | |
| }, | |
| { | |
| "epoch": 0.9501138480645829, | |
| "grad_norm": 1.7410897548688689, | |
| "learning_rate": 5.413020277481323e-06, | |
| "loss": 0.7276587, | |
| "memory(GiB)": 71.94, | |
| "step": 2295, | |
| "train_speed(iter/s)": 0.020066 | |
| }, | |
| { | |
| "epoch": 0.9521838128751812, | |
| "grad_norm": 1.6737135024901502, | |
| "learning_rate": 5.402347918890075e-06, | |
| "loss": 0.70822001, | |
| "memory(GiB)": 71.94, | |
| "step": 2300, | |
| "train_speed(iter/s)": 0.020066 | |
| }, | |
| { | |
| "epoch": 0.9542537776857793, | |
| "grad_norm": 1.7876076111815575, | |
| "learning_rate": 5.391675560298827e-06, | |
| "loss": 0.72815857, | |
| "memory(GiB)": 71.94, | |
| "step": 2305, | |
| "train_speed(iter/s)": 0.020067 | |
| }, | |
| { | |
| "epoch": 0.9563237424963775, | |
| "grad_norm": 1.653921158054905, | |
| "learning_rate": 5.381003201707577e-06, | |
| "loss": 0.715973, | |
| "memory(GiB)": 71.94, | |
| "step": 2310, | |
| "train_speed(iter/s)": 0.020068 | |
| }, | |
| { | |
| "epoch": 0.9583937073069758, | |
| "grad_norm": 1.7830026539914627, | |
| "learning_rate": 5.370330843116329e-06, | |
| "loss": 0.70955439, | |
| "memory(GiB)": 71.94, | |
| "step": 2315, | |
| "train_speed(iter/s)": 0.020068 | |
| }, | |
| { | |
| "epoch": 0.960463672117574, | |
| "grad_norm": 2.0794293583699712, | |
| "learning_rate": 5.359658484525081e-06, | |
| "loss": 0.71212959, | |
| "memory(GiB)": 71.94, | |
| "step": 2320, | |
| "train_speed(iter/s)": 0.020049 | |
| }, | |
| { | |
| "epoch": 0.9625336369281722, | |
| "grad_norm": 1.62651854843721, | |
| "learning_rate": 5.348986125933831e-06, | |
| "loss": 0.69347043, | |
| "memory(GiB)": 71.94, | |
| "step": 2325, | |
| "train_speed(iter/s)": 0.02005 | |
| }, | |
| { | |
| "epoch": 0.9646036017387705, | |
| "grad_norm": 1.6345937025216515, | |
| "learning_rate": 5.338313767342583e-06, | |
| "loss": 0.68745365, | |
| "memory(GiB)": 71.94, | |
| "step": 2330, | |
| "train_speed(iter/s)": 0.02005 | |
| }, | |
| { | |
| "epoch": 0.9666735665493686, | |
| "grad_norm": 2.1460062258102504, | |
| "learning_rate": 5.327641408751334e-06, | |
| "loss": 0.69751196, | |
| "memory(GiB)": 71.94, | |
| "step": 2335, | |
| "train_speed(iter/s)": 0.020051 | |
| }, | |
| { | |
| "epoch": 0.9687435313599668, | |
| "grad_norm": 1.8428729242460318, | |
| "learning_rate": 5.316969050160086e-06, | |
| "loss": 0.70150051, | |
| "memory(GiB)": 71.94, | |
| "step": 2340, | |
| "train_speed(iter/s)": 0.020052 | |
| }, | |
| { | |
| "epoch": 0.9708134961705651, | |
| "grad_norm": 1.870750640547904, | |
| "learning_rate": 5.306296691568837e-06, | |
| "loss": 0.67915797, | |
| "memory(GiB)": 71.94, | |
| "step": 2345, | |
| "train_speed(iter/s)": 0.020053 | |
| }, | |
| { | |
| "epoch": 0.9728834609811633, | |
| "grad_norm": 1.6984421405387677, | |
| "learning_rate": 5.295624332977588e-06, | |
| "loss": 0.71915126, | |
| "memory(GiB)": 71.94, | |
| "step": 2350, | |
| "train_speed(iter/s)": 0.020054 | |
| }, | |
| { | |
| "epoch": 0.9749534257917616, | |
| "grad_norm": 1.7839025594515001, | |
| "learning_rate": 5.28495197438634e-06, | |
| "loss": 0.69594321, | |
| "memory(GiB)": 71.94, | |
| "step": 2355, | |
| "train_speed(iter/s)": 0.020055 | |
| }, | |
| { | |
| "epoch": 0.9770233906023598, | |
| "grad_norm": 1.666815065361009, | |
| "learning_rate": 5.274279615795091e-06, | |
| "loss": 0.68858194, | |
| "memory(GiB)": 71.94, | |
| "step": 2360, | |
| "train_speed(iter/s)": 0.020055 | |
| }, | |
| { | |
| "epoch": 0.9790933554129579, | |
| "grad_norm": 1.6613287141536495, | |
| "learning_rate": 5.263607257203842e-06, | |
| "loss": 0.65968986, | |
| "memory(GiB)": 71.94, | |
| "step": 2365, | |
| "train_speed(iter/s)": 0.020056 | |
| }, | |
| { | |
| "epoch": 0.9811633202235562, | |
| "grad_norm": 1.5579649689164343, | |
| "learning_rate": 5.252934898612594e-06, | |
| "loss": 0.69255476, | |
| "memory(GiB)": 71.94, | |
| "step": 2370, | |
| "train_speed(iter/s)": 0.020057 | |
| }, | |
| { | |
| "epoch": 0.9832332850341544, | |
| "grad_norm": 1.7564735837589676, | |
| "learning_rate": 5.242262540021346e-06, | |
| "loss": 0.71395578, | |
| "memory(GiB)": 71.94, | |
| "step": 2375, | |
| "train_speed(iter/s)": 0.020057 | |
| }, | |
| { | |
| "epoch": 0.9853032498447526, | |
| "grad_norm": 2.0952603393058076, | |
| "learning_rate": 5.231590181430096e-06, | |
| "loss": 0.69916964, | |
| "memory(GiB)": 71.94, | |
| "step": 2380, | |
| "train_speed(iter/s)": 0.020058 | |
| }, | |
| { | |
| "epoch": 0.9873732146553509, | |
| "grad_norm": 1.851124802207451, | |
| "learning_rate": 5.220917822838848e-06, | |
| "loss": 0.6992053, | |
| "memory(GiB)": 71.94, | |
| "step": 2385, | |
| "train_speed(iter/s)": 0.020059 | |
| }, | |
| { | |
| "epoch": 0.9894431794659491, | |
| "grad_norm": 1.977421778833197, | |
| "learning_rate": 5.2102454642476e-06, | |
| "loss": 0.70937605, | |
| "memory(GiB)": 71.94, | |
| "step": 2390, | |
| "train_speed(iter/s)": 0.020059 | |
| }, | |
| { | |
| "epoch": 0.9915131442765474, | |
| "grad_norm": 1.8125821116129224, | |
| "learning_rate": 5.19957310565635e-06, | |
| "loss": 0.70189705, | |
| "memory(GiB)": 71.94, | |
| "step": 2395, | |
| "train_speed(iter/s)": 0.02006 | |
| }, | |
| { | |
| "epoch": 0.9935831090871455, | |
| "grad_norm": 1.8812587805267227, | |
| "learning_rate": 5.188900747065102e-06, | |
| "loss": 0.68958111, | |
| "memory(GiB)": 71.94, | |
| "step": 2400, | |
| "train_speed(iter/s)": 0.02006 | |
| }, | |
| { | |
| "epoch": 0.9935831090871455, | |
| "eval_loss": 0.812356173992157, | |
| "eval_runtime": 333.5694, | |
| "eval_samples_per_second": 18.725, | |
| "eval_steps_per_second": 1.172, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.9956530738977437, | |
| "grad_norm": 1.77525759977015, | |
| "learning_rate": 5.178228388473853e-06, | |
| "loss": 0.71421309, | |
| "memory(GiB)": 71.94, | |
| "step": 2405, | |
| "train_speed(iter/s)": 0.019998 | |
| }, | |
| { | |
| "epoch": 0.997723038708342, | |
| "grad_norm": 1.9847604199741764, | |
| "learning_rate": 5.167556029882604e-06, | |
| "loss": 0.67816806, | |
| "memory(GiB)": 71.94, | |
| "step": 2410, | |
| "train_speed(iter/s)": 0.019999 | |
| }, | |
| { | |
| "epoch": 0.9997930035189402, | |
| "grad_norm": 1.8831401901251863, | |
| "learning_rate": 5.156883671291356e-06, | |
| "loss": 0.67297964, | |
| "memory(GiB)": 71.94, | |
| "step": 2415, | |
| "train_speed(iter/s)": 0.02 | |
| }, | |
| { | |
| "epoch": 1.0018629683295384, | |
| "grad_norm": 1.902600390439468, | |
| "learning_rate": 5.146211312700107e-06, | |
| "loss": 0.65476379, | |
| "memory(GiB)": 71.94, | |
| "step": 2420, | |
| "train_speed(iter/s)": 0.020001 | |
| }, | |
| { | |
| "epoch": 1.0039329331401365, | |
| "grad_norm": 1.8958163712669238, | |
| "learning_rate": 5.135538954108859e-06, | |
| "loss": 0.69827843, | |
| "memory(GiB)": 71.94, | |
| "step": 2425, | |
| "train_speed(iter/s)": 0.020002 | |
| }, | |
| { | |
| "epoch": 1.006002897950735, | |
| "grad_norm": 2.0893567670048774, | |
| "learning_rate": 5.12486659551761e-06, | |
| "loss": 0.6843009, | |
| "memory(GiB)": 71.94, | |
| "step": 2430, | |
| "train_speed(iter/s)": 0.020004 | |
| }, | |
| { | |
| "epoch": 1.0080728627613331, | |
| "grad_norm": 1.7283831963515028, | |
| "learning_rate": 5.114194236926361e-06, | |
| "loss": 0.66953688, | |
| "memory(GiB)": 71.94, | |
| "step": 2435, | |
| "train_speed(iter/s)": 0.020005 | |
| }, | |
| { | |
| "epoch": 1.0101428275719313, | |
| "grad_norm": 2.4550295961951023, | |
| "learning_rate": 5.103521878335113e-06, | |
| "loss": 0.6826447, | |
| "memory(GiB)": 71.94, | |
| "step": 2440, | |
| "train_speed(iter/s)": 0.020006 | |
| }, | |
| { | |
| "epoch": 1.0122127923825295, | |
| "grad_norm": 2.065348792100011, | |
| "learning_rate": 5.092849519743865e-06, | |
| "loss": 0.71306543, | |
| "memory(GiB)": 71.94, | |
| "step": 2445, | |
| "train_speed(iter/s)": 0.020006 | |
| }, | |
| { | |
| "epoch": 1.0142827571931277, | |
| "grad_norm": 1.8461278616269987, | |
| "learning_rate": 5.082177161152615e-06, | |
| "loss": 0.70268006, | |
| "memory(GiB)": 71.94, | |
| "step": 2450, | |
| "train_speed(iter/s)": 0.020007 | |
| }, | |
| { | |
| "epoch": 1.0163527220037258, | |
| "grad_norm": 1.8474179385577107, | |
| "learning_rate": 5.071504802561367e-06, | |
| "loss": 0.68759146, | |
| "memory(GiB)": 71.94, | |
| "step": 2455, | |
| "train_speed(iter/s)": 0.020008 | |
| }, | |
| { | |
| "epoch": 1.0184226868143242, | |
| "grad_norm": 2.1221766243412556, | |
| "learning_rate": 5.060832443970117e-06, | |
| "loss": 0.70161247, | |
| "memory(GiB)": 71.94, | |
| "step": 2460, | |
| "train_speed(iter/s)": 0.020009 | |
| }, | |
| { | |
| "epoch": 1.0204926516249224, | |
| "grad_norm": 1.7992664488684018, | |
| "learning_rate": 5.050160085378869e-06, | |
| "loss": 0.70210953, | |
| "memory(GiB)": 71.94, | |
| "step": 2465, | |
| "train_speed(iter/s)": 0.02001 | |
| }, | |
| { | |
| "epoch": 1.0225626164355206, | |
| "grad_norm": 2.010160051422228, | |
| "learning_rate": 5.039487726787621e-06, | |
| "loss": 0.70377779, | |
| "memory(GiB)": 71.94, | |
| "step": 2470, | |
| "train_speed(iter/s)": 0.020011 | |
| }, | |
| { | |
| "epoch": 1.0246325812461188, | |
| "grad_norm": 1.677895969704363, | |
| "learning_rate": 5.028815368196372e-06, | |
| "loss": 0.71084547, | |
| "memory(GiB)": 71.94, | |
| "step": 2475, | |
| "train_speed(iter/s)": 0.020012 | |
| }, | |
| { | |
| "epoch": 1.026702546056717, | |
| "grad_norm": 1.9057688334203953, | |
| "learning_rate": 5.018143009605123e-06, | |
| "loss": 0.67874904, | |
| "memory(GiB)": 71.94, | |
| "step": 2480, | |
| "train_speed(iter/s)": 0.020013 | |
| }, | |
| { | |
| "epoch": 1.0287725108673154, | |
| "grad_norm": 1.826208938410727, | |
| "learning_rate": 5.007470651013875e-06, | |
| "loss": 0.67107067, | |
| "memory(GiB)": 71.94, | |
| "step": 2485, | |
| "train_speed(iter/s)": 0.020014 | |
| }, | |
| { | |
| "epoch": 1.0308424756779135, | |
| "grad_norm": 2.0686042207681425, | |
| "learning_rate": 4.996798292422626e-06, | |
| "loss": 0.70525031, | |
| "memory(GiB)": 71.94, | |
| "step": 2490, | |
| "train_speed(iter/s)": 0.020016 | |
| }, | |
| { | |
| "epoch": 1.0329124404885117, | |
| "grad_norm": 1.7733232551181717, | |
| "learning_rate": 4.986125933831378e-06, | |
| "loss": 0.68992233, | |
| "memory(GiB)": 71.94, | |
| "step": 2495, | |
| "train_speed(iter/s)": 0.020017 | |
| }, | |
| { | |
| "epoch": 1.03498240529911, | |
| "grad_norm": 1.7950220673083168, | |
| "learning_rate": 4.975453575240129e-06, | |
| "loss": 0.67175922, | |
| "memory(GiB)": 71.94, | |
| "step": 2500, | |
| "train_speed(iter/s)": 0.020017 | |
| }, | |
| { | |
| "epoch": 1.037052370109708, | |
| "grad_norm": 1.9232030990454307, | |
| "learning_rate": 4.96478121664888e-06, | |
| "loss": 0.70407228, | |
| "memory(GiB)": 71.94, | |
| "step": 2505, | |
| "train_speed(iter/s)": 0.020018 | |
| }, | |
| { | |
| "epoch": 1.0391223349203063, | |
| "grad_norm": 1.708121386060437, | |
| "learning_rate": 4.9541088580576316e-06, | |
| "loss": 0.70078506, | |
| "memory(GiB)": 71.94, | |
| "step": 2510, | |
| "train_speed(iter/s)": 0.020018 | |
| }, | |
| { | |
| "epoch": 1.0411922997309047, | |
| "grad_norm": 1.5750528842945233, | |
| "learning_rate": 4.943436499466383e-06, | |
| "loss": 0.66830945, | |
| "memory(GiB)": 71.94, | |
| "step": 2515, | |
| "train_speed(iter/s)": 0.020019 | |
| }, | |
| { | |
| "epoch": 1.0432622645415028, | |
| "grad_norm": 1.8181518834205428, | |
| "learning_rate": 4.932764140875134e-06, | |
| "loss": 0.69259553, | |
| "memory(GiB)": 71.94, | |
| "step": 2520, | |
| "train_speed(iter/s)": 0.020019 | |
| }, | |
| { | |
| "epoch": 1.045332229352101, | |
| "grad_norm": 1.551131375424082, | |
| "learning_rate": 4.9220917822838855e-06, | |
| "loss": 0.69300728, | |
| "memory(GiB)": 71.94, | |
| "step": 2525, | |
| "train_speed(iter/s)": 0.02002 | |
| }, | |
| { | |
| "epoch": 1.0474021941626992, | |
| "grad_norm": 1.9636419108578083, | |
| "learning_rate": 4.9114194236926366e-06, | |
| "loss": 0.69065495, | |
| "memory(GiB)": 71.94, | |
| "step": 2530, | |
| "train_speed(iter/s)": 0.020021 | |
| }, | |
| { | |
| "epoch": 1.0494721589732974, | |
| "grad_norm": 2.2096885596291918, | |
| "learning_rate": 4.900747065101388e-06, | |
| "loss": 0.72614369, | |
| "memory(GiB)": 71.94, | |
| "step": 2535, | |
| "train_speed(iter/s)": 0.020022 | |
| }, | |
| { | |
| "epoch": 1.0515421237838958, | |
| "grad_norm": 1.8198162773587976, | |
| "learning_rate": 4.890074706510139e-06, | |
| "loss": 0.68779688, | |
| "memory(GiB)": 71.94, | |
| "step": 2540, | |
| "train_speed(iter/s)": 0.020023 | |
| }, | |
| { | |
| "epoch": 1.053612088594494, | |
| "grad_norm": 1.8767827748337365, | |
| "learning_rate": 4.8794023479188905e-06, | |
| "loss": 0.65902538, | |
| "memory(GiB)": 71.94, | |
| "step": 2545, | |
| "train_speed(iter/s)": 0.020024 | |
| }, | |
| { | |
| "epoch": 1.0556820534050921, | |
| "grad_norm": 1.774731724427948, | |
| "learning_rate": 4.8687299893276416e-06, | |
| "loss": 0.6885426, | |
| "memory(GiB)": 71.94, | |
| "step": 2550, | |
| "train_speed(iter/s)": 0.020025 | |
| }, | |
| { | |
| "epoch": 1.0577520182156903, | |
| "grad_norm": 1.7292661960079105, | |
| "learning_rate": 4.858057630736393e-06, | |
| "loss": 0.67627816, | |
| "memory(GiB)": 71.94, | |
| "step": 2555, | |
| "train_speed(iter/s)": 0.020026 | |
| }, | |
| { | |
| "epoch": 1.0598219830262885, | |
| "grad_norm": 2.022917994019762, | |
| "learning_rate": 4.8473852721451445e-06, | |
| "loss": 0.69524202, | |
| "memory(GiB)": 71.94, | |
| "step": 2560, | |
| "train_speed(iter/s)": 0.020027 | |
| }, | |
| { | |
| "epoch": 1.0618919478368867, | |
| "grad_norm": 1.7503339393851076, | |
| "learning_rate": 4.8367129135538955e-06, | |
| "loss": 0.69861703, | |
| "memory(GiB)": 71.94, | |
| "step": 2565, | |
| "train_speed(iter/s)": 0.020028 | |
| }, | |
| { | |
| "epoch": 1.063961912647485, | |
| "grad_norm": 2.3241272289849126, | |
| "learning_rate": 4.826040554962647e-06, | |
| "loss": 0.69416137, | |
| "memory(GiB)": 71.94, | |
| "step": 2570, | |
| "train_speed(iter/s)": 0.020029 | |
| }, | |
| { | |
| "epoch": 1.0660318774580833, | |
| "grad_norm": 1.9145774620065716, | |
| "learning_rate": 4.8153681963713985e-06, | |
| "loss": 0.707304, | |
| "memory(GiB)": 71.94, | |
| "step": 2575, | |
| "train_speed(iter/s)": 0.02003 | |
| }, | |
| { | |
| "epoch": 1.0681018422686814, | |
| "grad_norm": 2.2205440096454363, | |
| "learning_rate": 4.80469583778015e-06, | |
| "loss": 0.68690495, | |
| "memory(GiB)": 71.94, | |
| "step": 2580, | |
| "train_speed(iter/s)": 0.02003 | |
| }, | |
| { | |
| "epoch": 1.0701718070792796, | |
| "grad_norm": 1.907681817748529, | |
| "learning_rate": 4.794023479188901e-06, | |
| "loss": 0.67836595, | |
| "memory(GiB)": 71.94, | |
| "step": 2585, | |
| "train_speed(iter/s)": 0.020032 | |
| }, | |
| { | |
| "epoch": 1.0722417718898778, | |
| "grad_norm": 1.812113935386319, | |
| "learning_rate": 4.783351120597652e-06, | |
| "loss": 0.69949398, | |
| "memory(GiB)": 71.94, | |
| "step": 2590, | |
| "train_speed(iter/s)": 0.020032 | |
| }, | |
| { | |
| "epoch": 1.0743117367004762, | |
| "grad_norm": 1.9890056601624129, | |
| "learning_rate": 4.7726787620064035e-06, | |
| "loss": 0.68311234, | |
| "memory(GiB)": 71.94, | |
| "step": 2595, | |
| "train_speed(iter/s)": 0.020032 | |
| }, | |
| { | |
| "epoch": 1.0763817015110744, | |
| "grad_norm": 1.6389803143854558, | |
| "learning_rate": 4.762006403415155e-06, | |
| "loss": 0.68783474, | |
| "memory(GiB)": 71.94, | |
| "step": 2600, | |
| "train_speed(iter/s)": 0.020034 | |
| }, | |
| { | |
| "epoch": 1.0784516663216726, | |
| "grad_norm": 1.7243055433624384, | |
| "learning_rate": 4.751334044823906e-06, | |
| "loss": 0.68109202, | |
| "memory(GiB)": 71.94, | |
| "step": 2605, | |
| "train_speed(iter/s)": 0.020035 | |
| }, | |
| { | |
| "epoch": 1.0805216311322707, | |
| "grad_norm": 2.0564605414653356, | |
| "learning_rate": 4.740661686232657e-06, | |
| "loss": 0.67778783, | |
| "memory(GiB)": 71.94, | |
| "step": 2610, | |
| "train_speed(iter/s)": 0.020036 | |
| }, | |
| { | |
| "epoch": 1.082591595942869, | |
| "grad_norm": 1.9673716530688552, | |
| "learning_rate": 4.729989327641409e-06, | |
| "loss": 0.67061253, | |
| "memory(GiB)": 71.94, | |
| "step": 2615, | |
| "train_speed(iter/s)": 0.020037 | |
| }, | |
| { | |
| "epoch": 1.084661560753467, | |
| "grad_norm": 2.256366938059263, | |
| "learning_rate": 4.71931696905016e-06, | |
| "loss": 0.6971777, | |
| "memory(GiB)": 71.94, | |
| "step": 2620, | |
| "train_speed(iter/s)": 0.020038 | |
| }, | |
| { | |
| "epoch": 1.0867315255640655, | |
| "grad_norm": 2.092500467589189, | |
| "learning_rate": 4.708644610458911e-06, | |
| "loss": 0.67864285, | |
| "memory(GiB)": 71.94, | |
| "step": 2625, | |
| "train_speed(iter/s)": 0.020039 | |
| }, | |
| { | |
| "epoch": 1.0888014903746637, | |
| "grad_norm": 1.7063119566002638, | |
| "learning_rate": 4.697972251867663e-06, | |
| "loss": 0.66528416, | |
| "memory(GiB)": 71.94, | |
| "step": 2630, | |
| "train_speed(iter/s)": 0.02004 | |
| }, | |
| { | |
| "epoch": 1.0908714551852619, | |
| "grad_norm": 1.7825515797938636, | |
| "learning_rate": 4.687299893276414e-06, | |
| "loss": 0.67014618, | |
| "memory(GiB)": 71.94, | |
| "step": 2635, | |
| "train_speed(iter/s)": 0.02004 | |
| }, | |
| { | |
| "epoch": 1.09294141999586, | |
| "grad_norm": 1.7034629950636981, | |
| "learning_rate": 4.676627534685166e-06, | |
| "loss": 0.67851248, | |
| "memory(GiB)": 71.94, | |
| "step": 2640, | |
| "train_speed(iter/s)": 0.020041 | |
| }, | |
| { | |
| "epoch": 1.0950113848064582, | |
| "grad_norm": 1.6535356357438644, | |
| "learning_rate": 4.665955176093917e-06, | |
| "loss": 0.69617391, | |
| "memory(GiB)": 71.94, | |
| "step": 2645, | |
| "train_speed(iter/s)": 0.020042 | |
| }, | |
| { | |
| "epoch": 1.0970813496170566, | |
| "grad_norm": 1.790083800076922, | |
| "learning_rate": 4.655282817502668e-06, | |
| "loss": 0.68771133, | |
| "memory(GiB)": 71.94, | |
| "step": 2650, | |
| "train_speed(iter/s)": 0.020042 | |
| }, | |
| { | |
| "epoch": 1.0991513144276548, | |
| "grad_norm": 1.9625451394764908, | |
| "learning_rate": 4.64461045891142e-06, | |
| "loss": 0.68705645, | |
| "memory(GiB)": 71.94, | |
| "step": 2655, | |
| "train_speed(iter/s)": 0.020043 | |
| }, | |
| { | |
| "epoch": 1.101221279238253, | |
| "grad_norm": 2.41883263144616, | |
| "learning_rate": 4.633938100320171e-06, | |
| "loss": 0.66960602, | |
| "memory(GiB)": 71.94, | |
| "step": 2660, | |
| "train_speed(iter/s)": 0.020044 | |
| }, | |
| { | |
| "epoch": 1.1032912440488511, | |
| "grad_norm": 1.6833879907399165, | |
| "learning_rate": 4.623265741728922e-06, | |
| "loss": 0.67477846, | |
| "memory(GiB)": 71.94, | |
| "step": 2665, | |
| "train_speed(iter/s)": 0.020045 | |
| }, | |
| { | |
| "epoch": 1.1053612088594493, | |
| "grad_norm": 1.84460802825979, | |
| "learning_rate": 4.612593383137674e-06, | |
| "loss": 0.66959124, | |
| "memory(GiB)": 71.94, | |
| "step": 2670, | |
| "train_speed(iter/s)": 0.020046 | |
| }, | |
| { | |
| "epoch": 1.1074311736700475, | |
| "grad_norm": 1.8226080355090242, | |
| "learning_rate": 4.601921024546425e-06, | |
| "loss": 0.66996727, | |
| "memory(GiB)": 71.94, | |
| "step": 2675, | |
| "train_speed(iter/s)": 0.020047 | |
| }, | |
| { | |
| "epoch": 1.109501138480646, | |
| "grad_norm": 1.9273809027924724, | |
| "learning_rate": 4.591248665955176e-06, | |
| "loss": 0.69185095, | |
| "memory(GiB)": 71.94, | |
| "step": 2680, | |
| "train_speed(iter/s)": 0.020048 | |
| }, | |
| { | |
| "epoch": 1.111571103291244, | |
| "grad_norm": 2.2322742496415517, | |
| "learning_rate": 4.580576307363927e-06, | |
| "loss": 0.70724788, | |
| "memory(GiB)": 71.94, | |
| "step": 2685, | |
| "train_speed(iter/s)": 0.020049 | |
| }, | |
| { | |
| "epoch": 1.1136410681018423, | |
| "grad_norm": 2.1881226444266133, | |
| "learning_rate": 4.569903948772679e-06, | |
| "loss": 0.67986469, | |
| "memory(GiB)": 71.94, | |
| "step": 2690, | |
| "train_speed(iter/s)": 0.020049 | |
| }, | |
| { | |
| "epoch": 1.1157110329124404, | |
| "grad_norm": 1.9344503535315112, | |
| "learning_rate": 4.55923159018143e-06, | |
| "loss": 0.6932023, | |
| "memory(GiB)": 71.94, | |
| "step": 2695, | |
| "train_speed(iter/s)": 0.02005 | |
| }, | |
| { | |
| "epoch": 1.1177809977230386, | |
| "grad_norm": 2.0197719509697056, | |
| "learning_rate": 4.548559231590182e-06, | |
| "loss": 0.67475772, | |
| "memory(GiB)": 71.94, | |
| "step": 2700, | |
| "train_speed(iter/s)": 0.020051 | |
| }, | |
| { | |
| "epoch": 1.1177809977230386, | |
| "eval_loss": 0.8077359795570374, | |
| "eval_runtime": 332.9193, | |
| "eval_samples_per_second": 18.761, | |
| "eval_steps_per_second": 1.174, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.119850962533637, | |
| "grad_norm": 1.8305900241415576, | |
| "learning_rate": 4.537886872998933e-06, | |
| "loss": 0.71444244, | |
| "memory(GiB)": 71.94, | |
| "step": 2705, | |
| "train_speed(iter/s)": 0.019995 | |
| }, | |
| { | |
| "epoch": 1.1219209273442352, | |
| "grad_norm": 1.9866747607495971, | |
| "learning_rate": 4.527214514407685e-06, | |
| "loss": 0.68752618, | |
| "memory(GiB)": 71.94, | |
| "step": 2710, | |
| "train_speed(iter/s)": 0.019996 | |
| }, | |
| { | |
| "epoch": 1.1239908921548334, | |
| "grad_norm": 1.8866009316885002, | |
| "learning_rate": 4.516542155816436e-06, | |
| "loss": 0.67673769, | |
| "memory(GiB)": 71.94, | |
| "step": 2715, | |
| "train_speed(iter/s)": 0.019997 | |
| }, | |
| { | |
| "epoch": 1.1260608569654316, | |
| "grad_norm": 1.8693665648036668, | |
| "learning_rate": 4.505869797225187e-06, | |
| "loss": 0.68302212, | |
| "memory(GiB)": 71.94, | |
| "step": 2720, | |
| "train_speed(iter/s)": 0.019997 | |
| }, | |
| { | |
| "epoch": 1.1281308217760297, | |
| "grad_norm": 2.088485544417028, | |
| "learning_rate": 4.495197438633939e-06, | |
| "loss": 0.69513187, | |
| "memory(GiB)": 71.94, | |
| "step": 2725, | |
| "train_speed(iter/s)": 0.019999 | |
| }, | |
| { | |
| "epoch": 1.1302007865866281, | |
| "grad_norm": 1.83149183273408, | |
| "learning_rate": 4.48452508004269e-06, | |
| "loss": 0.67474871, | |
| "memory(GiB)": 71.94, | |
| "step": 2730, | |
| "train_speed(iter/s)": 0.02 | |
| }, | |
| { | |
| "epoch": 1.1322707513972263, | |
| "grad_norm": 1.9681727198174188, | |
| "learning_rate": 4.473852721451441e-06, | |
| "loss": 0.6727396, | |
| "memory(GiB)": 71.94, | |
| "step": 2735, | |
| "train_speed(iter/s)": 0.020001 | |
| }, | |
| { | |
| "epoch": 1.1343407162078245, | |
| "grad_norm": 3.0386548884501288, | |
| "learning_rate": 4.463180362860193e-06, | |
| "loss": 0.69408131, | |
| "memory(GiB)": 71.94, | |
| "step": 2740, | |
| "train_speed(iter/s)": 0.020002 | |
| }, | |
| { | |
| "epoch": 1.1364106810184227, | |
| "grad_norm": 3.156158661144904, | |
| "learning_rate": 4.452508004268944e-06, | |
| "loss": 0.6340518, | |
| "memory(GiB)": 71.94, | |
| "step": 2745, | |
| "train_speed(iter/s)": 0.020003 | |
| }, | |
| { | |
| "epoch": 1.1384806458290209, | |
| "grad_norm": 2.005947464562104, | |
| "learning_rate": 4.441835645677695e-06, | |
| "loss": 0.68589301, | |
| "memory(GiB)": 71.94, | |
| "step": 2750, | |
| "train_speed(iter/s)": 0.020004 | |
| }, | |
| { | |
| "epoch": 1.140550610639619, | |
| "grad_norm": 1.8910732500138312, | |
| "learning_rate": 4.431163287086446e-06, | |
| "loss": 0.68320475, | |
| "memory(GiB)": 71.94, | |
| "step": 2755, | |
| "train_speed(iter/s)": 0.020005 | |
| }, | |
| { | |
| "epoch": 1.1426205754502172, | |
| "grad_norm": 1.5980061705957451, | |
| "learning_rate": 4.420490928495198e-06, | |
| "loss": 0.68288679, | |
| "memory(GiB)": 71.94, | |
| "step": 2760, | |
| "train_speed(iter/s)": 0.020006 | |
| }, | |
| { | |
| "epoch": 1.1446905402608156, | |
| "grad_norm": 2.1893217377555367, | |
| "learning_rate": 4.409818569903949e-06, | |
| "loss": 0.67679596, | |
| "memory(GiB)": 71.94, | |
| "step": 2765, | |
| "train_speed(iter/s)": 0.020007 | |
| }, | |
| { | |
| "epoch": 1.1467605050714138, | |
| "grad_norm": 1.6144531687050387, | |
| "learning_rate": 4.3991462113127e-06, | |
| "loss": 0.67341776, | |
| "memory(GiB)": 71.94, | |
| "step": 2770, | |
| "train_speed(iter/s)": 0.020008 | |
| }, | |
| { | |
| "epoch": 1.148830469882012, | |
| "grad_norm": 1.7848487108948465, | |
| "learning_rate": 4.388473852721452e-06, | |
| "loss": 0.65265465, | |
| "memory(GiB)": 71.94, | |
| "step": 2775, | |
| "train_speed(iter/s)": 0.020008 | |
| }, | |
| { | |
| "epoch": 1.1509004346926102, | |
| "grad_norm": 1.9198684626115081, | |
| "learning_rate": 4.377801494130203e-06, | |
| "loss": 0.65432949, | |
| "memory(GiB)": 71.94, | |
| "step": 2780, | |
| "train_speed(iter/s)": 0.02001 | |
| }, | |
| { | |
| "epoch": 1.1529703995032086, | |
| "grad_norm": 1.8075401444295365, | |
| "learning_rate": 4.367129135538955e-06, | |
| "loss": 0.66166754, | |
| "memory(GiB)": 71.94, | |
| "step": 2785, | |
| "train_speed(iter/s)": 0.020011 | |
| }, | |
| { | |
| "epoch": 1.1550403643138067, | |
| "grad_norm": 2.093007547124246, | |
| "learning_rate": 4.356456776947706e-06, | |
| "loss": 0.671489, | |
| "memory(GiB)": 71.94, | |
| "step": 2790, | |
| "train_speed(iter/s)": 0.020011 | |
| }, | |
| { | |
| "epoch": 1.157110329124405, | |
| "grad_norm": 2.155911339314564, | |
| "learning_rate": 4.345784418356458e-06, | |
| "loss": 0.66719484, | |
| "memory(GiB)": 71.94, | |
| "step": 2795, | |
| "train_speed(iter/s)": 0.020013 | |
| }, | |
| { | |
| "epoch": 1.159180293935003, | |
| "grad_norm": 1.7929721304219823, | |
| "learning_rate": 4.335112059765209e-06, | |
| "loss": 0.65916939, | |
| "memory(GiB)": 71.94, | |
| "step": 2800, | |
| "train_speed(iter/s)": 0.020014 | |
| }, | |
| { | |
| "epoch": 1.1612502587456013, | |
| "grad_norm": 2.3382800828112695, | |
| "learning_rate": 4.32443970117396e-06, | |
| "loss": 0.68535299, | |
| "memory(GiB)": 71.94, | |
| "step": 2805, | |
| "train_speed(iter/s)": 0.020014 | |
| }, | |
| { | |
| "epoch": 1.1633202235561995, | |
| "grad_norm": 1.9393370355158424, | |
| "learning_rate": 4.313767342582711e-06, | |
| "loss": 0.68975515, | |
| "memory(GiB)": 71.94, | |
| "step": 2810, | |
| "train_speed(iter/s)": 0.020015 | |
| }, | |
| { | |
| "epoch": 1.1653901883667976, | |
| "grad_norm": 2.1161711862572172, | |
| "learning_rate": 4.303094983991463e-06, | |
| "loss": 0.68595347, | |
| "memory(GiB)": 71.94, | |
| "step": 2815, | |
| "train_speed(iter/s)": 0.020016 | |
| }, | |
| { | |
| "epoch": 1.167460153177396, | |
| "grad_norm": 1.89196514815735, | |
| "learning_rate": 4.292422625400214e-06, | |
| "loss": 0.66012936, | |
| "memory(GiB)": 71.94, | |
| "step": 2820, | |
| "train_speed(iter/s)": 0.020018 | |
| }, | |
| { | |
| "epoch": 1.1695301179879942, | |
| "grad_norm": 1.8601131110854523, | |
| "learning_rate": 4.281750266808965e-06, | |
| "loss": 0.68001904, | |
| "memory(GiB)": 71.94, | |
| "step": 2825, | |
| "train_speed(iter/s)": 0.020019 | |
| }, | |
| { | |
| "epoch": 1.1716000827985924, | |
| "grad_norm": 1.8930363611249428, | |
| "learning_rate": 4.271077908217717e-06, | |
| "loss": 0.66193466, | |
| "memory(GiB)": 71.94, | |
| "step": 2830, | |
| "train_speed(iter/s)": 0.02002 | |
| }, | |
| { | |
| "epoch": 1.1736700476091906, | |
| "grad_norm": 1.651757107446397, | |
| "learning_rate": 4.260405549626468e-06, | |
| "loss": 0.67280817, | |
| "memory(GiB)": 71.94, | |
| "step": 2835, | |
| "train_speed(iter/s)": 0.020021 | |
| }, | |
| { | |
| "epoch": 1.175740012419789, | |
| "grad_norm": 1.6748291349437752, | |
| "learning_rate": 4.249733191035219e-06, | |
| "loss": 0.70100274, | |
| "memory(GiB)": 71.94, | |
| "step": 2840, | |
| "train_speed(iter/s)": 0.020022 | |
| }, | |
| { | |
| "epoch": 1.1778099772303872, | |
| "grad_norm": 1.7834078724205271, | |
| "learning_rate": 4.239060832443971e-06, | |
| "loss": 0.64067845, | |
| "memory(GiB)": 71.94, | |
| "step": 2845, | |
| "train_speed(iter/s)": 0.020023 | |
| }, | |
| { | |
| "epoch": 1.1798799420409853, | |
| "grad_norm": 1.9357492367842137, | |
| "learning_rate": 4.228388473852722e-06, | |
| "loss": 0.68998647, | |
| "memory(GiB)": 71.94, | |
| "step": 2850, | |
| "train_speed(iter/s)": 0.020024 | |
| }, | |
| { | |
| "epoch": 1.1819499068515835, | |
| "grad_norm": 1.9186551723129406, | |
| "learning_rate": 4.2177161152614736e-06, | |
| "loss": 0.67889709, | |
| "memory(GiB)": 71.94, | |
| "step": 2855, | |
| "train_speed(iter/s)": 0.020023 | |
| }, | |
| { | |
| "epoch": 1.1840198716621817, | |
| "grad_norm": 1.9166194791943714, | |
| "learning_rate": 4.207043756670225e-06, | |
| "loss": 0.67329493, | |
| "memory(GiB)": 71.94, | |
| "step": 2860, | |
| "train_speed(iter/s)": 0.020024 | |
| }, | |
| { | |
| "epoch": 1.1860898364727799, | |
| "grad_norm": 1.9439517613212347, | |
| "learning_rate": 4.196371398078976e-06, | |
| "loss": 0.6614254, | |
| "memory(GiB)": 71.94, | |
| "step": 2865, | |
| "train_speed(iter/s)": 0.020026 | |
| }, | |
| { | |
| "epoch": 1.188159801283378, | |
| "grad_norm": 1.6420722807797328, | |
| "learning_rate": 4.1856990394877275e-06, | |
| "loss": 0.67023277, | |
| "memory(GiB)": 71.94, | |
| "step": 2870, | |
| "train_speed(iter/s)": 0.020026 | |
| }, | |
| { | |
| "epoch": 1.1902297660939765, | |
| "grad_norm": 1.7445706716402636, | |
| "learning_rate": 4.1750266808964786e-06, | |
| "loss": 0.67275252, | |
| "memory(GiB)": 71.94, | |
| "step": 2875, | |
| "train_speed(iter/s)": 0.020027 | |
| }, | |
| { | |
| "epoch": 1.1922997309045746, | |
| "grad_norm": 1.8273002856280824, | |
| "learning_rate": 4.16435432230523e-06, | |
| "loss": 0.65841031, | |
| "memory(GiB)": 71.94, | |
| "step": 2880, | |
| "train_speed(iter/s)": 0.020028 | |
| }, | |
| { | |
| "epoch": 1.1943696957151728, | |
| "grad_norm": 1.7392597436189736, | |
| "learning_rate": 4.1536819637139815e-06, | |
| "loss": 0.66225605, | |
| "memory(GiB)": 71.94, | |
| "step": 2885, | |
| "train_speed(iter/s)": 0.020029 | |
| }, | |
| { | |
| "epoch": 1.196439660525771, | |
| "grad_norm": 1.639200530922351, | |
| "learning_rate": 4.1430096051227325e-06, | |
| "loss": 0.65032516, | |
| "memory(GiB)": 71.94, | |
| "step": 2890, | |
| "train_speed(iter/s)": 0.02003 | |
| }, | |
| { | |
| "epoch": 1.1985096253363694, | |
| "grad_norm": 1.8960982589133293, | |
| "learning_rate": 4.1323372465314836e-06, | |
| "loss": 0.69994841, | |
| "memory(GiB)": 71.94, | |
| "step": 2895, | |
| "train_speed(iter/s)": 0.020031 | |
| }, | |
| { | |
| "epoch": 1.2005795901469676, | |
| "grad_norm": 2.1518082070491613, | |
| "learning_rate": 4.121664887940235e-06, | |
| "loss": 0.64357581, | |
| "memory(GiB)": 71.94, | |
| "step": 2900, | |
| "train_speed(iter/s)": 0.020031 | |
| }, | |
| { | |
| "epoch": 1.2026495549575658, | |
| "grad_norm": 2.450141804935637, | |
| "learning_rate": 4.1109925293489865e-06, | |
| "loss": 0.68345547, | |
| "memory(GiB)": 71.94, | |
| "step": 2905, | |
| "train_speed(iter/s)": 0.020032 | |
| }, | |
| { | |
| "epoch": 1.204719519768164, | |
| "grad_norm": 1.8681725857258895, | |
| "learning_rate": 4.1003201707577375e-06, | |
| "loss": 0.66512461, | |
| "memory(GiB)": 71.94, | |
| "step": 2910, | |
| "train_speed(iter/s)": 0.020033 | |
| }, | |
| { | |
| "epoch": 1.2067894845787621, | |
| "grad_norm": 2.2592923799668774, | |
| "learning_rate": 4.089647812166489e-06, | |
| "loss": 0.68196335, | |
| "memory(GiB)": 71.94, | |
| "step": 2915, | |
| "train_speed(iter/s)": 0.020034 | |
| }, | |
| { | |
| "epoch": 1.2088594493893603, | |
| "grad_norm": 2.0216041168873775, | |
| "learning_rate": 4.0789754535752404e-06, | |
| "loss": 0.6808126, | |
| "memory(GiB)": 71.94, | |
| "step": 2920, | |
| "train_speed(iter/s)": 0.020035 | |
| }, | |
| { | |
| "epoch": 1.2109294141999585, | |
| "grad_norm": 1.7659468605949793, | |
| "learning_rate": 4.0683030949839915e-06, | |
| "loss": 0.64283953, | |
| "memory(GiB)": 71.94, | |
| "step": 2925, | |
| "train_speed(iter/s)": 0.020037 | |
| }, | |
| { | |
| "epoch": 1.2129993790105569, | |
| "grad_norm": 1.970065027416205, | |
| "learning_rate": 4.057630736392743e-06, | |
| "loss": 0.6555728, | |
| "memory(GiB)": 71.94, | |
| "step": 2930, | |
| "train_speed(iter/s)": 0.020038 | |
| }, | |
| { | |
| "epoch": 1.215069343821155, | |
| "grad_norm": 1.998737392424493, | |
| "learning_rate": 4.046958377801494e-06, | |
| "loss": 0.6660428, | |
| "memory(GiB)": 71.94, | |
| "step": 2935, | |
| "train_speed(iter/s)": 0.020039 | |
| }, | |
| { | |
| "epoch": 1.2171393086317532, | |
| "grad_norm": 2.1589153805149173, | |
| "learning_rate": 4.036286019210246e-06, | |
| "loss": 0.65805073, | |
| "memory(GiB)": 71.94, | |
| "step": 2940, | |
| "train_speed(iter/s)": 0.02004 | |
| }, | |
| { | |
| "epoch": 1.2192092734423514, | |
| "grad_norm": 1.990247509684529, | |
| "learning_rate": 4.025613660618997e-06, | |
| "loss": 0.6413794, | |
| "memory(GiB)": 71.94, | |
| "step": 2945, | |
| "train_speed(iter/s)": 0.020041 | |
| }, | |
| { | |
| "epoch": 1.2212792382529498, | |
| "grad_norm": 2.0438116812415625, | |
| "learning_rate": 4.014941302027748e-06, | |
| "loss": 0.65000477, | |
| "memory(GiB)": 71.94, | |
| "step": 2950, | |
| "train_speed(iter/s)": 0.020042 | |
| }, | |
| { | |
| "epoch": 1.223349203063548, | |
| "grad_norm": 1.6608044995599232, | |
| "learning_rate": 4.004268943436499e-06, | |
| "loss": 0.65562267, | |
| "memory(GiB)": 71.94, | |
| "step": 2955, | |
| "train_speed(iter/s)": 0.020042 | |
| }, | |
| { | |
| "epoch": 1.2254191678741462, | |
| "grad_norm": 1.9291845395844707, | |
| "learning_rate": 3.993596584845251e-06, | |
| "loss": 0.67264051, | |
| "memory(GiB)": 71.94, | |
| "step": 2960, | |
| "train_speed(iter/s)": 0.020043 | |
| }, | |
| { | |
| "epoch": 1.2274891326847444, | |
| "grad_norm": 2.3618922331006753, | |
| "learning_rate": 3.982924226254002e-06, | |
| "loss": 0.68117104, | |
| "memory(GiB)": 71.94, | |
| "step": 2965, | |
| "train_speed(iter/s)": 0.020044 | |
| }, | |
| { | |
| "epoch": 1.2295590974953425, | |
| "grad_norm": 1.9561928806095756, | |
| "learning_rate": 3.972251867662753e-06, | |
| "loss": 0.6778089, | |
| "memory(GiB)": 71.94, | |
| "step": 2970, | |
| "train_speed(iter/s)": 0.020045 | |
| }, | |
| { | |
| "epoch": 1.2316290623059407, | |
| "grad_norm": 2.103021206596317, | |
| "learning_rate": 3.961579509071505e-06, | |
| "loss": 0.66529841, | |
| "memory(GiB)": 71.94, | |
| "step": 2975, | |
| "train_speed(iter/s)": 0.020046 | |
| }, | |
| { | |
| "epoch": 1.2336990271165391, | |
| "grad_norm": 1.6317138464756236, | |
| "learning_rate": 3.950907150480256e-06, | |
| "loss": 0.63005896, | |
| "memory(GiB)": 71.94, | |
| "step": 2980, | |
| "train_speed(iter/s)": 0.020047 | |
| }, | |
| { | |
| "epoch": 1.2357689919271373, | |
| "grad_norm": 1.8763154321083348, | |
| "learning_rate": 3.940234791889007e-06, | |
| "loss": 0.69782147, | |
| "memory(GiB)": 71.94, | |
| "step": 2985, | |
| "train_speed(iter/s)": 0.020048 | |
| }, | |
| { | |
| "epoch": 1.2378389567377355, | |
| "grad_norm": 2.12720513001939, | |
| "learning_rate": 3.929562433297759e-06, | |
| "loss": 0.67128716, | |
| "memory(GiB)": 71.94, | |
| "step": 2990, | |
| "train_speed(iter/s)": 0.020049 | |
| }, | |
| { | |
| "epoch": 1.2399089215483337, | |
| "grad_norm": 2.0560360745042243, | |
| "learning_rate": 3.91889007470651e-06, | |
| "loss": 0.66187463, | |
| "memory(GiB)": 71.94, | |
| "step": 2995, | |
| "train_speed(iter/s)": 0.02005 | |
| }, | |
| { | |
| "epoch": 1.2419788863589318, | |
| "grad_norm": 1.9884557277874149, | |
| "learning_rate": 3.908217716115262e-06, | |
| "loss": 0.68540969, | |
| "memory(GiB)": 71.94, | |
| "step": 3000, | |
| "train_speed(iter/s)": 0.020051 | |
| }, | |
| { | |
| "epoch": 1.2419788863589318, | |
| "eval_loss": 0.8033931255340576, | |
| "eval_runtime": 334.3532, | |
| "eval_samples_per_second": 18.681, | |
| "eval_steps_per_second": 1.169, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.2440488511695302, | |
| "grad_norm": 1.8132336089456924, | |
| "learning_rate": 3.897545357524013e-06, | |
| "loss": 0.65594606, | |
| "memory(GiB)": 71.94, | |
| "step": 3005, | |
| "train_speed(iter/s)": 0.020001 | |
| }, | |
| { | |
| "epoch": 1.2461188159801284, | |
| "grad_norm": 1.6982582415846914, | |
| "learning_rate": 3.886872998932765e-06, | |
| "loss": 0.65645032, | |
| "memory(GiB)": 71.94, | |
| "step": 3010, | |
| "train_speed(iter/s)": 0.020003 | |
| }, | |
| { | |
| "epoch": 1.2481887807907266, | |
| "grad_norm": 1.9942609739954071, | |
| "learning_rate": 3.876200640341516e-06, | |
| "loss": 0.66572218, | |
| "memory(GiB)": 71.94, | |
| "step": 3015, | |
| "train_speed(iter/s)": 0.020004 | |
| }, | |
| { | |
| "epoch": 1.2502587456013248, | |
| "grad_norm": 1.7952303972411592, | |
| "learning_rate": 3.865528281750267e-06, | |
| "loss": 0.64628544, | |
| "memory(GiB)": 71.94, | |
| "step": 3020, | |
| "train_speed(iter/s)": 0.020005 | |
| }, | |
| { | |
| "epoch": 1.252328710411923, | |
| "grad_norm": 1.9058576079441742, | |
| "learning_rate": 3.854855923159018e-06, | |
| "loss": 0.68286681, | |
| "memory(GiB)": 71.94, | |
| "step": 3025, | |
| "train_speed(iter/s)": 0.020005 | |
| }, | |
| { | |
| "epoch": 1.2543986752225211, | |
| "grad_norm": 2.4990275874354357, | |
| "learning_rate": 3.84418356456777e-06, | |
| "loss": 0.67756252, | |
| "memory(GiB)": 71.94, | |
| "step": 3030, | |
| "train_speed(iter/s)": 0.020006 | |
| }, | |
| { | |
| "epoch": 1.2564686400331193, | |
| "grad_norm": 2.18800214487155, | |
| "learning_rate": 3.833511205976521e-06, | |
| "loss": 0.65295424, | |
| "memory(GiB)": 71.94, | |
| "step": 3035, | |
| "train_speed(iter/s)": 0.020006 | |
| }, | |
| { | |
| "epoch": 1.2585386048437177, | |
| "grad_norm": 2.1525831686251737, | |
| "learning_rate": 3.822838847385272e-06, | |
| "loss": 0.66122398, | |
| "memory(GiB)": 71.94, | |
| "step": 3040, | |
| "train_speed(iter/s)": 0.020007 | |
| }, | |
| { | |
| "epoch": 1.260608569654316, | |
| "grad_norm": 2.1001976709383374, | |
| "learning_rate": 3.812166488794024e-06, | |
| "loss": 0.64958653, | |
| "memory(GiB)": 71.94, | |
| "step": 3045, | |
| "train_speed(iter/s)": 0.020008 | |
| }, | |
| { | |
| "epoch": 1.262678534464914, | |
| "grad_norm": 1.8005403174974608, | |
| "learning_rate": 3.801494130202775e-06, | |
| "loss": 0.66733065, | |
| "memory(GiB)": 71.94, | |
| "step": 3050, | |
| "train_speed(iter/s)": 0.020009 | |
| }, | |
| { | |
| "epoch": 1.2647484992755123, | |
| "grad_norm": 2.0392708573153255, | |
| "learning_rate": 3.7908217716115265e-06, | |
| "loss": 0.65150566, | |
| "memory(GiB)": 71.94, | |
| "step": 3055, | |
| "train_speed(iter/s)": 0.02001 | |
| }, | |
| { | |
| "epoch": 1.2668184640861107, | |
| "grad_norm": 1.9818256506070209, | |
| "learning_rate": 3.7801494130202776e-06, | |
| "loss": 0.65937147, | |
| "memory(GiB)": 71.94, | |
| "step": 3060, | |
| "train_speed(iter/s)": 0.02001 | |
| }, | |
| { | |
| "epoch": 1.2688884288967088, | |
| "grad_norm": 2.091473795655044, | |
| "learning_rate": 3.7694770544290294e-06, | |
| "loss": 0.65645385, | |
| "memory(GiB)": 71.94, | |
| "step": 3065, | |
| "train_speed(iter/s)": 0.020011 | |
| }, | |
| { | |
| "epoch": 1.270958393707307, | |
| "grad_norm": 1.9887693066644563, | |
| "learning_rate": 3.7588046958377805e-06, | |
| "loss": 0.6356863, | |
| "memory(GiB)": 71.94, | |
| "step": 3070, | |
| "train_speed(iter/s)": 0.020013 | |
| }, | |
| { | |
| "epoch": 1.2730283585179052, | |
| "grad_norm": 1.849373670863495, | |
| "learning_rate": 3.7481323372465315e-06, | |
| "loss": 0.64913082, | |
| "memory(GiB)": 71.94, | |
| "step": 3075, | |
| "train_speed(iter/s)": 0.020013 | |
| }, | |
| { | |
| "epoch": 1.2750983233285034, | |
| "grad_norm": 2.8796661699092776, | |
| "learning_rate": 3.737459978655283e-06, | |
| "loss": 0.66612153, | |
| "memory(GiB)": 71.94, | |
| "step": 3080, | |
| "train_speed(iter/s)": 0.020015 | |
| }, | |
| { | |
| "epoch": 1.2771682881391015, | |
| "grad_norm": 1.9206629854919683, | |
| "learning_rate": 3.7267876200640345e-06, | |
| "loss": 0.65432153, | |
| "memory(GiB)": 71.94, | |
| "step": 3085, | |
| "train_speed(iter/s)": 0.020016 | |
| }, | |
| { | |
| "epoch": 1.2792382529496997, | |
| "grad_norm": 2.0649234898157562, | |
| "learning_rate": 3.716115261472786e-06, | |
| "loss": 0.63801923, | |
| "memory(GiB)": 71.94, | |
| "step": 3090, | |
| "train_speed(iter/s)": 0.020016 | |
| }, | |
| { | |
| "epoch": 1.2813082177602981, | |
| "grad_norm": 1.859875670695032, | |
| "learning_rate": 3.705442902881537e-06, | |
| "loss": 0.6548945, | |
| "memory(GiB)": 71.94, | |
| "step": 3095, | |
| "train_speed(iter/s)": 0.020017 | |
| }, | |
| { | |
| "epoch": 1.2833781825708963, | |
| "grad_norm": 1.767661952337357, | |
| "learning_rate": 3.694770544290289e-06, | |
| "loss": 0.64956121, | |
| "memory(GiB)": 71.94, | |
| "step": 3100, | |
| "train_speed(iter/s)": 0.020018 | |
| }, | |
| { | |
| "epoch": 1.2854481473814945, | |
| "grad_norm": 1.8007833500981838, | |
| "learning_rate": 3.68409818569904e-06, | |
| "loss": 0.669453, | |
| "memory(GiB)": 71.94, | |
| "step": 3105, | |
| "train_speed(iter/s)": 0.020019 | |
| }, | |
| { | |
| "epoch": 1.2875181121920927, | |
| "grad_norm": 2.0099069002485863, | |
| "learning_rate": 3.673425827107791e-06, | |
| "loss": 0.67178354, | |
| "memory(GiB)": 71.94, | |
| "step": 3110, | |
| "train_speed(iter/s)": 0.02002 | |
| }, | |
| { | |
| "epoch": 1.289588077002691, | |
| "grad_norm": 2.0463019564128886, | |
| "learning_rate": 3.6627534685165424e-06, | |
| "loss": 0.65595369, | |
| "memory(GiB)": 71.94, | |
| "step": 3115, | |
| "train_speed(iter/s)": 0.02002 | |
| }, | |
| { | |
| "epoch": 1.2916580418132892, | |
| "grad_norm": 1.9152750413831356, | |
| "learning_rate": 3.652081109925294e-06, | |
| "loss": 0.65940399, | |
| "memory(GiB)": 71.94, | |
| "step": 3120, | |
| "train_speed(iter/s)": 0.020021 | |
| }, | |
| { | |
| "epoch": 1.2937280066238874, | |
| "grad_norm": 2.9816950881544306, | |
| "learning_rate": 3.6414087513340453e-06, | |
| "loss": 0.66090908, | |
| "memory(GiB)": 71.94, | |
| "step": 3125, | |
| "train_speed(iter/s)": 0.020022 | |
| }, | |
| { | |
| "epoch": 1.2957979714344856, | |
| "grad_norm": 1.8591409546980415, | |
| "learning_rate": 3.6307363927427963e-06, | |
| "loss": 0.66902189, | |
| "memory(GiB)": 71.94, | |
| "step": 3130, | |
| "train_speed(iter/s)": 0.020023 | |
| }, | |
| { | |
| "epoch": 1.2978679362450838, | |
| "grad_norm": 2.078590180758969, | |
| "learning_rate": 3.6200640341515482e-06, | |
| "loss": 0.66616745, | |
| "memory(GiB)": 71.94, | |
| "step": 3135, | |
| "train_speed(iter/s)": 0.020024 | |
| }, | |
| { | |
| "epoch": 1.299937901055682, | |
| "grad_norm": 1.8683848031966166, | |
| "learning_rate": 3.6093916755602993e-06, | |
| "loss": 0.64836683, | |
| "memory(GiB)": 71.94, | |
| "step": 3140, | |
| "train_speed(iter/s)": 0.020025 | |
| }, | |
| { | |
| "epoch": 1.3020078658662801, | |
| "grad_norm": 1.7924123567589454, | |
| "learning_rate": 3.5987193169690503e-06, | |
| "loss": 0.66824627, | |
| "memory(GiB)": 71.94, | |
| "step": 3145, | |
| "train_speed(iter/s)": 0.020026 | |
| }, | |
| { | |
| "epoch": 1.3040778306768785, | |
| "grad_norm": 1.94170729576761, | |
| "learning_rate": 3.5880469583778018e-06, | |
| "loss": 0.67116079, | |
| "memory(GiB)": 71.94, | |
| "step": 3150, | |
| "train_speed(iter/s)": 0.020026 | |
| }, | |
| { | |
| "epoch": 1.3061477954874767, | |
| "grad_norm": 2.1208196521106357, | |
| "learning_rate": 3.5773745997865532e-06, | |
| "loss": 0.66398339, | |
| "memory(GiB)": 71.94, | |
| "step": 3155, | |
| "train_speed(iter/s)": 0.020027 | |
| }, | |
| { | |
| "epoch": 1.308217760298075, | |
| "grad_norm": 1.94319742259963, | |
| "learning_rate": 3.5667022411953047e-06, | |
| "loss": 0.66830492, | |
| "memory(GiB)": 71.94, | |
| "step": 3160, | |
| "train_speed(iter/s)": 0.020028 | |
| }, | |
| { | |
| "epoch": 1.310287725108673, | |
| "grad_norm": 1.8986704786653348, | |
| "learning_rate": 3.5560298826040557e-06, | |
| "loss": 0.64020205, | |
| "memory(GiB)": 71.94, | |
| "step": 3165, | |
| "train_speed(iter/s)": 0.020028 | |
| }, | |
| { | |
| "epoch": 1.3123576899192715, | |
| "grad_norm": 1.832166033780513, | |
| "learning_rate": 3.5453575240128068e-06, | |
| "loss": 0.65297813, | |
| "memory(GiB)": 71.94, | |
| "step": 3170, | |
| "train_speed(iter/s)": 0.020029 | |
| }, | |
| { | |
| "epoch": 1.3144276547298697, | |
| "grad_norm": 1.7524632564639653, | |
| "learning_rate": 3.5346851654215586e-06, | |
| "loss": 0.65833459, | |
| "memory(GiB)": 71.94, | |
| "step": 3175, | |
| "train_speed(iter/s)": 0.020029 | |
| }, | |
| { | |
| "epoch": 1.3164976195404678, | |
| "grad_norm": 1.784819778645554, | |
| "learning_rate": 3.5240128068303097e-06, | |
| "loss": 0.65319304, | |
| "memory(GiB)": 71.94, | |
| "step": 3180, | |
| "train_speed(iter/s)": 0.02003 | |
| }, | |
| { | |
| "epoch": 1.318567584351066, | |
| "grad_norm": 1.7883657805218875, | |
| "learning_rate": 3.513340448239061e-06, | |
| "loss": 0.65240812, | |
| "memory(GiB)": 71.94, | |
| "step": 3185, | |
| "train_speed(iter/s)": 0.02003 | |
| }, | |
| { | |
| "epoch": 1.3206375491616642, | |
| "grad_norm": 1.894144860689911, | |
| "learning_rate": 3.5026680896478126e-06, | |
| "loss": 0.6723794, | |
| "memory(GiB)": 71.94, | |
| "step": 3190, | |
| "train_speed(iter/s)": 0.020031 | |
| }, | |
| { | |
| "epoch": 1.3227075139722624, | |
| "grad_norm": 2.0320221849381195, | |
| "learning_rate": 3.491995731056564e-06, | |
| "loss": 0.64077559, | |
| "memory(GiB)": 71.94, | |
| "step": 3195, | |
| "train_speed(iter/s)": 0.020032 | |
| }, | |
| { | |
| "epoch": 1.3247774787828606, | |
| "grad_norm": 1.727824740633838, | |
| "learning_rate": 3.481323372465315e-06, | |
| "loss": 0.63609524, | |
| "memory(GiB)": 71.94, | |
| "step": 3200, | |
| "train_speed(iter/s)": 0.020033 | |
| }, | |
| { | |
| "epoch": 1.326847443593459, | |
| "grad_norm": 1.7454884742520516, | |
| "learning_rate": 3.470651013874066e-06, | |
| "loss": 0.65425596, | |
| "memory(GiB)": 71.94, | |
| "step": 3205, | |
| "train_speed(iter/s)": 0.020033 | |
| }, | |
| { | |
| "epoch": 1.3289174084040571, | |
| "grad_norm": 1.8911130178984759, | |
| "learning_rate": 3.459978655282818e-06, | |
| "loss": 0.66199875, | |
| "memory(GiB)": 71.94, | |
| "step": 3210, | |
| "train_speed(iter/s)": 0.020034 | |
| }, | |
| { | |
| "epoch": 1.3309873732146553, | |
| "grad_norm": 2.3250937309778474, | |
| "learning_rate": 3.449306296691569e-06, | |
| "loss": 0.65385923, | |
| "memory(GiB)": 71.94, | |
| "step": 3215, | |
| "train_speed(iter/s)": 0.020035 | |
| }, | |
| { | |
| "epoch": 1.3330573380252535, | |
| "grad_norm": 2.1704453467921447, | |
| "learning_rate": 3.43863393810032e-06, | |
| "loss": 0.66229916, | |
| "memory(GiB)": 71.94, | |
| "step": 3220, | |
| "train_speed(iter/s)": 0.020036 | |
| }, | |
| { | |
| "epoch": 1.335127302835852, | |
| "grad_norm": 2.2209117698054768, | |
| "learning_rate": 3.427961579509072e-06, | |
| "loss": 0.65051212, | |
| "memory(GiB)": 71.94, | |
| "step": 3225, | |
| "train_speed(iter/s)": 0.020036 | |
| }, | |
| { | |
| "epoch": 1.33719726764645, | |
| "grad_norm": 1.6845784156480532, | |
| "learning_rate": 3.417289220917823e-06, | |
| "loss": 0.64576015, | |
| "memory(GiB)": 71.94, | |
| "step": 3230, | |
| "train_speed(iter/s)": 0.020037 | |
| }, | |
| { | |
| "epoch": 1.3392672324570483, | |
| "grad_norm": 2.7721561024455332, | |
| "learning_rate": 3.4066168623265745e-06, | |
| "loss": 0.68257604, | |
| "memory(GiB)": 71.94, | |
| "step": 3235, | |
| "train_speed(iter/s)": 0.020038 | |
| }, | |
| { | |
| "epoch": 1.3413371972676464, | |
| "grad_norm": 1.9362479296558788, | |
| "learning_rate": 3.3959445037353255e-06, | |
| "loss": 0.63483586, | |
| "memory(GiB)": 71.94, | |
| "step": 3240, | |
| "train_speed(iter/s)": 0.020038 | |
| }, | |
| { | |
| "epoch": 1.3434071620782446, | |
| "grad_norm": 1.9956347191580364, | |
| "learning_rate": 3.3852721451440774e-06, | |
| "loss": 0.65959587, | |
| "memory(GiB)": 71.94, | |
| "step": 3245, | |
| "train_speed(iter/s)": 0.020039 | |
| }, | |
| { | |
| "epoch": 1.3454771268888428, | |
| "grad_norm": 1.7321062675450865, | |
| "learning_rate": 3.3745997865528285e-06, | |
| "loss": 0.66381845, | |
| "memory(GiB)": 71.94, | |
| "step": 3250, | |
| "train_speed(iter/s)": 0.02004 | |
| }, | |
| { | |
| "epoch": 1.347547091699441, | |
| "grad_norm": 1.9696403539230432, | |
| "learning_rate": 3.3639274279615795e-06, | |
| "loss": 0.64812822, | |
| "memory(GiB)": 71.94, | |
| "step": 3255, | |
| "train_speed(iter/s)": 0.02004 | |
| }, | |
| { | |
| "epoch": 1.3496170565100394, | |
| "grad_norm": 2.1319401432279053, | |
| "learning_rate": 3.353255069370331e-06, | |
| "loss": 0.64005919, | |
| "memory(GiB)": 71.94, | |
| "step": 3260, | |
| "train_speed(iter/s)": 0.020041 | |
| }, | |
| { | |
| "epoch": 1.3516870213206376, | |
| "grad_norm": 1.8363706763408283, | |
| "learning_rate": 3.3425827107790824e-06, | |
| "loss": 0.64201698, | |
| "memory(GiB)": 71.94, | |
| "step": 3265, | |
| "train_speed(iter/s)": 0.020042 | |
| }, | |
| { | |
| "epoch": 1.3537569861312357, | |
| "grad_norm": 2.124129454637882, | |
| "learning_rate": 3.331910352187834e-06, | |
| "loss": 0.65025196, | |
| "memory(GiB)": 71.94, | |
| "step": 3270, | |
| "train_speed(iter/s)": 0.020043 | |
| }, | |
| { | |
| "epoch": 1.355826950941834, | |
| "grad_norm": 1.7896864503844883, | |
| "learning_rate": 3.321237993596585e-06, | |
| "loss": 0.63300438, | |
| "memory(GiB)": 71.94, | |
| "step": 3275, | |
| "train_speed(iter/s)": 0.020044 | |
| }, | |
| { | |
| "epoch": 1.3578969157524323, | |
| "grad_norm": 1.9837261215389441, | |
| "learning_rate": 3.310565635005337e-06, | |
| "loss": 0.63380709, | |
| "memory(GiB)": 71.94, | |
| "step": 3280, | |
| "train_speed(iter/s)": 0.020044 | |
| }, | |
| { | |
| "epoch": 1.3599668805630305, | |
| "grad_norm": 2.097355543637799, | |
| "learning_rate": 3.299893276414088e-06, | |
| "loss": 0.62883258, | |
| "memory(GiB)": 71.94, | |
| "step": 3285, | |
| "train_speed(iter/s)": 0.020044 | |
| }, | |
| { | |
| "epoch": 1.3620368453736287, | |
| "grad_norm": 1.9315070924851874, | |
| "learning_rate": 3.289220917822839e-06, | |
| "loss": 0.63195004, | |
| "memory(GiB)": 71.94, | |
| "step": 3290, | |
| "train_speed(iter/s)": 0.020045 | |
| }, | |
| { | |
| "epoch": 1.3641068101842269, | |
| "grad_norm": 1.7050134771385488, | |
| "learning_rate": 3.2785485592315903e-06, | |
| "loss": 0.65333614, | |
| "memory(GiB)": 71.94, | |
| "step": 3295, | |
| "train_speed(iter/s)": 0.020045 | |
| }, | |
| { | |
| "epoch": 1.366176774994825, | |
| "grad_norm": 3.6481119398862005, | |
| "learning_rate": 3.267876200640342e-06, | |
| "loss": 0.68793478, | |
| "memory(GiB)": 71.94, | |
| "step": 3300, | |
| "train_speed(iter/s)": 0.020046 | |
| }, | |
| { | |
| "epoch": 1.366176774994825, | |
| "eval_loss": 0.8023556470870972, | |
| "eval_runtime": 334.0894, | |
| "eval_samples_per_second": 18.696, | |
| "eval_steps_per_second": 1.17, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.3682467398054232, | |
| "grad_norm": 2.092330422962737, | |
| "learning_rate": 3.2572038420490933e-06, | |
| "loss": 0.63313837, | |
| "memory(GiB)": 71.94, | |
| "step": 3305, | |
| "train_speed(iter/s)": 0.02 | |
| }, | |
| { | |
| "epoch": 1.3703167046160214, | |
| "grad_norm": 1.9167879677265438, | |
| "learning_rate": 3.2465314834578443e-06, | |
| "loss": 0.65159397, | |
| "memory(GiB)": 71.94, | |
| "step": 3310, | |
| "train_speed(iter/s)": 0.020001 | |
| }, | |
| { | |
| "epoch": 1.3723866694266198, | |
| "grad_norm": 1.8761921604953933, | |
| "learning_rate": 3.235859124866596e-06, | |
| "loss": 0.62655239, | |
| "memory(GiB)": 71.94, | |
| "step": 3315, | |
| "train_speed(iter/s)": 0.020001 | |
| }, | |
| { | |
| "epoch": 1.374456634237218, | |
| "grad_norm": 2.178411039961288, | |
| "learning_rate": 3.2251867662753472e-06, | |
| "loss": 0.67218485, | |
| "memory(GiB)": 71.94, | |
| "step": 3320, | |
| "train_speed(iter/s)": 0.020002 | |
| }, | |
| { | |
| "epoch": 1.3765265990478162, | |
| "grad_norm": 1.776609105623676, | |
| "learning_rate": 3.2145144076840983e-06, | |
| "loss": 0.63943739, | |
| "memory(GiB)": 71.94, | |
| "step": 3325, | |
| "train_speed(iter/s)": 0.020003 | |
| }, | |
| { | |
| "epoch": 1.3785965638584143, | |
| "grad_norm": 1.8854963168848657, | |
| "learning_rate": 3.2038420490928497e-06, | |
| "loss": 0.65016818, | |
| "memory(GiB)": 71.94, | |
| "step": 3330, | |
| "train_speed(iter/s)": 0.020003 | |
| }, | |
| { | |
| "epoch": 1.3806665286690127, | |
| "grad_norm": 2.3494051096524915, | |
| "learning_rate": 3.193169690501601e-06, | |
| "loss": 0.6696517, | |
| "memory(GiB)": 71.94, | |
| "step": 3335, | |
| "train_speed(iter/s)": 0.020004 | |
| }, | |
| { | |
| "epoch": 1.382736493479611, | |
| "grad_norm": 2.390390518794705, | |
| "learning_rate": 3.1824973319103527e-06, | |
| "loss": 0.64389553, | |
| "memory(GiB)": 71.94, | |
| "step": 3340, | |
| "train_speed(iter/s)": 0.020005 | |
| }, | |
| { | |
| "epoch": 1.384806458290209, | |
| "grad_norm": 1.8195738414381504, | |
| "learning_rate": 3.1718249733191037e-06, | |
| "loss": 0.64606419, | |
| "memory(GiB)": 71.94, | |
| "step": 3345, | |
| "train_speed(iter/s)": 0.020006 | |
| }, | |
| { | |
| "epoch": 1.3868764231008073, | |
| "grad_norm": 2.3772901728441846, | |
| "learning_rate": 3.1611526147278547e-06, | |
| "loss": 0.66564407, | |
| "memory(GiB)": 71.94, | |
| "step": 3350, | |
| "train_speed(iter/s)": 0.020006 | |
| }, | |
| { | |
| "epoch": 1.3889463879114055, | |
| "grad_norm": 1.9865374550113573, | |
| "learning_rate": 3.1504802561366066e-06, | |
| "loss": 0.62708969, | |
| "memory(GiB)": 71.94, | |
| "step": 3355, | |
| "train_speed(iter/s)": 0.020007 | |
| }, | |
| { | |
| "epoch": 1.3910163527220036, | |
| "grad_norm": 1.8995100511755476, | |
| "learning_rate": 3.1398078975453577e-06, | |
| "loss": 0.64633865, | |
| "memory(GiB)": 71.94, | |
| "step": 3360, | |
| "train_speed(iter/s)": 0.020008 | |
| }, | |
| { | |
| "epoch": 1.3930863175326018, | |
| "grad_norm": 2.3050512969189954, | |
| "learning_rate": 3.129135538954109e-06, | |
| "loss": 0.64496307, | |
| "memory(GiB)": 71.94, | |
| "step": 3365, | |
| "train_speed(iter/s)": 0.020008 | |
| }, | |
| { | |
| "epoch": 1.3951562823432002, | |
| "grad_norm": 1.9056478303612696, | |
| "learning_rate": 3.1184631803628606e-06, | |
| "loss": 0.641465, | |
| "memory(GiB)": 71.94, | |
| "step": 3370, | |
| "train_speed(iter/s)": 0.020009 | |
| }, | |
| { | |
| "epoch": 1.3972262471537984, | |
| "grad_norm": 2.166573531526341, | |
| "learning_rate": 3.107790821771612e-06, | |
| "loss": 0.64069824, | |
| "memory(GiB)": 71.94, | |
| "step": 3375, | |
| "train_speed(iter/s)": 0.020009 | |
| }, | |
| { | |
| "epoch": 1.3992962119643966, | |
| "grad_norm": 1.9398811847394521, | |
| "learning_rate": 3.097118463180363e-06, | |
| "loss": 0.64301763, | |
| "memory(GiB)": 47.6, | |
| "step": 3380, | |
| "train_speed(iter/s)": 0.02001 | |
| }, | |
| { | |
| "epoch": 1.4013661767749948, | |
| "grad_norm": 1.959159982373063, | |
| "learning_rate": 3.086446104589114e-06, | |
| "loss": 0.6362546, | |
| "memory(GiB)": 47.6, | |
| "step": 3385, | |
| "train_speed(iter/s)": 0.020011 | |
| }, | |
| { | |
| "epoch": 1.4034361415855932, | |
| "grad_norm": 2.0530926805349083, | |
| "learning_rate": 3.075773745997866e-06, | |
| "loss": 0.63418798, | |
| "memory(GiB)": 47.6, | |
| "step": 3390, | |
| "train_speed(iter/s)": 0.020013 | |
| }, | |
| { | |
| "epoch": 1.4055061063961913, | |
| "grad_norm": 1.9297510699914728, | |
| "learning_rate": 3.065101387406617e-06, | |
| "loss": 0.6311512, | |
| "memory(GiB)": 47.6, | |
| "step": 3395, | |
| "train_speed(iter/s)": 0.020013 | |
| }, | |
| { | |
| "epoch": 1.4075760712067895, | |
| "grad_norm": 1.9271894163285872, | |
| "learning_rate": 3.054429028815368e-06, | |
| "loss": 0.63234644, | |
| "memory(GiB)": 47.6, | |
| "step": 3400, | |
| "train_speed(iter/s)": 0.020014 | |
| }, | |
| { | |
| "epoch": 1.4096460360173877, | |
| "grad_norm": 1.7395603251397769, | |
| "learning_rate": 3.04375667022412e-06, | |
| "loss": 0.63525658, | |
| "memory(GiB)": 47.6, | |
| "step": 3405, | |
| "train_speed(iter/s)": 0.020015 | |
| }, | |
| { | |
| "epoch": 1.4117160008279859, | |
| "grad_norm": 1.9592997788913435, | |
| "learning_rate": 3.033084311632871e-06, | |
| "loss": 0.62426691, | |
| "memory(GiB)": 47.6, | |
| "step": 3410, | |
| "train_speed(iter/s)": 0.020016 | |
| }, | |
| { | |
| "epoch": 1.413785965638584, | |
| "grad_norm": 1.83244625095987, | |
| "learning_rate": 3.0224119530416225e-06, | |
| "loss": 0.64436603, | |
| "memory(GiB)": 47.6, | |
| "step": 3415, | |
| "train_speed(iter/s)": 0.020016 | |
| }, | |
| { | |
| "epoch": 1.4158559304491822, | |
| "grad_norm": 1.9274094419667949, | |
| "learning_rate": 3.0117395944503735e-06, | |
| "loss": 0.65462785, | |
| "memory(GiB)": 47.6, | |
| "step": 3420, | |
| "train_speed(iter/s)": 0.020017 | |
| }, | |
| { | |
| "epoch": 1.4179258952597806, | |
| "grad_norm": 1.9910550366499922, | |
| "learning_rate": 3.0010672358591254e-06, | |
| "loss": 0.62345576, | |
| "memory(GiB)": 47.6, | |
| "step": 3425, | |
| "train_speed(iter/s)": 0.020017 | |
| }, | |
| { | |
| "epoch": 1.4199958600703788, | |
| "grad_norm": 1.9961790791051468, | |
| "learning_rate": 2.9903948772678764e-06, | |
| "loss": 0.62686663, | |
| "memory(GiB)": 48.58, | |
| "step": 3430, | |
| "train_speed(iter/s)": 0.020018 | |
| }, | |
| { | |
| "epoch": 1.422065824880977, | |
| "grad_norm": 2.2471760043812767, | |
| "learning_rate": 2.9797225186766275e-06, | |
| "loss": 0.65531764, | |
| "memory(GiB)": 48.58, | |
| "step": 3435, | |
| "train_speed(iter/s)": 0.020018 | |
| }, | |
| { | |
| "epoch": 1.4241357896915752, | |
| "grad_norm": 2.225238329922807, | |
| "learning_rate": 2.9690501600853794e-06, | |
| "loss": 0.64034252, | |
| "memory(GiB)": 48.58, | |
| "step": 3440, | |
| "train_speed(iter/s)": 0.020019 | |
| }, | |
| { | |
| "epoch": 1.4262057545021736, | |
| "grad_norm": 1.6663799088887756, | |
| "learning_rate": 2.9583778014941304e-06, | |
| "loss": 0.61355238, | |
| "memory(GiB)": 48.58, | |
| "step": 3445, | |
| "train_speed(iter/s)": 0.020019 | |
| }, | |
| { | |
| "epoch": 1.4282757193127718, | |
| "grad_norm": 1.9127892416039678, | |
| "learning_rate": 2.947705442902882e-06, | |
| "loss": 0.61548796, | |
| "memory(GiB)": 48.58, | |
| "step": 3450, | |
| "train_speed(iter/s)": 0.02002 | |
| }, | |
| { | |
| "epoch": 1.43034568412337, | |
| "grad_norm": 1.7733393610398442, | |
| "learning_rate": 2.937033084311633e-06, | |
| "loss": 0.66395988, | |
| "memory(GiB)": 48.58, | |
| "step": 3455, | |
| "train_speed(iter/s)": 0.02002 | |
| }, | |
| { | |
| "epoch": 1.432415648933968, | |
| "grad_norm": 2.1962868424551676, | |
| "learning_rate": 2.9263607257203848e-06, | |
| "loss": 0.61601906, | |
| "memory(GiB)": 48.58, | |
| "step": 3460, | |
| "train_speed(iter/s)": 0.020021 | |
| }, | |
| { | |
| "epoch": 1.4344856137445663, | |
| "grad_norm": 1.8134260024404016, | |
| "learning_rate": 2.915688367129136e-06, | |
| "loss": 0.64126596, | |
| "memory(GiB)": 48.58, | |
| "step": 3465, | |
| "train_speed(iter/s)": 0.020022 | |
| }, | |
| { | |
| "epoch": 1.4365555785551645, | |
| "grad_norm": 2.136457991499728, | |
| "learning_rate": 2.905016008537887e-06, | |
| "loss": 0.63421082, | |
| "memory(GiB)": 48.58, | |
| "step": 3470, | |
| "train_speed(iter/s)": 0.020022 | |
| }, | |
| { | |
| "epoch": 1.4386255433657626, | |
| "grad_norm": 1.988532670614334, | |
| "learning_rate": 2.8943436499466383e-06, | |
| "loss": 0.63867459, | |
| "memory(GiB)": 48.58, | |
| "step": 3475, | |
| "train_speed(iter/s)": 0.020023 | |
| }, | |
| { | |
| "epoch": 1.440695508176361, | |
| "grad_norm": 1.9277626836086974, | |
| "learning_rate": 2.8836712913553898e-06, | |
| "loss": 0.65572329, | |
| "memory(GiB)": 48.58, | |
| "step": 3480, | |
| "train_speed(iter/s)": 0.020023 | |
| }, | |
| { | |
| "epoch": 1.4427654729869592, | |
| "grad_norm": 2.1432613819151247, | |
| "learning_rate": 2.8729989327641412e-06, | |
| "loss": 0.64992175, | |
| "memory(GiB)": 48.58, | |
| "step": 3485, | |
| "train_speed(iter/s)": 0.020025 | |
| }, | |
| { | |
| "epoch": 1.4448354377975574, | |
| "grad_norm": 2.355567308350272, | |
| "learning_rate": 2.8623265741728923e-06, | |
| "loss": 0.66429825, | |
| "memory(GiB)": 48.58, | |
| "step": 3490, | |
| "train_speed(iter/s)": 0.020026 | |
| }, | |
| { | |
| "epoch": 1.4469054026081556, | |
| "grad_norm": 2.045569698254085, | |
| "learning_rate": 2.851654215581644e-06, | |
| "loss": 0.65050926, | |
| "memory(GiB)": 48.58, | |
| "step": 3495, | |
| "train_speed(iter/s)": 0.020026 | |
| }, | |
| { | |
| "epoch": 1.448975367418754, | |
| "grad_norm": 1.8389546498339602, | |
| "learning_rate": 2.840981856990395e-06, | |
| "loss": 0.63454714, | |
| "memory(GiB)": 48.58, | |
| "step": 3500, | |
| "train_speed(iter/s)": 0.020027 | |
| }, | |
| { | |
| "epoch": 1.4510453322293522, | |
| "grad_norm": 2.1282462505999518, | |
| "learning_rate": 2.8303094983991462e-06, | |
| "loss": 0.61026077, | |
| "memory(GiB)": 48.58, | |
| "step": 3505, | |
| "train_speed(iter/s)": 0.020027 | |
| }, | |
| { | |
| "epoch": 1.4531152970399503, | |
| "grad_norm": 1.9822876450283116, | |
| "learning_rate": 2.8196371398078977e-06, | |
| "loss": 0.63836317, | |
| "memory(GiB)": 48.58, | |
| "step": 3510, | |
| "train_speed(iter/s)": 0.020028 | |
| }, | |
| { | |
| "epoch": 1.4551852618505485, | |
| "grad_norm": 2.1201754460326603, | |
| "learning_rate": 2.808964781216649e-06, | |
| "loss": 0.65191045, | |
| "memory(GiB)": 48.58, | |
| "step": 3515, | |
| "train_speed(iter/s)": 0.020028 | |
| }, | |
| { | |
| "epoch": 1.4572552266611467, | |
| "grad_norm": 1.8520751525916404, | |
| "learning_rate": 2.7982924226254006e-06, | |
| "loss": 0.62120395, | |
| "memory(GiB)": 48.58, | |
| "step": 3520, | |
| "train_speed(iter/s)": 0.02003 | |
| }, | |
| { | |
| "epoch": 1.4593251914717449, | |
| "grad_norm": 1.7983375554778653, | |
| "learning_rate": 2.7876200640341517e-06, | |
| "loss": 0.63227787, | |
| "memory(GiB)": 48.58, | |
| "step": 3525, | |
| "train_speed(iter/s)": 0.02003 | |
| }, | |
| { | |
| "epoch": 1.461395156282343, | |
| "grad_norm": 2.340481282081477, | |
| "learning_rate": 2.7769477054429036e-06, | |
| "loss": 0.63372889, | |
| "memory(GiB)": 48.58, | |
| "step": 3530, | |
| "train_speed(iter/s)": 0.020031 | |
| }, | |
| { | |
| "epoch": 1.4634651210929415, | |
| "grad_norm": 1.9262925464363927, | |
| "learning_rate": 2.7662753468516546e-06, | |
| "loss": 0.63678517, | |
| "memory(GiB)": 48.58, | |
| "step": 3535, | |
| "train_speed(iter/s)": 0.020031 | |
| }, | |
| { | |
| "epoch": 1.4655350859035396, | |
| "grad_norm": 2.2093247947290164, | |
| "learning_rate": 2.7556029882604056e-06, | |
| "loss": 0.6558732, | |
| "memory(GiB)": 48.58, | |
| "step": 3540, | |
| "train_speed(iter/s)": 0.020032 | |
| }, | |
| { | |
| "epoch": 1.4676050507141378, | |
| "grad_norm": 2.020333271689009, | |
| "learning_rate": 2.744930629669157e-06, | |
| "loss": 0.60733166, | |
| "memory(GiB)": 48.58, | |
| "step": 3545, | |
| "train_speed(iter/s)": 0.020032 | |
| }, | |
| { | |
| "epoch": 1.469675015524736, | |
| "grad_norm": 2.0734276125917632, | |
| "learning_rate": 2.7342582710779086e-06, | |
| "loss": 0.66044526, | |
| "memory(GiB)": 48.58, | |
| "step": 3550, | |
| "train_speed(iter/s)": 0.020033 | |
| }, | |
| { | |
| "epoch": 1.4717449803353344, | |
| "grad_norm": 1.8332312440056042, | |
| "learning_rate": 2.72358591248666e-06, | |
| "loss": 0.64025326, | |
| "memory(GiB)": 48.58, | |
| "step": 3555, | |
| "train_speed(iter/s)": 0.020034 | |
| }, | |
| { | |
| "epoch": 1.4738149451459326, | |
| "grad_norm": 1.8856532947296074, | |
| "learning_rate": 2.712913553895411e-06, | |
| "loss": 0.63681345, | |
| "memory(GiB)": 48.58, | |
| "step": 3560, | |
| "train_speed(iter/s)": 0.020034 | |
| }, | |
| { | |
| "epoch": 1.4758849099565308, | |
| "grad_norm": 1.6710776220883121, | |
| "learning_rate": 2.702241195304162e-06, | |
| "loss": 0.64032116, | |
| "memory(GiB)": 48.58, | |
| "step": 3565, | |
| "train_speed(iter/s)": 0.020035 | |
| }, | |
| { | |
| "epoch": 1.477954874767129, | |
| "grad_norm": 2.095636589348373, | |
| "learning_rate": 2.691568836712914e-06, | |
| "loss": 0.63223238, | |
| "memory(GiB)": 48.58, | |
| "step": 3570, | |
| "train_speed(iter/s)": 0.020035 | |
| }, | |
| { | |
| "epoch": 1.4800248395777271, | |
| "grad_norm": 1.8927929015268774, | |
| "learning_rate": 2.680896478121665e-06, | |
| "loss": 0.63061609, | |
| "memory(GiB)": 48.58, | |
| "step": 3575, | |
| "train_speed(iter/s)": 0.020036 | |
| }, | |
| { | |
| "epoch": 1.4820948043883253, | |
| "grad_norm": 2.0929754486788137, | |
| "learning_rate": 2.670224119530416e-06, | |
| "loss": 0.62695656, | |
| "memory(GiB)": 48.58, | |
| "step": 3580, | |
| "train_speed(iter/s)": 0.020037 | |
| }, | |
| { | |
| "epoch": 1.4841647691989235, | |
| "grad_norm": 1.7458918576306506, | |
| "learning_rate": 2.659551760939168e-06, | |
| "loss": 0.63322277, | |
| "memory(GiB)": 48.58, | |
| "step": 3585, | |
| "train_speed(iter/s)": 0.020037 | |
| }, | |
| { | |
| "epoch": 1.4862347340095219, | |
| "grad_norm": 1.8907196532347343, | |
| "learning_rate": 2.648879402347919e-06, | |
| "loss": 0.63732347, | |
| "memory(GiB)": 48.58, | |
| "step": 3590, | |
| "train_speed(iter/s)": 0.020038 | |
| }, | |
| { | |
| "epoch": 1.48830469882012, | |
| "grad_norm": 2.263161529685459, | |
| "learning_rate": 2.6382070437566704e-06, | |
| "loss": 0.6138607, | |
| "memory(GiB)": 48.58, | |
| "step": 3595, | |
| "train_speed(iter/s)": 0.020039 | |
| }, | |
| { | |
| "epoch": 1.4903746636307182, | |
| "grad_norm": 2.572454412416673, | |
| "learning_rate": 2.6275346851654215e-06, | |
| "loss": 0.63049603, | |
| "memory(GiB)": 48.58, | |
| "step": 3600, | |
| "train_speed(iter/s)": 0.02004 | |
| }, | |
| { | |
| "epoch": 1.4903746636307182, | |
| "eval_loss": 0.8007386922836304, | |
| "eval_runtime": 333.8857, | |
| "eval_samples_per_second": 18.707, | |
| "eval_steps_per_second": 1.171, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.4924446284413164, | |
| "grad_norm": 1.901029287627636, | |
| "learning_rate": 2.6168623265741734e-06, | |
| "loss": 0.65960011, | |
| "memory(GiB)": 48.58, | |
| "step": 3605, | |
| "train_speed(iter/s)": 0.019998 | |
| }, | |
| { | |
| "epoch": 1.4945145932519148, | |
| "grad_norm": 2.0108141055428423, | |
| "learning_rate": 2.6061899679829244e-06, | |
| "loss": 0.63098369, | |
| "memory(GiB)": 48.58, | |
| "step": 3610, | |
| "train_speed(iter/s)": 0.019999 | |
| }, | |
| { | |
| "epoch": 1.496584558062513, | |
| "grad_norm": 1.8385014047339334, | |
| "learning_rate": 2.5955176093916754e-06, | |
| "loss": 0.63654456, | |
| "memory(GiB)": 48.58, | |
| "step": 3615, | |
| "train_speed(iter/s)": 0.019999 | |
| }, | |
| { | |
| "epoch": 1.4986545228731112, | |
| "grad_norm": 2.502391600466987, | |
| "learning_rate": 2.5848452508004273e-06, | |
| "loss": 0.64129109, | |
| "memory(GiB)": 48.58, | |
| "step": 3620, | |
| "train_speed(iter/s)": 0.02 | |
| }, | |
| { | |
| "epoch": 1.5007244876837094, | |
| "grad_norm": 1.8078536683803752, | |
| "learning_rate": 2.5741728922091784e-06, | |
| "loss": 0.61762905, | |
| "memory(GiB)": 48.58, | |
| "step": 3625, | |
| "train_speed(iter/s)": 0.020001 | |
| }, | |
| { | |
| "epoch": 1.5027944524943075, | |
| "grad_norm": 2.4124021719958813, | |
| "learning_rate": 2.56350053361793e-06, | |
| "loss": 0.62886133, | |
| "memory(GiB)": 48.58, | |
| "step": 3630, | |
| "train_speed(iter/s)": 0.020002 | |
| }, | |
| { | |
| "epoch": 1.5048644173049057, | |
| "grad_norm": 2.14818078370575, | |
| "learning_rate": 2.552828175026681e-06, | |
| "loss": 0.64229774, | |
| "memory(GiB)": 48.58, | |
| "step": 3635, | |
| "train_speed(iter/s)": 0.020002 | |
| }, | |
| { | |
| "epoch": 1.506934382115504, | |
| "grad_norm": 2.0607592121598777, | |
| "learning_rate": 2.5421558164354328e-06, | |
| "loss": 0.62922821, | |
| "memory(GiB)": 48.58, | |
| "step": 3640, | |
| "train_speed(iter/s)": 0.020003 | |
| }, | |
| { | |
| "epoch": 1.5090043469261023, | |
| "grad_norm": 1.9537072105756903, | |
| "learning_rate": 2.531483457844184e-06, | |
| "loss": 0.6114254, | |
| "memory(GiB)": 48.58, | |
| "step": 3645, | |
| "train_speed(iter/s)": 0.020004 | |
| }, | |
| { | |
| "epoch": 1.5110743117367005, | |
| "grad_norm": 2.076894133768813, | |
| "learning_rate": 2.520811099252935e-06, | |
| "loss": 0.65702705, | |
| "memory(GiB)": 48.58, | |
| "step": 3650, | |
| "train_speed(iter/s)": 0.020005 | |
| }, | |
| { | |
| "epoch": 1.5131442765472987, | |
| "grad_norm": 1.8684495033846724, | |
| "learning_rate": 2.5101387406616863e-06, | |
| "loss": 0.65235605, | |
| "memory(GiB)": 48.58, | |
| "step": 3655, | |
| "train_speed(iter/s)": 0.020005 | |
| }, | |
| { | |
| "epoch": 1.515214241357897, | |
| "grad_norm": 2.099820461610625, | |
| "learning_rate": 2.4994663820704378e-06, | |
| "loss": 0.63383131, | |
| "memory(GiB)": 48.58, | |
| "step": 3660, | |
| "train_speed(iter/s)": 0.020005 | |
| }, | |
| { | |
| "epoch": 1.5172842061684952, | |
| "grad_norm": 2.0251264820061765, | |
| "learning_rate": 2.4887940234791892e-06, | |
| "loss": 0.63897676, | |
| "memory(GiB)": 48.58, | |
| "step": 3665, | |
| "train_speed(iter/s)": 0.020006 | |
| }, | |
| { | |
| "epoch": 1.5193541709790934, | |
| "grad_norm": 1.8916351044126378, | |
| "learning_rate": 2.4781216648879407e-06, | |
| "loss": 0.63116732, | |
| "memory(GiB)": 48.58, | |
| "step": 3670, | |
| "train_speed(iter/s)": 0.020007 | |
| }, | |
| { | |
| "epoch": 1.5214241357896916, | |
| "grad_norm": 1.758567214100863, | |
| "learning_rate": 2.4674493062966917e-06, | |
| "loss": 0.61664515, | |
| "memory(GiB)": 48.58, | |
| "step": 3675, | |
| "train_speed(iter/s)": 0.020008 | |
| }, | |
| { | |
| "epoch": 1.5234941006002898, | |
| "grad_norm": 2.004985649591871, | |
| "learning_rate": 2.456776947705443e-06, | |
| "loss": 0.63878107, | |
| "memory(GiB)": 48.58, | |
| "step": 3680, | |
| "train_speed(iter/s)": 0.020008 | |
| }, | |
| { | |
| "epoch": 1.525564065410888, | |
| "grad_norm": 2.0496876810492894, | |
| "learning_rate": 2.4461045891141942e-06, | |
| "loss": 0.62643003, | |
| "memory(GiB)": 48.58, | |
| "step": 3685, | |
| "train_speed(iter/s)": 0.020009 | |
| }, | |
| { | |
| "epoch": 1.5276340302214861, | |
| "grad_norm": 1.9829235096697742, | |
| "learning_rate": 2.4354322305229457e-06, | |
| "loss": 0.62055197, | |
| "memory(GiB)": 48.58, | |
| "step": 3690, | |
| "train_speed(iter/s)": 0.02001 | |
| }, | |
| { | |
| "epoch": 1.5297039950320843, | |
| "grad_norm": 1.7991062631513268, | |
| "learning_rate": 2.424759871931697e-06, | |
| "loss": 0.6605298, | |
| "memory(GiB)": 48.58, | |
| "step": 3695, | |
| "train_speed(iter/s)": 0.02001 | |
| }, | |
| { | |
| "epoch": 1.5317739598426827, | |
| "grad_norm": 2.526220221053364, | |
| "learning_rate": 2.4140875133404486e-06, | |
| "loss": 0.65973449, | |
| "memory(GiB)": 48.58, | |
| "step": 3700, | |
| "train_speed(iter/s)": 0.020011 | |
| }, | |
| { | |
| "epoch": 1.533843924653281, | |
| "grad_norm": 1.682540853202414, | |
| "learning_rate": 2.4034151547492e-06, | |
| "loss": 0.65421052, | |
| "memory(GiB)": 48.58, | |
| "step": 3705, | |
| "train_speed(iter/s)": 0.020012 | |
| }, | |
| { | |
| "epoch": 1.535913889463879, | |
| "grad_norm": 1.965734649630595, | |
| "learning_rate": 2.392742796157951e-06, | |
| "loss": 0.64042482, | |
| "memory(GiB)": 48.58, | |
| "step": 3710, | |
| "train_speed(iter/s)": 0.020013 | |
| }, | |
| { | |
| "epoch": 1.5379838542744775, | |
| "grad_norm": 2.2061994907060005, | |
| "learning_rate": 2.3820704375667026e-06, | |
| "loss": 0.61186495, | |
| "memory(GiB)": 48.58, | |
| "step": 3715, | |
| "train_speed(iter/s)": 0.020014 | |
| }, | |
| { | |
| "epoch": 1.5400538190850757, | |
| "grad_norm": 2.2749623686643474, | |
| "learning_rate": 2.3713980789754536e-06, | |
| "loss": 0.63306904, | |
| "memory(GiB)": 48.58, | |
| "step": 3720, | |
| "train_speed(iter/s)": 0.020014 | |
| }, | |
| { | |
| "epoch": 1.5421237838956738, | |
| "grad_norm": 1.8676661262594305, | |
| "learning_rate": 2.360725720384205e-06, | |
| "loss": 0.64988294, | |
| "memory(GiB)": 48.58, | |
| "step": 3725, | |
| "train_speed(iter/s)": 0.020015 | |
| }, | |
| { | |
| "epoch": 1.544193748706272, | |
| "grad_norm": 2.069377222520985, | |
| "learning_rate": 2.3500533617929565e-06, | |
| "loss": 0.63958693, | |
| "memory(GiB)": 48.58, | |
| "step": 3730, | |
| "train_speed(iter/s)": 0.020016 | |
| }, | |
| { | |
| "epoch": 1.5462637135168702, | |
| "grad_norm": 2.0148406212108574, | |
| "learning_rate": 2.339381003201708e-06, | |
| "loss": 0.60387516, | |
| "memory(GiB)": 48.58, | |
| "step": 3735, | |
| "train_speed(iter/s)": 0.020016 | |
| }, | |
| { | |
| "epoch": 1.5483336783274684, | |
| "grad_norm": 1.8592072092273917, | |
| "learning_rate": 2.328708644610459e-06, | |
| "loss": 0.60533466, | |
| "memory(GiB)": 48.58, | |
| "step": 3740, | |
| "train_speed(iter/s)": 0.020017 | |
| }, | |
| { | |
| "epoch": 1.5504036431380666, | |
| "grad_norm": 1.9293967097992641, | |
| "learning_rate": 2.3180362860192105e-06, | |
| "loss": 0.64885559, | |
| "memory(GiB)": 48.58, | |
| "step": 3745, | |
| "train_speed(iter/s)": 0.020018 | |
| }, | |
| { | |
| "epoch": 1.5524736079486647, | |
| "grad_norm": 2.105150298236883, | |
| "learning_rate": 2.307363927427962e-06, | |
| "loss": 0.6291214, | |
| "memory(GiB)": 48.58, | |
| "step": 3750, | |
| "train_speed(iter/s)": 0.020019 | |
| }, | |
| { | |
| "epoch": 1.5545435727592631, | |
| "grad_norm": 1.7706279189258218, | |
| "learning_rate": 2.296691568836713e-06, | |
| "loss": 0.62637095, | |
| "memory(GiB)": 48.58, | |
| "step": 3755, | |
| "train_speed(iter/s)": 0.020019 | |
| }, | |
| { | |
| "epoch": 1.5566135375698613, | |
| "grad_norm": 2.565565704557741, | |
| "learning_rate": 2.2860192102454645e-06, | |
| "loss": 0.61526871, | |
| "memory(GiB)": 48.58, | |
| "step": 3760, | |
| "train_speed(iter/s)": 0.02002 | |
| }, | |
| { | |
| "epoch": 1.5586835023804595, | |
| "grad_norm": 2.07386908713523, | |
| "learning_rate": 2.2753468516542155e-06, | |
| "loss": 0.62105417, | |
| "memory(GiB)": 48.58, | |
| "step": 3765, | |
| "train_speed(iter/s)": 0.02002 | |
| }, | |
| { | |
| "epoch": 1.560753467191058, | |
| "grad_norm": 2.0656516897338064, | |
| "learning_rate": 2.264674493062967e-06, | |
| "loss": 0.62474051, | |
| "memory(GiB)": 48.58, | |
| "step": 3770, | |
| "train_speed(iter/s)": 0.020021 | |
| }, | |
| { | |
| "epoch": 1.562823432001656, | |
| "grad_norm": 2.1881677769227075, | |
| "learning_rate": 2.2540021344717184e-06, | |
| "loss": 0.6300148, | |
| "memory(GiB)": 48.58, | |
| "step": 3775, | |
| "train_speed(iter/s)": 0.020022 | |
| }, | |
| { | |
| "epoch": 1.5648933968122543, | |
| "grad_norm": 1.9586494376217993, | |
| "learning_rate": 2.24332977588047e-06, | |
| "loss": 0.59469123, | |
| "memory(GiB)": 48.58, | |
| "step": 3780, | |
| "train_speed(iter/s)": 0.020023 | |
| }, | |
| { | |
| "epoch": 1.5669633616228524, | |
| "grad_norm": 2.440599800961732, | |
| "learning_rate": 2.232657417289221e-06, | |
| "loss": 0.62468157, | |
| "memory(GiB)": 48.58, | |
| "step": 3785, | |
| "train_speed(iter/s)": 0.020024 | |
| }, | |
| { | |
| "epoch": 1.5690333264334506, | |
| "grad_norm": 2.0016886010949797, | |
| "learning_rate": 2.2219850586979724e-06, | |
| "loss": 0.62775316, | |
| "memory(GiB)": 48.58, | |
| "step": 3790, | |
| "train_speed(iter/s)": 0.020025 | |
| }, | |
| { | |
| "epoch": 1.5711032912440488, | |
| "grad_norm": 1.9086880143876215, | |
| "learning_rate": 2.211312700106724e-06, | |
| "loss": 0.62862492, | |
| "memory(GiB)": 48.58, | |
| "step": 3795, | |
| "train_speed(iter/s)": 0.020026 | |
| }, | |
| { | |
| "epoch": 1.573173256054647, | |
| "grad_norm": 2.0888610038198636, | |
| "learning_rate": 2.200640341515475e-06, | |
| "loss": 0.59845972, | |
| "memory(GiB)": 48.58, | |
| "step": 3800, | |
| "train_speed(iter/s)": 0.020026 | |
| }, | |
| { | |
| "epoch": 1.5752432208652452, | |
| "grad_norm": 2.055969507055363, | |
| "learning_rate": 2.1899679829242263e-06, | |
| "loss": 0.61734905, | |
| "memory(GiB)": 48.58, | |
| "step": 3805, | |
| "train_speed(iter/s)": 0.020027 | |
| }, | |
| { | |
| "epoch": 1.5773131856758436, | |
| "grad_norm": 1.9707520677944765, | |
| "learning_rate": 2.179295624332978e-06, | |
| "loss": 0.62550197, | |
| "memory(GiB)": 48.58, | |
| "step": 3810, | |
| "train_speed(iter/s)": 0.020028 | |
| }, | |
| { | |
| "epoch": 1.5793831504864417, | |
| "grad_norm": 2.1321630269454617, | |
| "learning_rate": 2.1686232657417293e-06, | |
| "loss": 0.62156429, | |
| "memory(GiB)": 48.58, | |
| "step": 3815, | |
| "train_speed(iter/s)": 0.020029 | |
| }, | |
| { | |
| "epoch": 1.58145311529704, | |
| "grad_norm": 2.3313380056807373, | |
| "learning_rate": 2.1579509071504803e-06, | |
| "loss": 0.62489176, | |
| "memory(GiB)": 48.58, | |
| "step": 3820, | |
| "train_speed(iter/s)": 0.02003 | |
| }, | |
| { | |
| "epoch": 1.5835230801076383, | |
| "grad_norm": 2.0579013534602044, | |
| "learning_rate": 2.1472785485592318e-06, | |
| "loss": 0.60487609, | |
| "memory(GiB)": 48.58, | |
| "step": 3825, | |
| "train_speed(iter/s)": 0.02003 | |
| }, | |
| { | |
| "epoch": 1.5855930449182365, | |
| "grad_norm": 2.1449642041698267, | |
| "learning_rate": 2.136606189967983e-06, | |
| "loss": 0.63105164, | |
| "memory(GiB)": 48.58, | |
| "step": 3830, | |
| "train_speed(iter/s)": 0.020031 | |
| }, | |
| { | |
| "epoch": 1.5876630097288347, | |
| "grad_norm": 1.8838252023426636, | |
| "learning_rate": 2.1259338313767343e-06, | |
| "loss": 0.62664189, | |
| "memory(GiB)": 48.58, | |
| "step": 3835, | |
| "train_speed(iter/s)": 0.020032 | |
| }, | |
| { | |
| "epoch": 1.5897329745394329, | |
| "grad_norm": 2.289021375023386, | |
| "learning_rate": 2.1152614727854857e-06, | |
| "loss": 0.60014114, | |
| "memory(GiB)": 48.58, | |
| "step": 3840, | |
| "train_speed(iter/s)": 0.020032 | |
| }, | |
| { | |
| "epoch": 1.591802939350031, | |
| "grad_norm": 1.952623771952289, | |
| "learning_rate": 2.104589114194237e-06, | |
| "loss": 0.63038387, | |
| "memory(GiB)": 48.58, | |
| "step": 3845, | |
| "train_speed(iter/s)": 0.020032 | |
| }, | |
| { | |
| "epoch": 1.5938729041606292, | |
| "grad_norm": 2.151088803547346, | |
| "learning_rate": 2.0939167556029887e-06, | |
| "loss": 0.64208837, | |
| "memory(GiB)": 48.58, | |
| "step": 3850, | |
| "train_speed(iter/s)": 0.020033 | |
| }, | |
| { | |
| "epoch": 1.5959428689712274, | |
| "grad_norm": 2.0667970850832025, | |
| "learning_rate": 2.0832443970117397e-06, | |
| "loss": 0.62941227, | |
| "memory(GiB)": 48.58, | |
| "step": 3855, | |
| "train_speed(iter/s)": 0.020034 | |
| }, | |
| { | |
| "epoch": 1.5980128337818256, | |
| "grad_norm": 1.8287162112237207, | |
| "learning_rate": 2.072572038420491e-06, | |
| "loss": 0.60888386, | |
| "memory(GiB)": 48.58, | |
| "step": 3860, | |
| "train_speed(iter/s)": 0.020035 | |
| }, | |
| { | |
| "epoch": 1.600082798592424, | |
| "grad_norm": 2.083482144562111, | |
| "learning_rate": 2.061899679829242e-06, | |
| "loss": 0.60378475, | |
| "memory(GiB)": 48.58, | |
| "step": 3865, | |
| "train_speed(iter/s)": 0.020035 | |
| }, | |
| { | |
| "epoch": 1.6021527634030222, | |
| "grad_norm": 2.3064863216709925, | |
| "learning_rate": 2.0512273212379937e-06, | |
| "loss": 0.60522232, | |
| "memory(GiB)": 48.58, | |
| "step": 3870, | |
| "train_speed(iter/s)": 0.020036 | |
| }, | |
| { | |
| "epoch": 1.6042227282136203, | |
| "grad_norm": 2.1700368490113844, | |
| "learning_rate": 2.040554962646745e-06, | |
| "loss": 0.61363611, | |
| "memory(GiB)": 48.58, | |
| "step": 3875, | |
| "train_speed(iter/s)": 0.020037 | |
| }, | |
| { | |
| "epoch": 1.6062926930242187, | |
| "grad_norm": 2.2038759685376843, | |
| "learning_rate": 2.0298826040554966e-06, | |
| "loss": 0.60480423, | |
| "memory(GiB)": 48.58, | |
| "step": 3880, | |
| "train_speed(iter/s)": 0.020037 | |
| }, | |
| { | |
| "epoch": 1.608362657834817, | |
| "grad_norm": 2.1947427482623914, | |
| "learning_rate": 2.019210245464248e-06, | |
| "loss": 0.61413918, | |
| "memory(GiB)": 48.58, | |
| "step": 3885, | |
| "train_speed(iter/s)": 0.020038 | |
| }, | |
| { | |
| "epoch": 1.610432622645415, | |
| "grad_norm": 2.022770825774821, | |
| "learning_rate": 2.008537886872999e-06, | |
| "loss": 0.61320429, | |
| "memory(GiB)": 48.58, | |
| "step": 3890, | |
| "train_speed(iter/s)": 0.020039 | |
| }, | |
| { | |
| "epoch": 1.6125025874560133, | |
| "grad_norm": 2.052392899511488, | |
| "learning_rate": 1.9978655282817505e-06, | |
| "loss": 0.6130487, | |
| "memory(GiB)": 48.58, | |
| "step": 3895, | |
| "train_speed(iter/s)": 0.020039 | |
| }, | |
| { | |
| "epoch": 1.6145725522666114, | |
| "grad_norm": 2.0156586912928596, | |
| "learning_rate": 1.9871931696905016e-06, | |
| "loss": 0.64238014, | |
| "memory(GiB)": 48.58, | |
| "step": 3900, | |
| "train_speed(iter/s)": 0.02004 | |
| }, | |
| { | |
| "epoch": 1.6145725522666114, | |
| "eval_loss": 0.79938143491745, | |
| "eval_runtime": 333.9342, | |
| "eval_samples_per_second": 18.704, | |
| "eval_steps_per_second": 1.171, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.6166425170772096, | |
| "grad_norm": 2.023426003459966, | |
| "learning_rate": 1.976520811099253e-06, | |
| "loss": 0.61180491, | |
| "memory(GiB)": 48.58, | |
| "step": 3905, | |
| "train_speed(iter/s)": 0.020001 | |
| }, | |
| { | |
| "epoch": 1.6187124818878078, | |
| "grad_norm": 2.053001775702235, | |
| "learning_rate": 1.9658484525080045e-06, | |
| "loss": 0.62299452, | |
| "memory(GiB)": 48.58, | |
| "step": 3910, | |
| "train_speed(iter/s)": 0.020002 | |
| }, | |
| { | |
| "epoch": 1.620782446698406, | |
| "grad_norm": 1.867769590995953, | |
| "learning_rate": 1.955176093916756e-06, | |
| "loss": 0.61298056, | |
| "memory(GiB)": 48.58, | |
| "step": 3915, | |
| "train_speed(iter/s)": 0.020002 | |
| }, | |
| { | |
| "epoch": 1.6228524115090044, | |
| "grad_norm": 2.3195386012179404, | |
| "learning_rate": 1.944503735325507e-06, | |
| "loss": 0.62579803, | |
| "memory(GiB)": 48.58, | |
| "step": 3920, | |
| "train_speed(iter/s)": 0.020003 | |
| }, | |
| { | |
| "epoch": 1.6249223763196026, | |
| "grad_norm": 2.0728629845134825, | |
| "learning_rate": 1.9338313767342585e-06, | |
| "loss": 0.60280285, | |
| "memory(GiB)": 48.58, | |
| "step": 3925, | |
| "train_speed(iter/s)": 0.020004 | |
| }, | |
| { | |
| "epoch": 1.6269923411302007, | |
| "grad_norm": 1.9603574858854882, | |
| "learning_rate": 1.92315901814301e-06, | |
| "loss": 0.61659031, | |
| "memory(GiB)": 48.58, | |
| "step": 3930, | |
| "train_speed(iter/s)": 0.020005 | |
| }, | |
| { | |
| "epoch": 1.6290623059407991, | |
| "grad_norm": 2.1782071337826014, | |
| "learning_rate": 1.912486659551761e-06, | |
| "loss": 0.60701981, | |
| "memory(GiB)": 48.58, | |
| "step": 3935, | |
| "train_speed(iter/s)": 0.020006 | |
| }, | |
| { | |
| "epoch": 1.6311322707513973, | |
| "grad_norm": 1.7195215240069541, | |
| "learning_rate": 1.9018143009605124e-06, | |
| "loss": 0.59961052, | |
| "memory(GiB)": 48.58, | |
| "step": 3940, | |
| "train_speed(iter/s)": 0.020007 | |
| }, | |
| { | |
| "epoch": 1.6332022355619955, | |
| "grad_norm": 2.076700307294715, | |
| "learning_rate": 1.8911419423692637e-06, | |
| "loss": 0.61397924, | |
| "memory(GiB)": 48.58, | |
| "step": 3945, | |
| "train_speed(iter/s)": 0.020008 | |
| }, | |
| { | |
| "epoch": 1.6352722003725937, | |
| "grad_norm": 2.1171856192990615, | |
| "learning_rate": 1.8804695837780151e-06, | |
| "loss": 0.61544151, | |
| "memory(GiB)": 48.58, | |
| "step": 3950, | |
| "train_speed(iter/s)": 0.020008 | |
| }, | |
| { | |
| "epoch": 1.6373421651831919, | |
| "grad_norm": 2.2158147695337838, | |
| "learning_rate": 1.8697972251867664e-06, | |
| "loss": 0.62159052, | |
| "memory(GiB)": 48.58, | |
| "step": 3955, | |
| "train_speed(iter/s)": 0.020009 | |
| }, | |
| { | |
| "epoch": 1.63941212999379, | |
| "grad_norm": 1.8635077630631116, | |
| "learning_rate": 1.8591248665955179e-06, | |
| "loss": 0.62834597, | |
| "memory(GiB)": 48.58, | |
| "step": 3960, | |
| "train_speed(iter/s)": 0.02001 | |
| }, | |
| { | |
| "epoch": 1.6414820948043882, | |
| "grad_norm": 1.9610933756803843, | |
| "learning_rate": 1.8484525080042693e-06, | |
| "loss": 0.61779599, | |
| "memory(GiB)": 48.58, | |
| "step": 3965, | |
| "train_speed(iter/s)": 0.020011 | |
| }, | |
| { | |
| "epoch": 1.6435520596149864, | |
| "grad_norm": 1.9029383953647434, | |
| "learning_rate": 1.8377801494130204e-06, | |
| "loss": 0.61077566, | |
| "memory(GiB)": 48.58, | |
| "step": 3970, | |
| "train_speed(iter/s)": 0.020011 | |
| }, | |
| { | |
| "epoch": 1.6456220244255848, | |
| "grad_norm": 2.0846014405355926, | |
| "learning_rate": 1.8271077908217718e-06, | |
| "loss": 0.61311092, | |
| "memory(GiB)": 48.58, | |
| "step": 3975, | |
| "train_speed(iter/s)": 0.020012 | |
| }, | |
| { | |
| "epoch": 1.647691989236183, | |
| "grad_norm": 2.390238831095919, | |
| "learning_rate": 1.816435432230523e-06, | |
| "loss": 0.62842617, | |
| "memory(GiB)": 48.58, | |
| "step": 3980, | |
| "train_speed(iter/s)": 0.020013 | |
| }, | |
| { | |
| "epoch": 1.6497619540467812, | |
| "grad_norm": 1.8650578046674164, | |
| "learning_rate": 1.8057630736392745e-06, | |
| "loss": 0.62033787, | |
| "memory(GiB)": 48.58, | |
| "step": 3985, | |
| "train_speed(iter/s)": 0.020014 | |
| }, | |
| { | |
| "epoch": 1.6518319188573796, | |
| "grad_norm": 1.9997553186688102, | |
| "learning_rate": 1.7950907150480258e-06, | |
| "loss": 0.61835356, | |
| "memory(GiB)": 48.58, | |
| "step": 3990, | |
| "train_speed(iter/s)": 0.020015 | |
| }, | |
| { | |
| "epoch": 1.6539018836679777, | |
| "grad_norm": 2.0185275543308, | |
| "learning_rate": 1.7844183564567772e-06, | |
| "loss": 0.62564411, | |
| "memory(GiB)": 48.58, | |
| "step": 3995, | |
| "train_speed(iter/s)": 0.020015 | |
| }, | |
| { | |
| "epoch": 1.655971848478576, | |
| "grad_norm": 1.6493866482983615, | |
| "learning_rate": 1.7737459978655283e-06, | |
| "loss": 0.62476611, | |
| "memory(GiB)": 48.58, | |
| "step": 4000, | |
| "train_speed(iter/s)": 0.020016 | |
| }, | |
| { | |
| "epoch": 1.658041813289174, | |
| "grad_norm": 2.1258878217039157, | |
| "learning_rate": 1.7630736392742797e-06, | |
| "loss": 0.61094875, | |
| "memory(GiB)": 48.58, | |
| "step": 4005, | |
| "train_speed(iter/s)": 0.020017 | |
| }, | |
| { | |
| "epoch": 1.6601117780997723, | |
| "grad_norm": 1.8264630776919117, | |
| "learning_rate": 1.7524012806830312e-06, | |
| "loss": 0.61455221, | |
| "memory(GiB)": 48.58, | |
| "step": 4010, | |
| "train_speed(iter/s)": 0.020018 | |
| }, | |
| { | |
| "epoch": 1.6621817429103705, | |
| "grad_norm": 2.1834934664197148, | |
| "learning_rate": 1.7417289220917825e-06, | |
| "loss": 0.61525717, | |
| "memory(GiB)": 48.58, | |
| "step": 4015, | |
| "train_speed(iter/s)": 0.020018 | |
| }, | |
| { | |
| "epoch": 1.6642517077209686, | |
| "grad_norm": 1.951531129714831, | |
| "learning_rate": 1.731056563500534e-06, | |
| "loss": 0.61558003, | |
| "memory(GiB)": 48.58, | |
| "step": 4020, | |
| "train_speed(iter/s)": 0.020019 | |
| }, | |
| { | |
| "epoch": 1.6663216725315668, | |
| "grad_norm": 2.4480446682769315, | |
| "learning_rate": 1.720384204909285e-06, | |
| "loss": 0.58283405, | |
| "memory(GiB)": 48.58, | |
| "step": 4025, | |
| "train_speed(iter/s)": 0.02002 | |
| }, | |
| { | |
| "epoch": 1.6683916373421652, | |
| "grad_norm": 1.7243360581014375, | |
| "learning_rate": 1.7097118463180364e-06, | |
| "loss": 0.60816731, | |
| "memory(GiB)": 48.58, | |
| "step": 4030, | |
| "train_speed(iter/s)": 0.020021 | |
| }, | |
| { | |
| "epoch": 1.6704616021527634, | |
| "grad_norm": 1.935378370315751, | |
| "learning_rate": 1.6990394877267877e-06, | |
| "loss": 0.59363813, | |
| "memory(GiB)": 48.58, | |
| "step": 4035, | |
| "train_speed(iter/s)": 0.020021 | |
| }, | |
| { | |
| "epoch": 1.6725315669633616, | |
| "grad_norm": 2.343341523870396, | |
| "learning_rate": 1.6883671291355391e-06, | |
| "loss": 0.59670277, | |
| "memory(GiB)": 48.58, | |
| "step": 4040, | |
| "train_speed(iter/s)": 0.020022 | |
| }, | |
| { | |
| "epoch": 1.67460153177396, | |
| "grad_norm": 2.002429630691932, | |
| "learning_rate": 1.6776947705442904e-06, | |
| "loss": 0.61318674, | |
| "memory(GiB)": 48.58, | |
| "step": 4045, | |
| "train_speed(iter/s)": 0.020022 | |
| }, | |
| { | |
| "epoch": 1.6766714965845582, | |
| "grad_norm": 2.161836907625474, | |
| "learning_rate": 1.6670224119530418e-06, | |
| "loss": 0.63845253, | |
| "memory(GiB)": 48.58, | |
| "step": 4050, | |
| "train_speed(iter/s)": 0.020023 | |
| }, | |
| { | |
| "epoch": 1.6787414613951563, | |
| "grad_norm": 2.3095783835557993, | |
| "learning_rate": 1.6563500533617933e-06, | |
| "loss": 0.61825991, | |
| "memory(GiB)": 48.58, | |
| "step": 4055, | |
| "train_speed(iter/s)": 0.020023 | |
| }, | |
| { | |
| "epoch": 1.6808114262057545, | |
| "grad_norm": 2.0857994277393326, | |
| "learning_rate": 1.6456776947705443e-06, | |
| "loss": 0.58271861, | |
| "memory(GiB)": 48.58, | |
| "step": 4060, | |
| "train_speed(iter/s)": 0.020024 | |
| }, | |
| { | |
| "epoch": 1.6828813910163527, | |
| "grad_norm": 2.591242513945162, | |
| "learning_rate": 1.6350053361792958e-06, | |
| "loss": 0.59729037, | |
| "memory(GiB)": 48.58, | |
| "step": 4065, | |
| "train_speed(iter/s)": 0.020025 | |
| }, | |
| { | |
| "epoch": 1.6849513558269509, | |
| "grad_norm": 2.2408993824641836, | |
| "learning_rate": 1.624332977588047e-06, | |
| "loss": 0.61958027, | |
| "memory(GiB)": 48.58, | |
| "step": 4070, | |
| "train_speed(iter/s)": 0.020026 | |
| }, | |
| { | |
| "epoch": 1.687021320637549, | |
| "grad_norm": 2.07836511660639, | |
| "learning_rate": 1.6136606189967985e-06, | |
| "loss": 0.61059999, | |
| "memory(GiB)": 48.58, | |
| "step": 4075, | |
| "train_speed(iter/s)": 0.020026 | |
| }, | |
| { | |
| "epoch": 1.6890912854481472, | |
| "grad_norm": 2.1885333872092767, | |
| "learning_rate": 1.6029882604055498e-06, | |
| "loss": 0.61813364, | |
| "memory(GiB)": 48.58, | |
| "step": 4080, | |
| "train_speed(iter/s)": 0.020027 | |
| }, | |
| { | |
| "epoch": 1.6911612502587456, | |
| "grad_norm": 2.0563236890963075, | |
| "learning_rate": 1.5923159018143012e-06, | |
| "loss": 0.62983589, | |
| "memory(GiB)": 48.58, | |
| "step": 4085, | |
| "train_speed(iter/s)": 0.020027 | |
| }, | |
| { | |
| "epoch": 1.6932312150693438, | |
| "grad_norm": 2.205936525088634, | |
| "learning_rate": 1.5816435432230523e-06, | |
| "loss": 0.6022356, | |
| "memory(GiB)": 48.58, | |
| "step": 4090, | |
| "train_speed(iter/s)": 0.020028 | |
| }, | |
| { | |
| "epoch": 1.695301179879942, | |
| "grad_norm": 2.1192285983309262, | |
| "learning_rate": 1.5709711846318037e-06, | |
| "loss": 0.61405392, | |
| "memory(GiB)": 48.58, | |
| "step": 4095, | |
| "train_speed(iter/s)": 0.020029 | |
| }, | |
| { | |
| "epoch": 1.6973711446905404, | |
| "grad_norm": 2.2747714483339676, | |
| "learning_rate": 1.5602988260405552e-06, | |
| "loss": 0.59890566, | |
| "memory(GiB)": 48.58, | |
| "step": 4100, | |
| "train_speed(iter/s)": 0.02003 | |
| }, | |
| { | |
| "epoch": 1.6994411095011386, | |
| "grad_norm": 1.7865480667421139, | |
| "learning_rate": 1.5496264674493064e-06, | |
| "loss": 0.59438276, | |
| "memory(GiB)": 48.58, | |
| "step": 4105, | |
| "train_speed(iter/s)": 0.02003 | |
| }, | |
| { | |
| "epoch": 1.7015110743117368, | |
| "grad_norm": 2.1136132702931953, | |
| "learning_rate": 1.538954108858058e-06, | |
| "loss": 0.58430662, | |
| "memory(GiB)": 48.58, | |
| "step": 4110, | |
| "train_speed(iter/s)": 0.020031 | |
| }, | |
| { | |
| "epoch": 1.703581039122335, | |
| "grad_norm": 1.8888155653077559, | |
| "learning_rate": 1.528281750266809e-06, | |
| "loss": 0.61480141, | |
| "memory(GiB)": 48.58, | |
| "step": 4115, | |
| "train_speed(iter/s)": 0.020032 | |
| }, | |
| { | |
| "epoch": 1.7056510039329331, | |
| "grad_norm": 2.1453583525948567, | |
| "learning_rate": 1.5176093916755604e-06, | |
| "loss": 0.60317545, | |
| "memory(GiB)": 48.58, | |
| "step": 4120, | |
| "train_speed(iter/s)": 0.020033 | |
| }, | |
| { | |
| "epoch": 1.7077209687435313, | |
| "grad_norm": 2.0869705530610174, | |
| "learning_rate": 1.5069370330843117e-06, | |
| "loss": 0.57888694, | |
| "memory(GiB)": 48.58, | |
| "step": 4125, | |
| "train_speed(iter/s)": 0.020033 | |
| }, | |
| { | |
| "epoch": 1.7097909335541295, | |
| "grad_norm": 2.0589261216950177, | |
| "learning_rate": 1.4962646744930631e-06, | |
| "loss": 0.63887987, | |
| "memory(GiB)": 48.58, | |
| "step": 4130, | |
| "train_speed(iter/s)": 0.020034 | |
| }, | |
| { | |
| "epoch": 1.7118608983647277, | |
| "grad_norm": 2.246554976018598, | |
| "learning_rate": 1.4855923159018144e-06, | |
| "loss": 0.60080147, | |
| "memory(GiB)": 48.58, | |
| "step": 4135, | |
| "train_speed(iter/s)": 0.020035 | |
| }, | |
| { | |
| "epoch": 1.713930863175326, | |
| "grad_norm": 2.2465140455191377, | |
| "learning_rate": 1.4749199573105658e-06, | |
| "loss": 0.62593145, | |
| "memory(GiB)": 48.58, | |
| "step": 4140, | |
| "train_speed(iter/s)": 0.020035 | |
| }, | |
| { | |
| "epoch": 1.7160008279859242, | |
| "grad_norm": 2.1975792886927135, | |
| "learning_rate": 1.4642475987193173e-06, | |
| "loss": 0.61860814, | |
| "memory(GiB)": 48.58, | |
| "step": 4145, | |
| "train_speed(iter/s)": 0.020036 | |
| }, | |
| { | |
| "epoch": 1.7180707927965224, | |
| "grad_norm": 1.8897839022485312, | |
| "learning_rate": 1.4535752401280683e-06, | |
| "loss": 0.62411423, | |
| "memory(GiB)": 48.58, | |
| "step": 4150, | |
| "train_speed(iter/s)": 0.020037 | |
| }, | |
| { | |
| "epoch": 1.7201407576071208, | |
| "grad_norm": 2.4140521953157794, | |
| "learning_rate": 1.4429028815368198e-06, | |
| "loss": 0.60574412, | |
| "memory(GiB)": 48.58, | |
| "step": 4155, | |
| "train_speed(iter/s)": 0.020038 | |
| }, | |
| { | |
| "epoch": 1.722210722417719, | |
| "grad_norm": 2.04528954072566, | |
| "learning_rate": 1.432230522945571e-06, | |
| "loss": 0.61065197, | |
| "memory(GiB)": 48.58, | |
| "step": 4160, | |
| "train_speed(iter/s)": 0.020038 | |
| }, | |
| { | |
| "epoch": 1.7242806872283172, | |
| "grad_norm": 2.544468455409069, | |
| "learning_rate": 1.4215581643543225e-06, | |
| "loss": 0.59008269, | |
| "memory(GiB)": 48.58, | |
| "step": 4165, | |
| "train_speed(iter/s)": 0.020039 | |
| }, | |
| { | |
| "epoch": 1.7263506520389154, | |
| "grad_norm": 1.9933726482621115, | |
| "learning_rate": 1.4108858057630738e-06, | |
| "loss": 0.61828232, | |
| "memory(GiB)": 48.58, | |
| "step": 4170, | |
| "train_speed(iter/s)": 0.020039 | |
| }, | |
| { | |
| "epoch": 1.7284206168495135, | |
| "grad_norm": 2.0938798089462702, | |
| "learning_rate": 1.4002134471718252e-06, | |
| "loss": 0.63251686, | |
| "memory(GiB)": 48.58, | |
| "step": 4175, | |
| "train_speed(iter/s)": 0.020039 | |
| }, | |
| { | |
| "epoch": 1.7304905816601117, | |
| "grad_norm": 2.586866443962492, | |
| "learning_rate": 1.3895410885805763e-06, | |
| "loss": 0.61843009, | |
| "memory(GiB)": 48.58, | |
| "step": 4180, | |
| "train_speed(iter/s)": 0.02004 | |
| }, | |
| { | |
| "epoch": 1.73256054647071, | |
| "grad_norm": 2.157748885604068, | |
| "learning_rate": 1.3788687299893277e-06, | |
| "loss": 0.61311278, | |
| "memory(GiB)": 48.58, | |
| "step": 4185, | |
| "train_speed(iter/s)": 0.020041 | |
| }, | |
| { | |
| "epoch": 1.734630511281308, | |
| "grad_norm": 2.1925495515847944, | |
| "learning_rate": 1.3681963713980792e-06, | |
| "loss": 0.61852412, | |
| "memory(GiB)": 48.58, | |
| "step": 4190, | |
| "train_speed(iter/s)": 0.020041 | |
| }, | |
| { | |
| "epoch": 1.7367004760919065, | |
| "grad_norm": 2.0519214102963566, | |
| "learning_rate": 1.3575240128068304e-06, | |
| "loss": 0.60840869, | |
| "memory(GiB)": 48.58, | |
| "step": 4195, | |
| "train_speed(iter/s)": 0.020041 | |
| }, | |
| { | |
| "epoch": 1.7387704409025047, | |
| "grad_norm": 2.1138169542373335, | |
| "learning_rate": 1.3468516542155819e-06, | |
| "loss": 0.60299668, | |
| "memory(GiB)": 48.58, | |
| "step": 4200, | |
| "train_speed(iter/s)": 0.020042 | |
| }, | |
| { | |
| "epoch": 1.7387704409025047, | |
| "eval_loss": 0.7988836765289307, | |
| "eval_runtime": 333.377, | |
| "eval_samples_per_second": 18.736, | |
| "eval_steps_per_second": 1.173, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.7408404057131028, | |
| "grad_norm": 1.8176186505765208, | |
| "learning_rate": 1.336179295624333e-06, | |
| "loss": 0.59205551, | |
| "memory(GiB)": 48.58, | |
| "step": 4205, | |
| "train_speed(iter/s)": 0.020007 | |
| }, | |
| { | |
| "epoch": 1.7429103705237012, | |
| "grad_norm": 2.097301273083197, | |
| "learning_rate": 1.3255069370330844e-06, | |
| "loss": 0.59544134, | |
| "memory(GiB)": 48.58, | |
| "step": 4210, | |
| "train_speed(iter/s)": 0.020007 | |
| }, | |
| { | |
| "epoch": 1.7449803353342994, | |
| "grad_norm": 2.1754814306718093, | |
| "learning_rate": 1.3148345784418356e-06, | |
| "loss": 0.61374321, | |
| "memory(GiB)": 48.58, | |
| "step": 4215, | |
| "train_speed(iter/s)": 0.020008 | |
| }, | |
| { | |
| "epoch": 1.7470503001448976, | |
| "grad_norm": 2.1091284971781543, | |
| "learning_rate": 1.304162219850587e-06, | |
| "loss": 0.58602142, | |
| "memory(GiB)": 48.58, | |
| "step": 4220, | |
| "train_speed(iter/s)": 0.020008 | |
| }, | |
| { | |
| "epoch": 1.7491202649554958, | |
| "grad_norm": 2.1153060430505177, | |
| "learning_rate": 1.2934898612593383e-06, | |
| "loss": 0.59698052, | |
| "memory(GiB)": 48.58, | |
| "step": 4225, | |
| "train_speed(iter/s)": 0.020009 | |
| }, | |
| { | |
| "epoch": 1.751190229766094, | |
| "grad_norm": 2.3476551963062193, | |
| "learning_rate": 1.2828175026680898e-06, | |
| "loss": 0.63100615, | |
| "memory(GiB)": 48.58, | |
| "step": 4230, | |
| "train_speed(iter/s)": 0.020009 | |
| }, | |
| { | |
| "epoch": 1.7532601945766921, | |
| "grad_norm": 2.1723309903381987, | |
| "learning_rate": 1.2721451440768413e-06, | |
| "loss": 0.61266661, | |
| "memory(GiB)": 48.58, | |
| "step": 4235, | |
| "train_speed(iter/s)": 0.02001 | |
| }, | |
| { | |
| "epoch": 1.7553301593872903, | |
| "grad_norm": 2.4855935431363267, | |
| "learning_rate": 1.2614727854855923e-06, | |
| "loss": 0.60998316, | |
| "memory(GiB)": 48.58, | |
| "step": 4240, | |
| "train_speed(iter/s)": 0.020011 | |
| }, | |
| { | |
| "epoch": 1.7574001241978885, | |
| "grad_norm": 2.5103101197105233, | |
| "learning_rate": 1.2508004268943438e-06, | |
| "loss": 0.61193109, | |
| "memory(GiB)": 48.58, | |
| "step": 4245, | |
| "train_speed(iter/s)": 0.020011 | |
| }, | |
| { | |
| "epoch": 1.759470089008487, | |
| "grad_norm": 2.2253279455991195, | |
| "learning_rate": 1.2401280683030952e-06, | |
| "loss": 0.58802786, | |
| "memory(GiB)": 48.58, | |
| "step": 4250, | |
| "train_speed(iter/s)": 0.020012 | |
| }, | |
| { | |
| "epoch": 1.761540053819085, | |
| "grad_norm": 2.412011728154561, | |
| "learning_rate": 1.2294557097118465e-06, | |
| "loss": 0.6127542, | |
| "memory(GiB)": 48.58, | |
| "step": 4255, | |
| "train_speed(iter/s)": 0.020013 | |
| }, | |
| { | |
| "epoch": 1.7636100186296833, | |
| "grad_norm": 2.186989181036045, | |
| "learning_rate": 1.2187833511205977e-06, | |
| "loss": 0.59003277, | |
| "memory(GiB)": 48.58, | |
| "step": 4260, | |
| "train_speed(iter/s)": 0.020013 | |
| }, | |
| { | |
| "epoch": 1.7656799834402817, | |
| "grad_norm": 2.528633630931418, | |
| "learning_rate": 1.2081109925293492e-06, | |
| "loss": 0.61563702, | |
| "memory(GiB)": 48.58, | |
| "step": 4265, | |
| "train_speed(iter/s)": 0.020014 | |
| }, | |
| { | |
| "epoch": 1.7677499482508798, | |
| "grad_norm": 1.9867035049680128, | |
| "learning_rate": 1.1974386339381004e-06, | |
| "loss": 0.60806894, | |
| "memory(GiB)": 48.58, | |
| "step": 4270, | |
| "train_speed(iter/s)": 0.020015 | |
| }, | |
| { | |
| "epoch": 1.769819913061478, | |
| "grad_norm": 2.2593003701423755, | |
| "learning_rate": 1.1867662753468517e-06, | |
| "loss": 0.61837912, | |
| "memory(GiB)": 48.58, | |
| "step": 4275, | |
| "train_speed(iter/s)": 0.020015 | |
| }, | |
| { | |
| "epoch": 1.7718898778720762, | |
| "grad_norm": 2.4545294029220797, | |
| "learning_rate": 1.176093916755603e-06, | |
| "loss": 0.61774817, | |
| "memory(GiB)": 48.58, | |
| "step": 4280, | |
| "train_speed(iter/s)": 0.020016 | |
| }, | |
| { | |
| "epoch": 1.7739598426826744, | |
| "grad_norm": 1.9914997887032238, | |
| "learning_rate": 1.1654215581643544e-06, | |
| "loss": 0.58686681, | |
| "memory(GiB)": 48.58, | |
| "step": 4285, | |
| "train_speed(iter/s)": 0.020016 | |
| }, | |
| { | |
| "epoch": 1.7760298074932725, | |
| "grad_norm": 2.0999532307692896, | |
| "learning_rate": 1.1547491995731057e-06, | |
| "loss": 0.60145164, | |
| "memory(GiB)": 48.58, | |
| "step": 4290, | |
| "train_speed(iter/s)": 0.020017 | |
| }, | |
| { | |
| "epoch": 1.7780997723038707, | |
| "grad_norm": 2.480798626827661, | |
| "learning_rate": 1.1440768409818571e-06, | |
| "loss": 0.61948671, | |
| "memory(GiB)": 48.58, | |
| "step": 4295, | |
| "train_speed(iter/s)": 0.020018 | |
| }, | |
| { | |
| "epoch": 1.780169737114469, | |
| "grad_norm": 2.1964203822462527, | |
| "learning_rate": 1.1334044823906084e-06, | |
| "loss": 0.5903161, | |
| "memory(GiB)": 48.58, | |
| "step": 4300, | |
| "train_speed(iter/s)": 0.020018 | |
| }, | |
| { | |
| "epoch": 1.7822397019250673, | |
| "grad_norm": 2.2986740177948914, | |
| "learning_rate": 1.1227321237993598e-06, | |
| "loss": 0.60875359, | |
| "memory(GiB)": 48.58, | |
| "step": 4305, | |
| "train_speed(iter/s)": 0.020019 | |
| }, | |
| { | |
| "epoch": 1.7843096667356655, | |
| "grad_norm": 1.994450780247921, | |
| "learning_rate": 1.112059765208111e-06, | |
| "loss": 0.61587105, | |
| "memory(GiB)": 48.58, | |
| "step": 4310, | |
| "train_speed(iter/s)": 0.02002 | |
| }, | |
| { | |
| "epoch": 1.7863796315462637, | |
| "grad_norm": 2.096310617308162, | |
| "learning_rate": 1.1013874066168623e-06, | |
| "loss": 0.58633337, | |
| "memory(GiB)": 48.58, | |
| "step": 4315, | |
| "train_speed(iter/s)": 0.02002 | |
| }, | |
| { | |
| "epoch": 1.788449596356862, | |
| "grad_norm": 1.9466363269467861, | |
| "learning_rate": 1.0907150480256138e-06, | |
| "loss": 0.60186481, | |
| "memory(GiB)": 48.58, | |
| "step": 4320, | |
| "train_speed(iter/s)": 0.020021 | |
| }, | |
| { | |
| "epoch": 1.7905195611674602, | |
| "grad_norm": 2.308498919041305, | |
| "learning_rate": 1.080042689434365e-06, | |
| "loss": 0.59321814, | |
| "memory(GiB)": 48.58, | |
| "step": 4325, | |
| "train_speed(iter/s)": 0.020022 | |
| }, | |
| { | |
| "epoch": 1.7925895259780584, | |
| "grad_norm": 2.6206625429667874, | |
| "learning_rate": 1.0693703308431163e-06, | |
| "loss": 0.61461964, | |
| "memory(GiB)": 48.58, | |
| "step": 4330, | |
| "train_speed(iter/s)": 0.020022 | |
| }, | |
| { | |
| "epoch": 1.7946594907886566, | |
| "grad_norm": 2.4886244412597094, | |
| "learning_rate": 1.0586979722518678e-06, | |
| "loss": 0.61646094, | |
| "memory(GiB)": 48.58, | |
| "step": 4335, | |
| "train_speed(iter/s)": 0.020022 | |
| }, | |
| { | |
| "epoch": 1.7967294555992548, | |
| "grad_norm": 1.9987554252944004, | |
| "learning_rate": 1.0480256136606192e-06, | |
| "loss": 0.60513401, | |
| "memory(GiB)": 48.58, | |
| "step": 4340, | |
| "train_speed(iter/s)": 0.020023 | |
| }, | |
| { | |
| "epoch": 1.798799420409853, | |
| "grad_norm": 2.909678280995542, | |
| "learning_rate": 1.0373532550693705e-06, | |
| "loss": 0.58900928, | |
| "memory(GiB)": 48.58, | |
| "step": 4345, | |
| "train_speed(iter/s)": 0.020023 | |
| }, | |
| { | |
| "epoch": 1.8008693852204511, | |
| "grad_norm": 2.232043207476952, | |
| "learning_rate": 1.0266808964781217e-06, | |
| "loss": 0.61423898, | |
| "memory(GiB)": 48.58, | |
| "step": 4350, | |
| "train_speed(iter/s)": 0.020024 | |
| }, | |
| { | |
| "epoch": 1.8029393500310493, | |
| "grad_norm": 1.9536032593654695, | |
| "learning_rate": 1.0160085378868732e-06, | |
| "loss": 0.61663337, | |
| "memory(GiB)": 48.58, | |
| "step": 4355, | |
| "train_speed(iter/s)": 0.020025 | |
| }, | |
| { | |
| "epoch": 1.8050093148416477, | |
| "grad_norm": 2.5224030653638048, | |
| "learning_rate": 1.0053361792956244e-06, | |
| "loss": 0.61187458, | |
| "memory(GiB)": 48.58, | |
| "step": 4360, | |
| "train_speed(iter/s)": 0.020025 | |
| }, | |
| { | |
| "epoch": 1.807079279652246, | |
| "grad_norm": 1.7925531365832896, | |
| "learning_rate": 9.946638207043757e-07, | |
| "loss": 0.59958668, | |
| "memory(GiB)": 48.58, | |
| "step": 4365, | |
| "train_speed(iter/s)": 0.020026 | |
| }, | |
| { | |
| "epoch": 1.809149244462844, | |
| "grad_norm": 2.000073758007796, | |
| "learning_rate": 9.839914621131271e-07, | |
| "loss": 0.62268171, | |
| "memory(GiB)": 48.58, | |
| "step": 4370, | |
| "train_speed(iter/s)": 0.020027 | |
| }, | |
| { | |
| "epoch": 1.8112192092734425, | |
| "grad_norm": 1.8927383998351053, | |
| "learning_rate": 9.733191035218784e-07, | |
| "loss": 0.61844292, | |
| "memory(GiB)": 48.58, | |
| "step": 4375, | |
| "train_speed(iter/s)": 0.020028 | |
| }, | |
| { | |
| "epoch": 1.8132891740840407, | |
| "grad_norm": 2.2603010222528708, | |
| "learning_rate": 9.626467449306296e-07, | |
| "loss": 0.60280704, | |
| "memory(GiB)": 48.58, | |
| "step": 4380, | |
| "train_speed(iter/s)": 0.020028 | |
| }, | |
| { | |
| "epoch": 1.8153591388946388, | |
| "grad_norm": 2.0743626326418494, | |
| "learning_rate": 9.519743863393811e-07, | |
| "loss": 0.59184837, | |
| "memory(GiB)": 48.58, | |
| "step": 4385, | |
| "train_speed(iter/s)": 0.020029 | |
| }, | |
| { | |
| "epoch": 1.817429103705237, | |
| "grad_norm": 2.5328803417607273, | |
| "learning_rate": 9.413020277481325e-07, | |
| "loss": 0.60617228, | |
| "memory(GiB)": 48.58, | |
| "step": 4390, | |
| "train_speed(iter/s)": 0.020029 | |
| }, | |
| { | |
| "epoch": 1.8194990685158352, | |
| "grad_norm": 2.157648955819526, | |
| "learning_rate": 9.306296691568837e-07, | |
| "loss": 0.57980437, | |
| "memory(GiB)": 48.58, | |
| "step": 4395, | |
| "train_speed(iter/s)": 0.020029 | |
| }, | |
| { | |
| "epoch": 1.8215690333264334, | |
| "grad_norm": 2.238654145184739, | |
| "learning_rate": 9.199573105656351e-07, | |
| "loss": 0.59073811, | |
| "memory(GiB)": 48.58, | |
| "step": 4400, | |
| "train_speed(iter/s)": 0.02003 | |
| }, | |
| { | |
| "epoch": 1.8236389981370316, | |
| "grad_norm": 2.1560196555189677, | |
| "learning_rate": 9.092849519743864e-07, | |
| "loss": 0.60031624, | |
| "memory(GiB)": 48.58, | |
| "step": 4405, | |
| "train_speed(iter/s)": 0.020031 | |
| }, | |
| { | |
| "epoch": 1.8257089629476297, | |
| "grad_norm": 1.9778064514427567, | |
| "learning_rate": 8.986125933831377e-07, | |
| "loss": 0.59476948, | |
| "memory(GiB)": 48.58, | |
| "step": 4410, | |
| "train_speed(iter/s)": 0.020031 | |
| }, | |
| { | |
| "epoch": 1.8277789277582281, | |
| "grad_norm": 1.9062652252413357, | |
| "learning_rate": 8.87940234791889e-07, | |
| "loss": 0.60492353, | |
| "memory(GiB)": 48.58, | |
| "step": 4415, | |
| "train_speed(iter/s)": 0.020032 | |
| }, | |
| { | |
| "epoch": 1.8298488925688263, | |
| "grad_norm": 2.20251758356039, | |
| "learning_rate": 8.772678762006404e-07, | |
| "loss": 0.57913284, | |
| "memory(GiB)": 48.58, | |
| "step": 4420, | |
| "train_speed(iter/s)": 0.020033 | |
| }, | |
| { | |
| "epoch": 1.8319188573794245, | |
| "grad_norm": 2.374670539942745, | |
| "learning_rate": 8.665955176093919e-07, | |
| "loss": 0.61436529, | |
| "memory(GiB)": 48.58, | |
| "step": 4425, | |
| "train_speed(iter/s)": 0.020033 | |
| }, | |
| { | |
| "epoch": 1.833988822190023, | |
| "grad_norm": 1.8014659618328237, | |
| "learning_rate": 8.559231590181431e-07, | |
| "loss": 0.58954339, | |
| "memory(GiB)": 48.58, | |
| "step": 4430, | |
| "train_speed(iter/s)": 0.020034 | |
| }, | |
| { | |
| "epoch": 1.836058787000621, | |
| "grad_norm": 2.5328263785548706, | |
| "learning_rate": 8.452508004268945e-07, | |
| "loss": 0.62326274, | |
| "memory(GiB)": 48.58, | |
| "step": 4435, | |
| "train_speed(iter/s)": 0.020034 | |
| }, | |
| { | |
| "epoch": 1.8381287518112193, | |
| "grad_norm": 2.201541673623922, | |
| "learning_rate": 8.345784418356458e-07, | |
| "loss": 0.61668777, | |
| "memory(GiB)": 48.58, | |
| "step": 4440, | |
| "train_speed(iter/s)": 0.020035 | |
| }, | |
| { | |
| "epoch": 1.8401987166218174, | |
| "grad_norm": 1.9916726147288757, | |
| "learning_rate": 8.239060832443971e-07, | |
| "loss": 0.60296612, | |
| "memory(GiB)": 48.58, | |
| "step": 4445, | |
| "train_speed(iter/s)": 0.020036 | |
| }, | |
| { | |
| "epoch": 1.8422686814324156, | |
| "grad_norm": 2.2611511913260167, | |
| "learning_rate": 8.132337246531484e-07, | |
| "loss": 0.59926748, | |
| "memory(GiB)": 48.58, | |
| "step": 4450, | |
| "train_speed(iter/s)": 0.020037 | |
| }, | |
| { | |
| "epoch": 1.8443386462430138, | |
| "grad_norm": 1.8225104361277575, | |
| "learning_rate": 8.025613660618997e-07, | |
| "loss": 0.58590517, | |
| "memory(GiB)": 48.58, | |
| "step": 4455, | |
| "train_speed(iter/s)": 0.020038 | |
| }, | |
| { | |
| "epoch": 1.846408611053612, | |
| "grad_norm": 2.350449897326385, | |
| "learning_rate": 7.91889007470651e-07, | |
| "loss": 0.60878654, | |
| "memory(GiB)": 48.58, | |
| "step": 4460, | |
| "train_speed(iter/s)": 0.020038 | |
| }, | |
| { | |
| "epoch": 1.8484785758642102, | |
| "grad_norm": 2.4294512189314075, | |
| "learning_rate": 7.812166488794024e-07, | |
| "loss": 0.60740719, | |
| "memory(GiB)": 48.58, | |
| "step": 4465, | |
| "train_speed(iter/s)": 0.020039 | |
| }, | |
| { | |
| "epoch": 1.8505485406748086, | |
| "grad_norm": 2.0078565564513413, | |
| "learning_rate": 7.705442902881538e-07, | |
| "loss": 0.61569843, | |
| "memory(GiB)": 48.58, | |
| "step": 4470, | |
| "train_speed(iter/s)": 0.020039 | |
| }, | |
| { | |
| "epoch": 1.8526185054854067, | |
| "grad_norm": 2.416347350001525, | |
| "learning_rate": 7.598719316969051e-07, | |
| "loss": 0.59907522, | |
| "memory(GiB)": 48.58, | |
| "step": 4475, | |
| "train_speed(iter/s)": 0.02004 | |
| }, | |
| { | |
| "epoch": 1.854688470296005, | |
| "grad_norm": 2.5546773910049523, | |
| "learning_rate": 7.491995731056565e-07, | |
| "loss": 0.58857327, | |
| "memory(GiB)": 48.58, | |
| "step": 4480, | |
| "train_speed(iter/s)": 0.02004 | |
| }, | |
| { | |
| "epoch": 1.8567584351066033, | |
| "grad_norm": 2.1560857096799775, | |
| "learning_rate": 7.385272145144078e-07, | |
| "loss": 0.59972405, | |
| "memory(GiB)": 48.58, | |
| "step": 4485, | |
| "train_speed(iter/s)": 0.020041 | |
| }, | |
| { | |
| "epoch": 1.8588283999172015, | |
| "grad_norm": 2.8045450674344155, | |
| "learning_rate": 7.278548559231591e-07, | |
| "loss": 0.62519865, | |
| "memory(GiB)": 48.58, | |
| "step": 4490, | |
| "train_speed(iter/s)": 0.020042 | |
| }, | |
| { | |
| "epoch": 1.8608983647277997, | |
| "grad_norm": 1.88470841271018, | |
| "learning_rate": 7.171824973319104e-07, | |
| "loss": 0.59394321, | |
| "memory(GiB)": 48.58, | |
| "step": 4495, | |
| "train_speed(iter/s)": 0.020042 | |
| }, | |
| { | |
| "epoch": 1.8629683295383979, | |
| "grad_norm": 1.900323108929672, | |
| "learning_rate": 7.065101387406617e-07, | |
| "loss": 0.59171925, | |
| "memory(GiB)": 48.58, | |
| "step": 4500, | |
| "train_speed(iter/s)": 0.020043 | |
| }, | |
| { | |
| "epoch": 1.8629683295383979, | |
| "eval_loss": 0.7995001077651978, | |
| "eval_runtime": 333.5022, | |
| "eval_samples_per_second": 18.729, | |
| "eval_steps_per_second": 1.172, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.865038294348996, | |
| "grad_norm": 2.3418380686910854, | |
| "learning_rate": 6.95837780149413e-07, | |
| "loss": 0.62550597, | |
| "memory(GiB)": 48.58, | |
| "step": 4505, | |
| "train_speed(iter/s)": 0.020009 | |
| }, | |
| { | |
| "epoch": 1.8671082591595942, | |
| "grad_norm": 2.2011802954910826, | |
| "learning_rate": 6.851654215581644e-07, | |
| "loss": 0.60017891, | |
| "memory(GiB)": 48.58, | |
| "step": 4510, | |
| "train_speed(iter/s)": 0.02001 | |
| }, | |
| { | |
| "epoch": 1.8691782239701924, | |
| "grad_norm": 2.346777967941492, | |
| "learning_rate": 6.744930629669158e-07, | |
| "loss": 0.62277446, | |
| "memory(GiB)": 48.58, | |
| "step": 4515, | |
| "train_speed(iter/s)": 0.020011 | |
| }, | |
| { | |
| "epoch": 1.8712481887807906, | |
| "grad_norm": 2.3039363321887385, | |
| "learning_rate": 6.638207043756671e-07, | |
| "loss": 0.63189201, | |
| "memory(GiB)": 48.58, | |
| "step": 4520, | |
| "train_speed(iter/s)": 0.020012 | |
| }, | |
| { | |
| "epoch": 1.873318153591389, | |
| "grad_norm": 2.4254296862394753, | |
| "learning_rate": 6.531483457844184e-07, | |
| "loss": 0.59847736, | |
| "memory(GiB)": 48.58, | |
| "step": 4525, | |
| "train_speed(iter/s)": 0.020012 | |
| }, | |
| { | |
| "epoch": 1.8753881184019872, | |
| "grad_norm": 2.520645231015452, | |
| "learning_rate": 6.424759871931698e-07, | |
| "loss": 0.60772429, | |
| "memory(GiB)": 48.58, | |
| "step": 4530, | |
| "train_speed(iter/s)": 0.020013 | |
| }, | |
| { | |
| "epoch": 1.8774580832125853, | |
| "grad_norm": 2.14246492812266, | |
| "learning_rate": 6.31803628601921e-07, | |
| "loss": 0.61204777, | |
| "memory(GiB)": 48.58, | |
| "step": 4535, | |
| "train_speed(iter/s)": 0.020014 | |
| }, | |
| { | |
| "epoch": 1.8795280480231837, | |
| "grad_norm": 1.9822250514442827, | |
| "learning_rate": 6.211312700106724e-07, | |
| "loss": 0.55947261, | |
| "memory(GiB)": 48.58, | |
| "step": 4540, | |
| "train_speed(iter/s)": 0.020014 | |
| }, | |
| { | |
| "epoch": 1.881598012833782, | |
| "grad_norm": 2.052778758307509, | |
| "learning_rate": 6.104589114194238e-07, | |
| "loss": 0.60927758, | |
| "memory(GiB)": 48.58, | |
| "step": 4545, | |
| "train_speed(iter/s)": 0.020015 | |
| }, | |
| { | |
| "epoch": 1.88366797764438, | |
| "grad_norm": 2.3908992672472222, | |
| "learning_rate": 5.997865528281751e-07, | |
| "loss": 0.61153603, | |
| "memory(GiB)": 48.58, | |
| "step": 4550, | |
| "train_speed(iter/s)": 0.020016 | |
| }, | |
| { | |
| "epoch": 1.8857379424549783, | |
| "grad_norm": 1.9114509697538704, | |
| "learning_rate": 5.891141942369264e-07, | |
| "loss": 0.6099647, | |
| "memory(GiB)": 48.58, | |
| "step": 4555, | |
| "train_speed(iter/s)": 0.020016 | |
| }, | |
| { | |
| "epoch": 1.8878079072655765, | |
| "grad_norm": 2.063913038194293, | |
| "learning_rate": 5.784418356456777e-07, | |
| "loss": 0.61249609, | |
| "memory(GiB)": 48.58, | |
| "step": 4560, | |
| "train_speed(iter/s)": 0.020017 | |
| }, | |
| { | |
| "epoch": 1.8898778720761746, | |
| "grad_norm": 2.3274588748356404, | |
| "learning_rate": 5.677694770544291e-07, | |
| "loss": 0.60335112, | |
| "memory(GiB)": 48.58, | |
| "step": 4565, | |
| "train_speed(iter/s)": 0.020018 | |
| }, | |
| { | |
| "epoch": 1.8919478368867728, | |
| "grad_norm": 2.5803919299014404, | |
| "learning_rate": 5.570971184631804e-07, | |
| "loss": 0.59715414, | |
| "memory(GiB)": 48.58, | |
| "step": 4570, | |
| "train_speed(iter/s)": 0.020018 | |
| }, | |
| { | |
| "epoch": 1.894017801697371, | |
| "grad_norm": 2.0982349697996727, | |
| "learning_rate": 5.464247598719318e-07, | |
| "loss": 0.59020104, | |
| "memory(GiB)": 48.58, | |
| "step": 4575, | |
| "train_speed(iter/s)": 0.020019 | |
| }, | |
| { | |
| "epoch": 1.8960877665079694, | |
| "grad_norm": 2.4156310349424492, | |
| "learning_rate": 5.35752401280683e-07, | |
| "loss": 0.58418913, | |
| "memory(GiB)": 48.58, | |
| "step": 4580, | |
| "train_speed(iter/s)": 0.02002 | |
| }, | |
| { | |
| "epoch": 1.8981577313185676, | |
| "grad_norm": 2.1192384982329977, | |
| "learning_rate": 5.250800426894344e-07, | |
| "loss": 0.57976351, | |
| "memory(GiB)": 48.58, | |
| "step": 4585, | |
| "train_speed(iter/s)": 0.02002 | |
| }, | |
| { | |
| "epoch": 1.9002276961291658, | |
| "grad_norm": 2.363144438078418, | |
| "learning_rate": 5.144076840981858e-07, | |
| "loss": 0.59181528, | |
| "memory(GiB)": 48.58, | |
| "step": 4590, | |
| "train_speed(iter/s)": 0.020021 | |
| }, | |
| { | |
| "epoch": 1.9022976609397642, | |
| "grad_norm": 2.014916601780685, | |
| "learning_rate": 5.037353255069371e-07, | |
| "loss": 0.57993307, | |
| "memory(GiB)": 48.58, | |
| "step": 4595, | |
| "train_speed(iter/s)": 0.020022 | |
| }, | |
| { | |
| "epoch": 1.9043676257503623, | |
| "grad_norm": 2.1798732452842615, | |
| "learning_rate": 4.930629669156884e-07, | |
| "loss": 0.61651163, | |
| "memory(GiB)": 48.58, | |
| "step": 4600, | |
| "train_speed(iter/s)": 0.020022 | |
| }, | |
| { | |
| "epoch": 1.9064375905609605, | |
| "grad_norm": 2.4067029242954847, | |
| "learning_rate": 4.823906083244397e-07, | |
| "loss": 0.60284195, | |
| "memory(GiB)": 48.58, | |
| "step": 4605, | |
| "train_speed(iter/s)": 0.020023 | |
| }, | |
| { | |
| "epoch": 1.9085075553715587, | |
| "grad_norm": 2.1464043153264587, | |
| "learning_rate": 4.7171824973319113e-07, | |
| "loss": 0.58217282, | |
| "memory(GiB)": 48.58, | |
| "step": 4610, | |
| "train_speed(iter/s)": 0.020023 | |
| }, | |
| { | |
| "epoch": 1.9105775201821569, | |
| "grad_norm": 2.786031828765745, | |
| "learning_rate": 4.6104589114194243e-07, | |
| "loss": 0.59245019, | |
| "memory(GiB)": 48.58, | |
| "step": 4615, | |
| "train_speed(iter/s)": 0.020024 | |
| }, | |
| { | |
| "epoch": 1.912647484992755, | |
| "grad_norm": 1.971002889872401, | |
| "learning_rate": 4.5037353255069374e-07, | |
| "loss": 0.55735373, | |
| "memory(GiB)": 48.58, | |
| "step": 4620, | |
| "train_speed(iter/s)": 0.020025 | |
| }, | |
| { | |
| "epoch": 1.9147174498033532, | |
| "grad_norm": 2.6133112222197097, | |
| "learning_rate": 4.3970117395944504e-07, | |
| "loss": 0.58715906, | |
| "memory(GiB)": 48.58, | |
| "step": 4625, | |
| "train_speed(iter/s)": 0.020025 | |
| }, | |
| { | |
| "epoch": 1.9167874146139514, | |
| "grad_norm": 2.2493774425876496, | |
| "learning_rate": 4.290288153681964e-07, | |
| "loss": 0.61737943, | |
| "memory(GiB)": 48.58, | |
| "step": 4630, | |
| "train_speed(iter/s)": 0.020026 | |
| }, | |
| { | |
| "epoch": 1.9188573794245498, | |
| "grad_norm": 2.066157101262181, | |
| "learning_rate": 4.1835645677694775e-07, | |
| "loss": 0.58552856, | |
| "memory(GiB)": 48.58, | |
| "step": 4635, | |
| "train_speed(iter/s)": 0.020027 | |
| }, | |
| { | |
| "epoch": 1.920927344235148, | |
| "grad_norm": 2.2279403099524164, | |
| "learning_rate": 4.076840981856991e-07, | |
| "loss": 0.60430613, | |
| "memory(GiB)": 48.58, | |
| "step": 4640, | |
| "train_speed(iter/s)": 0.020027 | |
| }, | |
| { | |
| "epoch": 1.9229973090457462, | |
| "grad_norm": 2.21706254471347, | |
| "learning_rate": 3.970117395944504e-07, | |
| "loss": 0.60209589, | |
| "memory(GiB)": 48.58, | |
| "step": 4645, | |
| "train_speed(iter/s)": 0.020027 | |
| }, | |
| { | |
| "epoch": 1.9250672738563446, | |
| "grad_norm": 1.8175472634449323, | |
| "learning_rate": 3.863393810032017e-07, | |
| "loss": 0.57723808, | |
| "memory(GiB)": 48.58, | |
| "step": 4650, | |
| "train_speed(iter/s)": 0.020028 | |
| }, | |
| { | |
| "epoch": 1.9271372386669428, | |
| "grad_norm": 2.013736958312471, | |
| "learning_rate": 3.756670224119531e-07, | |
| "loss": 0.59187717, | |
| "memory(GiB)": 48.58, | |
| "step": 4655, | |
| "train_speed(iter/s)": 0.020028 | |
| }, | |
| { | |
| "epoch": 1.929207203477541, | |
| "grad_norm": 2.9147581447233883, | |
| "learning_rate": 3.649946638207044e-07, | |
| "loss": 0.62887087, | |
| "memory(GiB)": 48.58, | |
| "step": 4660, | |
| "train_speed(iter/s)": 0.020029 | |
| }, | |
| { | |
| "epoch": 1.9312771682881391, | |
| "grad_norm": 2.1113909828567605, | |
| "learning_rate": 3.5432230522945573e-07, | |
| "loss": 0.60369582, | |
| "memory(GiB)": 48.58, | |
| "step": 4665, | |
| "train_speed(iter/s)": 0.02003 | |
| }, | |
| { | |
| "epoch": 1.9333471330987373, | |
| "grad_norm": 2.42442448807154, | |
| "learning_rate": 3.4364994663820703e-07, | |
| "loss": 0.62186384, | |
| "memory(GiB)": 48.58, | |
| "step": 4670, | |
| "train_speed(iter/s)": 0.02003 | |
| }, | |
| { | |
| "epoch": 1.9354170979093355, | |
| "grad_norm": 2.380814562965916, | |
| "learning_rate": 3.329775880469584e-07, | |
| "loss": 0.59275131, | |
| "memory(GiB)": 48.58, | |
| "step": 4675, | |
| "train_speed(iter/s)": 0.020031 | |
| }, | |
| { | |
| "epoch": 1.9374870627199337, | |
| "grad_norm": 2.6584440377132363, | |
| "learning_rate": 3.2230522945570974e-07, | |
| "loss": 0.60647793, | |
| "memory(GiB)": 48.58, | |
| "step": 4680, | |
| "train_speed(iter/s)": 0.020031 | |
| }, | |
| { | |
| "epoch": 1.9395570275305318, | |
| "grad_norm": 1.9567852417003078, | |
| "learning_rate": 3.116328708644611e-07, | |
| "loss": 0.60262537, | |
| "memory(GiB)": 48.58, | |
| "step": 4685, | |
| "train_speed(iter/s)": 0.020032 | |
| }, | |
| { | |
| "epoch": 1.9416269923411302, | |
| "grad_norm": 2.3501175824898266, | |
| "learning_rate": 3.009605122732124e-07, | |
| "loss": 0.58797078, | |
| "memory(GiB)": 48.58, | |
| "step": 4690, | |
| "train_speed(iter/s)": 0.020032 | |
| }, | |
| { | |
| "epoch": 1.9436969571517284, | |
| "grad_norm": 2.237666059037871, | |
| "learning_rate": 2.9028815368196376e-07, | |
| "loss": 0.60981102, | |
| "memory(GiB)": 48.58, | |
| "step": 4695, | |
| "train_speed(iter/s)": 0.020033 | |
| }, | |
| { | |
| "epoch": 1.9457669219623266, | |
| "grad_norm": 2.4233069808853993, | |
| "learning_rate": 2.7961579509071506e-07, | |
| "loss": 0.60726156, | |
| "memory(GiB)": 48.58, | |
| "step": 4700, | |
| "train_speed(iter/s)": 0.020033 | |
| }, | |
| { | |
| "epoch": 1.947836886772925, | |
| "grad_norm": 2.260763775980905, | |
| "learning_rate": 2.689434364994664e-07, | |
| "loss": 0.60588284, | |
| "memory(GiB)": 48.58, | |
| "step": 4705, | |
| "train_speed(iter/s)": 0.020034 | |
| }, | |
| { | |
| "epoch": 1.9499068515835232, | |
| "grad_norm": 2.5081221538985625, | |
| "learning_rate": 2.582710779082177e-07, | |
| "loss": 0.59843764, | |
| "memory(GiB)": 48.58, | |
| "step": 4710, | |
| "train_speed(iter/s)": 0.020034 | |
| }, | |
| { | |
| "epoch": 1.9519768163941214, | |
| "grad_norm": 2.3881113527972304, | |
| "learning_rate": 2.475987193169691e-07, | |
| "loss": 0.58006935, | |
| "memory(GiB)": 48.58, | |
| "step": 4715, | |
| "train_speed(iter/s)": 0.020035 | |
| }, | |
| { | |
| "epoch": 1.9540467812047195, | |
| "grad_norm": 2.0678535427683564, | |
| "learning_rate": 2.369263607257204e-07, | |
| "loss": 0.60157442, | |
| "memory(GiB)": 48.58, | |
| "step": 4720, | |
| "train_speed(iter/s)": 0.020036 | |
| }, | |
| { | |
| "epoch": 1.9561167460153177, | |
| "grad_norm": 2.438506236548372, | |
| "learning_rate": 2.2625400213447176e-07, | |
| "loss": 0.61028309, | |
| "memory(GiB)": 48.58, | |
| "step": 4725, | |
| "train_speed(iter/s)": 0.020036 | |
| }, | |
| { | |
| "epoch": 1.9581867108259159, | |
| "grad_norm": 2.0110599859655482, | |
| "learning_rate": 2.1558164354322307e-07, | |
| "loss": 0.59143724, | |
| "memory(GiB)": 48.58, | |
| "step": 4730, | |
| "train_speed(iter/s)": 0.020037 | |
| }, | |
| { | |
| "epoch": 1.960256675636514, | |
| "grad_norm": 2.0688837840068124, | |
| "learning_rate": 2.049092849519744e-07, | |
| "loss": 0.60122604, | |
| "memory(GiB)": 48.58, | |
| "step": 4735, | |
| "train_speed(iter/s)": 0.020037 | |
| }, | |
| { | |
| "epoch": 1.9623266404471122, | |
| "grad_norm": 2.1607611011042964, | |
| "learning_rate": 1.9423692636072575e-07, | |
| "loss": 0.59857554, | |
| "memory(GiB)": 48.58, | |
| "step": 4740, | |
| "train_speed(iter/s)": 0.020038 | |
| }, | |
| { | |
| "epoch": 1.9643966052577106, | |
| "grad_norm": 1.9355873743150782, | |
| "learning_rate": 1.8356456776947706e-07, | |
| "loss": 0.58816404, | |
| "memory(GiB)": 48.58, | |
| "step": 4745, | |
| "train_speed(iter/s)": 0.020038 | |
| }, | |
| { | |
| "epoch": 1.9664665700683088, | |
| "grad_norm": 2.1028560489657915, | |
| "learning_rate": 1.728922091782284e-07, | |
| "loss": 0.59075899, | |
| "memory(GiB)": 48.58, | |
| "step": 4750, | |
| "train_speed(iter/s)": 0.020039 | |
| }, | |
| { | |
| "epoch": 1.968536534878907, | |
| "grad_norm": 1.9716163219927025, | |
| "learning_rate": 1.6221985058697972e-07, | |
| "loss": 0.58181, | |
| "memory(GiB)": 48.58, | |
| "step": 4755, | |
| "train_speed(iter/s)": 0.020039 | |
| }, | |
| { | |
| "epoch": 1.9706064996895054, | |
| "grad_norm": 2.243337925247892, | |
| "learning_rate": 1.5154749199573107e-07, | |
| "loss": 0.58783703, | |
| "memory(GiB)": 48.58, | |
| "step": 4760, | |
| "train_speed(iter/s)": 0.02004 | |
| }, | |
| { | |
| "epoch": 1.9726764645001036, | |
| "grad_norm": 2.4859440712948166, | |
| "learning_rate": 1.408751334044824e-07, | |
| "loss": 0.60655708, | |
| "memory(GiB)": 48.58, | |
| "step": 4765, | |
| "train_speed(iter/s)": 0.02004 | |
| }, | |
| { | |
| "epoch": 1.9747464293107018, | |
| "grad_norm": 1.9831273691126385, | |
| "learning_rate": 1.3020277481323373e-07, | |
| "loss": 0.61188507, | |
| "memory(GiB)": 48.58, | |
| "step": 4770, | |
| "train_speed(iter/s)": 0.020041 | |
| }, | |
| { | |
| "epoch": 1.9768163941213, | |
| "grad_norm": 2.2314049587449807, | |
| "learning_rate": 1.1953041622198506e-07, | |
| "loss": 0.58718634, | |
| "memory(GiB)": 48.58, | |
| "step": 4775, | |
| "train_speed(iter/s)": 0.020041 | |
| }, | |
| { | |
| "epoch": 1.9788863589318981, | |
| "grad_norm": 1.8150041254780722, | |
| "learning_rate": 1.088580576307364e-07, | |
| "loss": 0.58888893, | |
| "memory(GiB)": 48.58, | |
| "step": 4780, | |
| "train_speed(iter/s)": 0.020042 | |
| }, | |
| { | |
| "epoch": 1.9809563237424963, | |
| "grad_norm": 2.247396093692947, | |
| "learning_rate": 9.818569903948773e-08, | |
| "loss": 0.58732767, | |
| "memory(GiB)": 48.58, | |
| "step": 4785, | |
| "train_speed(iter/s)": 0.020042 | |
| }, | |
| { | |
| "epoch": 1.9830262885530945, | |
| "grad_norm": 2.2098523979831644, | |
| "learning_rate": 8.751334044823908e-08, | |
| "loss": 0.56731772, | |
| "memory(GiB)": 48.58, | |
| "step": 4790, | |
| "train_speed(iter/s)": 0.020043 | |
| }, | |
| { | |
| "epoch": 1.9850962533636927, | |
| "grad_norm": 2.097191818370972, | |
| "learning_rate": 7.68409818569904e-08, | |
| "loss": 0.60409393, | |
| "memory(GiB)": 48.58, | |
| "step": 4795, | |
| "train_speed(iter/s)": 0.020043 | |
| }, | |
| { | |
| "epoch": 1.987166218174291, | |
| "grad_norm": 2.228865758940409, | |
| "learning_rate": 6.616862326574174e-08, | |
| "loss": 0.58583736, | |
| "memory(GiB)": 48.58, | |
| "step": 4800, | |
| "train_speed(iter/s)": 0.020043 | |
| }, | |
| { | |
| "epoch": 1.987166218174291, | |
| "eval_loss": 0.7990086674690247, | |
| "eval_runtime": 335.524, | |
| "eval_samples_per_second": 18.616, | |
| "eval_steps_per_second": 1.165, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.9892361829848892, | |
| "grad_norm": 2.144126117644109, | |
| "learning_rate": 5.5496264674493065e-08, | |
| "loss": 0.59295273, | |
| "memory(GiB)": 48.58, | |
| "step": 4805, | |
| "train_speed(iter/s)": 0.020012 | |
| }, | |
| { | |
| "epoch": 1.9913061477954874, | |
| "grad_norm": 2.020692421672203, | |
| "learning_rate": 4.48239060832444e-08, | |
| "loss": 0.58215327, | |
| "memory(GiB)": 48.58, | |
| "step": 4810, | |
| "train_speed(iter/s)": 0.020012 | |
| }, | |
| { | |
| "epoch": 1.9933761126060858, | |
| "grad_norm": 2.560575104653987, | |
| "learning_rate": 3.415154749199574e-08, | |
| "loss": 0.59236603, | |
| "memory(GiB)": 48.58, | |
| "step": 4815, | |
| "train_speed(iter/s)": 0.020013 | |
| }, | |
| { | |
| "epoch": 1.995446077416684, | |
| "grad_norm": 2.242799455811118, | |
| "learning_rate": 2.347918890074707e-08, | |
| "loss": 0.59284697, | |
| "memory(GiB)": 48.58, | |
| "step": 4820, | |
| "train_speed(iter/s)": 0.020013 | |
| }, | |
| { | |
| "epoch": 1.9975160422272822, | |
| "grad_norm": 2.496931570288639, | |
| "learning_rate": 1.28068303094984e-08, | |
| "loss": 0.58884125, | |
| "memory(GiB)": 48.58, | |
| "step": 4825, | |
| "train_speed(iter/s)": 0.020014 | |
| }, | |
| { | |
| "epoch": 1.9995860070378804, | |
| "grad_norm": 2.1504541545847133, | |
| "learning_rate": 2.1344717182497336e-09, | |
| "loss": 0.57651815, | |
| "memory(GiB)": 48.58, | |
| "step": 4830, | |
| "train_speed(iter/s)": 0.020014 | |
| }, | |
| { | |
| "epoch": 1.9995860070378804, | |
| "eval_loss": 0.7990483641624451, | |
| "eval_runtime": 333.2574, | |
| "eval_samples_per_second": 18.742, | |
| "eval_steps_per_second": 1.173, | |
| "step": 4830 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 4830, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 300, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.0363792423256064e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |