| { |
| "best_global_step": 300, |
| "best_metric": 0.008146172389388084, |
| "best_model_checkpoint": "/teamspace/studios/this_studio/DATN/output/medgemma_finetuned/checkpoint-300", |
| "epoch": 1.556420233463035, |
| "eval_steps": 100, |
| "global_step": 400, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.019455252918287938, |
| "grad_norm": 3.5695066452026367, |
| "learning_rate": 4.137931034482758e-06, |
| "loss": 0.8424, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.038910505836575876, |
| "grad_norm": 1.942628026008606, |
| "learning_rate": 9.310344827586207e-06, |
| "loss": 0.7525, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.058365758754863814, |
| "grad_norm": 1.2901023626327515, |
| "learning_rate": 1.4482758620689657e-05, |
| "loss": 0.5573, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.07782101167315175, |
| "grad_norm": 1.1915671825408936, |
| "learning_rate": 1.9655172413793102e-05, |
| "loss": 0.3637, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.09727626459143969, |
| "grad_norm": 1.0968961715698242, |
| "learning_rate": 2.4827586206896553e-05, |
| "loss": 0.1735, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.11673151750972763, |
| "grad_norm": 0.4196433126926422, |
| "learning_rate": 3e-05, |
| "loss": 0.0496, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.13618677042801555, |
| "grad_norm": 0.1870017945766449, |
| "learning_rate": 3.517241379310345e-05, |
| "loss": 0.0232, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.1556420233463035, |
| "grad_norm": 0.23363570868968964, |
| "learning_rate": 4.03448275862069e-05, |
| "loss": 0.0168, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.17509727626459143, |
| "grad_norm": 0.14438340067863464, |
| "learning_rate": 4.551724137931034e-05, |
| "loss": 0.0151, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.19455252918287938, |
| "grad_norm": 0.10051655769348145, |
| "learning_rate": 5.0689655172413794e-05, |
| "loss": 0.0107, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2140077821011673, |
| "grad_norm": 0.09743820875883102, |
| "learning_rate": 5.586206896551724e-05, |
| "loss": 0.008, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.23346303501945526, |
| "grad_norm": 0.06392025947570801, |
| "learning_rate": 6.103448275862069e-05, |
| "loss": 0.0085, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.2529182879377432, |
| "grad_norm": 0.04592013731598854, |
| "learning_rate": 6.620689655172413e-05, |
| "loss": 0.0075, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.2723735408560311, |
| "grad_norm": 0.055331017822027206, |
| "learning_rate": 7.137931034482759e-05, |
| "loss": 0.0077, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2918287937743191, |
| "grad_norm": 0.048375148326158524, |
| "learning_rate": 7.655172413793105e-05, |
| "loss": 0.0072, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.311284046692607, |
| "grad_norm": 0.060988396406173706, |
| "learning_rate": 8.172413793103448e-05, |
| "loss": 0.0087, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.33073929961089493, |
| "grad_norm": 0.04453667998313904, |
| "learning_rate": 8.689655172413794e-05, |
| "loss": 0.008, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.35019455252918286, |
| "grad_norm": 0.06883223354816437, |
| "learning_rate": 9.206896551724138e-05, |
| "loss": 0.0083, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.36964980544747084, |
| "grad_norm": 0.08616536855697632, |
| "learning_rate": 9.724137931034482e-05, |
| "loss": 0.0084, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.38910505836575876, |
| "grad_norm": 0.03529098257422447, |
| "learning_rate": 0.00010241379310344828, |
| "loss": 0.0076, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.38910505836575876, |
| "eval_loss": 0.009547106921672821, |
| "eval_runtime": 157.0062, |
| "eval_samples_per_second": 3.248, |
| "eval_steps_per_second": 0.815, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4085603112840467, |
| "grad_norm": 0.03567972779273987, |
| "learning_rate": 0.00010758620689655173, |
| "loss": 0.0084, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.4280155642023346, |
| "grad_norm": 0.06252816319465637, |
| "learning_rate": 0.00011275862068965518, |
| "loss": 0.008, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.4474708171206226, |
| "grad_norm": 0.044703833758831024, |
| "learning_rate": 0.00011793103448275861, |
| "loss": 0.0083, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.4669260700389105, |
| "grad_norm": 0.03152047470211983, |
| "learning_rate": 0.00011999378882999482, |
| "loss": 0.0074, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.48638132295719844, |
| "grad_norm": 0.03210924193263054, |
| "learning_rate": 0.00011995583633681744, |
| "loss": 0.0078, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.5058365758754864, |
| "grad_norm": 0.04019011929631233, |
| "learning_rate": 0.00011988340380013058, |
| "loss": 0.0074, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.5252918287937743, |
| "grad_norm": 0.04210692271590233, |
| "learning_rate": 0.00011977653287521201, |
| "loss": 0.0071, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.5447470817120622, |
| "grad_norm": 0.03734419122338295, |
| "learning_rate": 0.00011963528502253607, |
| "loss": 0.0075, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.5642023346303502, |
| "grad_norm": 0.02421458251774311, |
| "learning_rate": 0.00011945974147242832, |
| "loss": 0.0075, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.5836575875486382, |
| "grad_norm": 0.035657692700624466, |
| "learning_rate": 0.0001192500031783508, |
| "loss": 0.008, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.603112840466926, |
| "grad_norm": 0.0272879209369421, |
| "learning_rate": 0.00011900619075884453, |
| "loss": 0.0077, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.622568093385214, |
| "grad_norm": 0.03400944918394089, |
| "learning_rate": 0.00011872844442816295, |
| "loss": 0.0073, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.642023346303502, |
| "grad_norm": 0.03923163563013077, |
| "learning_rate": 0.00011841692391563607, |
| "loss": 0.0076, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.6614785992217899, |
| "grad_norm": 0.03444487228989601, |
| "learning_rate": 0.00011807180837381154, |
| "loss": 0.0077, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.6809338521400778, |
| "grad_norm": 0.03119409829378128, |
| "learning_rate": 0.00011769329627542567, |
| "loss": 0.008, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.7003891050583657, |
| "grad_norm": 0.0293661467730999, |
| "learning_rate": 0.00011728160529926373, |
| "loss": 0.008, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.7198443579766537, |
| "grad_norm": 0.03529886156320572, |
| "learning_rate": 0.00011683697220497477, |
| "loss": 0.0075, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.7392996108949417, |
| "grad_norm": 0.0280179213732481, |
| "learning_rate": 0.00011635965269691342, |
| "loss": 0.0071, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.7587548638132295, |
| "grad_norm": 0.047665953636169434, |
| "learning_rate": 0.00011584992127708669, |
| "loss": 0.0075, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.7782101167315175, |
| "grad_norm": 0.019554605707526207, |
| "learning_rate": 0.00011530807108729038, |
| "loss": 0.0073, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7782101167315175, |
| "eval_loss": 0.008479318581521511, |
| "eval_runtime": 133.9329, |
| "eval_samples_per_second": 3.808, |
| "eval_steps_per_second": 0.956, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7976653696498055, |
| "grad_norm": 0.021762819960713387, |
| "learning_rate": 0.00011473441374052603, |
| "loss": 0.0071, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.8171206225680934, |
| "grad_norm": 0.034537579864263535, |
| "learning_rate": 0.00011412927914179513, |
| "loss": 0.0073, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.8365758754863813, |
| "grad_norm": 0.024975109845399857, |
| "learning_rate": 0.00011349301529837383, |
| "loss": 0.0069, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.8560311284046692, |
| "grad_norm": 0.05370034649968147, |
| "learning_rate": 0.00011282598811967723, |
| "loss": 0.0073, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.8754863813229572, |
| "grad_norm": 0.027759484946727753, |
| "learning_rate": 0.0001121285812068282, |
| "loss": 0.0069, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.8949416342412452, |
| "grad_norm": 0.024865204468369484, |
| "learning_rate": 0.00011140119563205206, |
| "loss": 0.0069, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.914396887159533, |
| "grad_norm": 0.03023722395300865, |
| "learning_rate": 0.00011064424970802358, |
| "loss": 0.0073, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.933852140077821, |
| "grad_norm": 0.02334648370742798, |
| "learning_rate": 0.00010985817874729929, |
| "loss": 0.0069, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.953307392996109, |
| "grad_norm": 0.026197293773293495, |
| "learning_rate": 0.00010904343481197327, |
| "loss": 0.0069, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.9727626459143969, |
| "grad_norm": 0.034592628479003906, |
| "learning_rate": 0.00010820048645370048, |
| "loss": 0.0073, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.9922178988326849, |
| "grad_norm": 0.029151858761906624, |
| "learning_rate": 0.00010732981844423701, |
| "loss": 0.0074, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.0116731517509727, |
| "grad_norm": 0.017624910920858383, |
| "learning_rate": 0.00010643193149665239, |
| "loss": 0.007, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.0311284046692606, |
| "grad_norm": 0.023881293833255768, |
| "learning_rate": 0.00010550734197737418, |
| "loss": 0.0068, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.0505836575875487, |
| "grad_norm": 0.021148784086108208, |
| "learning_rate": 0.00010455658160923037, |
| "loss": 0.0076, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.0700389105058365, |
| "grad_norm": 0.028897982090711594, |
| "learning_rate": 0.00010358019716566062, |
| "loss": 0.0072, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.0894941634241244, |
| "grad_norm": 0.022528519853949547, |
| "learning_rate": 0.00010257875015627189, |
| "loss": 0.007, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.1089494163424125, |
| "grad_norm": 0.02779594622552395, |
| "learning_rate": 0.0001015528165039195, |
| "loss": 0.0074, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.1284046692607004, |
| "grad_norm": 0.024411482736468315, |
| "learning_rate": 0.00010050298621349925, |
| "loss": 0.0071, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.1478599221789882, |
| "grad_norm": 0.0831264927983284, |
| "learning_rate": 9.942986303264109e-05, |
| "loss": 0.007, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.1673151750972763, |
| "grad_norm": 0.020742744207382202, |
| "learning_rate": 9.833406410449954e-05, |
| "loss": 0.0073, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.1673151750972763, |
| "eval_loss": 0.008146172389388084, |
| "eval_runtime": 133.973, |
| "eval_samples_per_second": 3.807, |
| "eval_steps_per_second": 0.955, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.1867704280155642, |
| "grad_norm": 0.02054239809513092, |
| "learning_rate": 9.721621961284032e-05, |
| "loss": 0.0073, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.206225680933852, |
| "grad_norm": 0.024585654959082603, |
| "learning_rate": 9.607697241962756e-05, |
| "loss": 0.0073, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.2256809338521402, |
| "grad_norm": 0.03846452757716179, |
| "learning_rate": 9.491697769531991e-05, |
| "loss": 0.0076, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.245136186770428, |
| "grad_norm": 0.04766026884317398, |
| "learning_rate": 9.373690254208802e-05, |
| "loss": 0.0077, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.264591439688716, |
| "grad_norm": 0.02248929999768734, |
| "learning_rate": 9.253742561017038e-05, |
| "loss": 0.0072, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.2840466926070038, |
| "grad_norm": 0.024121012538671494, |
| "learning_rate": 9.131923670758781e-05, |
| "loss": 0.0072, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.3035019455252919, |
| "grad_norm": 0.028830286115407944, |
| "learning_rate": 9.008303640344135e-05, |
| "loss": 0.0074, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.3229571984435797, |
| "grad_norm": 0.0204729363322258, |
| "learning_rate": 8.882953562502145e-05, |
| "loss": 0.007, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.3424124513618678, |
| "grad_norm": 0.019391437992453575, |
| "learning_rate": 8.755945524896028e-05, |
| "loss": 0.0071, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.3618677042801557, |
| "grad_norm": 0.02011592499911785, |
| "learning_rate": 8.62735256866624e-05, |
| "loss": 0.0073, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.3813229571984436, |
| "grad_norm": 0.026530904695391655, |
| "learning_rate": 8.497248646425179e-05, |
| "loss": 0.0074, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.4007782101167314, |
| "grad_norm": 0.013384876772761345, |
| "learning_rate": 8.365708579727738e-05, |
| "loss": 0.0069, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.4202334630350195, |
| "grad_norm": 0.0255065206438303, |
| "learning_rate": 8.23280801604212e-05, |
| "loss": 0.0067, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.4396887159533074, |
| "grad_norm": 0.022361749783158302, |
| "learning_rate": 8.098623385245689e-05, |
| "loss": 0.007, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.4591439688715953, |
| "grad_norm": 0.023336444050073624, |
| "learning_rate": 7.963231855670848e-05, |
| "loss": 0.0073, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.4785992217898833, |
| "grad_norm": 0.019427774474024773, |
| "learning_rate": 7.826711289726267e-05, |
| "loss": 0.0071, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.4980544747081712, |
| "grad_norm": 0.014258882962167263, |
| "learning_rate": 7.689140199118908e-05, |
| "loss": 0.0066, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.517509727626459, |
| "grad_norm": 0.030942602083086967, |
| "learning_rate": 7.550597699702708e-05, |
| "loss": 0.0073, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.536964980544747, |
| "grad_norm": 0.023137860000133514, |
| "learning_rate": 7.411163465979756e-05, |
| "loss": 0.007, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.556420233463035, |
| "grad_norm": 0.03352006524801254, |
| "learning_rate": 7.270917685280247e-05, |
| "loss": 0.0072, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.556420233463035, |
| "eval_loss": 0.008275757543742657, |
| "eval_runtime": 134.1583, |
| "eval_samples_per_second": 3.801, |
| "eval_steps_per_second": 0.954, |
| "step": 400 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 771, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 30, |
| "early_stopping_threshold": 0.001 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 2 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.362961992244354e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|