| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9967335510965936, |
| "eval_steps": 500, |
| "global_step": 178, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0055996266915538965, |
| "grad_norm": 0.9287750653110117, |
| "learning_rate": 5.555555555555555e-07, |
| "loss": 0.7737, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.027998133457769483, |
| "grad_norm": 0.7051671404282305, |
| "learning_rate": 2.7777777777777783e-06, |
| "loss": 0.7341, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.05599626691553897, |
| "grad_norm": 0.2520214877481558, |
| "learning_rate": 5.555555555555557e-06, |
| "loss": 0.5258, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.08399440037330845, |
| "grad_norm": 0.14052119101634014, |
| "learning_rate": 8.333333333333334e-06, |
| "loss": 0.2894, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.11199253383107793, |
| "grad_norm": 0.056427898284759595, |
| "learning_rate": 9.996145181203616e-06, |
| "loss": 0.1822, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.1399906672888474, |
| "grad_norm": 0.03842870313192144, |
| "learning_rate": 9.952846702217886e-06, |
| "loss": 0.144, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.1679888007466169, |
| "grad_norm": 0.03200746314332259, |
| "learning_rate": 9.861849601988384e-06, |
| "loss": 0.1211, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.19598693420438637, |
| "grad_norm": 0.02565149233539642, |
| "learning_rate": 9.72403023233439e-06, |
| "loss": 0.1101, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.22398506766215587, |
| "grad_norm": 0.024613549317984977, |
| "learning_rate": 9.540715869125407e-06, |
| "loss": 0.097, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.25198320111992534, |
| "grad_norm": 0.02144179428017716, |
| "learning_rate": 9.31367192988896e-06, |
| "loss": 0.0871, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.2799813345776948, |
| "grad_norm": 0.023284769150725856, |
| "learning_rate": 9.045084971874738e-06, |
| "loss": 0.0817, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.3079794680354643, |
| "grad_norm": 0.02269598632952473, |
| "learning_rate": 8.737541634312985e-06, |
| "loss": 0.0748, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.3359776014932338, |
| "grad_norm": 0.024675985934094376, |
| "learning_rate": 8.39400372766471e-06, |
| "loss": 0.0708, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.36397573495100327, |
| "grad_norm": 0.02363318129449854, |
| "learning_rate": 8.017779709767857e-06, |
| "loss": 0.0632, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.39197386840877274, |
| "grad_norm": 0.017744170199618194, |
| "learning_rate": 7.612492823579744e-06, |
| "loss": 0.0601, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.4199720018665422, |
| "grad_norm": 0.02180319562108693, |
| "learning_rate": 7.18204620336671e-06, |
| "loss": 0.0557, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.44797013532431174, |
| "grad_norm": 0.021481125876423392, |
| "learning_rate": 6.730585285387465e-06, |
| "loss": 0.0612, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.4759682687820812, |
| "grad_norm": 0.01901447075970861, |
| "learning_rate": 6.26245788507579e-06, |
| "loss": 0.0511, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.5039664022398507, |
| "grad_norm": 0.019624242818030622, |
| "learning_rate": 5.782172325201155e-06, |
| "loss": 0.0541, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.5319645356976201, |
| "grad_norm": 0.021770565533330784, |
| "learning_rate": 5.294354018255945e-06, |
| "loss": 0.052, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.5599626691553896, |
| "grad_norm": 0.019329162918422096, |
| "learning_rate": 4.803700921204659e-06, |
| "loss": 0.0494, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5879608026131591, |
| "grad_norm": 0.020279337189906023, |
| "learning_rate": 4.314938291590161e-06, |
| "loss": 0.0472, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.6159589360709286, |
| "grad_norm": 0.020370573885900606, |
| "learning_rate": 3.832773180720475e-06, |
| "loss": 0.0496, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.6439570695286981, |
| "grad_norm": 0.01780890820627811, |
| "learning_rate": 3.3618491021915334e-06, |
| "loss": 0.0418, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.6719552029864676, |
| "grad_norm": 0.01770442385609227, |
| "learning_rate": 2.906701312312861e-06, |
| "loss": 0.0463, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.6999533364442371, |
| "grad_norm": 0.017517610950091075, |
| "learning_rate": 2.471713133110078e-06, |
| "loss": 0.0427, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.7279514699020065, |
| "grad_norm": 0.017896600937746723, |
| "learning_rate": 2.061073738537635e-06, |
| "loss": 0.0436, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.755949603359776, |
| "grad_norm": 0.01501649892368704, |
| "learning_rate": 1.6787378104435931e-06, |
| "loss": 0.0428, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.7839477368175455, |
| "grad_norm": 0.0163607070923351, |
| "learning_rate": 1.3283874528215735e-06, |
| "loss": 0.047, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.811945870275315, |
| "grad_norm": 0.019565825006344753, |
| "learning_rate": 1.013396731136465e-06, |
| "loss": 0.0441, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.8399440037330844, |
| "grad_norm": 0.017999287355466544, |
| "learning_rate": 7.367991782295392e-07, |
| "loss": 0.0454, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.8679421371908539, |
| "grad_norm": 0.020504960252671952, |
| "learning_rate": 5.012585797388936e-07, |
| "loss": 0.0485, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.8959402706486235, |
| "grad_norm": 0.016379587248382697, |
| "learning_rate": 3.0904332038757977e-07, |
| "loss": 0.0434, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.9239384041063929, |
| "grad_norm": 0.0174476169165481, |
| "learning_rate": 1.6200453819870122e-07, |
| "loss": 0.05, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.9519365375641624, |
| "grad_norm": 0.01783292237434629, |
| "learning_rate": 6.15582970243117e-08, |
| "loss": 0.0452, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.9799346710219319, |
| "grad_norm": 0.01908510011245694, |
| "learning_rate": 8.671949076420883e-09, |
| "loss": 0.0427, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.9967335510965936, |
| "step": 178, |
| "total_flos": 9.035586631704248e+17, |
| "train_loss": 0.10327898384479994, |
| "train_runtime": 2928.2846, |
| "train_samples_per_second": 2.927, |
| "train_steps_per_second": 0.061 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 178, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.035586631704248e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|