| { | |
| "best_metric": 0.021110303699970245, | |
| "best_model_checkpoint": "/kaggle/working/output/checkpoint-56", | |
| "epoch": 20.857142857142858, | |
| "eval_steps": 500, | |
| "global_step": 73, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.8571428571428571, | |
| "eval_LCC": 0.19761129838843355, | |
| "eval_SROCC": 0.1086674669867947, | |
| "eval_loss": 0.17467159032821655, | |
| "eval_runtime": 35.5195, | |
| "eval_samples_per_second": 1.408, | |
| "eval_steps_per_second": 0.056, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_LCC": 0.1898407031739211, | |
| "eval_SROCC": 0.10424969987995197, | |
| "eval_loss": 0.056969162076711655, | |
| "eval_runtime": 35.417, | |
| "eval_samples_per_second": 1.412, | |
| "eval_steps_per_second": 0.056, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "grad_norm": 3.666994094848633, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.1599, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "eval_LCC": 0.1686459812348507, | |
| "eval_SROCC": 0.11260504201680673, | |
| "eval_loss": 0.031988270580768585, | |
| "eval_runtime": 35.3188, | |
| "eval_samples_per_second": 1.416, | |
| "eval_steps_per_second": 0.057, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_LCC": 0.12351226835758868, | |
| "eval_SROCC": 0.02645858343337335, | |
| "eval_loss": 0.0510590560734272, | |
| "eval_runtime": 35.2516, | |
| "eval_samples_per_second": 1.418, | |
| "eval_steps_per_second": 0.057, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 4.857142857142857, | |
| "eval_LCC": 0.1029176675867897, | |
| "eval_SROCC": -0.00043217286914765904, | |
| "eval_loss": 0.02736870013177395, | |
| "eval_runtime": 35.3522, | |
| "eval_samples_per_second": 1.414, | |
| "eval_steps_per_second": 0.057, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 5.714285714285714, | |
| "grad_norm": 2.3310303688049316, | |
| "learning_rate": 9.966191788709716e-06, | |
| "loss": 0.0602, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_LCC": 0.08996101890143099, | |
| "eval_SROCC": -0.04057623049219687, | |
| "eval_loss": 0.03745032474398613, | |
| "eval_runtime": 35.2712, | |
| "eval_samples_per_second": 1.418, | |
| "eval_steps_per_second": 0.057, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 6.857142857142857, | |
| "eval_LCC": 0.08304152415159055, | |
| "eval_SROCC": -0.05334933973589436, | |
| "eval_loss": 0.03062591142952442, | |
| "eval_runtime": 35.089, | |
| "eval_samples_per_second": 1.425, | |
| "eval_steps_per_second": 0.057, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_LCC": 0.07141231150294015, | |
| "eval_SROCC": -0.07255702280912364, | |
| "eval_loss": 0.02552003413438797, | |
| "eval_runtime": 35.0759, | |
| "eval_samples_per_second": 1.425, | |
| "eval_steps_per_second": 0.057, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 8.571428571428571, | |
| "grad_norm": 1.1723262071609497, | |
| "learning_rate": 9.698463103929542e-06, | |
| "loss": 0.029, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 8.857142857142858, | |
| "eval_LCC": 0.07341013768251957, | |
| "eval_SROCC": -0.056806722689075634, | |
| "eval_loss": 0.024663101881742477, | |
| "eval_runtime": 35.4424, | |
| "eval_samples_per_second": 1.411, | |
| "eval_steps_per_second": 0.056, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_LCC": 0.09004418149637736, | |
| "eval_SROCC": -0.042881152460984395, | |
| "eval_loss": 0.02926880680024624, | |
| "eval_runtime": 35.3398, | |
| "eval_samples_per_second": 1.415, | |
| "eval_steps_per_second": 0.057, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 10.857142857142858, | |
| "eval_LCC": 0.09815228490732507, | |
| "eval_SROCC": -0.03174069627851141, | |
| "eval_loss": 0.025934694334864616, | |
| "eval_runtime": 35.299, | |
| "eval_samples_per_second": 1.416, | |
| "eval_steps_per_second": 0.057, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 11.428571428571429, | |
| "grad_norm": 0.726739764213562, | |
| "learning_rate": 9.177439057064684e-06, | |
| "loss": 0.0199, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_LCC": 0.12879803314702723, | |
| "eval_SROCC": -0.007250900360144057, | |
| "eval_loss": 0.02379768155515194, | |
| "eval_runtime": 35.6028, | |
| "eval_samples_per_second": 1.404, | |
| "eval_steps_per_second": 0.056, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 12.857142857142858, | |
| "eval_LCC": 0.15936463360358263, | |
| "eval_SROCC": 0.02156062424969988, | |
| "eval_loss": 0.02426682412624359, | |
| "eval_runtime": 35.4009, | |
| "eval_samples_per_second": 1.412, | |
| "eval_steps_per_second": 0.056, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_LCC": 0.18103273074335755, | |
| "eval_SROCC": 0.0453781512605042, | |
| "eval_loss": 0.02589680254459381, | |
| "eval_runtime": 35.429, | |
| "eval_samples_per_second": 1.411, | |
| "eval_steps_per_second": 0.056, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 14.285714285714286, | |
| "grad_norm": 0.8898158669471741, | |
| "learning_rate": 8.43120818934367e-06, | |
| "loss": 0.0161, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 14.857142857142858, | |
| "eval_LCC": 0.1954172655149359, | |
| "eval_SROCC": 0.056806722689075634, | |
| "eval_loss": 0.02237752452492714, | |
| "eval_runtime": 35.3401, | |
| "eval_samples_per_second": 1.415, | |
| "eval_steps_per_second": 0.057, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_LCC": 0.23162353354414347, | |
| "eval_SROCC": 0.08955582232893158, | |
| "eval_loss": 0.021110303699970245, | |
| "eval_runtime": 35.3825, | |
| "eval_samples_per_second": 1.413, | |
| "eval_steps_per_second": 0.057, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 16.857142857142858, | |
| "eval_LCC": 0.25437943511040334, | |
| "eval_SROCC": 0.10012004801920767, | |
| "eval_loss": 0.02233021892607212, | |
| "eval_runtime": 35.3868, | |
| "eval_samples_per_second": 1.413, | |
| "eval_steps_per_second": 0.057, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 17.142857142857142, | |
| "grad_norm": 1.1253899335861206, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.0132, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_LCC": 0.2680706396992355, | |
| "eval_SROCC": 0.0981032412965186, | |
| "eval_loss": 0.02166852541267872, | |
| "eval_runtime": 35.412, | |
| "eval_samples_per_second": 1.412, | |
| "eval_steps_per_second": 0.056, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 18.857142857142858, | |
| "eval_LCC": 0.27459457148225125, | |
| "eval_SROCC": 0.11548619447779111, | |
| "eval_loss": 0.022057028487324715, | |
| "eval_runtime": 35.3202, | |
| "eval_samples_per_second": 1.416, | |
| "eval_steps_per_second": 0.057, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 0.4893428087234497, | |
| "learning_rate": 6.434016163555452e-06, | |
| "loss": 0.0103, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_LCC": 0.28310987276640803, | |
| "eval_SROCC": 0.12297719087635053, | |
| "eval_loss": 0.022805728018283844, | |
| "eval_runtime": 35.2653, | |
| "eval_samples_per_second": 1.418, | |
| "eval_steps_per_second": 0.057, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 20.857142857142858, | |
| "eval_LCC": 0.2943832703125549, | |
| "eval_SROCC": 0.13267707082833133, | |
| "eval_loss": 0.024481065571308136, | |
| "eval_runtime": 35.4504, | |
| "eval_samples_per_second": 1.41, | |
| "eval_steps_per_second": 0.056, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 20.857142857142858, | |
| "step": 73, | |
| "total_flos": 5.848207823512535e+17, | |
| "train_loss": 0.042697800195788685, | |
| "train_runtime": 3517.7769, | |
| "train_samples_per_second": 3.042, | |
| "train_steps_per_second": 0.043 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 150, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.848207823512535e+17, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |