| { |
| "best_global_step": 380, |
| "best_metric": 0.7177177177177178, |
| "best_model_checkpoint": "./ner-archetype-model/checkpoint-380", |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 475, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.10526315789473684, |
| "grad_norm": 0.3689213991165161, |
| "learning_rate": 1.9578947368421055e-05, |
| "loss": 0.4426, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.21052631578947367, |
| "grad_norm": 0.2375708669424057, |
| "learning_rate": 1.9157894736842108e-05, |
| "loss": 0.0929, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.3157894736842105, |
| "grad_norm": 0.6364700198173523, |
| "learning_rate": 1.873684210526316e-05, |
| "loss": 0.0732, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.42105263157894735, |
| "grad_norm": 0.3920805752277374, |
| "learning_rate": 1.831578947368421e-05, |
| "loss": 0.0639, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.5263157894736842, |
| "grad_norm": 0.37786218523979187, |
| "learning_rate": 1.7894736842105264e-05, |
| "loss": 0.0678, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.631578947368421, |
| "grad_norm": 0.41838499903678894, |
| "learning_rate": 1.7473684210526317e-05, |
| "loss": 0.0649, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.7368421052631579, |
| "grad_norm": 0.44255176186561584, |
| "learning_rate": 1.705263157894737e-05, |
| "loss": 0.0469, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.8421052631578947, |
| "grad_norm": 0.44138917326927185, |
| "learning_rate": 1.6631578947368423e-05, |
| "loss": 0.0403, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.9473684210526315, |
| "grad_norm": 0.39505335688591003, |
| "learning_rate": 1.6210526315789473e-05, |
| "loss": 0.0447, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_ARCHETYPE": { |
| "f1": 0.6507177033492823, |
| "number": 338, |
| "precision": 0.7058823529411765, |
| "recall": 0.6035502958579881 |
| }, |
| "eval_loss": 0.034236546605825424, |
| "eval_overall_accuracy": 0.9891579152080903, |
| "eval_overall_f1": 0.6507177033492823, |
| "eval_overall_precision": 0.7058823529411765, |
| "eval_overall_recall": 0.6035502958579881, |
| "eval_runtime": 0.7373, |
| "eval_samples_per_second": 438.086, |
| "eval_steps_per_second": 28.482, |
| "step": 95 |
| }, |
| { |
| "epoch": 1.0526315789473684, |
| "grad_norm": 0.43742474913597107, |
| "learning_rate": 1.578947368421053e-05, |
| "loss": 0.0402, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.1578947368421053, |
| "grad_norm": 0.44011735916137695, |
| "learning_rate": 1.536842105263158e-05, |
| "loss": 0.0325, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.263157894736842, |
| "grad_norm": 0.5089640021324158, |
| "learning_rate": 1.4947368421052632e-05, |
| "loss": 0.0274, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.368421052631579, |
| "grad_norm": 0.5979259610176086, |
| "learning_rate": 1.4526315789473687e-05, |
| "loss": 0.043, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.4736842105263157, |
| "grad_norm": 0.5081164240837097, |
| "learning_rate": 1.4105263157894738e-05, |
| "loss": 0.0316, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.5789473684210527, |
| "grad_norm": 0.29700031876564026, |
| "learning_rate": 1.3684210526315791e-05, |
| "loss": 0.0262, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.6842105263157894, |
| "grad_norm": 0.33268997073173523, |
| "learning_rate": 1.3263157894736843e-05, |
| "loss": 0.0226, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.7894736842105263, |
| "grad_norm": 0.5644997954368591, |
| "learning_rate": 1.2842105263157896e-05, |
| "loss": 0.0295, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.8947368421052633, |
| "grad_norm": 0.4996410608291626, |
| "learning_rate": 1.2421052631578949e-05, |
| "loss": 0.0325, |
| "step": 180 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.8403948545455933, |
| "learning_rate": 1.2e-05, |
| "loss": 0.0304, |
| "step": 190 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_ARCHETYPE": { |
| "f1": 0.6838487972508591, |
| "number": 338, |
| "precision": 0.8155737704918032, |
| "recall": 0.5887573964497042 |
| }, |
| "eval_loss": 0.02692514844238758, |
| "eval_overall_accuracy": 0.9907137300661222, |
| "eval_overall_f1": 0.6838487972508591, |
| "eval_overall_precision": 0.8155737704918032, |
| "eval_overall_recall": 0.5887573964497042, |
| "eval_runtime": 0.74, |
| "eval_samples_per_second": 436.489, |
| "eval_steps_per_second": 28.379, |
| "step": 190 |
| }, |
| { |
| "epoch": 2.1052631578947367, |
| "grad_norm": 0.5079220533370972, |
| "learning_rate": 1.1578947368421053e-05, |
| "loss": 0.0249, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.2105263157894735, |
| "grad_norm": 0.3994927704334259, |
| "learning_rate": 1.1157894736842105e-05, |
| "loss": 0.0217, |
| "step": 210 |
| }, |
| { |
| "epoch": 2.3157894736842106, |
| "grad_norm": 0.40168270468711853, |
| "learning_rate": 1.073684210526316e-05, |
| "loss": 0.0217, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.4210526315789473, |
| "grad_norm": 0.5740233063697815, |
| "learning_rate": 1.0315789473684213e-05, |
| "loss": 0.0209, |
| "step": 230 |
| }, |
| { |
| "epoch": 2.526315789473684, |
| "grad_norm": 0.25694823265075684, |
| "learning_rate": 9.894736842105264e-06, |
| "loss": 0.0189, |
| "step": 240 |
| }, |
| { |
| "epoch": 2.6315789473684212, |
| "grad_norm": 0.3877980411052704, |
| "learning_rate": 9.473684210526315e-06, |
| "loss": 0.019, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.736842105263158, |
| "grad_norm": 0.7972201704978943, |
| "learning_rate": 9.05263157894737e-06, |
| "loss": 0.0283, |
| "step": 260 |
| }, |
| { |
| "epoch": 2.8421052631578947, |
| "grad_norm": 0.4060957431793213, |
| "learning_rate": 8.631578947368422e-06, |
| "loss": 0.0212, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.9473684210526314, |
| "grad_norm": 0.21649326384067535, |
| "learning_rate": 8.210526315789475e-06, |
| "loss": 0.0248, |
| "step": 280 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_ARCHETYPE": { |
| "f1": 0.690671031096563, |
| "number": 338, |
| "precision": 0.7728937728937729, |
| "recall": 0.6242603550295858 |
| }, |
| "eval_loss": 0.026027880609035492, |
| "eval_overall_accuracy": 0.9904706339945546, |
| "eval_overall_f1": 0.690671031096563, |
| "eval_overall_precision": 0.7728937728937729, |
| "eval_overall_recall": 0.6242603550295858, |
| "eval_runtime": 0.7334, |
| "eval_samples_per_second": 440.413, |
| "eval_steps_per_second": 28.634, |
| "step": 285 |
| }, |
| { |
| "epoch": 3.0526315789473686, |
| "grad_norm": 0.3869718909263611, |
| "learning_rate": 7.789473684210526e-06, |
| "loss": 0.0242, |
| "step": 290 |
| }, |
| { |
| "epoch": 3.1578947368421053, |
| "grad_norm": 0.6291412115097046, |
| "learning_rate": 7.368421052631579e-06, |
| "loss": 0.0192, |
| "step": 300 |
| }, |
| { |
| "epoch": 3.263157894736842, |
| "grad_norm": 0.4284729063510895, |
| "learning_rate": 6.947368421052632e-06, |
| "loss": 0.0166, |
| "step": 310 |
| }, |
| { |
| "epoch": 3.3684210526315788, |
| "grad_norm": 0.17491641640663147, |
| "learning_rate": 6.526315789473685e-06, |
| "loss": 0.0222, |
| "step": 320 |
| }, |
| { |
| "epoch": 3.473684210526316, |
| "grad_norm": 0.3707456588745117, |
| "learning_rate": 6.105263157894738e-06, |
| "loss": 0.0152, |
| "step": 330 |
| }, |
| { |
| "epoch": 3.5789473684210527, |
| "grad_norm": 0.41764163970947266, |
| "learning_rate": 5.68421052631579e-06, |
| "loss": 0.0204, |
| "step": 340 |
| }, |
| { |
| "epoch": 3.6842105263157894, |
| "grad_norm": 0.5528035759925842, |
| "learning_rate": 5.263157894736842e-06, |
| "loss": 0.0182, |
| "step": 350 |
| }, |
| { |
| "epoch": 3.7894736842105265, |
| "grad_norm": 0.3283008635044098, |
| "learning_rate": 4.842105263157895e-06, |
| "loss": 0.0144, |
| "step": 360 |
| }, |
| { |
| "epoch": 3.8947368421052633, |
| "grad_norm": 0.39311137795448303, |
| "learning_rate": 4.4210526315789476e-06, |
| "loss": 0.0191, |
| "step": 370 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.7367917895317078, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.0132, |
| "step": 380 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_ARCHETYPE": { |
| "f1": 0.7177177177177178, |
| "number": 338, |
| "precision": 0.7286585365853658, |
| "recall": 0.7071005917159763 |
| }, |
| "eval_loss": 0.025751134380698204, |
| "eval_overall_accuracy": 0.9905192532088681, |
| "eval_overall_f1": 0.7177177177177178, |
| "eval_overall_precision": 0.7286585365853658, |
| "eval_overall_recall": 0.7071005917159763, |
| "eval_runtime": 0.7269, |
| "eval_samples_per_second": 444.353, |
| "eval_steps_per_second": 28.89, |
| "step": 380 |
| }, |
| { |
| "epoch": 4.105263157894737, |
| "grad_norm": 0.34780532121658325, |
| "learning_rate": 3.578947368421053e-06, |
| "loss": 0.0149, |
| "step": 390 |
| }, |
| { |
| "epoch": 4.2105263157894735, |
| "grad_norm": 0.9173750877380371, |
| "learning_rate": 3.157894736842105e-06, |
| "loss": 0.015, |
| "step": 400 |
| }, |
| { |
| "epoch": 4.315789473684211, |
| "grad_norm": 0.5133023858070374, |
| "learning_rate": 2.7368421052631583e-06, |
| "loss": 0.0135, |
| "step": 410 |
| }, |
| { |
| "epoch": 4.421052631578947, |
| "grad_norm": 0.611457109451294, |
| "learning_rate": 2.3157894736842105e-06, |
| "loss": 0.0174, |
| "step": 420 |
| }, |
| { |
| "epoch": 4.526315789473684, |
| "grad_norm": 0.3382570743560791, |
| "learning_rate": 1.8947368421052634e-06, |
| "loss": 0.0173, |
| "step": 430 |
| }, |
| { |
| "epoch": 4.631578947368421, |
| "grad_norm": 0.3094813823699951, |
| "learning_rate": 1.4736842105263159e-06, |
| "loss": 0.0122, |
| "step": 440 |
| }, |
| { |
| "epoch": 4.7368421052631575, |
| "grad_norm": 0.3610962927341461, |
| "learning_rate": 1.0526315789473685e-06, |
| "loss": 0.0135, |
| "step": 450 |
| }, |
| { |
| "epoch": 4.842105263157895, |
| "grad_norm": 0.7427480816841125, |
| "learning_rate": 6.315789473684211e-07, |
| "loss": 0.0118, |
| "step": 460 |
| }, |
| { |
| "epoch": 4.947368421052632, |
| "grad_norm": 0.4106050729751587, |
| "learning_rate": 2.105263157894737e-07, |
| "loss": 0.0153, |
| "step": 470 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_ARCHETYPE": { |
| "f1": 0.7125382262996942, |
| "number": 338, |
| "precision": 0.7373417721518988, |
| "recall": 0.6893491124260355 |
| }, |
| "eval_loss": 0.025944961234927177, |
| "eval_overall_accuracy": 0.9906164916374951, |
| "eval_overall_f1": 0.7125382262996942, |
| "eval_overall_precision": 0.7373417721518988, |
| "eval_overall_recall": 0.6893491124260355, |
| "eval_runtime": 0.7347, |
| "eval_samples_per_second": 439.643, |
| "eval_steps_per_second": 28.584, |
| "step": 475 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 475, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 492222215612160.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|