Text Generation
Transformers
Safetensors
French
llama_syncabel
BEL
retrieval
entity-retrieval
named-entity-disambiguation
entity-disambiguation
named-entity-linking
entity-linking
text2text-generation
biomedical
healthcare
synthetic-data
causal-lm
llm
conversational
custom_code
Eval Results (legacy)
| { | |
| "best_global_step": 14000, | |
| "best_metric": 0.5805, | |
| "best_model_checkpoint": "/lustre/fsn1/projects/rech/ssq/usk98ia/expe_data_ratio/MEDLINE_full_upsampled_tfidf/Meta-Llama-3-8B-Instruct/checkpoint-14000", | |
| "epoch": 3.0, | |
| "eval_steps": 2000, | |
| "global_step": 19056, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "entropy": 1.969329814195633, | |
| "epoch": 0.3148614609571788, | |
| "grad_norm": 4.3125, | |
| "learning_rate": 2.7683942869508766e-05, | |
| "loss": 0.5794, | |
| "mean_token_accuracy": 0.8737598845213652, | |
| "num_tokens": 15559602.0, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.3148614609571788, | |
| "eval_entropy": 2.0184345861409336, | |
| "eval_loss": 1.1819707155227661, | |
| "eval_mean_token_accuracy": 0.8292109202718575, | |
| "eval_num_gold": 298, | |
| "eval_num_guess": 298, | |
| "eval_num_tokens": 15559602.0, | |
| "eval_recall": 0.5268, | |
| "eval_runtime": 5.3876, | |
| "eval_samples_per_second": 55.312, | |
| "eval_steps_per_second": 55.312, | |
| "step": 2000 | |
| }, | |
| { | |
| "entropy": 1.9614297151565552, | |
| "epoch": 0.6297229219143576, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 2.4437892231118805e-05, | |
| "loss": 0.3147, | |
| "mean_token_accuracy": 0.9210941952764988, | |
| "num_tokens": 31152472.0, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.6297229219143576, | |
| "eval_entropy": 1.9975530157153238, | |
| "eval_loss": 1.255624532699585, | |
| "eval_mean_token_accuracy": 0.8376039716621373, | |
| "eval_num_gold": 298, | |
| "eval_num_guess": 298, | |
| "eval_num_tokens": 31152472.0, | |
| "eval_recall": 0.557, | |
| "eval_runtime": 5.1227, | |
| "eval_samples_per_second": 58.173, | |
| "eval_steps_per_second": 58.173, | |
| "step": 4000 | |
| }, | |
| { | |
| "entropy": 1.8552323149442673, | |
| "epoch": 0.9445843828715366, | |
| "grad_norm": 5.84375, | |
| "learning_rate": 2.1191841592728847e-05, | |
| "loss": 0.2448, | |
| "mean_token_accuracy": 0.9366229450702668, | |
| "num_tokens": 46755525.0, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.9445843828715366, | |
| "eval_entropy": 1.7975720761206326, | |
| "eval_loss": 1.4483306407928467, | |
| "eval_mean_token_accuracy": 0.8389281419499609, | |
| "eval_num_gold": 298, | |
| "eval_num_guess": 298, | |
| "eval_num_tokens": 46755525.0, | |
| "eval_recall": 0.5537, | |
| "eval_runtime": 5.1544, | |
| "eval_samples_per_second": 57.815, | |
| "eval_steps_per_second": 57.815, | |
| "step": 6000 | |
| }, | |
| { | |
| "entropy": 1.5045488215088845, | |
| "epoch": 1.2594458438287153, | |
| "grad_norm": 6.25, | |
| "learning_rate": 1.794579095433889e-05, | |
| "loss": 0.1197, | |
| "mean_token_accuracy": 0.9654132520854473, | |
| "num_tokens": 62322213.0, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.2594458438287153, | |
| "eval_entropy": 1.6012752000117463, | |
| "eval_loss": 1.4600104093551636, | |
| "eval_mean_token_accuracy": 0.8446898946426059, | |
| "eval_num_gold": 298, | |
| "eval_num_guess": 298, | |
| "eval_num_tokens": 62322213.0, | |
| "eval_recall": 0.5638, | |
| "eval_runtime": 5.1609, | |
| "eval_samples_per_second": 57.742, | |
| "eval_steps_per_second": 57.742, | |
| "step": 8000 | |
| }, | |
| { | |
| "entropy": 1.3820615062713624, | |
| "epoch": 1.5743073047858942, | |
| "grad_norm": 2.734375, | |
| "learning_rate": 1.469974031594893e-05, | |
| "loss": 0.0943, | |
| "mean_token_accuracy": 0.9713528198897838, | |
| "num_tokens": 77897553.0, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.5743073047858942, | |
| "eval_entropy": 1.4945057386519924, | |
| "eval_loss": 1.4899256229400635, | |
| "eval_mean_token_accuracy": 0.8435980430585426, | |
| "eval_num_gold": 298, | |
| "eval_num_guess": 298, | |
| "eval_num_tokens": 77897553.0, | |
| "eval_recall": 0.5638, | |
| "eval_runtime": 5.1773, | |
| "eval_samples_per_second": 57.558, | |
| "eval_steps_per_second": 57.558, | |
| "step": 10000 | |
| }, | |
| { | |
| "entropy": 1.3741806761026383, | |
| "epoch": 1.8891687657430731, | |
| "grad_norm": 4.65625, | |
| "learning_rate": 1.1453689677558971e-05, | |
| "loss": 0.0877, | |
| "mean_token_accuracy": 0.9735741688013076, | |
| "num_tokens": 93526033.0, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.8891687657430731, | |
| "eval_entropy": 1.4978553134722998, | |
| "eval_loss": 1.519768476486206, | |
| "eval_mean_token_accuracy": 0.8461721192470333, | |
| "eval_num_gold": 298, | |
| "eval_num_guess": 298, | |
| "eval_num_tokens": 93526033.0, | |
| "eval_recall": 0.5772, | |
| "eval_runtime": 5.1186, | |
| "eval_samples_per_second": 58.219, | |
| "eval_steps_per_second": 58.219, | |
| "step": 12000 | |
| }, | |
| { | |
| "entropy": 1.3528405362963676, | |
| "epoch": 2.204030226700252, | |
| "grad_norm": 2.15625, | |
| "learning_rate": 8.207639039169012e-06, | |
| "loss": 0.0503, | |
| "mean_token_accuracy": 0.9846821752786636, | |
| "num_tokens": 109130930.0, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.204030226700252, | |
| "eval_entropy": 1.4673509179745745, | |
| "eval_loss": 1.6728711128234863, | |
| "eval_mean_token_accuracy": 0.8439136392518178, | |
| "eval_num_gold": 298, | |
| "eval_num_guess": 298, | |
| "eval_num_tokens": 109130930.0, | |
| "eval_recall": 0.5805, | |
| "eval_runtime": 5.1395, | |
| "eval_samples_per_second": 57.982, | |
| "eval_steps_per_second": 57.982, | |
| "step": 14000 | |
| }, | |
| { | |
| "entropy": 1.3303052598834038, | |
| "epoch": 2.5188916876574305, | |
| "grad_norm": 2.25, | |
| "learning_rate": 4.961588400779053e-06, | |
| "loss": 0.0296, | |
| "mean_token_accuracy": 0.9909585509300232, | |
| "num_tokens": 124740845.0, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.5188916876574305, | |
| "eval_entropy": 1.4869463125731321, | |
| "eval_loss": 1.6893718242645264, | |
| "eval_mean_token_accuracy": 0.8412250805421163, | |
| "eval_num_gold": 298, | |
| "eval_num_guess": 298, | |
| "eval_num_tokens": 124740845.0, | |
| "eval_recall": 0.5705, | |
| "eval_runtime": 5.1055, | |
| "eval_samples_per_second": 58.368, | |
| "eval_steps_per_second": 58.368, | |
| "step": 16000 | |
| }, | |
| { | |
| "entropy": 1.3306579428315162, | |
| "epoch": 2.8337531486146097, | |
| "grad_norm": 0.0537109375, | |
| "learning_rate": 1.7155377623890934e-06, | |
| "loss": 0.0295, | |
| "mean_token_accuracy": 0.9908962353467942, | |
| "num_tokens": 140357465.0, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.8337531486146097, | |
| "eval_entropy": 1.4714410711054835, | |
| "eval_loss": 1.6864608526229858, | |
| "eval_mean_token_accuracy": 0.8417843647251193, | |
| "eval_num_gold": 298, | |
| "eval_num_guess": 298, | |
| "eval_num_tokens": 140357465.0, | |
| "eval_recall": 0.5738, | |
| "eval_runtime": 5.2683, | |
| "eval_samples_per_second": 56.564, | |
| "eval_steps_per_second": 56.564, | |
| "step": 18000 | |
| } | |
| ], | |
| "logging_steps": 2000, | |
| "max_steps": 19056, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 2000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.689385209600672e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |