Transformers
TensorBoard
Safetensors
t5
text2text-generation
Generated from Trainer
text-generation-inference
Instructions to use enriquesaou/debug_seq2seq_squad with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use enriquesaou/debug_seq2seq_squad with Transformers:
# Load model directly from transformers import AutoTokenizer, AutoModelForSeq2SeqLM tokenizer = AutoTokenizer.from_pretrained("enriquesaou/debug_seq2seq_squad") model = AutoModelForSeq2SeqLM.from_pretrained("enriquesaou/debug_seq2seq_squad") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 21720, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04604051565377532, | |
| "grad_norm": 4.105458736419678, | |
| "learning_rate": 2.930939226519337e-05, | |
| "loss": 0.7338, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.09208103130755065, | |
| "grad_norm": 6.632126331329346, | |
| "learning_rate": 2.861878453038674e-05, | |
| "loss": 0.6214, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.13812154696132597, | |
| "grad_norm": 5.542463302612305, | |
| "learning_rate": 2.7928176795580113e-05, | |
| "loss": 0.6065, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.1841620626151013, | |
| "grad_norm": 8.155370712280273, | |
| "learning_rate": 2.7237569060773482e-05, | |
| "loss": 0.5881, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.2302025782688766, | |
| "grad_norm": 3.674407720565796, | |
| "learning_rate": 2.6546961325966852e-05, | |
| "loss": 0.582, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.27624309392265195, | |
| "grad_norm": 3.3256947994232178, | |
| "learning_rate": 2.585635359116022e-05, | |
| "loss": 0.577, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.32228360957642727, | |
| "grad_norm": 4.420624732971191, | |
| "learning_rate": 2.516574585635359e-05, | |
| "loss": 0.5748, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.3683241252302026, | |
| "grad_norm": 5.609716892242432, | |
| "learning_rate": 2.4475138121546964e-05, | |
| "loss": 0.5532, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.4143646408839779, | |
| "grad_norm": 10.64582633972168, | |
| "learning_rate": 2.3784530386740334e-05, | |
| "loss": 0.5705, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.4604051565377532, | |
| "grad_norm": 4.55027437210083, | |
| "learning_rate": 2.3093922651933703e-05, | |
| "loss": 0.5542, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.5064456721915286, | |
| "grad_norm": 4.849785327911377, | |
| "learning_rate": 2.2403314917127073e-05, | |
| "loss": 0.5561, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.5524861878453039, | |
| "grad_norm": 7.6427154541015625, | |
| "learning_rate": 2.1712707182320442e-05, | |
| "loss": 0.5427, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.5985267034990792, | |
| "grad_norm": 4.943839073181152, | |
| "learning_rate": 2.1022099447513815e-05, | |
| "loss": 0.548, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.6445672191528545, | |
| "grad_norm": 3.1242663860321045, | |
| "learning_rate": 2.0331491712707185e-05, | |
| "loss": 0.5401, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.6906077348066298, | |
| "grad_norm": 4.478808403015137, | |
| "learning_rate": 1.9640883977900554e-05, | |
| "loss": 0.5444, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.7366482504604052, | |
| "grad_norm": 5.4113054275512695, | |
| "learning_rate": 1.8950276243093924e-05, | |
| "loss": 0.545, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.7826887661141805, | |
| "grad_norm": 3.117663860321045, | |
| "learning_rate": 1.825966850828729e-05, | |
| "loss": 0.5268, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.8287292817679558, | |
| "grad_norm": 4.178927421569824, | |
| "learning_rate": 1.7569060773480663e-05, | |
| "loss": 0.5441, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.8747697974217311, | |
| "grad_norm": 8.61349868774414, | |
| "learning_rate": 1.6878453038674033e-05, | |
| "loss": 0.5383, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.9208103130755064, | |
| "grad_norm": 5.246815204620361, | |
| "learning_rate": 1.6187845303867402e-05, | |
| "loss": 0.5386, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.9668508287292817, | |
| "grad_norm": 5.026543140411377, | |
| "learning_rate": 1.5497237569060772e-05, | |
| "loss": 0.5313, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.0128913443830572, | |
| "grad_norm": 3.600374937057495, | |
| "learning_rate": 1.4806629834254145e-05, | |
| "loss": 0.5252, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.0589318600368325, | |
| "grad_norm": 3.5238852500915527, | |
| "learning_rate": 1.4116022099447514e-05, | |
| "loss": 0.4928, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.1049723756906078, | |
| "grad_norm": 4.74073600769043, | |
| "learning_rate": 1.3425414364640886e-05, | |
| "loss": 0.5087, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.1510128913443831, | |
| "grad_norm": 8.856585502624512, | |
| "learning_rate": 1.2734806629834255e-05, | |
| "loss": 0.4898, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.1970534069981584, | |
| "grad_norm": 8.898761749267578, | |
| "learning_rate": 1.2044198895027623e-05, | |
| "loss": 0.5066, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.2430939226519337, | |
| "grad_norm": 5.918560028076172, | |
| "learning_rate": 1.1353591160220994e-05, | |
| "loss": 0.5149, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.289134438305709, | |
| "grad_norm": 4.796153545379639, | |
| "learning_rate": 1.0662983425414364e-05, | |
| "loss": 0.5006, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.3351749539594844, | |
| "grad_norm": 5.584662914276123, | |
| "learning_rate": 9.972375690607735e-06, | |
| "loss": 0.5058, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.3812154696132597, | |
| "grad_norm": 9.456035614013672, | |
| "learning_rate": 9.281767955801105e-06, | |
| "loss": 0.4885, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.427255985267035, | |
| "grad_norm": 3.6488196849823, | |
| "learning_rate": 8.591160220994474e-06, | |
| "loss": 0.4914, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.4732965009208103, | |
| "grad_norm": 3.0951404571533203, | |
| "learning_rate": 7.900552486187846e-06, | |
| "loss": 0.5016, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.5193370165745856, | |
| "grad_norm": 5.87822151184082, | |
| "learning_rate": 7.209944751381215e-06, | |
| "loss": 0.5117, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.565377532228361, | |
| "grad_norm": 5.016966819763184, | |
| "learning_rate": 6.519337016574586e-06, | |
| "loss": 0.5134, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.6114180478821363, | |
| "grad_norm": 8.312193870544434, | |
| "learning_rate": 5.828729281767956e-06, | |
| "loss": 0.4934, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.6574585635359116, | |
| "grad_norm": 5.323014736175537, | |
| "learning_rate": 5.1381215469613265e-06, | |
| "loss": 0.5025, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.703499079189687, | |
| "grad_norm": 4.238588809967041, | |
| "learning_rate": 4.447513812154697e-06, | |
| "loss": 0.4948, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.7495395948434622, | |
| "grad_norm": 3.3375260829925537, | |
| "learning_rate": 3.756906077348067e-06, | |
| "loss": 0.5086, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.7955801104972375, | |
| "grad_norm": 6.620930194854736, | |
| "learning_rate": 3.0662983425414365e-06, | |
| "loss": 0.4931, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.8416206261510129, | |
| "grad_norm": 4.153687000274658, | |
| "learning_rate": 2.375690607734807e-06, | |
| "loss": 0.4853, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.8876611418047882, | |
| "grad_norm": 5.683996677398682, | |
| "learning_rate": 1.6850828729281769e-06, | |
| "loss": 0.492, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.9337016574585635, | |
| "grad_norm": 3.776688575744629, | |
| "learning_rate": 9.944751381215469e-07, | |
| "loss": 0.4876, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.979742173112339, | |
| "grad_norm": 2.931673526763916, | |
| "learning_rate": 3.0386740331491715e-07, | |
| "loss": 0.5069, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 21720, | |
| "total_flos": 2.645641233904435e+16, | |
| "train_loss": 0.5331895410246172, | |
| "train_runtime": 7144.6567, | |
| "train_samples_per_second": 36.48, | |
| "train_steps_per_second": 3.04 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 21720, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.645641233904435e+16, | |
| "train_batch_size": 12, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |