mt5-gigatrue-tpb / coreconfig.json
Plasmoxy's picture
End of training
8423bfd verified
{
"out_name": "mt5-gigatrue-tpb",
"remove_layers": false,
"model_archetype": "mt5",
"model_name": "../txprun/pruned_models/mt5-gigatrue-tpb-base",
"tokenizer_name": "../txprun/pruned_models/mt5-gigatrue-tpb-base",
"model_torch_dtype": "bfloat16",
"dataset_lang": "en",
"dataset_name": "Plasmoxy/gigatrue",
"dataset_tokenized_cache_name": "gigatrue_tokenized_mt5_tpb_110-35",
"use_half_val_dataset": true,
"max_input_length": 110,
"max_target_length": 35,
"batch_size": 128,
"learning_rate": 0.0003,
"num_train_epochs": 3,
"pkg_versions": {
"optimum": "1.23.3",
"transformers": "4.45.2",
"openvino": "2024.6.0",
"nncf": "2.14.1",
"torch": "2.5.1",
"datasets": "3.2.0",
"peft": "0.13.3.dev0"
}
}