TallyAssistant / training_config.json
DivyaRani's picture
Upload folder using huggingface_hub
3b3bb07 verified
{
"teacher_model": "gpt2-large",
"student_model": "distilgpt2",
"tokenizer_model": "gpt2",
"batch_size": 4,
"max_length": 256,
"num_epochs": 3,
"learning_rate": 5e-05,
"distillation_alpha": 0.7,
"student_lm_beta": 0.3,
"dataset_file": "/home/divyarani.k/Internship-2025-Team1/Distillation/Model_Exploration/Distillation-Gpt2-DistilGpt/F-Tallyqa.csv",
"train_samples": 106414,
"val_samples": 11824,
"eval_samples": 100,
"best_validation_loss": 1.8084159012737713,
"evaluation_metrics": {
"rouge1_teacher_student_before": 0.3164868777314132,
"rouge2_teacher_student_before": 0.07499568529627544,
"rougeL_teacher_student_before": 0.18821303864954625,
"cosine_similarity_teacher_student_before": 0.36006441712379456,
"rouge1_teacher_student_after": 0.3048857745421187,
"rouge2_teacher_student_after": 0.06675678541680866,
"rougeL_teacher_student_after": 0.18748940481226373,
"cosine_similarity_teacher_student_after": 0.4749462604522705,
"rouge1_student_before_student_after": 0.2562567617366068,
"rouge2_student_before_student_after": 0.0373565907888051,
"rougeL_student_before_student_after": 0.14784319087594555,
"cosine_similarity_student_before_student_after": 0.33844634890556335,
"avg_teacher_time": 0.9765390038490296,
"avg_student_before_time": 0.27398345947265623,
"avg_student_after_time": 0.29969048976898194
}
}