{ "teacher_model": "gpt2-large", "student_model": "distilgpt2", "tokenizer_model": "gpt2", "batch_size": 4, "max_length": 256, "num_epochs": 3, "learning_rate": 5e-05, "distillation_alpha": 0.7, "student_lm_beta": 0.3, "dataset_file": "/home/divyarani.k/Internship-2025-Team1/Distillation/Model_Exploration/Distillation-Gpt2-DistilGpt/F-Tallyqa.csv", "train_samples": 106414, "val_samples": 11824, "eval_samples": 100, "best_validation_loss": 1.8084159012737713, "evaluation_metrics": { "rouge1_teacher_student_before": 0.3164868777314132, "rouge2_teacher_student_before": 0.07499568529627544, "rougeL_teacher_student_before": 0.18821303864954625, "cosine_similarity_teacher_student_before": 0.36006441712379456, "rouge1_teacher_student_after": 0.3048857745421187, "rouge2_teacher_student_after": 0.06675678541680866, "rougeL_teacher_student_after": 0.18748940481226373, "cosine_similarity_teacher_student_after": 0.4749462604522705, "rouge1_student_before_student_after": 0.2562567617366068, "rouge2_student_before_student_after": 0.0373565907888051, "rougeL_student_before_student_after": 0.14784319087594555, "cosine_similarity_student_before_student_after": 0.33844634890556335, "avg_teacher_time": 0.9765390038490296, "avg_student_before_time": 0.27398345947265623, "avg_student_after_time": 0.29969048976898194 } }