| { | |
| "teacher_model": "gpt2-large", | |
| "student_model": "distilgpt2", | |
| "tokenizer_model": "gpt2", | |
| "batch_size": 4, | |
| "max_length": 256, | |
| "num_epochs": 3, | |
| "learning_rate": 5e-05, | |
| "distillation_alpha": 0.7, | |
| "student_lm_beta": 0.3, | |
| "dataset_file": "/home/divyarani.k/Internship-2025-Team1/Distillation/Model_Exploration/Distillation-Gpt2-DistilGpt/F-Tallyqa.csv", | |
| "train_samples": 106414, | |
| "val_samples": 11824, | |
| "eval_samples": 100, | |
| "best_validation_loss": 1.8084159012737713, | |
| "evaluation_metrics": { | |
| "rouge1_teacher_student_before": 0.3164868777314132, | |
| "rouge2_teacher_student_before": 0.07499568529627544, | |
| "rougeL_teacher_student_before": 0.18821303864954625, | |
| "cosine_similarity_teacher_student_before": 0.36006441712379456, | |
| "rouge1_teacher_student_after": 0.3048857745421187, | |
| "rouge2_teacher_student_after": 0.06675678541680866, | |
| "rougeL_teacher_student_after": 0.18748940481226373, | |
| "cosine_similarity_teacher_student_after": 0.4749462604522705, | |
| "rouge1_student_before_student_after": 0.2562567617366068, | |
| "rouge2_student_before_student_after": 0.0373565907888051, | |
| "rougeL_student_before_student_after": 0.14784319087594555, | |
| "cosine_similarity_student_before_student_after": 0.33844634890556335, | |
| "avg_teacher_time": 0.9765390038490296, | |
| "avg_student_before_time": 0.27398345947265623, | |
| "avg_student_after_time": 0.29969048976898194 | |
| } | |
| } |