| { | |
| "teacher_model": "gpt2-large", | |
| "student_model": "distilgpt2", | |
| "tokenizer_model": "gpt2", | |
| "batch_size": 4, | |
| "max_length": 256, | |
| "num_epochs": 1, | |
| "learning_rate": 5e-05, | |
| "distillation_alpha": 0.7, | |
| "student_lm_beta": 0.3, | |
| "dataset_file": "/home/jayanthram.k/Distillation/clean_dataset_with_no_outliers.csv", | |
| "train_samples": 43572, | |
| "val_samples": 4842, | |
| "eval_samples": 100, | |
| "best_validation_loss": 2.3524647863515638, | |
| "evaluation_metrics": { | |
| "rouge1_teacher_student_before": 0.32189833105454313, | |
| "rouge2_teacher_student_before": 0.07253675599508494, | |
| "rougeL_teacher_student_before": 0.19562937213425652, | |
| "cosine_similarity_teacher_student_before": 0.44240760803222656, | |
| "rouge1_teacher_student_after": 0.3346180933711459, | |
| "rouge2_teacher_student_after": 0.08527329173516719, | |
| "rougeL_teacher_student_after": 0.20919089238473532, | |
| "cosine_similarity_teacher_student_after": 0.5256437659263611, | |
| "rouge1_student_before_student_after": 0.3137800181943293, | |
| "rouge2_student_before_student_after": 0.07712439299277635, | |
| "rougeL_student_before_student_after": 0.18683832802096778, | |
| "cosine_similarity_student_before_student_after": 0.4463849663734436, | |
| "avg_teacher_time": 1.2837520718574524, | |
| "avg_student_before_time": 0.4636122870445252, | |
| "avg_student_after_time": 0.46685707569122314 | |
| } | |
| } |