NisargUpadhyay's picture
Add Assignment 4 summary
2663ef8 verified
{
"baseline": {
"epochs": 100,
"training_time_seconds": 3139.0,
"training_time_minutes": 52.32,
"final_loss": 0.0974,
"bleu": 0.5247,
"bleu_percent": 52.47,
"source": "Extracted from en_to_hi.ipynb recorded outputs on 2026-03-13"
},
"tuning": {
"best_config": {
"learning_rate": 8.884411309500873e-05,
"batch_size": 16,
"num_heads": 4,
"d_ff": 1536,
"dropout": 0.19016462895143443,
"num_layers": 4,
"weight_decay": 1.149540832915101e-06,
"d_model": 512,
"max_len": 50,
"seed": 42,
"force_cpu": false,
"epochs": 20
},
"best_metrics": {
"epoch": 20,
"loss": 0.5256124382669275,
"bleu": 0.7698028731485314,
"bleu_percent": 76.98028731485313,
"timestamp": 1773401641,
"checkpoint_dir_name": null,
"done": true,
"training_iteration": 20,
"trial_id": "28792536",
"date": "2026-03-13_17-04-01",
"time_this_iter_s": 42.827330112457275,
"time_total_s": 857.8043477535248,
"pid": 3948130,
"hostname": "user-SSI100C3A-48",
"node_ip": "10.6.0.87",
"config": {
"learning_rate": 8.884411309500873e-05,
"batch_size": 16,
"num_heads": 4,
"d_ff": 1536,
"dropout": 0.19016462895143443,
"num_layers": 4,
"weight_decay": 1.149540832915101e-06,
"d_model": 512,
"max_len": 50,
"seed": 42,
"force_cpu": false,
"epochs": 20
},
"time_since_restore": 857.8043477535248,
"iterations_since_restore": 20,
"experiment_tag": "18_batch_size=16,d_ff=1536,d_model=512,dropout=0.1902,epochs=20,force_cpu=False,learning_rate=0.0001,max_len=50,num_heads=4,num_layers=4,seed=42,weight_decay=0.0000"
},
"num_samples": 20,
"tune_epochs": 20,
"search_space": {
"learning_rate": "loguniform(1e-5, 1e-3)",
"batch_size": "choice([16, 32, 64])",
"num_heads": "choice([4, 8])",
"d_ff": "choice([1024, 1536, 2048])",
"dropout": "uniform(0.10, 0.40)",
"num_layers": "choice([4, 6])",
"weight_decay": "loguniform(1e-6, 1e-3)"
},
"metric": "bleu",
"mode": "max",
"scheduler": "ASHAScheduler",
"search_algorithm": "OptunaSearch"
},
"best_model": {
"epochs": 20,
"training_time_seconds": 853.0576362609863,
"training_time_minutes": 14.217627271016438,
"final_loss": 0.5256124382669275,
"best_bleu": 0.8471540338276414,
"best_bleu_percent": 84.71540338276414,
"best_epoch": 15,
"epochs_to_target": 6,
"model_path": "b23cs1075_ass_4_best_model.pth",
"config": {
"learning_rate": 8.884411309500873e-05,
"batch_size": 16,
"num_heads": 4,
"d_ff": 1536,
"dropout": 0.19016462895143443,
"num_layers": 4,
"weight_decay": 1.149540832915101e-06,
"d_model": 512,
"max_len": 50,
"seed": 42,
"force_cpu": false,
"epochs": 20
}
},
"search_space": {
"learning_rate": "loguniform(1e-5, 1e-3)",
"batch_size": "choice([16, 32, 64])",
"num_heads": "choice([4, 8])",
"d_ff": "choice([1024, 1536, 2048])",
"dropout": "uniform(0.10, 0.40)",
"num_layers": "choice([4, 6])",
"weight_decay": "loguniform(1e-6, 1e-3)"
},
"artifacts": {
"baseline_model": "transformer_translation_final.pth",
"best_model": "b23cs1075_ass_4_best_model.pth",
"report_markdown": "b23cs1075_ass_4_report.md",
"summary_json": "artifacts/assignment4/summary.json"
}
}