{ "baseline": { "epochs": 100, "training_time_seconds": 3139.0, "training_time_minutes": 52.32, "final_loss": 0.0974, "bleu": 0.5247, "bleu_percent": 52.47, "source": "Extracted from en_to_hi.ipynb recorded outputs on 2026-03-13" }, "tuning": { "best_config": { "learning_rate": 8.884411309500873e-05, "batch_size": 16, "num_heads": 4, "d_ff": 1536, "dropout": 0.19016462895143443, "num_layers": 4, "weight_decay": 1.149540832915101e-06, "d_model": 512, "max_len": 50, "seed": 42, "force_cpu": false, "epochs": 20 }, "best_metrics": { "epoch": 20, "loss": 0.5256124382669275, "bleu": 0.7698028731485314, "bleu_percent": 76.98028731485313, "timestamp": 1773401641, "checkpoint_dir_name": null, "done": true, "training_iteration": 20, "trial_id": "28792536", "date": "2026-03-13_17-04-01", "time_this_iter_s": 42.827330112457275, "time_total_s": 857.8043477535248, "pid": 3948130, "hostname": "user-SSI100C3A-48", "node_ip": "10.6.0.87", "config": { "learning_rate": 8.884411309500873e-05, "batch_size": 16, "num_heads": 4, "d_ff": 1536, "dropout": 0.19016462895143443, "num_layers": 4, "weight_decay": 1.149540832915101e-06, "d_model": 512, "max_len": 50, "seed": 42, "force_cpu": false, "epochs": 20 }, "time_since_restore": 857.8043477535248, "iterations_since_restore": 20, "experiment_tag": "18_batch_size=16,d_ff=1536,d_model=512,dropout=0.1902,epochs=20,force_cpu=False,learning_rate=0.0001,max_len=50,num_heads=4,num_layers=4,seed=42,weight_decay=0.0000" }, "num_samples": 20, "tune_epochs": 20, "search_space": { "learning_rate": "loguniform(1e-5, 1e-3)", "batch_size": "choice([16, 32, 64])", "num_heads": "choice([4, 8])", "d_ff": "choice([1024, 1536, 2048])", "dropout": "uniform(0.10, 0.40)", "num_layers": "choice([4, 6])", "weight_decay": "loguniform(1e-6, 1e-3)" }, "metric": "bleu", "mode": "max", "scheduler": "ASHAScheduler", "search_algorithm": "OptunaSearch" }, "best_model": { "epochs": 20, "training_time_seconds": 853.0576362609863, "training_time_minutes": 14.217627271016438, "final_loss": 0.5256124382669275, "best_bleu": 0.8471540338276414, "best_bleu_percent": 84.71540338276414, "best_epoch": 15, "epochs_to_target": 6, "model_path": "b23cs1075_ass_4_best_model.pth", "config": { "learning_rate": 8.884411309500873e-05, "batch_size": 16, "num_heads": 4, "d_ff": 1536, "dropout": 0.19016462895143443, "num_layers": 4, "weight_decay": 1.149540832915101e-06, "d_model": 512, "max_len": 50, "seed": 42, "force_cpu": false, "epochs": 20 } }, "search_space": { "learning_rate": "loguniform(1e-5, 1e-3)", "batch_size": "choice([16, 32, 64])", "num_heads": "choice([4, 8])", "d_ff": "choice([1024, 1536, 2048])", "dropout": "uniform(0.10, 0.40)", "num_layers": "choice([4, 6])", "weight_decay": "loguniform(1e-6, 1e-3)" }, "artifacts": { "baseline_model": "transformer_translation_final.pth", "best_model": "b23cs1075_ass_4_best_model.pth", "report_markdown": "b23cs1075_ass_4_report.md", "summary_json": "artifacts/assignment4/summary.json" } }