| { |
| "baseline": { |
| "epochs": 100, |
| "training_time_seconds": 3139.0, |
| "training_time_minutes": 52.32, |
| "final_loss": 0.0974, |
| "bleu": 0.5247, |
| "bleu_percent": 52.47, |
| "source": "Extracted from en_to_hi.ipynb recorded outputs on 2026-03-13" |
| }, |
| "tuning": { |
| "best_config": { |
| "learning_rate": 8.884411309500873e-05, |
| "batch_size": 16, |
| "num_heads": 4, |
| "d_ff": 1536, |
| "dropout": 0.19016462895143443, |
| "num_layers": 4, |
| "weight_decay": 1.149540832915101e-06, |
| "d_model": 512, |
| "max_len": 50, |
| "seed": 42, |
| "force_cpu": false, |
| "epochs": 20 |
| }, |
| "best_metrics": { |
| "epoch": 20, |
| "loss": 0.5256124382669275, |
| "bleu": 0.7698028731485314, |
| "bleu_percent": 76.98028731485313, |
| "timestamp": 1773401641, |
| "checkpoint_dir_name": null, |
| "done": true, |
| "training_iteration": 20, |
| "trial_id": "28792536", |
| "date": "2026-03-13_17-04-01", |
| "time_this_iter_s": 42.827330112457275, |
| "time_total_s": 857.8043477535248, |
| "pid": 3948130, |
| "hostname": "user-SSI100C3A-48", |
| "node_ip": "10.6.0.87", |
| "config": { |
| "learning_rate": 8.884411309500873e-05, |
| "batch_size": 16, |
| "num_heads": 4, |
| "d_ff": 1536, |
| "dropout": 0.19016462895143443, |
| "num_layers": 4, |
| "weight_decay": 1.149540832915101e-06, |
| "d_model": 512, |
| "max_len": 50, |
| "seed": 42, |
| "force_cpu": false, |
| "epochs": 20 |
| }, |
| "time_since_restore": 857.8043477535248, |
| "iterations_since_restore": 20, |
| "experiment_tag": "18_batch_size=16,d_ff=1536,d_model=512,dropout=0.1902,epochs=20,force_cpu=False,learning_rate=0.0001,max_len=50,num_heads=4,num_layers=4,seed=42,weight_decay=0.0000" |
| }, |
| "num_samples": 20, |
| "tune_epochs": 20, |
| "search_space": { |
| "learning_rate": "loguniform(1e-5, 1e-3)", |
| "batch_size": "choice([16, 32, 64])", |
| "num_heads": "choice([4, 8])", |
| "d_ff": "choice([1024, 1536, 2048])", |
| "dropout": "uniform(0.10, 0.40)", |
| "num_layers": "choice([4, 6])", |
| "weight_decay": "loguniform(1e-6, 1e-3)" |
| }, |
| "metric": "bleu", |
| "mode": "max", |
| "scheduler": "ASHAScheduler", |
| "search_algorithm": "OptunaSearch" |
| }, |
| "best_model": { |
| "epochs": 20, |
| "training_time_seconds": 853.0576362609863, |
| "training_time_minutes": 14.217627271016438, |
| "final_loss": 0.5256124382669275, |
| "best_bleu": 0.8471540338276414, |
| "best_bleu_percent": 84.71540338276414, |
| "best_epoch": 15, |
| "epochs_to_target": 6, |
| "model_path": "b23cs1075_ass_4_best_model.pth", |
| "config": { |
| "learning_rate": 8.884411309500873e-05, |
| "batch_size": 16, |
| "num_heads": 4, |
| "d_ff": 1536, |
| "dropout": 0.19016462895143443, |
| "num_layers": 4, |
| "weight_decay": 1.149540832915101e-06, |
| "d_model": 512, |
| "max_len": 50, |
| "seed": 42, |
| "force_cpu": false, |
| "epochs": 20 |
| } |
| }, |
| "search_space": { |
| "learning_rate": "loguniform(1e-5, 1e-3)", |
| "batch_size": "choice([16, 32, 64])", |
| "num_heads": "choice([4, 8])", |
| "d_ff": "choice([1024, 1536, 2048])", |
| "dropout": "uniform(0.10, 0.40)", |
| "num_layers": "choice([4, 6])", |
| "weight_decay": "loguniform(1e-6, 1e-3)" |
| }, |
| "artifacts": { |
| "baseline_model": "transformer_translation_final.pth", |
| "best_model": "b23cs1075_ass_4_best_model.pth", |
| "report_markdown": "b23cs1075_ass_4_report.md", |
| "summary_json": "artifacts/assignment4/summary.json" |
| } |
| } |