File size: 3,551 Bytes
2663ef8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
{
  "baseline": {
    "epochs": 100,
    "training_time_seconds": 3139.0,
    "training_time_minutes": 52.32,
    "final_loss": 0.0974,
    "bleu": 0.5247,
    "bleu_percent": 52.47,
    "source": "Extracted from en_to_hi.ipynb recorded outputs on 2026-03-13"
  },
  "tuning": {
    "best_config": {
      "learning_rate": 8.884411309500873e-05,
      "batch_size": 16,
      "num_heads": 4,
      "d_ff": 1536,
      "dropout": 0.19016462895143443,
      "num_layers": 4,
      "weight_decay": 1.149540832915101e-06,
      "d_model": 512,
      "max_len": 50,
      "seed": 42,
      "force_cpu": false,
      "epochs": 20
    },
    "best_metrics": {
      "epoch": 20,
      "loss": 0.5256124382669275,
      "bleu": 0.7698028731485314,
      "bleu_percent": 76.98028731485313,
      "timestamp": 1773401641,
      "checkpoint_dir_name": null,
      "done": true,
      "training_iteration": 20,
      "trial_id": "28792536",
      "date": "2026-03-13_17-04-01",
      "time_this_iter_s": 42.827330112457275,
      "time_total_s": 857.8043477535248,
      "pid": 3948130,
      "hostname": "user-SSI100C3A-48",
      "node_ip": "10.6.0.87",
      "config": {
        "learning_rate": 8.884411309500873e-05,
        "batch_size": 16,
        "num_heads": 4,
        "d_ff": 1536,
        "dropout": 0.19016462895143443,
        "num_layers": 4,
        "weight_decay": 1.149540832915101e-06,
        "d_model": 512,
        "max_len": 50,
        "seed": 42,
        "force_cpu": false,
        "epochs": 20
      },
      "time_since_restore": 857.8043477535248,
      "iterations_since_restore": 20,
      "experiment_tag": "18_batch_size=16,d_ff=1536,d_model=512,dropout=0.1902,epochs=20,force_cpu=False,learning_rate=0.0001,max_len=50,num_heads=4,num_layers=4,seed=42,weight_decay=0.0000"
    },
    "num_samples": 20,
    "tune_epochs": 20,
    "search_space": {
      "learning_rate": "loguniform(1e-5, 1e-3)",
      "batch_size": "choice([16, 32, 64])",
      "num_heads": "choice([4, 8])",
      "d_ff": "choice([1024, 1536, 2048])",
      "dropout": "uniform(0.10, 0.40)",
      "num_layers": "choice([4, 6])",
      "weight_decay": "loguniform(1e-6, 1e-3)"
    },
    "metric": "bleu",
    "mode": "max",
    "scheduler": "ASHAScheduler",
    "search_algorithm": "OptunaSearch"
  },
  "best_model": {
    "epochs": 20,
    "training_time_seconds": 853.0576362609863,
    "training_time_minutes": 14.217627271016438,
    "final_loss": 0.5256124382669275,
    "best_bleu": 0.8471540338276414,
    "best_bleu_percent": 84.71540338276414,
    "best_epoch": 15,
    "epochs_to_target": 6,
    "model_path": "b23cs1075_ass_4_best_model.pth",
    "config": {
      "learning_rate": 8.884411309500873e-05,
      "batch_size": 16,
      "num_heads": 4,
      "d_ff": 1536,
      "dropout": 0.19016462895143443,
      "num_layers": 4,
      "weight_decay": 1.149540832915101e-06,
      "d_model": 512,
      "max_len": 50,
      "seed": 42,
      "force_cpu": false,
      "epochs": 20
    }
  },
  "search_space": {
    "learning_rate": "loguniform(1e-5, 1e-3)",
    "batch_size": "choice([16, 32, 64])",
    "num_heads": "choice([4, 8])",
    "d_ff": "choice([1024, 1536, 2048])",
    "dropout": "uniform(0.10, 0.40)",
    "num_layers": "choice([4, 6])",
    "weight_decay": "loguniform(1e-6, 1e-3)"
  },
  "artifacts": {
    "baseline_model": "transformer_translation_final.pth",
    "best_model": "b23cs1075_ass_4_best_model.pth",
    "report_markdown": "b23cs1075_ass_4_report.md",
    "summary_json": "artifacts/assignment4/summary.json"
  }
}