inlp-assignment3 / task2_ssm /tuning /tuning_results.json
samvaran's picture
Upload task2_ssm/tuning/tuning_results.json with huggingface_hub
834be77 verified
[
{
"trial": 2,
"name": "ssm_ss64_nl4_emb64_do0.1_lr0.001_bs32_ep30",
"params": {
"model.state_size": 64,
"model.num_layers": 4,
"model.input_size": 64,
"model.dropout": 0.1,
"training.learning_rate": 0.001,
"training.batch_size": 32,
"training.epochs": 30
},
"best_val_loss": 5.375143,
"best_val_acc": 0.188612,
"best_epoch": 27,
"epochs_run": 30,
"param_count": 1407632,
"model_path": "models/ssm_ss64_nl4_emb64_do0.1_lr0.001_bs32_ep30.pt"
},
{
"trial": 4,
"name": "ssm_ss128_nl4_emb64_do0.1_lr0.001_bs32_ep30",
"params": {
"model.state_size": 128,
"model.num_layers": 4,
"model.input_size": 64,
"model.dropout": 0.1,
"training.learning_rate": 0.001,
"training.batch_size": 32,
"training.epochs": 30
},
"best_val_loss": 5.369389,
"best_val_acc": 0.188401,
"best_epoch": 29,
"epochs_run": 30,
"param_count": 1440656,
"model_path": "models/ssm_ss128_nl4_emb64_do0.1_lr0.001_bs32_ep30.pt"
},
{
"trial": 6,
"name": "ssm_ss256_nl4_emb64_do0.1_lr0.001_bs32_ep30",
"params": {
"model.state_size": 256,
"model.num_layers": 4,
"model.input_size": 64,
"model.dropout": 0.1,
"training.learning_rate": 0.001,
"training.batch_size": 32,
"training.epochs": 30
},
"best_val_loss": 5.367797,
"best_val_acc": 0.188095,
"best_epoch": 29,
"epochs_run": 30,
"param_count": 1506704,
"model_path": "models/ssm_ss256_nl4_emb64_do0.1_lr0.001_bs32_ep30.pt"
},
{
"trial": 3,
"name": "ssm_ss128_nl2_emb64_do0.1_lr0.001_bs32_ep30",
"params": {
"model.state_size": 128,
"model.num_layers": 2,
"model.input_size": 64,
"model.dropout": 0.1,
"training.learning_rate": 0.001,
"training.batch_size": 32,
"training.epochs": 30
},
"best_val_loss": 5.470606,
"best_val_acc": 0.184723,
"best_epoch": 23,
"epochs_run": 30,
"param_count": 1365392,
"model_path": "models/ssm_ss128_nl2_emb64_do0.1_lr0.001_bs32_ep30.pt"
},
{
"trial": 5,
"name": "ssm_ss256_nl2_emb64_do0.1_lr0.001_bs32_ep30",
"params": {
"model.state_size": 256,
"model.num_layers": 2,
"model.input_size": 64,
"model.dropout": 0.1,
"training.learning_rate": 0.001,
"training.batch_size": 32,
"training.epochs": 30
},
"best_val_loss": 5.459764,
"best_val_acc": 0.184554,
"best_epoch": 18,
"epochs_run": 28,
"param_count": 1398416,
"model_path": "models/ssm_ss256_nl2_emb64_do0.1_lr0.001_bs32_ep30.pt"
},
{
"trial": 1,
"name": "ssm_ss64_nl2_emb64_do0.1_lr0.001_bs32_ep30",
"params": {
"model.state_size": 64,
"model.num_layers": 2,
"model.input_size": 64,
"model.dropout": 0.1,
"training.learning_rate": 0.001,
"training.batch_size": 32,
"training.epochs": 30
},
"best_val_loss": 5.472807,
"best_val_acc": 0.1843,
"best_epoch": 21,
"epochs_run": 30,
"param_count": 1348880,
"model_path": "models/ssm_ss64_nl2_emb64_do0.1_lr0.001_bs32_ep30.pt"
}
]