{ "prev_hidden_dims": 16, "curr_hidden_dims": 64, "time_embedding_dims": 8, "lstm_embedding_dims": 128, "dropout": 0.2, "num_layers": 2, "num_heads": 8, "batch_size": 8, "learning_rate": "1e-4", "weight_decay": "1e-4", "grad_accum_steps": 16 }