| { | |
| "model_name": "Hyformer", | |
| "embedding_dim": 512, | |
| "embedding_hidden_dim": 2048, | |
| "num_heads": 8, | |
| "num_local_heads": 8, | |
| "head_dim": 64, | |
| "num_layers": 12, | |
| "bias": false, | |
| "attention_dropout": 0.0, | |
| "feed_forward_dropout": 0.0, | |
| "prediction_dropout": null, | |
| "layer_norm_eps": 1e-05, | |
| "vocab_size": 596, | |
| "max_seq_len": 128, | |
| "prediction_task_type": null, | |
| "num_prediction_tasks": null, | |
| "num_physchem_tasks": 200, | |
| "pretrained_filepath": null, | |
| "predictor_hidden_size": null, | |
| "predictor_dropout": null, | |
| "predictor_num_heads": null, | |
| "prediction_hidden_dim": 512, | |
| "set_separate_task_tokens": null, | |
| "flash_attention": true, | |
| "dropout": null, | |
| "lambda_hparam": null, | |
| "pooler_dropout": null, | |
| "pooler_hidden_dim": null | |
| } | |