{ "model_name": "Hyformer", "embedding_dim": 512, "embedding_hidden_dim": 2048, "num_heads": 8, "num_local_heads": 8, "head_dim": 64, "num_layers": 12, "bias": false, "attention_dropout": 0.0, "feed_forward_dropout": 0.0, "prediction_dropout": null, "layer_norm_eps": 1e-05, "vocab_size": 596, "max_seq_len": 128, "prediction_task_type": null, "num_prediction_tasks": null, "num_physchem_tasks": 200, "pretrained_filepath": null, "predictor_hidden_size": null, "predictor_dropout": null, "predictor_num_heads": null, "prediction_hidden_dim": 512, "set_separate_task_tokens": null, "flash_attention": true, "dropout": null, "lambda_hparam": null, "pooler_dropout": null, "pooler_hidden_dim": null }