| { | |
| "architectures": [ | |
| "TeddyGModel" | |
| ], | |
| "cls_loss": false, | |
| "annotation_loss_weight": null, | |
| "modeling_loss_weight": null, | |
| "d_hid": 4096, | |
| "d_model": 1024, | |
| "dropout": 0.02, | |
| "gradient_checkpointing": false, | |
| "initializer_range": 0.02, | |
| "layer_activation": "gelu", | |
| "mask_token": "<mask>", | |
| "mask_token_id": 1, | |
| "masking_loss": false, | |
| "max_position_embeddings": 2048, | |
| "n_cls": 0, | |
| "n_layers_cls": 0, | |
| "nheads": 16, | |
| "nlayers": 24, | |
| "ntoken": 43840, | |
| "pad_token_id": -100, | |
| "pre_norm": false, | |
| "torch_dtype": "float32" | |
| } | |