| { | |
| "_name_or_path": "None", | |
| "architectures": [ | |
| "NetFoundLanguageModelling" | |
| ], | |
| "attention_probs_dropout_prob": 0.1, | |
| "classifier_dropout": null, | |
| "embedding_size": 1024, | |
| "encoder_layout": null, | |
| "flat": false, | |
| "hidden_act": "gelu", | |
| "hidden_dropout_prob": 0.1, | |
| "hidden_size": 1024, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 3072, | |
| "layer_norm_eps": 1e-12, | |
| "limit_bursts": false, | |
| "max_burst_length": 109, | |
| "max_bursts": 12, | |
| "max_position_embeddings": 109, | |
| "metaFeatures": 4, | |
| "model_max_length": 1308, | |
| "model_type": "NetFound", | |
| "no_direction_loss": false, | |
| "no_meta": false, | |
| "no_metadata_loss": false, | |
| "no_mlm": false, | |
| "no_swapped_bursts": false, | |
| "num_attention_heads": 16, | |
| "num_hidden_layers": 24, | |
| "p": 0, | |
| "pad_token_id": 0, | |
| "position_embedding_type": "absolute", | |
| "pretraining": true, | |
| "rep_output_path": null, | |
| "roformer": false, | |
| "rotary_value": false, | |
| "subflow_bursts": 3, | |
| "subflow_len": -1, | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.47.0.dev0", | |
| "type_vocab_size": 2, | |
| "use_cache": true, | |
| "vocab_size": 65539 | |
| } | |