| { | |
| "context_length": 2048, | |
| "dropout": 0.0, | |
| "embedding_dimensions": 384, | |
| "hidden_ratio": 2, | |
| "kv_heads": 4, | |
| "num_encoder_layers": 10, | |
| "padding_index": 1, | |
| "q_heads": 16, | |
| "teacher_dimensions": 960, | |
| "vocabulary_size": 33 | |
| } |
| { | |
| "context_length": 2048, | |
| "dropout": 0.0, | |
| "embedding_dimensions": 384, | |
| "hidden_ratio": 2, | |
| "kv_heads": 4, | |
| "num_encoder_layers": 10, | |
| "padding_index": 1, | |
| "q_heads": 16, | |
| "teacher_dimensions": 960, | |
| "vocabulary_size": 33 | |
| } |