{ "model_type": "VerySmollGPT", "architectures": [ "VerySmollGPT" ], "vocab_size": 104, "d_model": 256, "n_layers": 6, "n_heads": 8, "d_ff": 1024, "max_seq_len": 128, "dropout": 0.1, "block_size": 128, "tie_word_embeddings": true, "training_config": { "num_epochs": 3, "batch_size": 16, "learning_rate": 0.0003, "weight_decay": 0.01 } }