{ "architectures": [ "GPT2LMHeadModel" ], "vocab_size": 50257, "n_embd": 768, "n_layer": 12, "n_head": 12, "tool_masking": true, "schema_first": true, "schema_format": "json", "mask_ratio": 0.8, "dpo": false, "dpo_beta": 0.1, "uncertainty_threshold": 0.7, "rag": false, "rag_topk": 3, "rag_chunk_size": 256, "semantic_cache_size": 128, "semantic_cache_threshold": 0.85, "quantization_bits": 4, "quantization_backend": "autogptq", "pruning_ratio": 0, "flash_attention": false, "fused_kernels": false }