{
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "vocab_size": 50257,
  "n_embd": 768,
  "n_layer": 12,
  "n_head": 12,
  "tool_masking": true,
  "schema_first": true,
  "schema_format": "json",
  "mask_ratio": 0.8,
  "dpo": false,
  "dpo_beta": 0.1,
  "uncertainty_threshold": 0.7,
  "rag": false,
  "rag_topk": 3,
  "rag_chunk_size": 256,
  "semantic_cache_size": 128,
  "semantic_cache_threshold": 0.85,
  "quantization_bits": 4,
  "quantization_backend": "autogptq",
  "pruning_ratio": 0,
  "flash_attention": false,
  "fused_kernels": false
}