{ "architectures": [ "LlamaForCausalLM" ], "model_type": "llama", "torch_dtype": "float16", "use_cache": true, "vocab_size": 128000 }