raddicl2-demo-model / model_4bit_config.json
Christopher Denq
model push
b64620a
raw
history blame contribute delete
769 Bytes
{
"model_name": "Intel/neural-chat-7b-v3-3",
"quantization_config": {
"use_quantization": true,
"quantization_mode": "4bit"
},
"device_config": {
"device_type": "cpu",
"max_memory": {
"0": "15GB",
"cpu": "9GB"
},
"low_cpu_mem_usage": false
},
"model_params": {
"attn_implementation": "sdpa",
"pad_token_id": 0,
"trust_remote_code": true
},
"model_info": {
"total_params": 3752071168,
"trainable_params": 262410240,
"dtype": "torch.float16",
"estimated_memory_gb": 3.7387773990631104,
"quantization_mode": "4bit",
"dtype_variants": [
"torch.float16",
"torch.uint8"
],
"gpu_memory_allocated_gb": 0.1322178840637207,
"gpu_memory_reserved_gb": 0.158203125
}
}