| { |
| "model_name": "Intel/neural-chat-7b-v3-3", |
| "quantization_config": { |
| "use_quantization": true, |
| "quantization_mode": "4bit" |
| }, |
| "device_config": { |
| "device_type": "cpu", |
| "max_memory": { |
| "0": "15GB", |
| "cpu": "9GB" |
| }, |
| "low_cpu_mem_usage": false |
| }, |
| "model_params": { |
| "attn_implementation": "sdpa", |
| "pad_token_id": 0, |
| "trust_remote_code": true |
| }, |
| "model_info": { |
| "total_params": 3752071168, |
| "trainable_params": 262410240, |
| "dtype": "torch.float16", |
| "estimated_memory_gb": 3.7387773990631104, |
| "quantization_mode": "4bit", |
| "dtype_variants": [ |
| "torch.float16", |
| "torch.uint8" |
| ], |
| "gpu_memory_allocated_gb": 0.1322178840637207, |
| "gpu_memory_reserved_gb": 0.158203125 |
| } |
| } |