redasers
/

raddicl2-demo-model

Text Generation

text-classification

deception-detection

few-shot-learning

retrieval-augmented-generation

in-context-learning

4-bit precision

Model card Files Files and versions

raddicl2-demo-model / model_4bit_config.json

Christopher Denq

model push

b64620a about 2 months ago

history blame contribute delete

769 Bytes

	{
	"model_name": "Intel/neural-chat-7b-v3-3",
	"quantization_config": {
	"use_quantization": true,
	"quantization_mode": "4bit"
	},
	"device_config": {
	"device_type": "cpu",
	"max_memory": {
	"0": "15GB",
	"cpu": "9GB"
	},
	"low_cpu_mem_usage": false
	},
	"model_params": {
	"attn_implementation": "sdpa",
	"pad_token_id": 0,
	"trust_remote_code": true
	},
	"model_info": {
	"total_params": 3752071168,
	"trainable_params": 262410240,
	"dtype": "torch.float16",
	"estimated_memory_gb": 3.7387773990631104,
	"quantization_mode": "4bit",
	"dtype_variants": [
	"torch.float16",
	"torch.uint8"
	],
	"gpu_memory_allocated_gb": 0.1322178840637207,
	"gpu_memory_reserved_gb": 0.158203125
	}
	}