WhoIsShe's picture
Add MNN 4-bit quantized model with Model Card
52855b8 verified
{
"llm_model": "llm.mnn",
"llm_weight": "llm.mnn.weight",
"backend_type": "cpu",
"thread_num": 4,
"precision": "low",
"memory": "low",
"sampler_type": "penalty",
"penalty": 1.1
}