File size: 332 Bytes
af3b3f9
b2730db
13752a4
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13

```
sudo docker run --rm    \
-p 8080:80     \
-e GPTQ_BITS=4     \
-e GPTQ_GROUPSIZE=128     \
-e MAX_BEST_OF=1 \
-e MAX_BATCH_PREFILL_TOKENS=2048 \
--gpus '"device=0"'     \
-v $PWD/data:/data ghcr.io/huggingface/text-generation-inference:sha-bce5e22     \
--model-id /data/WizardCoder-Python-34B-V1.0-GPTQ \
--quantize gptq 
```