Update README.md
Browse files
README.md
CHANGED
|
@@ -38,7 +38,7 @@ export VLLM_USE_FLASHINFER_SAMPLER=0
|
|
| 38 |
export OMP_NUM_THREADS=4
|
| 39 |
|
| 40 |
vllm serve \
|
| 41 |
-
__YOUR_PATH__/
|
| 42 |
--served-model-name MY_MODEL_NAME \
|
| 43 |
--swap-space 16 \
|
| 44 |
--max-num-seqs 32 \
|
|
@@ -69,8 +69,8 @@ vllm serve \
|
|
| 69 |
|
| 70 |
### 【Model Download】
|
| 71 |
```python
|
| 72 |
-
from
|
| 73 |
-
snapshot_download('
|
| 74 |
```
|
| 75 |
|
| 76 |
### 【Overview】
|
|
|
|
| 38 |
export OMP_NUM_THREADS=4
|
| 39 |
|
| 40 |
vllm serve \
|
| 41 |
+
__YOUR_PATH__/QuantTrio/GLM-4.7-GPTQ-Int4-Int8Mix \
|
| 42 |
--served-model-name MY_MODEL_NAME \
|
| 43 |
--swap-space 16 \
|
| 44 |
--max-num-seqs 32 \
|
|
|
|
| 69 |
|
| 70 |
### 【Model Download】
|
| 71 |
```python
|
| 72 |
+
from huggingface_hub import snapshot_download
|
| 73 |
+
snapshot_download('QuantTrio/GLM-4.7-GPTQ-Int4-Int8Mix', cache_dir="your_local_path")
|
| 74 |
```
|
| 75 |
|
| 76 |
### 【Overview】
|