Update README.md
Browse files
README.md
CHANGED
|
@@ -41,7 +41,7 @@ export VLLM_USE_FLASHINFER_SAMPLER=0
|
|
| 41 |
export OMP_NUM_THREADS=4
|
| 42 |
|
| 43 |
vllm serve \
|
| 44 |
-
__YOUR_PATH__/
|
| 45 |
--served-model-name MY_MODEL \
|
| 46 |
--swap-space 16 \
|
| 47 |
--max-num-seqs 32 \
|
|
@@ -70,8 +70,8 @@ vllm serve \
|
|
| 70 |
|
| 71 |
### 【Model Download】
|
| 72 |
```python
|
| 73 |
-
from
|
| 74 |
-
snapshot_download('
|
| 75 |
```
|
| 76 |
|
| 77 |
### 【Overview】
|
|
|
|
| 41 |
export OMP_NUM_THREADS=4
|
| 42 |
|
| 43 |
vllm serve \
|
| 44 |
+
__YOUR_PATH__/QuantTrio/MiniMax-M2.1-AWQ \
|
| 45 |
--served-model-name MY_MODEL \
|
| 46 |
--swap-space 16 \
|
| 47 |
--max-num-seqs 32 \
|
|
|
|
| 70 |
|
| 71 |
### 【Model Download】
|
| 72 |
```python
|
| 73 |
+
from huggingface_hub import snapshot_download
|
| 74 |
+
snapshot_download('QuantTrio/MiniMax-M2.1-AWQ', cache_dir="your_local_path")
|
| 75 |
```
|
| 76 |
|
| 77 |
### 【Overview】
|