Update README.md
Browse files
README.md
CHANGED
|
@@ -152,17 +152,14 @@ To achieve optimal results, we recommend always including a system prompt that c
|
|
| 152 |
|
| 153 |
### Basic Instruct Template (V7)
|
| 154 |
|
| 155 |
-
Without vision:
|
| 156 |
```
|
| 157 |
-
<s>[SYSTEM_PROMPT]
|
| 158 |
```
|
| 159 |
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
```
|
| 164 |
|
| 165 |
-
*For more information about the tokenizer please refer to [mistral-common](https://github.com/mistralai/mistral-common)*
|
| 166 |
|
| 167 |
## Metrics
|
| 168 |
|
|
@@ -187,7 +184,7 @@ to implement production-ready inference pipelines with Pixtral-Large-Instruct-24
|
|
| 187 |
|
| 188 |
**_Installation_**
|
| 189 |
|
| 190 |
-
Make sure you install `vLLM >=
|
| 191 |
|
| 192 |
```
|
| 193 |
pip install --upgrade vllm
|
|
@@ -201,26 +198,28 @@ pip install --upgrade mistral_common
|
|
| 201 |
|
| 202 |
You can also make use of a ready-to-go [docker image](https://github.com/vllm-project/vllm/blob/main/Dockerfile).
|
| 203 |
|
| 204 |
-
**_Example_**
|
| 205 |
-
|
| 206 |
```py
|
| 207 |
from vllm import LLM
|
| 208 |
from vllm.sampling_params import SamplingParams
|
|
|
|
|
|
|
| 209 |
|
| 210 |
-
model_name = "mistralai/Pixtral-
|
| 211 |
-
max_img_per_msg = 5
|
| 212 |
|
| 213 |
-
llm = LLM(model=model_name, tokenizer_mode="mistral", limit_mm_per_prompt={"image": max_img_per_msg}, max_model_len=32768)
|
| 214 |
|
| 215 |
def load_system_prompt(repo_id: str, filename: str) -> str:
|
| 216 |
-
file_path = hf_hub_download(repo_id, filename)
|
| 217 |
-
with open(file_path,
|
| 218 |
-
|
| 219 |
-
|
|
|
|
|
|
|
|
|
|
| 220 |
|
| 221 |
-
SYSTEM_PROMPT = load_system_prompt(model_name, "vision_system_prompt.txt")
|
| 222 |
|
| 223 |
-
|
|
|
|
|
|
|
| 224 |
|
| 225 |
messages = [
|
| 226 |
{
|
|
@@ -229,11 +228,18 @@ messages = [
|
|
| 229 |
},
|
| 230 |
{
|
| 231 |
"role": "user",
|
| 232 |
-
"content":
|
| 233 |
},
|
| 234 |
]
|
| 235 |
|
| 236 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
```
|
| 238 |
|
| 239 |
|
|
|
|
| 152 |
|
| 153 |
### Basic Instruct Template (V7)
|
| 154 |
|
|
|
|
| 155 |
```
|
| 156 |
+
<s>[SYSTEM_PROMPT]<system prompt>[/SYSTEM_PROMPT][INST]<user message>[/INST]<assistant response></s>[INST]<user message>[/INST]
|
| 157 |
```
|
| 158 |
|
| 159 |
+
**Be careful with subtle missing or trailing white spaces!**
|
| 160 |
+
|
| 161 |
+
*Please make sure to use [mistral-common](https://github.com/mistralai/mistral-common) as the source of truth*
|
|
|
|
| 162 |
|
|
|
|
| 163 |
|
| 164 |
## Metrics
|
| 165 |
|
|
|
|
| 184 |
|
| 185 |
**_Installation_**
|
| 186 |
|
| 187 |
+
Make sure you install `vLLM >= v0.6.4`:
|
| 188 |
|
| 189 |
```
|
| 190 |
pip install --upgrade vllm
|
|
|
|
| 198 |
|
| 199 |
You can also make use of a ready-to-go [docker image](https://github.com/vllm-project/vllm/blob/main/Dockerfile).
|
| 200 |
|
|
|
|
|
|
|
| 201 |
```py
|
| 202 |
from vllm import LLM
|
| 203 |
from vllm.sampling_params import SamplingParams
|
| 204 |
+
from huggingface_hub import hf_hub_download
|
| 205 |
+
from datetime import datetime, timedelta
|
| 206 |
|
| 207 |
+
model_name = "mistralai/Pixtral-Large-Instruct-2411"
|
|
|
|
| 208 |
|
|
|
|
| 209 |
|
| 210 |
def load_system_prompt(repo_id: str, filename: str) -> str:
|
| 211 |
+
file_path = hf_hub_download(repo_id=repo_id, filename=filename)
|
| 212 |
+
with open(file_path, 'r') as file:
|
| 213 |
+
SYSTEM_PROMPT = file.read()
|
| 214 |
+
today = datetime.today().strftime('%Y-%m-%d')
|
| 215 |
+
yesterday = (datetime.today() - timedelta(days=1)).strftime('%Y-%m-%d')
|
| 216 |
+
model_name = repo_id.split("/")[-1]
|
| 217 |
+
return SYSTEM_PROMPT.format(name=model_name, today=today, yesterday=yesterday)
|
| 218 |
|
|
|
|
| 219 |
|
| 220 |
+
system_prompt = load_system_prompt(model_name, "SYSTEM_PROMPT.txt")
|
| 221 |
+
|
| 222 |
+
user_prompt = "How many days ago was Mistral founded?"
|
| 223 |
|
| 224 |
messages = [
|
| 225 |
{
|
|
|
|
| 228 |
},
|
| 229 |
{
|
| 230 |
"role": "user",
|
| 231 |
+
"content": prompt
|
| 232 |
},
|
| 233 |
]
|
| 234 |
|
| 235 |
+
sampling_params = SamplingParams(max_tokens=128_000)
|
| 236 |
+
|
| 237 |
+
# note that running this model on GPU requires over 300 GB of GPU RAM
|
| 238 |
+
llm = LLM(model=model_name, tokenizer_mode="mistral", tensor_parallel=8, limit_mm_per_prompt={"image": 4})
|
| 239 |
+
|
| 240 |
+
outputs = llm.chat(messages, sampling_params=sampling_params)
|
| 241 |
+
|
| 242 |
+
print(outputs[0].outputs[0].text)
|
| 243 |
```
|
| 244 |
|
| 245 |
|