Update README.md
Browse files
README.md
CHANGED
|
@@ -77,26 +77,6 @@ response = tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_token
|
|
| 77 |
print(response)
|
| 78 |
```
|
| 79 |
|
| 80 |
-
### With vLLM
|
| 81 |
-
|
| 82 |
-
```python
|
| 83 |
-
from vllm import LLM, SamplingParams
|
| 84 |
-
|
| 85 |
-
llm = LLM(model="Phind/Phind-70B", tensor_parallel_size=4)
|
| 86 |
-
sampling_params = SamplingParams(temperature=0.7, top_p=0.9, max_tokens=1024)
|
| 87 |
-
|
| 88 |
-
prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
|
| 89 |
-
|
| 90 |
-
You are Phind, an intelligent assistant that helps with programming and technical questions.<|eot_id|><|start_header_id|>user<|end_header_id|}
|
| 91 |
-
|
| 92 |
-
Write a Python function to find the longest palindromic substring.<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
| 93 |
-
|
| 94 |
-
"""
|
| 95 |
-
|
| 96 |
-
outputs = llm.generate([prompt], sampling_params)
|
| 97 |
-
print(outputs[0].outputs[0].text)
|
| 98 |
-
```
|
| 99 |
-
|
| 100 |
## Chat Template
|
| 101 |
|
| 102 |
This model uses the Llama 3 chat format:
|
|
|
|
| 77 |
print(response)
|
| 78 |
```
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
## Chat Template
|
| 81 |
|
| 82 |
This model uses the Llama 3 chat format:
|