Update README.md
Browse files
README.md
CHANGED
|
@@ -20,7 +20,7 @@ from huggingface_hub import snapshot_download
|
|
| 20 |
from vllm import LLM, SamplingParams
|
| 21 |
|
| 22 |
# Consider toggling "enforce_eager" to False if you want to load the model quicker, at the expense of tokens per second.
|
| 23 |
-
repo = snapshot_download(repo_id="Staticaliza/Reya-Reasoning
|
| 24 |
llm = LLM(model=repo, dtype="auto", tensor_parallel_size=torch.cuda.device_count(), enforce_eager=True, trust_remote_code=True)
|
| 25 |
|
| 26 |
params = SamplingParams(
|
|
|
|
| 20 |
from vllm import LLM, SamplingParams
|
| 21 |
|
| 22 |
# Consider toggling "enforce_eager" to False if you want to load the model quicker, at the expense of tokens per second.
|
| 23 |
+
repo = snapshot_download(repo_id="Staticaliza/Reya-Reasoning", allow_patterns=["*.json", "*.bin", "*.safetensors"])
|
| 24 |
llm = LLM(model=repo, dtype="auto", tensor_parallel_size=torch.cuda.device_count(), enforce_eager=True, trust_remote_code=True)
|
| 25 |
|
| 26 |
params = SamplingParams(
|