Staticaliza commited on
Commit
b023e4e
·
verified ·
1 Parent(s): a687303

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +1 -1
README.md CHANGED
@@ -20,7 +20,7 @@ from huggingface_hub import snapshot_download
20
  from vllm import LLM, SamplingParams
21
 
22
  # Consider toggling "enforce_eager" to False if you want to load the model quicker, at the expense of tokens per second.
23
- repo = snapshot_download(repo_id="Staticaliza/Reya-Reasoning-8B-Distilled-GPTQ-Int4", allow_patterns=["*.json", "*.bin", "*.safetensors"])
24
  llm = LLM(model=repo, dtype="auto", tensor_parallel_size=torch.cuda.device_count(), enforce_eager=True, trust_remote_code=True)
25
 
26
  params = SamplingParams(
 
20
  from vllm import LLM, SamplingParams
21
 
22
  # Consider toggling "enforce_eager" to False if you want to load the model quicker, at the expense of tokens per second.
23
+ repo = snapshot_download(repo_id="Staticaliza/Reya-Reasoning", allow_patterns=["*.json", "*.bin", "*.safetensors"])
24
  llm = LLM(model=repo, dtype="auto", tensor_parallel_size=torch.cuda.device_count(), enforce_eager=True, trust_remote_code=True)
25
 
26
  params = SamplingParams(