Staticaliza
/

ReyaChat-Reasoning

Text Generation

compressed-tensors

Model card Files Files and versions

Staticaliza commited on Jun 23, 2025

Commit

b023e4e

·

verified ·

1 Parent(s): a687303

Update README.md

Files changed (1) hide show

README.md +1 -1

README.md CHANGED Viewed

@@ -20,7 +20,7 @@ from huggingface_hub import snapshot_download
 from vllm import LLM, SamplingParams
 # Consider toggling "enforce_eager" to False if you want to load the model quicker, at the expense of tokens per second.
-repo = snapshot_download(repo_id="Staticaliza/Reya-Reasoning-8B-Distilled-GPTQ-Int4", allow_patterns=["*.json", "*.bin", "*.safetensors"])
 llm = LLM(model=repo, dtype="auto", tensor_parallel_size=torch.cuda.device_count(), enforce_eager=True, trust_remote_code=True)
 params = SamplingParams(

 from vllm import LLM, SamplingParams
 # Consider toggling "enforce_eager" to False if you want to load the model quicker, at the expense of tokens per second.
+repo = snapshot_download(repo_id="Staticaliza/Reya-Reasoning", allow_patterns=["*.json", "*.bin", "*.safetensors"])
 llm = LLM(model=repo, dtype="auto", tensor_parallel_size=torch.cuda.device_count(), enforce_eager=True, trust_remote_code=True)
 params = SamplingParams(