File size: 454 Bytes
a9d8ae1 5939c1f a9d8ae1 6ec75fd 6d6e365 a9d8ae1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 | import os
from huggingface_hub import InferenceClient
token = os.environ["forRAG"]
client = InferenceClient(
model="Qwen/Qwen2.5-7B-Instruct",
token=token
)
def get_llm_answer(prompt):
stream = client.chat_completion(
messages=[{"role": "user", "content": prompt}],
max_tokens=5000,
stream=True
)
for chunk in stream:
delta = chunk.choices[0].delta.content
if delta:
yield delta
|