Spaces:

toropets
/

RAG2

Sleeping

File size: 454 Bytes

a9d8ae1
 
 
5939c1f
a9d8ae1
6ec75fd
6d6e365
a9d8ae1

import os
from huggingface_hub import InferenceClient

token = os.environ["forRAG"]
client = InferenceClient(
    model="Qwen/Qwen2.5-7B-Instruct",
    token=token
)

def get_llm_answer(prompt):
    stream = client.chat_completion(
        messages=[{"role": "user", "content": prompt}],
        max_tokens=5000,
        stream=True
    )

    for chunk in stream:
        delta = chunk.choices[0].delta.content
        if delta:
            yield delta