File size: 454 Bytes
a9d8ae1
 
 
5939c1f
a9d8ae1
6ec75fd
6d6e365
a9d8ae1
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import os
from huggingface_hub import InferenceClient

token = os.environ["forRAG"]
client = InferenceClient(
    model="Qwen/Qwen2.5-7B-Instruct",
    token=token
)

def get_llm_answer(prompt):
    stream = client.chat_completion(
        messages=[{"role": "user", "content": prompt}],
        max_tokens=5000,
        stream=True
    )

    for chunk in stream:
        delta = chunk.choices[0].delta.content
        if delta:
            yield delta