File size: 779 Bytes
88b5c49
fbe83b1
 
12df875
d697806
 
57b6c4f
 
12df875
88b5c49
fbe83b1
12df875
 
 
 
 
 
 
 
d697806
 
 
12df875
 
d697806
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from llama_cpp import Llama
import gradio as gr

# IMPORTANT: Use chat_format="qwen" (qwen2 is NOT supported)
model = Llama(
    model_path="qwen2.5-1.5B-q4.gguf",
    n_ctx=4096,
    n_gpu_layers=0,
    chat_format="qwen",  
)

def chat(user_input):
    messages = [
        {"role": "system", "content": "You are a helpful assistant. Answer ONLY the question. Do NOT continue, do NOT ask questions, do NOT add extra text."},
        {"role": "user", "content": user_input}
    ]

    response = model.create_chat_completion(
        messages=messages,
        max_tokens=256,
        temperature=0.7,
    )

    return response["choices"][0]["message"]["content"]

gr.Interface(
    fn=chat,
    inputs="text",
    outputs="text",
    title="Qwen2.5-1.5B Q4 Chatbot"
).launch()