File size: 2,343 Bytes
603fd5d
b06a3b7
78fbc2b
603fd5d
9d483b1
b06a3b7
603fd5d
9f3abbc
 
8d0811d
 
 
 
78fbc2b
8d0811d
 
 
 
b06a3b7
9d483b1
08b3aaa
 
 
 
 
 
 
 
 
 
b06a3b7
9d483b1
 
 
 
 
 
 
 
 
 
 
 
603fd5d
 
b06a3b7
 
8d0811d
9d483b1
8d0811d
 
 
 
 
 
603fd5d
9d483b1
 
b06a3b7
8d0811d
9d483b1
b06a3b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d0811d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import os
import gradio as gr
from openai import AsyncOpenAI  #Use the Async client
from dotenv import load_dotenv
import re

load_dotenv()

# Global client initialization (Prevents connection overhead)
api_key = os.getenv("GROQ_API_KEY")
if not api_key:
    raise RuntimeError("GROQ_API_KEY is not set")

client = AsyncOpenAI(
    api_key=api_key,
    base_url="https://api.groq.com/openai/v1",
    timeout=120.0
)

def sanitize_messages(history):
        sanitized = []
        for m in history:
            # Check if it's the new dictionary format
            if isinstance(m, dict):
                sanitized.append({"role": m["role"], "content": m["content"]})
            # Check if it's the old list/tuple format [user, bot]
            elif isinstance(m, (list, tuple)):
                if m[0]: sanitized.append({"role": "user", "content": m[0]})
                if m[1]: sanitized.append({"role": "assistant", "content": m[1]})
        return sanitized

def strip_think(text: str) -> str:
    return re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()

async def respond(
    message,
    history,
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    
    messages = [{"role": "system", "content": system_message}]
    messages.extend(sanitize_messages(history))
    messages.append({"role": "user", "content": message})

    try:
        completion = await client.chat.completions.create(
            model="qwen/qwen3-32b",
            messages=messages,
            max_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
        )

        text = completion.choices[0].message.content
        return strip_think(text)

    except Exception as e:
        return f"Error: {str(e)}"


chatbot = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)

if __name__ == "__main__":
    chatbot.queue(default_concurrency_limit=5).launch()