File size: 2,343 Bytes
603fd5d b06a3b7 78fbc2b 603fd5d 9d483b1 b06a3b7 603fd5d 9f3abbc 8d0811d 78fbc2b 8d0811d b06a3b7 9d483b1 08b3aaa b06a3b7 9d483b1 603fd5d b06a3b7 8d0811d 9d483b1 8d0811d 603fd5d 9d483b1 b06a3b7 8d0811d 9d483b1 b06a3b7 8d0811d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | import os
import gradio as gr
from openai import AsyncOpenAI #Use the Async client
from dotenv import load_dotenv
import re
load_dotenv()
# Global client initialization (Prevents connection overhead)
api_key = os.getenv("GROQ_API_KEY")
if not api_key:
raise RuntimeError("GROQ_API_KEY is not set")
client = AsyncOpenAI(
api_key=api_key,
base_url="https://api.groq.com/openai/v1",
timeout=120.0
)
def sanitize_messages(history):
sanitized = []
for m in history:
# Check if it's the new dictionary format
if isinstance(m, dict):
sanitized.append({"role": m["role"], "content": m["content"]})
# Check if it's the old list/tuple format [user, bot]
elif isinstance(m, (list, tuple)):
if m[0]: sanitized.append({"role": "user", "content": m[0]})
if m[1]: sanitized.append({"role": "assistant", "content": m[1]})
return sanitized
def strip_think(text: str) -> str:
return re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()
async def respond(
message,
history,
system_message,
max_tokens,
temperature,
top_p,
):
messages = [{"role": "system", "content": system_message}]
messages.extend(sanitize_messages(history))
messages.append({"role": "user", "content": message})
try:
completion = await client.chat.completions.create(
model="qwen/qwen3-32b",
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
)
text = completion.choices[0].message.content
return strip_think(text)
except Exception as e:
return f"Error: {str(e)}"
chatbot = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
if __name__ == "__main__":
chatbot.queue(default_concurrency_limit=5).launch()
|