Gemma / app.py
R-Kentaren's picture
Upload app.py with huggingface_hub
124e943 verified
import os
import gradio as gr
from huggingface_hub import InferenceClient
MODEL_ID = "VIDraft/Gemma-3-R1984-12B"
SYSTEM_PROMPT = "You are Gemma-3-R1984-12B, a helpful AI assistant."
# Grab token from environment
HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
if not HF_TOKEN:
raise EnvironmentError("Please set HF_TOKEN environment variable.")
client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)
def respond(history, user_input):
"""Called by Gradio on each user message."""
# history: list[list[str, str]] -> [[user1, bot1], [user2, bot2], ...]
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
for human, ai in history:
messages.append({"role": "user", "content": human})
messages.append({"role": "assistant", "content": ai})
messages.append({"role": "user", "content": user_input})
# Stream response tokens
stream = client.chat.completions.create(
messages=messages,
stream=True,
max_tokens=1024,
temperature=0.7,
top_p=0.9,
)
partial = ""
for chunk in stream:
delta = chunk.choices[0].delta.content or ""
partial += delta
yield history + [[user_input, partial]]
with gr.Blocks(title="Gemma-3-R1984-12B Chat") as demo:
gr.Markdown("### Chat with VIDraft/Gemma-3-R1984-12B")
chatbot = gr.Chatbot(label="History")
with gr.Row():
msg = gr.Textbox(
show_label=False,
placeholder="Type your message and press Enter...",
scale=4,
)
clear = gr.Button("Clear")
def user_fn(user_message, history):
return "", history + [[user_message, ""]]
msg.submit(user_fn, [msg, chatbot], [msg, chatbot], queue=False).then(
respond,
[chatbot, msg],
chatbot,
)
clear.click(lambda: None, None, chatbot, queue=False)
demo.queue().launch(server_name="0.0.0.0", share=False)