Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| MODEL_ID = "VIDraft/Gemma-3-R1984-12B" | |
| SYSTEM_PROMPT = "You are Gemma-3-R1984-12B, a helpful AI assistant." | |
| # Grab token from environment | |
| HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN") | |
| if not HF_TOKEN: | |
| raise EnvironmentError("Please set HF_TOKEN environment variable.") | |
| client = InferenceClient(model=MODEL_ID, token=HF_TOKEN) | |
| def respond(history, user_input): | |
| """Called by Gradio on each user message.""" | |
| # history: list[list[str, str]] -> [[user1, bot1], [user2, bot2], ...] | |
| messages = [{"role": "system", "content": SYSTEM_PROMPT}] | |
| for human, ai in history: | |
| messages.append({"role": "user", "content": human}) | |
| messages.append({"role": "assistant", "content": ai}) | |
| messages.append({"role": "user", "content": user_input}) | |
| # Stream response tokens | |
| stream = client.chat.completions.create( | |
| messages=messages, | |
| stream=True, | |
| max_tokens=1024, | |
| temperature=0.7, | |
| top_p=0.9, | |
| ) | |
| partial = "" | |
| for chunk in stream: | |
| delta = chunk.choices[0].delta.content or "" | |
| partial += delta | |
| yield history + [[user_input, partial]] | |
| with gr.Blocks(title="Gemma-3-R1984-12B Chat") as demo: | |
| gr.Markdown("### Chat with VIDraft/Gemma-3-R1984-12B") | |
| chatbot = gr.Chatbot(label="History") | |
| with gr.Row(): | |
| msg = gr.Textbox( | |
| show_label=False, | |
| placeholder="Type your message and press Enter...", | |
| scale=4, | |
| ) | |
| clear = gr.Button("Clear") | |
| def user_fn(user_message, history): | |
| return "", history + [[user_message, ""]] | |
| msg.submit(user_fn, [msg, chatbot], [msg, chatbot], queue=False).then( | |
| respond, | |
| [chatbot, msg], | |
| chatbot, | |
| ) | |
| clear.click(lambda: None, None, chatbot, queue=False) | |
| demo.queue().launch(server_name="0.0.0.0", share=False) |