Spaces:

R-Kentaren
/

Gemma

Sleeping

App Files Files Community

Gemma / app.py

R-Kentaren

Upload app.py with huggingface_hub

124e943 verified 9 months ago

raw

history blame contribute delete

1.95 kB

	import os
	import gradio as gr
	from huggingface_hub import InferenceClient

	MODEL_ID = "VIDraft/Gemma-3-R1984-12B"
	SYSTEM_PROMPT = "You are Gemma-3-R1984-12B, a helpful AI assistant."

	# Grab token from environment
	HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
	if not HF_TOKEN:
	raise EnvironmentError("Please set HF_TOKEN environment variable.")

	client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)


	def respond(history, user_input):
	"""Called by Gradio on each user message."""
	# history: list[list[str, str]] -> [[user1, bot1], [user2, bot2], ...]
	messages = [{"role": "system", "content": SYSTEM_PROMPT}]
	for human, ai in history:
	messages.append({"role": "user", "content": human})
	messages.append({"role": "assistant", "content": ai})

	messages.append({"role": "user", "content": user_input})

	# Stream response tokens
	stream = client.chat.completions.create(
	messages=messages,
	stream=True,
	max_tokens=1024,
	temperature=0.7,
	top_p=0.9,
	)

	partial = ""
	for chunk in stream:
	delta = chunk.choices[0].delta.content or ""
	partial += delta
	yield history + [[user_input, partial]]


	with gr.Blocks(title="Gemma-3-R1984-12B Chat") as demo:
	gr.Markdown("### Chat with VIDraft/Gemma-3-R1984-12B")
	chatbot = gr.Chatbot(label="History")
	with gr.Row():
	msg = gr.Textbox(
	show_label=False,
	placeholder="Type your message and press Enter...",
	scale=4,
	)
	clear = gr.Button("Clear")

	def user_fn(user_message, history):
	return "", history + [[user_message, ""]]

	msg.submit(user_fn, [msg, chatbot], [msg, chatbot], queue=False).then(
	respond,
	[chatbot, msg],
	chatbot,
	)
	clear.click(lambda: None, None, chatbot, queue=False)

	demo.queue().launch(server_name="0.0.0.0", share=False)