gemma4-e2b

Runtime error

Update app.py

60af1cd verified 17 days ago

1.65 kB

	import gradio as gr
	import torch
	from threading import Thread
	from transformers import pipeline, TextIteratorStreamer

	# ✅ Load GGUF model
	pipe = pipeline(
	"text-generation",
	model="MaziyarPanahi/gemma-2b-it-GGUF",
	device_map="cpu"
	)

	def generate_response(message, history):

	messages = []

	# Chat history
	for user_msg, bot_msg in history:
	messages.append({"role": "user", "content": user_msg})
	messages.append({"role": "assistant", "content": bot_msg})

	messages.append({"role": "user", "content": message})

	streamer = TextIteratorStreamer(
	pipe.tokenizer,
	skip_prompt=True,
	skip_special_tokens=True
	)

	generation_kwargs = dict(
	text_inputs=messages,
	streamer=streamer,
	max_new_tokens=256,
	temperature=0.7,
	top_p=0.9,
	do_sample=True
	)

	def run_generation():
	try:
	with torch.no_grad():
	pipe(**generation_kwargs)
	except Exception as e:
	print("Error:", e)
	streamer.text_queue.put(f"\n[Error: {e}]")
	streamer.end()

	Thread(target=run_generation).start()

	partial_text = ""

	for new_text in streamer:
	partial_text += new_text
	yield partial_text


	# 🎨 Gradio UI
	demo = gr.ChatInterface(
	fn=generate_response,
	title="Gemma 2B GGUF Chatbot",
	description="🚀 Running GGUF quantized Gemma on Hugging Face Spaces",
	examples=[
	"Explain AI simply",
	"Write Python hello world",
	"What is IoT?"
	],
	cache_examples=False
	)

	if __name__ == "__main__":
	demo.launch()