gemma4-e2b / app.py
sourav520's picture
Update app.py
60af1cd verified
import gradio as gr
import torch
from threading import Thread
from transformers import pipeline, TextIteratorStreamer
# โœ… Load GGUF model
pipe = pipeline(
"text-generation",
model="MaziyarPanahi/gemma-2b-it-GGUF",
device_map="cpu"
)
def generate_response(message, history):
messages = []
# Chat history
for user_msg, bot_msg in history:
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": bot_msg})
messages.append({"role": "user", "content": message})
streamer = TextIteratorStreamer(
pipe.tokenizer,
skip_prompt=True,
skip_special_tokens=True
)
generation_kwargs = dict(
text_inputs=messages,
streamer=streamer,
max_new_tokens=256,
temperature=0.7,
top_p=0.9,
do_sample=True
)
def run_generation():
try:
with torch.no_grad():
pipe(**generation_kwargs)
except Exception as e:
print("Error:", e)
streamer.text_queue.put(f"\n[Error: {e}]")
streamer.end()
Thread(target=run_generation).start()
partial_text = ""
for new_text in streamer:
partial_text += new_text
yield partial_text
# ๐ŸŽจ Gradio UI
demo = gr.ChatInterface(
fn=generate_response,
title="Gemma 2B GGUF Chatbot",
description="๐Ÿš€ Running GGUF quantized Gemma on Hugging Face Spaces",
examples=[
"Explain AI simply",
"Write Python hello world",
"What is IoT?"
],
cache_examples=False
)
if __name__ == "__main__":
demo.launch()