Spaces:
Runtime error
Runtime error
File size: 1,652 Bytes
41d057e 60af1cd 41d057e 60af1cd 41d057e 60af1cd 41d057e 8b3cdf4 60af1cd 41d057e 8b3cdf4 41d057e 60af1cd 8b3cdf4 41d057e 8b3cdf4 60af1cd 41d057e 60af1cd 41d057e 8b3cdf4 41d057e 8b3cdf4 166c408 8b3cdf4 60af1cd 166c408 60af1cd 8b3cdf4 166c408 8b3cdf4 41d057e 60af1cd 41d057e 8b3cdf4 4fb5a88 41d057e 60af1cd 8b3cdf4 60af1cd 8b3cdf4 166c408 41d057e 166c408 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 | import gradio as gr
import torch
from threading import Thread
from transformers import pipeline, TextIteratorStreamer
# ✅ Load GGUF model
pipe = pipeline(
"text-generation",
model="MaziyarPanahi/gemma-2b-it-GGUF",
device_map="cpu"
)
def generate_response(message, history):
messages = []
# Chat history
for user_msg, bot_msg in history:
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": bot_msg})
messages.append({"role": "user", "content": message})
streamer = TextIteratorStreamer(
pipe.tokenizer,
skip_prompt=True,
skip_special_tokens=True
)
generation_kwargs = dict(
text_inputs=messages,
streamer=streamer,
max_new_tokens=256,
temperature=0.7,
top_p=0.9,
do_sample=True
)
def run_generation():
try:
with torch.no_grad():
pipe(**generation_kwargs)
except Exception as e:
print("Error:", e)
streamer.text_queue.put(f"\n[Error: {e}]")
streamer.end()
Thread(target=run_generation).start()
partial_text = ""
for new_text in streamer:
partial_text += new_text
yield partial_text
# 🎨 Gradio UI
demo = gr.ChatInterface(
fn=generate_response,
title="Gemma 2B GGUF Chatbot",
description="🚀 Running GGUF quantized Gemma on Hugging Face Spaces",
examples=[
"Explain AI simply",
"Write Python hello world",
"What is IoT?"
],
cache_examples=False
)
if __name__ == "__main__":
demo.launch() |