Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| from threading import Thread | |
| from transformers import pipeline, TextIteratorStreamer | |
| # โ Load GGUF model | |
| pipe = pipeline( | |
| "text-generation", | |
| model="MaziyarPanahi/gemma-2b-it-GGUF", | |
| device_map="cpu" | |
| ) | |
| def generate_response(message, history): | |
| messages = [] | |
| # Chat history | |
| for user_msg, bot_msg in history: | |
| messages.append({"role": "user", "content": user_msg}) | |
| messages.append({"role": "assistant", "content": bot_msg}) | |
| messages.append({"role": "user", "content": message}) | |
| streamer = TextIteratorStreamer( | |
| pipe.tokenizer, | |
| skip_prompt=True, | |
| skip_special_tokens=True | |
| ) | |
| generation_kwargs = dict( | |
| text_inputs=messages, | |
| streamer=streamer, | |
| max_new_tokens=256, | |
| temperature=0.7, | |
| top_p=0.9, | |
| do_sample=True | |
| ) | |
| def run_generation(): | |
| try: | |
| with torch.no_grad(): | |
| pipe(**generation_kwargs) | |
| except Exception as e: | |
| print("Error:", e) | |
| streamer.text_queue.put(f"\n[Error: {e}]") | |
| streamer.end() | |
| Thread(target=run_generation).start() | |
| partial_text = "" | |
| for new_text in streamer: | |
| partial_text += new_text | |
| yield partial_text | |
| # ๐จ Gradio UI | |
| demo = gr.ChatInterface( | |
| fn=generate_response, | |
| title="Gemma 2B GGUF Chatbot", | |
| description="๐ Running GGUF quantized Gemma on Hugging Face Spaces", | |
| examples=[ | |
| "Explain AI simply", | |
| "Write Python hello world", | |
| "What is IoT?" | |
| ], | |
| cache_examples=False | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |