File size: 1,652 Bytes
41d057e
 
 
60af1cd
41d057e
60af1cd
 
 
 
 
41d057e
 
 
60af1cd
41d057e
8b3cdf4
60af1cd
41d057e
 
 
8b3cdf4
41d057e
 
 
60af1cd
8b3cdf4
41d057e
 
8b3cdf4
60af1cd
 
41d057e
60af1cd
41d057e
8b3cdf4
 
41d057e
8b3cdf4
166c408
 
8b3cdf4
60af1cd
166c408
60af1cd
8b3cdf4
166c408
 
8b3cdf4
 
41d057e
60af1cd
41d057e
 
 
 
8b3cdf4
4fb5a88
41d057e
 
60af1cd
 
8b3cdf4
60af1cd
 
 
8b3cdf4
166c408
41d057e
 
 
166c408
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import gradio as gr
import torch
from threading import Thread
from transformers import pipeline, TextIteratorStreamer

# ✅ Load GGUF model
pipe = pipeline(
    "text-generation",
    model="MaziyarPanahi/gemma-2b-it-GGUF",
    device_map="cpu"
)

def generate_response(message, history):

    messages = []

    # Chat history
    for user_msg, bot_msg in history:
        messages.append({"role": "user", "content": user_msg})
        messages.append({"role": "assistant", "content": bot_msg})

    messages.append({"role": "user", "content": message})

    streamer = TextIteratorStreamer(
        pipe.tokenizer,
        skip_prompt=True,
        skip_special_tokens=True
    )

    generation_kwargs = dict(
        text_inputs=messages,
        streamer=streamer,
        max_new_tokens=256,
        temperature=0.7,
        top_p=0.9,
        do_sample=True
    )

    def run_generation():
        try:
            with torch.no_grad():
                pipe(**generation_kwargs)
        except Exception as e:
            print("Error:", e)
            streamer.text_queue.put(f"\n[Error: {e}]")
            streamer.end()

    Thread(target=run_generation).start()

    partial_text = ""

    for new_text in streamer:
        partial_text += new_text
        yield partial_text


# 🎨 Gradio UI
demo = gr.ChatInterface(
    fn=generate_response,
    title="Gemma 2B GGUF Chatbot",
    description="🚀 Running GGUF quantized Gemma on Hugging Face Spaces",
    examples=[
        "Explain AI simply",
        "Write Python hello world",
        "What is IoT?"
    ],
    cache_examples=False
)

if __name__ == "__main__":
    demo.launch()