File size: 2,072 Bytes
2170a5b
 
b328cc7
2170a5b
913c59b
2170a5b
b328cc7
 
 
 
913c59b
 
2170a5b
 
 
913c59b
2170a5b
 
b328cc7
2170a5b
 
 
b328cc7
 
 
 
913c59b
b328cc7
913c59b
2170a5b
 
 
 
 
 
913c59b
2170a5b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bb03315
 
 
 
913c59b
bb03315
 
 
 
 
 
2170a5b
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# 🔥 Only open models that load in HF Spaces without gated access
MODEL_OPTIONS = {
    "Mistral-7B-Instruct": "mistralai/Mistral-7B-Instruct-v0.1",
    "Qwen2.5-3B-Instruct": "Qwen/Qwen2.5-3B-Instruct",
    "Qwen2.5-1.5B-Instruct": "Qwen/Qwen2.5-1.5B-Instruct",
    "StableLM2-1.6B": "stabilityai/stablelm-2-zephyr-1_6b",
    "SmolLM3-3B": "HuggingFaceTB/SmolLM3-3B",
    "BTLM-3B-8k-base": "cerebras/btlm-3b-8k-base"
}

loaded = {}
SYSTEM_PROMPT = "You are HugginGPT — helpful, friendly, and clear with memory."

def load_model(model_key):
    model_id = MODEL_OPTIONS[model_key]
    if model_key in loaded:
        return loaded[model_key]

    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map="auto",
        torch_dtype=torch.float16
    )

    loaded[model_key] = (tokenizer, model)
    return tokenizer, model

def generate_response(message, history, model_choice):
    tokenizer, model = load_model(model_choice)

    # build context with system + memory
    context = f"system: {SYSTEM_PROMPT}\n"
    if history:
        for u, a in history:
            context += f"user: {u}\nassistant: {a}\n"
    context += f"user: {message}\nassistant:"

    inputs = tokenizer(context, return_tensors="pt").to(model.device)
    output = model.generate(
        **inputs,
        max_new_tokens=200,
        do_sample=True,
        top_p=0.9,
        temperature=0.8
    )
    text = tokenizer.decode(output[0], skip_special_tokens=True)
    reply = text.split("assistant:")[-1].strip()
    return reply

with gr.Blocks() as demo:
    gr.Markdown("## HugginGPT")

    model_selector = gr.Dropdown(
        choices=list(MODEL_OPTIONS.keys()),
        value="Mistral-7B-Instruct",
        label="Select model"
    )

    chat = gr.ChatInterface(
        fn=lambda message, history: generate_response(message, history, model_selector.value),
        title="HugginGPT"
    )

demo.launch()