File size: 2,072 Bytes
2170a5b b328cc7 2170a5b 913c59b 2170a5b b328cc7 913c59b 2170a5b 913c59b 2170a5b b328cc7 2170a5b b328cc7 913c59b b328cc7 913c59b 2170a5b 913c59b 2170a5b bb03315 913c59b bb03315 2170a5b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
# 🔥 Only open models that load in HF Spaces without gated access
MODEL_OPTIONS = {
"Mistral-7B-Instruct": "mistralai/Mistral-7B-Instruct-v0.1",
"Qwen2.5-3B-Instruct": "Qwen/Qwen2.5-3B-Instruct",
"Qwen2.5-1.5B-Instruct": "Qwen/Qwen2.5-1.5B-Instruct",
"StableLM2-1.6B": "stabilityai/stablelm-2-zephyr-1_6b",
"SmolLM3-3B": "HuggingFaceTB/SmolLM3-3B",
"BTLM-3B-8k-base": "cerebras/btlm-3b-8k-base"
}
loaded = {}
SYSTEM_PROMPT = "You are HugginGPT — helpful, friendly, and clear with memory."
def load_model(model_key):
model_id = MODEL_OPTIONS[model_key]
if model_key in loaded:
return loaded[model_key]
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto",
torch_dtype=torch.float16
)
loaded[model_key] = (tokenizer, model)
return tokenizer, model
def generate_response(message, history, model_choice):
tokenizer, model = load_model(model_choice)
# build context with system + memory
context = f"system: {SYSTEM_PROMPT}\n"
if history:
for u, a in history:
context += f"user: {u}\nassistant: {a}\n"
context += f"user: {message}\nassistant:"
inputs = tokenizer(context, return_tensors="pt").to(model.device)
output = model.generate(
**inputs,
max_new_tokens=200,
do_sample=True,
top_p=0.9,
temperature=0.8
)
text = tokenizer.decode(output[0], skip_special_tokens=True)
reply = text.split("assistant:")[-1].strip()
return reply
with gr.Blocks() as demo:
gr.Markdown("## HugginGPT")
model_selector = gr.Dropdown(
choices=list(MODEL_OPTIONS.keys()),
value="Mistral-7B-Instruct",
label="Select model"
)
chat = gr.ChatInterface(
fn=lambda message, history: generate_response(message, history, model_selector.value),
title="HugginGPT"
)
demo.launch()
|