Spaces:
Sleeping
Sleeping
File size: 2,908 Bytes
e179439 e82baba 7291080 e179439 e82baba e179439 e82baba e179439 018b8a9 e82baba e179439 e82baba e179439 e82baba e179439 e82baba e179439 e82baba a28ffcf 018b8a9 a28ffcf e82baba a28ffcf e82baba a28ffcf e82baba e179439 a28ffcf e82baba 7291080 e82baba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr
# Supported models (text-only for now)
MODEL_OPTIONS = {
"Phi-3.5 Mini Instruct": "microsoft/Phi-3.5-mini-instruct",
"Phi-3.5 MoE Instruct": "microsoft/Phi-3.5-MoE-instruct",
"Phi-3 Mini 4K Instruct": "microsoft/Phi-3-mini-4k-instruct",
"Phi-3 Mini 128K Instruct": "microsoft/Phi-3-mini-128k-instruct"
}
# Cache for loaded models
loaded_models = {}
EXAMPLES = [
"Write a short story about a robot who learns to paint.",
"Summarize this paragraph: The Basque coast is known for its rugged cliffs, rich maritime history, and vibrant local culture.",
"Explain how solar panels work in simple terms.",
"Translate this sentence into Basque: 'The sea is calm today.'",
"Write a noir-style intro for a detective in Gros."
]
# Load model/tokenizer on demand
def load_model(model_id):
if model_id not in loaded_models:
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_id,
trust_remote_code=True,
torch_dtype=torch.float32
)
model.eval()
loaded_models[model_id] = (tokenizer, model)
return loaded_models[model_id]
# Chat function
def chat_with_model(user_input, model_choice):
model_id = MODEL_OPTIONS[model_choice]
tokenizer, model = load_model(model_id)
messages = [{"role": "user", "content": user_input}]
inputs = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
return_tensors="pt"
).to("cpu")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=100,
do_sample=False,
temperature=0.7,
top_p=0.9
)
response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
return response.strip()
# Gradio UI
with gr.Blocks(title="Phi-3 Instruct Explorer") as demo:
gr.Markdown("## 🧠 Phi-3 Instruct Explorer\nSwitch between Phi-3 instruct models and test responses on CPU.")
with gr.Row():
model_choice = gr.Dropdown(
label="Choose a model",
choices=list(MODEL_OPTIONS.keys()),
value="Phi-3.5 Mini Instruct"
)
with gr.Row():
user_input = gr.Textbox(label="Your message", placeholder="Ask me anything...")
with gr.Row():
output = gr.Textbox(label="Model response")
with gr.Row():
submit = gr.Button("Generate")
# Example prompts
gr.Markdown("### 🧪 Try an example prompt:")
gr.Examples(
examples=EXAMPLES,
inputs=user_input
)
submit.click(fn=chat_with_model, inputs=[user_input, model_choice], outputs=output)
demo.launch()
|