tinyInstruct

Sleeping

File size: 2,319 Bytes

e179439
e82baba
7291080
e179439
e82baba
 
 
 
 
 
 
e179439
e82baba
 
e179439
e82baba
 
 
 
 
 
 
 
 
 
 
 
e179439
e82baba
 
 
 
 
 
 
 
 
 
 
 
 
e179439
e82baba
 
 
 
 
 
 
 
e179439
e82baba
 
e179439
e82baba
 
 
 
 
 
 
 
 
 
 
e179439
e82baba
7291080
e82baba

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr

# Supported models (text-only for now)
MODEL_OPTIONS = {
    "Phi-3.5 Mini Instruct": "microsoft/Phi-3.5-mini-instruct",
    "Phi-3.5 MoE Instruct": "microsoft/Phi-3.5-MoE-instruct",
    "Phi-3 Mini 4K Instruct": "microsoft/Phi-3-mini-4k-instruct",
    "Phi-3 Mini 128K Instruct": "microsoft/Phi-3-mini-128k-instruct"
}

# Cache for loaded models
loaded_models = {}

# Load model/tokenizer on demand
def load_model(model_id):
    if model_id not in loaded_models:
        tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
        model = AutoModelForCausalLM.from_pretrained(
            model_id,
            trust_remote_code=True,
            torch_dtype=torch.float32
        )
        model.eval()
        loaded_models[model_id] = (tokenizer, model)
    return loaded_models[model_id]

# Chat function
def chat_with_model(user_input, model_choice):
    model_id = MODEL_OPTIONS[model_choice]
    tokenizer, model = load_model(model_id)

    messages = [{"role": "user", "content": user_input}]
    inputs = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        tokenize=True,
        return_dict=True,
        return_tensors="pt"
    ).to("cpu")

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=100,
            do_sample=False,
            temperature=0.7,
            top_p=0.9
        )

    response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
    return response.strip()

# Gradio UI
with gr.Blocks(title="Phi-3 Instruct Explorer") as demo:
    gr.Markdown("## 🧠 Phi-3 Instruct Explorer\nSwitch between Phi-3 instruct models and test responses on CPU.")
    with gr.Row():
        model_choice = gr.Dropdown(label="Choose a model", choices=list(MODEL_OPTIONS.keys()), value="Phi-3.5 Mini Instruct")
    with gr.Row():
        user_input = gr.Textbox(label="Your message", placeholder="Ask me anything...")
    with gr.Row():
        output = gr.Textbox(label="Model response")
    with gr.Row():
        submit = gr.Button("Generate")

    submit.click(fn=chat_with_model, inputs=[user_input, model_choice], outputs=output)

demo.launch()