import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer MODEL_ID = "AlexKitipov/phi-2" tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( MODEL_ID, torch_dtype="auto", trust_remote_code=True, device_map="cpu" ) def chat(prompt): inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate( **inputs, max_new_tokens=256, temperature=0.7, top_p=0.9 ) return tokenizer.decode(outputs[0], skip_special_tokens=True) demo = gr.Interface( fn=chat, inputs=gr.Textbox( lines=18, label="Prompt", placeholder="Type your message here..." ), outputs=gr.Textbox( lines=18, label="Response", placeholder="The model's response will appear here..." ), title="Phi‑2 App — by AlexKitipov", description="Interactive Phi‑2 demo with larger side‑by‑side text areas.", ) demo.launch()