import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline # ✅ Safe import of the GPU decorator try: from spaces import GPU except ImportError: def GPU(func): return func # Fallback if not in a HF Space # ✅ Load Phi-3 Mini model model_id = "microsoft/phi-3-mini-4k-instruct" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype="auto", device_map="auto" ) pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) # ✅ Apply GPU decorator to ensure ZeroGPU allocates GPU @GPU def chat_fn(message, history): history_text = "" for item in history: if item["role"] == "user": history_text += f"<|user|>\n{item['content']}\n" elif item["role"] == "assistant": history_text += f"<|assistant|>\n{item['content']}\n" prompt = f"{history_text}<|user|>\n{message}\n<|assistant|>\n" result = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)[0]['generated_text'] reply = result.split("<|assistant|>")[-1].strip() # ✅ Auto-format Python or general code keywords = ["def ", "class ", "import ", "function ", "console.log", "public static void"] if "```" not in reply and any(k in reply for k in keywords): reply = f"```\n{reply.strip()}\n```" # Wrap in Markdown code block return reply # ✅ Gradio UI with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("## 💬 Chat with Phi-3 Mini") gr.Markdown("Welcome to your AI Assistant powered by Phi-3 Mini. Ask me anything or request code examples!") gr.ChatInterface( fn=chat_fn, title="", examples=[ "What is Python?", "Write a JavaScript function to reverse a string.", "Explain how transformers work.", ], chatbot=gr.Chatbot(type="messages") ) demo.launch()