import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline model_name = "microsoft/phi-2" # Very fast & high quality tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) def chat(user_input, history=[]): result = pipe(user_input, max_new_tokens=100, do_sample=True, temperature=0.8) response = result[0]["generated_text"] history.append((user_input, response)) return history, history gr.ChatInterface( fn=chat, title="⚡ Mobe 2.0 – Super Fast AI", description="Using Microsoft Phi-2, blazing fast!", theme="soft" ).launch()