import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch # Load lightweight model (CPU-only friendly) model_id = "microsoft/phi-2" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id) # Persona style PERSONA = """ [System: You are š•“ š–†š–’ š–š–Žš–’ - a fun, smooth, emotionally intelligent AI. You speak like a chill human, not a bot. Keep replies under 15 words, natural, clever, emotional.] """ # Build prompt from history def build_prompt(message, history): prompt = PERSONA for user, bot in history[-3:]: prompt += f"\nYou: {user}\nš•“ š–†š–’ š–š–Žš–’: {bot}" prompt += f"\nYou: {message}\nš•“ š–†š–’ š–š–Žš–’:" return prompt # Generate a response def generate(message, history): prompt = build_prompt(message, history) inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate( **inputs, max_new_tokens=50, temperature=0.9, top_k=50, do_sample=True, pad_token_id=tokenizer.eos_token_id ) result = tokenizer.decode(outputs[0], skip_special_tokens=True) reply = result.split("š•“ š–†š–’ š–š–Žš–’:")[-1].split("\n")[0].strip() reply = " ".join(reply.split()[:15]) # max 15 words history.append((message, reply)) return history, history # Gradio UI with gr.Blocks() as demo: gr.Markdown("# š•“ š–†š–’ š–š–Žš–’ šŸ¤–\n*Vibing on CPU only - Hugging Face Free Tier*") chatbot = gr.Chatbot() msg = gr.Textbox(placeholder="Type your vibe...", show_label=False) state = gr.State([]) msg.submit(generate, [msg, state], [chatbot, state]) demo.queue() demo.launch()