import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch import re # Lightweight CPU model model_id = "microsoft/DialoGPT-medium" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id) # Persona definition PERSONA = """ [System: You are š•“ š–†š–’ š–š–Žš–’ - a fun, smooth, emotionally intelligent AI. You speak like a real person, not a robot. Reply like a calm, confident friend who gets the vibe. Keep responses under 15 words. Use natural speech. Add emotional flavor: 😊 šŸ¤” šŸ˜] """ def format_context(history): """Create context using last 3 exchanges""" context = PERSONA + "\n" # Add last 3 exchanges for user, bot in history[-3:]: context += f"You: {user}\n" context += f"š•“ š–†š–’ š–š–Žš–’: {bot}\n" return context def add_emotional_intelligence(response, message): """Enhance response with emotional elements""" # Add emoji based on content if "!" in message or any(w in response.lower() for w in ["cool", "great", "love", "awesome"]): response += " 😊" elif "?" in message or any(w in response.lower() for w in ["think", "why", "how", "consider"]): response += " šŸ¤”" # Add conversational hooks if "?" in message and not response.endswith("?"): if len(response.split()) < 10: response += " What do you think?" # Make more human-like response = response.replace("I am", "I'm").replace("You are", "You're") # Free-tier: Limit to 15 words max words = response.split() return " ".join(words[:15]) if len(words) > 15 else response def generate_response(message, history): """Generate response with memory context""" # Format context with memory context = format_context(history) + f"You: {message}\nš•“ š–†š–’ š–š–Žš–’:" # Tokenize for CPU efficiency inputs = tokenizer.encode(context, return_tensors="pt") # Generate response outputs = model.generate( inputs, max_new_tokens=48, temperature=0.9, top_k=40, do_sample=True, num_beams=1, repetition_penalty=1.1, pad_token_id=tokenizer.eos_token_id ) # Decode and extract response full_text = tokenizer.decode(outputs[0], skip_special_tokens=True) response = full_text.split("š•“ š–†š–’ š–š–Žš–’:")[-1].strip() # Clean extra dialog if "\nYou:" in response: response = response.split("\nYou:")[0] # Apply emotional intelligence response = add_emotional_intelligence(response, message) # Ensure natural ending if response and response[-1] not in {".", "!", "?", "..."}: response += "." if len(response) > 20 else "..." return response[:80] # Hard character limit # Chat interface with persistent memory with gr.Blocks(title="š•“ š–†š–’ š–š–Žš–’", theme=gr.themes.Soft()) as demo: # Persistent session state history_state = gr.State([]) gr.Markdown("# š•“ š–†š–’ š–š–Žš–’ \n*Chill • Confident • Remembers You*") # Use tuples format explicitly to avoid warning chatbot = gr.Chatbot(height=300, bubble_full_width=False, type="tokens") msg = gr.Textbox(placeholder="What's on your mind?", container=False) clear = gr.Button("New Vibe", size="sm") def user(user_message, history): """Save user message to history""" return "", history + [[user_message, None]] def bot(history): """Generate response with full history context""" message = history[-1][0] response = generate_response(message, history[:-1]) new_history = history + [[None, response]] return new_history def clear_chat(): """Reset chat while keeping session""" return [] # Event handling msg.submit(user, [msg, history_state], [msg, history_state]).then( bot, history_state, [chatbot, history_state] ) clear.click(clear_chat, None, [chatbot, history_state]) # Initialize with empty history demo.load(lambda: [], None, history_state) # Correct queue initialization for latest Gradio demo.queue() demo.launch()