import gradio as gr from fastapi import FastAPI, Query from fastapi.middleware.wsgi import WSGIMiddleware import uvicorn from transformers import AutoTokenizer, AutoModelForCausalLM import torch # Load model and tokenizer once model_id = "microsoft/DialoGPT-medium" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id) # Your AI persona prompt PERSONA = """ [System: You are š•“ š–†š–’ š–š–Žš–’ - a fun, smooth, emotionally intelligent AI. You speak like a real person, not a robot. Keep it under 15 words. šŸ˜ŠšŸ˜] """ def format_context(history): context = PERSONA + "\n" if not history: return context # Use only last 3 exchanges to keep it short for user, bot in history[-3:]: context += f"You: {user}\nš•“ š–†š–’ š–š–Žš–’: {bot}\n" return context def enhance_response(resp, message): if any(x in message.lower() for x in ["?", "think", "why"]): resp += " šŸ¤”" elif any(x in resp.lower() for x in ["cool", "great", "love", "fun"]): resp += " šŸ˜" return " ".join(resp.split()[:15]) def generate_ai_reply(user_input, history): context = format_context(history) + f"You: {user_input}\nš•“ š–†š–’ š–š–Žš–’:" inputs = tokenizer.encode(context, return_tensors="pt", truncation=True, max_length=1024) outputs = model.generate( inputs, max_new_tokens=50, temperature=0.9, top_k=40, do_sample=True, pad_token_id=tokenizer.eos_token_id ) full_text = tokenizer.decode(outputs[0], skip_special_tokens=True) response = full_text.split("š•“ š–†š–’ š–š–Žš–’:")[-1].split("\nYou:")[0].strip() response = enhance_response(response, user_input) return response app = FastAPI() # GET /ai?query=some+text => returns {"reply": "AI reply here"} @app.get("/ai") async def ai_endpoint(query: str = Query(..., min_length=1)): # For stateless API calls, history is empty (or you can extend to save history) reply = generate_ai_reply(query, history=[]) return {"reply": reply} # Gradio chat interface for interactive web UI def chat(user_input, history): history = history or [] reply = generate_ai_reply(user_input, history) history.append((user_input, reply)) return history, history with gr.Blocks() as demo: chatbot = gr.Chatbot() msg = gr.Textbox(placeholder="Say something...") state = gr.State() msg.submit(chat, inputs=[msg, state], outputs=[chatbot, state]) # Mount Gradio UI at root app.mount("/", WSGIMiddleware(demo.launch(prevent_thread_lock=True, share=False))) if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=7860)