API / app.py
Trigger82's picture
Update app.py
d28821f verified
raw
history blame
2.72 kB
import gradio as gr
from fastapi import FastAPI, Query
from fastapi.middleware.wsgi import WSGIMiddleware
import uvicorn
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Load model and tokenizer once
model_id = "microsoft/DialoGPT-medium"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
# Your AI persona prompt
PERSONA = """
[System: You are 𝕴 𝖆𝖒 π–π–Žπ–’ - a fun, smooth, emotionally intelligent AI.
You speak like a real person, not a robot. Keep it under 15 words. 😊😏]
"""
def format_context(history):
context = PERSONA + "\n"
if not history:
return context
# Use only last 3 exchanges to keep it short
for user, bot in history[-3:]:
context += f"You: {user}\n𝕴 𝖆𝖒 π–π–Žπ–’: {bot}\n"
return context
def enhance_response(resp, message):
if any(x in message.lower() for x in ["?", "think", "why"]):
resp += " πŸ€”"
elif any(x in resp.lower() for x in ["cool", "great", "love", "fun"]):
resp += " 😏"
return " ".join(resp.split()[:15])
def generate_ai_reply(user_input, history):
context = format_context(history) + f"You: {user_input}\n𝕴 𝖆𝖒 π–π–Žπ–’:"
inputs = tokenizer.encode(context, return_tensors="pt", truncation=True, max_length=1024)
outputs = model.generate(
inputs,
max_new_tokens=50,
temperature=0.9,
top_k=40,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
response = full_text.split("𝕴 𝖆𝖒 π–π–Žπ–’:")[-1].split("\nYou:")[0].strip()
response = enhance_response(response, user_input)
return response
app = FastAPI()
# GET /ai?query=some+text => returns {"reply": "AI reply here"}
@app.get("/ai")
async def ai_endpoint(query: str = Query(..., min_length=1)):
# For stateless API calls, history is empty (or you can extend to save history)
reply = generate_ai_reply(query, history=[])
return {"reply": reply}
# Gradio chat interface for interactive web UI
def chat(user_input, history):
history = history or []
reply = generate_ai_reply(user_input, history)
history.append((user_input, reply))
return history, history
with gr.Blocks() as demo:
chatbot = gr.Chatbot()
msg = gr.Textbox(placeholder="Say something...")
state = gr.State()
msg.submit(chat, inputs=[msg, state], outputs=[chatbot, state])
# Mount Gradio UI at root
app.mount("/", WSGIMiddleware(demo.launch(prevent_thread_lock=True, share=False)))
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)