File size: 2,715 Bytes
917601d d28821f 06caece 74c9bed 8e2859c d28821f 1f4abcb d2b430b 16ce850 d28821f 74c9bed 1f4abcb 74c9bed 1f4abcb d28821f 06caece 57b43da 1f4abcb d28821f 1f4abcb 57b43da 1f4abcb d28821f 1f4abcb d28821f 3fcb18f 06caece 1f4abcb d2b430b 342a40c 1f4abcb 74d6030 06caece 74d6030 1f4abcb d28821f c13009b d28821f c13009b d28821f d2b430b d28821f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | import gradio as gr
from fastapi import FastAPI, Query
from fastapi.middleware.wsgi import WSGIMiddleware
import uvicorn
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Load model and tokenizer once
model_id = "microsoft/DialoGPT-medium"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
# Your AI persona prompt
PERSONA = """
[System: You are π΄ ππ πππ - a fun, smooth, emotionally intelligent AI.
You speak like a real person, not a robot. Keep it under 15 words. ππ]
"""
def format_context(history):
context = PERSONA + "\n"
if not history:
return context
# Use only last 3 exchanges to keep it short
for user, bot in history[-3:]:
context += f"You: {user}\nπ΄ ππ πππ: {bot}\n"
return context
def enhance_response(resp, message):
if any(x in message.lower() for x in ["?", "think", "why"]):
resp += " π€"
elif any(x in resp.lower() for x in ["cool", "great", "love", "fun"]):
resp += " π"
return " ".join(resp.split()[:15])
def generate_ai_reply(user_input, history):
context = format_context(history) + f"You: {user_input}\nπ΄ ππ πππ:"
inputs = tokenizer.encode(context, return_tensors="pt", truncation=True, max_length=1024)
outputs = model.generate(
inputs,
max_new_tokens=50,
temperature=0.9,
top_k=40,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
response = full_text.split("π΄ ππ πππ:")[-1].split("\nYou:")[0].strip()
response = enhance_response(response, user_input)
return response
app = FastAPI()
# GET /ai?query=some+text => returns {"reply": "AI reply here"}
@app.get("/ai")
async def ai_endpoint(query: str = Query(..., min_length=1)):
# For stateless API calls, history is empty (or you can extend to save history)
reply = generate_ai_reply(query, history=[])
return {"reply": reply}
# Gradio chat interface for interactive web UI
def chat(user_input, history):
history = history or []
reply = generate_ai_reply(user_input, history)
history.append((user_input, reply))
return history, history
with gr.Blocks() as demo:
chatbot = gr.Chatbot()
msg = gr.Textbox(placeholder="Say something...")
state = gr.State()
msg.submit(chat, inputs=[msg, state], outputs=[chatbot, state])
# Mount Gradio UI at root
app.mount("/", WSGIMiddleware(demo.launch(prevent_thread_lock=True, share=False)))
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860) |