File size: 2,715 Bytes
917601d
d28821f
 
 
06caece
74c9bed
8e2859c
d28821f
1f4abcb
d2b430b
 
16ce850
d28821f
74c9bed
1f4abcb
 
74c9bed
 
1f4abcb
 
d28821f
 
 
06caece
57b43da
1f4abcb
 
 
d28821f
1f4abcb
 
 
57b43da
1f4abcb
d28821f
1f4abcb
d28821f
3fcb18f
06caece
1f4abcb
d2b430b
342a40c
1f4abcb
74d6030
06caece
74d6030
1f4abcb
 
 
 
d28821f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c13009b
d28821f
 
 
 
 
c13009b
d28821f
 
d2b430b
d28821f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import gradio as gr
from fastapi import FastAPI, Query
from fastapi.middleware.wsgi import WSGIMiddleware
import uvicorn
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Load model and tokenizer once
model_id = "microsoft/DialoGPT-medium"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

# Your AI persona prompt
PERSONA = """
[System: You are 𝕴 𝖆𝖒 π–π–Žπ–’ - a fun, smooth, emotionally intelligent AI.
You speak like a real person, not a robot. Keep it under 15 words. 😊😏]
"""

def format_context(history):
    context = PERSONA + "\n"
    if not history:
        return context
    # Use only last 3 exchanges to keep it short
    for user, bot in history[-3:]:
        context += f"You: {user}\n𝕴 𝖆𝖒 π–π–Žπ–’: {bot}\n"
    return context

def enhance_response(resp, message):
    if any(x in message.lower() for x in ["?", "think", "why"]):
        resp += " πŸ€”"
    elif any(x in resp.lower() for x in ["cool", "great", "love", "fun"]):
        resp += " 😏"
    return " ".join(resp.split()[:15])

def generate_ai_reply(user_input, history):
    context = format_context(history) + f"You: {user_input}\n𝕴 𝖆𝖒 π–π–Žπ–’:"
    inputs = tokenizer.encode(context, return_tensors="pt", truncation=True, max_length=1024)

    outputs = model.generate(
        inputs,
        max_new_tokens=50,
        temperature=0.9,
        top_k=40,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )

    full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = full_text.split("𝕴 𝖆𝖒 π–π–Žπ–’:")[-1].split("\nYou:")[0].strip()
    response = enhance_response(response, user_input)
    return response

app = FastAPI()

# GET /ai?query=some+text  => returns {"reply": "AI reply here"}
@app.get("/ai")
async def ai_endpoint(query: str = Query(..., min_length=1)):
    # For stateless API calls, history is empty (or you can extend to save history)
    reply = generate_ai_reply(query, history=[])
    return {"reply": reply}

# Gradio chat interface for interactive web UI
def chat(user_input, history):
    history = history or []
    reply = generate_ai_reply(user_input, history)
    history.append((user_input, reply))
    return history, history

with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox(placeholder="Say something...")
    state = gr.State()
    msg.submit(chat, inputs=[msg, state], outputs=[chatbot, state])

# Mount Gradio UI at root
app.mount("/", WSGIMiddleware(demo.launch(prevent_thread_lock=True, share=False)))

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)