File size: 1,525 Bytes
917601d
74c9bed
e44d7d1
08aad81
 
e44d7d1
8e2859c
08aad81
e44d7d1
d2b430b
 
16ce850
08aad81
e44d7d1
74c9bed
08aad81
1f4abcb
08aad81
 
 
 
d28821f
08aad81
 
 
 
 
 
 
 
 
d28821f
08aad81
 
e44d7d1
 
d28821f
e44d7d1
d28821f
e44d7d1
c13009b
08aad81
c13009b
08aad81
 
d2b430b
08aad81
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
import urllib.parse

# Load model
model_id = "microsoft/phi-2"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

# Memory for users
chat_history = {}

# Format history
def format_context(history):
    return "".join([f"You: {u}\n𝕴 𝖆𝖒 π–π–Žπ–’: {b}\n" for u, b in history[-3:]])

# FastAPI app
app = FastAPI()

@app.get("/ai")
async def ai_chat(request: Request):
    query_params = dict(request.query_params)
    user_input = query_params.get("query", "")
    user_id = query_params.get("user_id", "default")

    # Get user history
    history = chat_history.get(user_id, [])
    prompt = format_context(history) + f"You: {user_input}\n𝕴 𝖆𝖒 π–π–Žπ–’:"

    # Tokenize & run model
    inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=True)
    outputs = model.generate(**inputs, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id)
    reply = tokenizer.decode(outputs[0], skip_special_tokens=True).split("𝕴 𝖆𝖒 π–π–Žπ–’:")[-1].strip()

    # Save memory
    history.append((user_input, reply))
    chat_history[user_id] = history[-10:]

    return JSONResponse({"reply": reply})

# Wrap with Gradio to serve
app = gr.mount_gradio_app(app, gr.Interface(lambda x: x, "textbox", "textbox"))

# Launch it
gradio_app = gr.FastAPI(app)