File size: 1,525 Bytes
917601d 74c9bed e44d7d1 08aad81 e44d7d1 8e2859c 08aad81 e44d7d1 d2b430b 16ce850 08aad81 e44d7d1 74c9bed 08aad81 1f4abcb 08aad81 d28821f 08aad81 d28821f 08aad81 e44d7d1 d28821f e44d7d1 d28821f e44d7d1 c13009b 08aad81 c13009b 08aad81 d2b430b 08aad81 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
import urllib.parse
# Load model
model_id = "microsoft/phi-2"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
# Memory for users
chat_history = {}
# Format history
def format_context(history):
return "".join([f"You: {u}\nπ΄ ππ πππ: {b}\n" for u, b in history[-3:]])
# FastAPI app
app = FastAPI()
@app.get("/ai")
async def ai_chat(request: Request):
query_params = dict(request.query_params)
user_input = query_params.get("query", "")
user_id = query_params.get("user_id", "default")
# Get user history
history = chat_history.get(user_id, [])
prompt = format_context(history) + f"You: {user_input}\nπ΄ ππ πππ:"
# Tokenize & run model
inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=True)
outputs = model.generate(**inputs, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id)
reply = tokenizer.decode(outputs[0], skip_special_tokens=True).split("π΄ ππ πππ:")[-1].strip()
# Save memory
history.append((user_input, reply))
chat_history[user_id] = history[-10:]
return JSONResponse({"reply": reply})
# Wrap with Gradio to serve
app = gr.mount_gradio_app(app, gr.Interface(lambda x: x, "textbox", "textbox"))
# Launch it
gradio_app = gr.FastAPI(app) |