| | import gradio as gr |
| | import torch |
| | from transformers import AutoTokenizer, AutoModelForCausalLM |
| | from fastapi import FastAPI, Request |
| | from fastapi.responses import JSONResponse |
| | import urllib.parse |
| |
|
| | |
| | model_id = "microsoft/phi-2" |
| | tokenizer = AutoTokenizer.from_pretrained(model_id) |
| | model = AutoModelForCausalLM.from_pretrained(model_id) |
| |
|
| | |
| | chat_history = {} |
| |
|
| | |
| | def format_context(history): |
| | return "".join([f"You: {u}\nπ΄ ππ πππ: {b}\n" for u, b in history[-3:]]) |
| |
|
| | |
| | app = FastAPI() |
| |
|
| | @app.get("/ai") |
| | async def ai_chat(request: Request): |
| | query_params = dict(request.query_params) |
| | user_input = query_params.get("query", "") |
| | user_id = query_params.get("user_id", "default") |
| |
|
| | |
| | history = chat_history.get(user_id, []) |
| | prompt = format_context(history) + f"You: {user_input}\nπ΄ ππ πππ:" |
| |
|
| | |
| | inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=True) |
| | outputs = model.generate(**inputs, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id) |
| | reply = tokenizer.decode(outputs[0], skip_special_tokens=True).split("π΄ ππ πππ:")[-1].strip() |
| |
|
| | |
| | history.append((user_input, reply)) |
| | chat_history[user_id] = history[-10:] |
| |
|
| | return JSONResponse({"reply": reply}) |
| |
|
| | |
| | app = gr.mount_gradio_app(app, gr.Interface(lambda x: x, "textbox", "textbox")) |
| |
|
| | |
| | gradio_app = gr.FastAPI(app) |