|
|
import gradio as gr |
|
|
import torch |
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
import urllib.parse |
|
|
|
|
|
|
|
|
model_id = "microsoft/phi-2" |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
|
model = AutoModelForCausalLM.from_pretrained(model_id) |
|
|
|
|
|
|
|
|
chat_history = {} |
|
|
|
|
|
|
|
|
def format_context(history): |
|
|
context = "" |
|
|
for user, bot in history[-3:]: |
|
|
context += f"You: {user}\nπ΄ ππ πππ: {bot}\n" |
|
|
return context |
|
|
|
|
|
|
|
|
def chat_with_memory(query_string): |
|
|
parsed = urllib.parse.parse_qs(query_string) |
|
|
user_input = parsed.get("query", [""])[0] |
|
|
user_id = parsed.get("user_id", ["default"])[0] |
|
|
|
|
|
|
|
|
history = chat_history.get(user_id, []) |
|
|
|
|
|
|
|
|
context = format_context(history) + f"You: {user_input}\nπ΄ ππ πππ:" |
|
|
|
|
|
|
|
|
inputs = tokenizer(context, return_tensors="pt", return_attention_mask=True) |
|
|
outputs = model.generate(**inputs, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id) |
|
|
reply = tokenizer.decode(outputs[0], skip_special_tokens=True).split("π΄ ππ πππ:")[-1].strip() |
|
|
|
|
|
|
|
|
history.append((user_input, reply)) |
|
|
chat_history[user_id] = history[-10:] |
|
|
|
|
|
return {"reply": reply} |
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=chat_with_memory, |
|
|
inputs="text", |
|
|
outputs="json" |
|
|
) |
|
|
|
|
|
iface.launch() |