File size: 1,564 Bytes
917601d 74c9bed e44d7d1 8e2859c e44d7d1 d2b430b 16ce850 e44d7d1 74c9bed e44d7d1 1f4abcb e44d7d1 57b43da 1f4abcb e44d7d1 1f4abcb e44d7d1 d28821f e44d7d1 d28821f e44d7d1 d28821f e44d7d1 d28821f e44d7d1 c13009b e44d7d1 c13009b e44d7d1 d2b430b e44d7d1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import urllib.parse
# Load model and tokenizer
model_id = "microsoft/phi-2"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
# Global memory for all users
chat_history = {}
# Format past messages
def format_context(history):
context = ""
for user, bot in history[-3:]: # Last 3 exchanges
context += f"You: {user}\nπ΄ ππ πππ: {bot}\n"
return context
# Main chat function with memory per user
def chat_with_memory(query_string):
parsed = urllib.parse.parse_qs(query_string)
user_input = parsed.get("query", [""])[0]
user_id = parsed.get("user_id", ["default"])[0]
# Get or init user history
history = chat_history.get(user_id, [])
# Format prompt
context = format_context(history) + f"You: {user_input}\nπ΄ ππ πππ:"
# Tokenize & generate
inputs = tokenizer(context, return_tensors="pt", return_attention_mask=True)
outputs = model.generate(**inputs, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id)
reply = tokenizer.decode(outputs[0], skip_special_tokens=True).split("π΄ ππ πππ:")[-1].strip()
# Save memory
history.append((user_input, reply))
chat_history[user_id] = history[-10:]
return {"reply": reply}
# Create public /ai?query=&user_id=
iface = gr.Interface(
fn=chat_with_memory,
inputs="text", # URL query string
outputs="json"
)
iface.launch() |