Avatar-chat / app.py
Ephraimmm's picture
Upload 16 files
06e6988 verified
raw
history blame
6.83 kB
import os
import traceback
import numpy as np
import gradio as gr
from openai import AsyncOpenAI
from langsmith import traceable
from sklearn.metrics.pairwise import cosine_similarity
from src.prompts import system_prompt
# from src.name_extractor import extract_name_gliner
from src.models import CacheEntry
from src.config import Config
from src.utils import FileReader
# ---------------------------------------------------------------------
# CHAT CLASS
# ---------------------------------------------------------------------
class MyProfileAvatarChat(Config, FileReader):
def __init__(self, max_history_turns: int = 10, similarity_thresh: float = 0.80):
Config.__init__(self)
FileReader.__init__(self)
# 1. Try to load from env
self.name = os.getenv("PROFIL_NAME")
# if not self.name:
# name = extract_name_gliner(self.linkedin_profile)
# self.name = name["person"][0]
# print(f"Name found on Linkedin profile: {self.name}")
self.openai = AsyncOpenAI(api_key=self.openai_api_key)
# Build system prompt once
self.system_prompt = system_prompt
self.system_prompt += f"## Linkedin Profile:\n{self.linkedin_profile}\n\n"
self.system_prompt += f"## Addidional Information:\n{self.additional_info}\n\n"
self.system_prompt += f"With this context, please chat with user, always staying in character as {self.name}."
# Settings
self.max_history_turns = max_history_turns
self.similarity_threshold = similarity_thresh
# QA cache (question -> answer -> embedding)
self.qa_cache = [] # list of dict: {"question": str, "answer": str, "embedding": np.array}
def format_history(self, history):
return "\n".join(f"{turn['role'].upper()}: {turn['content']}" for turn in history)
async def embed(self, text: str):
"""Return embedding vector for text (uses OpenAI embeddings)."""
resp = await self.openai.embeddings.create(
model="text-embedding-3-small",
input=text
)
return np.array(resp.data[0].embedding)
def cosine_sim(self, a: np.ndarray, b: np.ndarray) -> float:
return float(cosine_similarity(a.reshape(1, -1), b.reshape(1, -1))[0][0])
async def find_similar_question(self, new_question: str):
if not self.qa_cache:
return None, 0.0
new_emb = await self.embed(new_question)
best = None
best_sim = 0.0
for item in self.qa_cache:
sim = self.cosine_sim(new_emb, item["embedding"])
if sim > best_sim:
best_sim = sim
best = item
if best and best_sim >= self.similarity_threshold:
return best, best_sim
return None, best_sim
async def chat(self, message: str, history: list, **kwargs):
"""Main chat. Uses semantic QA cache and sliding window for tokens
Args:
message: user message string
history: existing list of dicts [{"role":...., "content":....}]
Returns:
reply string
"""
# Cache exact-match short-circuit
if message in (qa["question"] for qa in self.qa_cache):
# exact match
for qa in self.qa_cache:
if qa["question"] == message:
print("Using exact cached reply")
history.append({"role": "user", "content": message})
history.append({"role": "assistant", "content": qa["answer"]})
return qa["answer"]
# Check for semantically similar previous question
similar, sim_score = await self.find_similar_question(message)
if similar:
print(f"Reusing past answer (similarity={sim_score:.2%})")
refine_prompt = (
f"The user previously asked a similar question:\n"
+ f"Old question: {similar['question']}\n"
+ f"Old answer: {similar['answer']}\n\n"
+ f"Now user asks: {message}\n\n"
+ f"Please update or refine the old answer to match the new question."
)
messages = [{"role": "system", "content": self.system_prompt},
{"role": "user", "content": refine_prompt}]
try:
response = await self.openai.chat.completions.create(
model="gpt-4o-mini",
messages=messages
)
reply = response.choices[0].message.content
except Exception as e:
print(f"Error calling OpenAI for refinement: {e}")
reply = similar["answer"]
else:
# Build token-efficent context (sliding window)
temp_history = history + [{"role": "user", "content": message}]
context_for_api = temp_history[-self.max_history_turns:]
messages = [{"role": "system", "content": self.system_prompt}] + context_for_api
try:
response = await self.openai.chat.completions.create(
model="gpt-4o-mini",
messages=messages
)
reply = response.choices[0].message.content
except Exception as e:
print(f"Error calling OpenAI: {e}")
try:
emb = await self.embed(message)
except Exception as e:
print(f"Embedding Error: {e}")
traceback.print_exc()
emb = None
self.qa_cache.append({
"question":message,
"answer":reply,
"embedding":emb
})
return reply
@traceable(run_type="chain", name="ProfileChat")
async def chat_traced(self, *args, **kwargs):
"""Wrapper for LangSmith tracing. Accepts any extra arguments
(like from Gradio) and passes only message/history to chat()."""
if len(args) >=2:
message, history = args[0], args[1]
else:
message = kwargs.get("message")
history = kwargs.get("history")
return await self.chat(message, history)
if __name__ == "__main__":
my_profile = MyProfileAvatarChat()
with gr.Blocks() as demo:
# Per-user chat history state
state = gr.State([])
# Chat interface
chat = gr.ChatInterface(
my_profile.chat_traced
)
demo.queue(max_size=10).launch(
server_name="0.0.0.0",
show_error=8000,
share=True
)