Spaces:

Ephraimmm
/

Avatar-chat

Running

App Files Files Community

Avatar-chat / app.py

Ephraimmm

Upload 16 files

06e6988 verified about 17 hours ago

raw

history blame contribute delete

6.83 kB

	import os
	import traceback
	import numpy as np
	import gradio as gr

	from openai import AsyncOpenAI
	from langsmith import traceable
	from sklearn.metrics.pairwise import cosine_similarity

	from src.prompts import system_prompt
	# from src.name_extractor import extract_name_gliner
	from src.models import CacheEntry
	from src.config import Config
	from src.utils import FileReader

	# ---------------------------------------------------------------------
	# CHAT CLASS
	# ---------------------------------------------------------------------
	class MyProfileAvatarChat(Config, FileReader):
	def __init__(self, max_history_turns: int = 10, similarity_thresh: float = 0.80):
	Config.__init__(self)
	FileReader.__init__(self)

	# 1. Try to load from env
	self.name = os.getenv("PROFIL_NAME")
	# if not self.name:
	# name = extract_name_gliner(self.linkedin_profile)
	# self.name = name["person"][0]
	# print(f"Name found on Linkedin profile: {self.name}")

	self.openai = AsyncOpenAI(api_key=self.openai_api_key)

	# Build system prompt once
	self.system_prompt = system_prompt
	self.system_prompt += f"## Linkedin Profile:\n{self.linkedin_profile}\n\n"
	self.system_prompt += f"## Addidional Information:\n{self.additional_info}\n\n"
	self.system_prompt += f"With this context, please chat with user, always staying in character as {self.name}."

	# Settings
	self.max_history_turns = max_history_turns
	self.similarity_threshold = similarity_thresh

	# QA cache (question -> answer -> embedding)
	self.qa_cache = [] # list of dict: {"question": str, "answer": str, "embedding": np.array}


	def format_history(self, history):
	return "\n".join(f"{turn['role'].upper()}: {turn['content']}" for turn in history)

	async def embed(self, text: str):
	"""Return embedding vector for text (uses OpenAI embeddings)."""
	resp = await self.openai.embeddings.create(
	model="text-embedding-3-small",
	input=text
	)
	return np.array(resp.data[0].embedding)

	def cosine_sim(self, a: np.ndarray, b: np.ndarray) -> float:
	return float(cosine_similarity(a.reshape(1, -1), b.reshape(1, -1))[0][0])

	async def find_similar_question(self, new_question: str):
	if not self.qa_cache:
	return None, 0.0
	new_emb = await self.embed(new_question)
	best = None
	best_sim = 0.0
	for item in self.qa_cache:
	sim = self.cosine_sim(new_emb, item["embedding"])
	if sim > best_sim:
	best_sim = sim
	best = item
	if best and best_sim >= self.similarity_threshold:
	return best, best_sim
	return None, best_sim

	async def chat(self, message: str, history: list, **kwargs):
	"""Main chat. Uses semantic QA cache and sliding window for tokens

	Args:
	message: user message string
	history: existing list of dicts [{"role":...., "content":....}]
	Returns:
	reply string
	"""
	# Cache exact-match short-circuit
	if message in (qa["question"] for qa in self.qa_cache):
	# exact match
	for qa in self.qa_cache:
	if qa["question"] == message:
	print("Using exact cached reply")
	history.append({"role": "user", "content": message})
	history.append({"role": "assistant", "content": qa["answer"]})
	return qa["answer"]

	# Check for semantically similar previous question
	similar, sim_score = await self.find_similar_question(message)
	if similar:
	print(f"Reusing past answer (similarity={sim_score:.2%})")
	refine_prompt = (
	f"The user previously asked a similar question:\n"
	+ f"Old question: {similar['question']}\n"
	+ f"Old answer: {similar['answer']}\n\n"
	+ f"Now user asks: {message}\n\n"
	+ f"Please update or refine the old answer to match the new question."
	)
	messages = [{"role": "system", "content": self.system_prompt},
	{"role": "user", "content": refine_prompt}]
	try:
	response = await self.openai.chat.completions.create(
	model="gpt-4o-mini",
	messages=messages
	)
	reply = response.choices[0].message.content
	except Exception as e:
	print(f"Error calling OpenAI for refinement: {e}")
	reply = similar["answer"]
	else:
	# Build token-efficent context (sliding window)
	temp_history = history + [{"role": "user", "content": message}]
	context_for_api = temp_history[-self.max_history_turns:]
	messages = [{"role": "system", "content": self.system_prompt}] + context_for_api

	try:
	response = await self.openai.chat.completions.create(
	model="gpt-4o-mini",
	messages=messages
	)
	reply = response.choices[0].message.content
	except Exception as e:
	print(f"Error calling OpenAI: {e}")

	try:
	emb = await self.embed(message)
	except Exception as e:
	print(f"Embedding Error: {e}")
	traceback.print_exc()
	emb = None

	self.qa_cache.append({
	"question":message,
	"answer":reply,
	"embedding":emb
	})

	return reply

	@traceable(run_type="chain", name="ProfileChat")
	async def chat_traced(self, args, *kwargs):
	"""Wrapper for LangSmith tracing. Accepts any extra arguments
	(like from Gradio) and passes only message/history to chat()."""

	if len(args) >=2:
	message, history = args[0], args[1]
	else:
	message = kwargs.get("message")
	history = kwargs.get("history")
	return await self.chat(message, history)

	if __name__ == "__main__":

	my_profile = MyProfileAvatarChat()
	with gr.Blocks() as demo:
	# Per-user chat history state
	state = gr.State([])

	# Chat interface
	chat = gr.ChatInterface(
	my_profile.chat_traced
	)

	demo.queue(max_size=10).launch(
	server_name="0.0.0.0",
	show_error=8000,
	share=True
	)