Spaces:
Running
Running
File size: 9,551 Bytes
f3c5f38 f5f1397 f3c5f38 f5f1397 f3c5f38 f5f1397 3f9cf3f f5f1397 f3c5f38 f5f1397 f3c5f38 3f9cf3f f5f1397 f3c5f38 f5f1397 f3c5f38 f5f1397 f3c5f38 f5f1397 f3c5f38 f5f1397 f3c5f38 f5f1397 f3c5f38 f5f1397 f3c5f38 f5f1397 f3c5f38 f5f1397 f3c5f38 f5f1397 f3c5f38 f5f1397 f3c5f38 f5f1397 f3c5f38 3a27bad f3c5f38 f5f1397 f3c5f38 b7ba4c4 f3c5f38 f5f1397 f3c5f38 f5f1397 f3c5f38 f5f1397 f3c5f38 f5f1397 f3c5f38 f5f1397 f3c5f38 87e3fac f3c5f38 f5f1397 f3c5f38 f5f1397 f3c5f38 21736a1 87e3fac f3c5f38 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 |
import os
import traceback
import numpy as np
import gradio as gr
from openai import AsyncOpenAI
from langsmith import traceable
from sklearn.metrics.pairwise import cosine_similarity
from src.prompts import system_prompt, evaluator_system_prompt
# from src.name_extractor import extract_name_gliner
from src.models import Evaluation, CacheEntry
from src.config import Config
from src.utils import FileReader
# ---------------------------------------------------------------------
# CHAT CLASS
# ---------------------------------------------------------------------
class MyProfileAvatarChat(Config, FileReader):
def __init__(self, max_history_turns: int = 10, similarity_thresh: float = 0.80):
Config.__init__(self)
FileReader.__init__(self)
# 1. Try to load from env
self.name = os.getenv("PROFIL_NAME")
# if not self.name:
# name = extract_name_gliner(self.linkedin_profile)
# self.name = name["person"][0]
# print(f"Name found on Linkedin profile: {self.name}")
self.openai = AsyncOpenAI(api_key=self.openai_api_key)
# gemini (evaluator) uses google_api_key via OpenAI wrapper
self.gemini = AsyncOpenAI(api_key=self.google_api_key,
base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
# Build system prompt once
self.system_prompt = system_prompt
self.system_prompt += f"## Linkedin Profile:\n{self.linkedin_profile}\n\n"
self.system_prompt += f"## Addidional Information:\n{self.additional_info}\n\n"
self.system_prompt += f"With this context, please chat with user, always staying in character as {self.name}."
self.evaluator_system_prompt = evaluator_system_prompt
# Settings
self.max_history_turns = max_history_turns
self.similarity_threshold = similarity_thresh
# QA cache (question -> answer -> embedding)
self.qa_cache = [] # list of dict: {"question": str, "answer": str, "embedding": np.array}
def format_history(self, history):
return "\n".join(f"{turn['role'].upper()}: {turn['content']}" for turn in history)
async def embed(self, text: str):
"""Return embedding vector for text (uses OpenAI embeddings)."""
resp = await self.openai.embeddings.create(
model="text-embedding-3-small",
input=text
)
return np.array(resp.data[0].embedding)
def cosine_sim(self, a: np.ndarray, b: np.ndarray) -> float:
return float(cosine_similarity(a.reshape(1, -1), b.reshape(1, -1))[0][0])
async def find_similar_question(self, new_question: str):
if not self.qa_cache:
return None, 0.0
new_emb = await self.embed(new_question)
best = None
best_sim = 0.0
for item in self.qa_cache:
sim = self.cosine_sim(new_emb, item["embedding"])
if sim > best_sim:
best_sim = sim
best = item
if best and best_sim >= self.similarity_threshold:
return best, best_sim
return None, best_sim
def evaluator_user_prompt(self, reply, message, history):
formatted_history = self.format_history(history)
user_prompt = f"Here's the conversation between the User and the Agent: \n\n{formatted_history}\n\n"
user_prompt += f"Here's the latest message from the User: \n\n{message}\n\n"
user_prompt += f"Here's the latest response from the Agent: \n\n{reply}\n\n"
user_prompt += f"Please evaluate the response, replying with whether it is acceptable and your feedback."
return user_prompt
@traceable(run_type="tool", name="EvaluateReply")
async def evaluate(self, reply, message, history, **kwargs) -> Evaluation:
messages = [{"role": "system", "content": self.evaluator_system_prompt}] + \
[{"role": "user", "content": self.evaluator_user_prompt(reply, message, history)}]
response = await self.gemini.chat.completions.parse(
model="gemini-2.0-flash",
messages=messages,
response_format=Evaluation
)
return response.choices[0].message.parsed
@traceable(run_type="llm", name="RerunRejectedAnswer")
async def rerun(self, reply, message, history, feedback, **kwargs):
updated_system_prompt = (
self.system_prompt
+ "\n\n## Previous answer rejected\n"
+ "You just tried to reply, but the quality control rejected your reply\n"
+ f"## Your attempted answer:\n{reply}\n\n"
+ f"## Reason for rejection:\n{feedback}\n\n"
)
messages = [{"role": "system", "content": updated_system_prompt}] + history + \
[{"role": "user", "content": message}]
try:
response = await self.openai.chat.completions.create(
model="gpt-4o-mini",
messages=messages
)
return response.choices[0].message.content
except Exception as e:
print(f"Error during rerun: {e}")
return reply
async def chat(self, message: str, history: list, **kwargs):
"""Main chat. Uses semantic QA cache, sliding window for tokens, evaluation and rerun
Args:
message: user message string
history: existing list of dicts [{"role":...., "content":....}]
Returns:
reply string
"""
# Cache exact-match short-circuit
if message in (qa["question"] for qa in self.qa_cache):
# exact match
for qa in self.qa_cache:
if qa["question"] == message:
print("Using exact cached reply")
history.append({"role": "user", "content": message})
history.append({"role": "assistant", "content": qa["answer"]})
return qa["answer"]
# Check for semantically similar previous question
similar, sim_score = await self.find_similar_question(message)
if similar:
print(f"Reusing past answer (similarity={sim_score:.2%})")
refine_prompt = (
f"The user previously asked a similar question:\n"
+ f"Old question: {similar['question']}\n"
+ f"Old answer: {similar['answer']}\n\n"
+ f"Now user asks: {message}\n\n"
+ f"Please update or refine the old answer to match the new question."
)
messages = [{"role": "system", "content": self.system_prompt},
{"role": "user", "content": refine_prompt}]
try:
response = await self.openai.chat.completions.create(
model="gpt-4o-mini",
messages=messages
)
reply = response.choices[0].message.content
except Exception as e:
print(f"Error calling OpenAI for refinement: {e}")
reply = similar["answer"]
else:
# Build token-efficent context (sliding window)
temp_history = history + [{"role": "user", "content": message}]
context_for_api = temp_history[-self.max_history_turns:]
messages = [{"role": "system", "content": self.system_prompt}] + context_for_api
try:
response = await self.openai.chat.completions.create(
model="gpt-4o-mini",
messages=messages
)
reply = response.choices[0].message.content
except Exception as e:
print(f"Error calling OpenAI: {e}")
# Evaluate the reply
try:
evaluation = await self.evaluate(reply, message, history)
except Exception as e:
print(f"Error during evaluation: {e}")
evaluation = Evaluation(is_acceptable=True, feedback="Evaluation failed, accepting reply")
if evaluation:
print(f"Feedback from Evaluation:\n{evaluation.feedback}\n\n")
if not evaluation.is_acceptable:
reply = await self.rerun(reply, message, history, evaluation.feedback)
try:
emb = await self.embed(message)
except Exception as e:
print(f"Embedding Error: {e}")
traceback.print_exc()
emb = None
self.qa_cache.append({
"question":message,
"answer":reply,
"embedding":emb
})
return reply
@traceable(run_type="chain", name="ProfileChat")
async def chat_traced(self, *args, **kwargs):
"""Wrapper for LangSmith tracing. Accepts any extra arguments
(like from Gradio) and passes only message/history to chat()."""
if len(args) >=2:
message, history = args[0], args[1]
else:
message = kwargs.get("message")
history = kwargs.get("history")
return await self.chat(message, history)
if __name__ == "__main__":
my_profile = MyProfileAvatarChat()
with gr.Blocks() as demo:
# Per-user chat history state
state = gr.State([])
# Chat interface
chat = gr.ChatInterface(
my_profile.chat_traced
)
demo.queue(max_size=10).launch(
server_name="0.0.0.0",
show_error=7860,
share=False
)
|