File size: 9,551 Bytes
f3c5f38
 
 
f5f1397
f3c5f38
f5f1397
 
 
f3c5f38
f5f1397
3f9cf3f
f5f1397
 
 
 
 
 
 
f3c5f38
 
 
 
 
f5f1397
f3c5f38
3f9cf3f
 
 
 
f5f1397
 
f3c5f38
f5f1397
f3c5f38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f5f1397
f3c5f38
f5f1397
f3c5f38
 
 
 
 
 
 
 
f5f1397
f3c5f38
 
f5f1397
f3c5f38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f5f1397
f3c5f38
 
f5f1397
f3c5f38
 
 
 
 
 
 
f5f1397
f3c5f38
 
 
 
 
 
 
 
 
 
f5f1397
f3c5f38
 
 
 
 
 
 
 
f5f1397
f3c5f38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f5f1397
f3c5f38
 
 
 
3a27bad
 
f3c5f38
 
 
 
 
 
f5f1397
f3c5f38
 
 
 
 
 
b7ba4c4
f3c5f38
 
 
 
 
 
 
f5f1397
f3c5f38
 
 
 
 
 
 
 
f5f1397
f3c5f38
 
 
 
f5f1397
 
f3c5f38
f5f1397
f3c5f38
 
f5f1397
f3c5f38
 
 
 
 
87e3fac
 
 
 
 
f3c5f38
 
 
 
f5f1397
f3c5f38
 
 
 
 
 
 
 
f5f1397
f3c5f38
 
 
 
21736a1
 
 
 
 
 
 
 
 
 
 
 
 
 
87e3fac
f3c5f38
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
import os
import traceback
import numpy as np
import gradio as gr

from openai import AsyncOpenAI
from langsmith import traceable
from sklearn.metrics.pairwise import cosine_similarity

from src.prompts import system_prompt, evaluator_system_prompt
# from src.name_extractor import extract_name_gliner
from src.models import Evaluation, CacheEntry
from src.config import Config
from src.utils import FileReader

# ---------------------------------------------------------------------
# CHAT CLASS
# ---------------------------------------------------------------------
class MyProfileAvatarChat(Config, FileReader):
    def __init__(self, max_history_turns: int = 10, similarity_thresh: float = 0.80):
        Config.__init__(self)
        FileReader.__init__(self)

        # 1. Try to load from env
        self.name = os.getenv("PROFIL_NAME")
        # if not self.name:
        #     name = extract_name_gliner(self.linkedin_profile)
        #     self.name = name["person"][0]
        #     print(f"Name found on Linkedin profile: {self.name}")

        self.openai = AsyncOpenAI(api_key=self.openai_api_key)
        # gemini (evaluator) uses google_api_key via OpenAI wrapper
        self.gemini = AsyncOpenAI(api_key=self.google_api_key, 
                             base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
        
        # Build system prompt once
        self.system_prompt = system_prompt
        self.system_prompt += f"## Linkedin Profile:\n{self.linkedin_profile}\n\n"
        self.system_prompt += f"## Addidional Information:\n{self.additional_info}\n\n"
        self.system_prompt += f"With this context, please chat with user, always staying in character as {self.name}."

        self.evaluator_system_prompt = evaluator_system_prompt

        # Settings
        self.max_history_turns = max_history_turns
        self.similarity_threshold = similarity_thresh

        # QA cache (question -> answer -> embedding)
        self.qa_cache = [] # list of dict: {"question": str, "answer": str, "embedding": np.array}
        
    
    def format_history(self, history):
        return "\n".join(f"{turn['role'].upper()}: {turn['content']}" for turn in history)
    
    async def embed(self, text: str):
        """Return embedding vector for text (uses OpenAI embeddings)."""
        resp = await self.openai.embeddings.create(
            model="text-embedding-3-small",
            input=text
        )
        return np.array(resp.data[0].embedding)
    
    def cosine_sim(self, a: np.ndarray, b: np.ndarray) -> float:
        return float(cosine_similarity(a.reshape(1, -1), b.reshape(1, -1))[0][0])
    
    async def find_similar_question(self, new_question: str):
        if not self.qa_cache:
            return None, 0.0
        new_emb = await self.embed(new_question)
        best = None
        best_sim = 0.0
        for item in self.qa_cache:
            sim = self.cosine_sim(new_emb, item["embedding"])
            if sim > best_sim:
                best_sim = sim
                best = item
        if best and best_sim >= self.similarity_threshold:
            return best, best_sim
        return None, best_sim
    
    def evaluator_user_prompt(self, reply, message, history):
        formatted_history = self.format_history(history)
        user_prompt = f"Here's the conversation between the User and the Agent: \n\n{formatted_history}\n\n"
        user_prompt += f"Here's the latest message from the User: \n\n{message}\n\n"
        user_prompt += f"Here's the latest response from the Agent: \n\n{reply}\n\n"
        user_prompt += f"Please evaluate the response, replying with whether it is acceptable and your feedback."
        return user_prompt

    @traceable(run_type="tool", name="EvaluateReply")
    async def evaluate(self, reply, message, history, **kwargs) -> Evaluation:
        messages = [{"role": "system", "content": self.evaluator_system_prompt}] + \
                    [{"role": "user", "content": self.evaluator_user_prompt(reply, message, history)}]
        response = await self.gemini.chat.completions.parse(
            model="gemini-2.0-flash",
            messages=messages,
            response_format=Evaluation
        )
        return response.choices[0].message.parsed
    
    @traceable(run_type="llm", name="RerunRejectedAnswer")
    async def rerun(self, reply, message, history, feedback, **kwargs):
        updated_system_prompt = (
            self.system_prompt 
            + "\n\n## Previous answer rejected\n"
            + "You just tried to reply, but the quality control rejected your reply\n"
            + f"## Your attempted answer:\n{reply}\n\n"
            + f"## Reason for rejection:\n{feedback}\n\n"
        )
        messages = [{"role": "system", "content": updated_system_prompt}] + history + \
                    [{"role": "user", "content": message}]
        try:
            response = await self.openai.chat.completions.create(
                model="gpt-4o-mini",
                messages=messages
            )
            return response.choices[0].message.content
        except Exception as e:
            print(f"Error during rerun: {e}")
            return reply
        
    async def chat(self, message: str, history: list, **kwargs):
        """Main chat. Uses semantic QA cache, sliding window for tokens, evaluation and rerun
        
        Args:
            message: user message string
            history: existing list of dicts [{"role":...., "content":....}]
        Returns:
            reply string
        """
        # Cache exact-match short-circuit
        if message in (qa["question"] for qa in self.qa_cache):
            # exact match
            for qa in self.qa_cache:
                if qa["question"] == message:
                    print("Using exact cached reply")
                    history.append({"role": "user", "content": message})
                    history.append({"role": "assistant", "content": qa["answer"]})
                    return qa["answer"]
                
        # Check for semantically similar previous question
        similar, sim_score = await self.find_similar_question(message)
        if similar:
            print(f"Reusing past answer (similarity={sim_score:.2%})")
            refine_prompt = (
                f"The user previously asked a similar question:\n"
                + f"Old question: {similar['question']}\n"
                + f"Old answer: {similar['answer']}\n\n"
                + f"Now user asks: {message}\n\n"
                + f"Please update or refine the old answer to match the new question."
            )
            messages = [{"role": "system", "content": self.system_prompt},
                        {"role": "user", "content": refine_prompt}]
            try:
                response = await self.openai.chat.completions.create(
                    model="gpt-4o-mini",
                    messages=messages
                )
                reply = response.choices[0].message.content
            except Exception as e:
                print(f"Error calling OpenAI for refinement: {e}")
                reply = similar["answer"]  
        else:
            # Build token-efficent context (sliding window)
            temp_history = history + [{"role": "user", "content": message}]
            context_for_api = temp_history[-self.max_history_turns:]
            messages = [{"role": "system", "content": self.system_prompt}] + context_for_api

            try:
                response = await self.openai.chat.completions.create(
                    model="gpt-4o-mini",
                    messages=messages
                )
                reply = response.choices[0].message.content
            except Exception as e:
                print(f"Error calling OpenAI: {e}")
        # Evaluate the reply
        try:
            evaluation = await self.evaluate(reply, message, history)
        except Exception as e:
            print(f"Error during evaluation: {e}")
            evaluation = Evaluation(is_acceptable=True, feedback="Evaluation failed, accepting reply")

        if evaluation:
            print(f"Feedback from Evaluation:\n{evaluation.feedback}\n\n")
        if not evaluation.is_acceptable:
            reply = await self.rerun(reply, message, history, evaluation.feedback)

        try:
            emb = await self.embed(message)
        except Exception as e:
            print(f"Embedding Error: {e}")
            traceback.print_exc()
            emb = None
        
        self.qa_cache.append({
            "question":message,
            "answer":reply,
            "embedding":emb
        })

        return reply
    
    @traceable(run_type="chain", name="ProfileChat")
    async def chat_traced(self, *args, **kwargs):
        """Wrapper for LangSmith tracing. Accepts any extra arguments
        (like from Gradio) and passes only message/history to chat()."""

        if len(args) >=2:
            message, history = args[0], args[1]
        else:
            message = kwargs.get("message")
            history = kwargs.get("history")
        return await self.chat(message, history)
        
if __name__ == "__main__":

    my_profile = MyProfileAvatarChat()
    with gr.Blocks() as demo:
       # Per-user chat history state
       state = gr.State([])

       # Chat interface
       chat = gr.ChatInterface(
           my_profile.chat_traced
       )

    demo.queue(max_size=10).launch(
        server_name="0.0.0.0",
        show_error=7860,
        share=False
    )