File size: 6,827 Bytes
06e6988
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
import os
import traceback
import numpy as np
import gradio as gr

from openai import AsyncOpenAI
from langsmith import traceable
from sklearn.metrics.pairwise import cosine_similarity

from src.prompts import system_prompt
# from src.name_extractor import extract_name_gliner
from src.models import CacheEntry
from src.config import Config
from src.utils import FileReader

# ---------------------------------------------------------------------
# CHAT CLASS
# ---------------------------------------------------------------------
class MyProfileAvatarChat(Config, FileReader):
    def __init__(self, max_history_turns: int = 10, similarity_thresh: float = 0.80):
        Config.__init__(self)
        FileReader.__init__(self)

        # 1. Try to load from env
        self.name = os.getenv("PROFIL_NAME")
        # if not self.name:
        #     name = extract_name_gliner(self.linkedin_profile)
        #     self.name = name["person"][0]
        #     print(f"Name found on Linkedin profile: {self.name}")

        self.openai = AsyncOpenAI(api_key=self.openai_api_key)
        
        # Build system prompt once
        self.system_prompt = system_prompt
        self.system_prompt += f"## Linkedin Profile:\n{self.linkedin_profile}\n\n"
        self.system_prompt += f"## Addidional Information:\n{self.additional_info}\n\n"
        self.system_prompt += f"With this context, please chat with user, always staying in character as {self.name}."

        # Settings
        self.max_history_turns = max_history_turns
        self.similarity_threshold = similarity_thresh

        # QA cache (question -> answer -> embedding)
        self.qa_cache = [] # list of dict: {"question": str, "answer": str, "embedding": np.array}
        
    
    def format_history(self, history):
        return "\n".join(f"{turn['role'].upper()}: {turn['content']}" for turn in history)
    
    async def embed(self, text: str):
        """Return embedding vector for text (uses OpenAI embeddings)."""
        resp = await self.openai.embeddings.create(
            model="text-embedding-3-small",
            input=text
        )
        return np.array(resp.data[0].embedding)
    
    def cosine_sim(self, a: np.ndarray, b: np.ndarray) -> float:
        return float(cosine_similarity(a.reshape(1, -1), b.reshape(1, -1))[0][0])
    
    async def find_similar_question(self, new_question: str):
        if not self.qa_cache:
            return None, 0.0
        new_emb = await self.embed(new_question)
        best = None
        best_sim = 0.0
        for item in self.qa_cache:
            sim = self.cosine_sim(new_emb, item["embedding"])
            if sim > best_sim:
                best_sim = sim
                best = item
        if best and best_sim >= self.similarity_threshold:
            return best, best_sim
        return None, best_sim
    
    async def chat(self, message: str, history: list, **kwargs):
        """Main chat. Uses semantic QA cache and sliding window for tokens

        

        Args:

            message: user message string

            history: existing list of dicts [{"role":...., "content":....}]

        Returns:

            reply string

        """
        # Cache exact-match short-circuit
        if message in (qa["question"] for qa in self.qa_cache):
            # exact match
            for qa in self.qa_cache:
                if qa["question"] == message:
                    print("Using exact cached reply")
                    history.append({"role": "user", "content": message})
                    history.append({"role": "assistant", "content": qa["answer"]})
                    return qa["answer"]
                
        # Check for semantically similar previous question
        similar, sim_score = await self.find_similar_question(message)
        if similar:
            print(f"Reusing past answer (similarity={sim_score:.2%})")
            refine_prompt = (
                f"The user previously asked a similar question:\n"
                + f"Old question: {similar['question']}\n"
                + f"Old answer: {similar['answer']}\n\n"
                + f"Now user asks: {message}\n\n"
                + f"Please update or refine the old answer to match the new question."
            )
            messages = [{"role": "system", "content": self.system_prompt},
                        {"role": "user", "content": refine_prompt}]
            try:
                response = await self.openai.chat.completions.create(
                    model="gpt-4o-mini",
                    messages=messages
                )
                reply = response.choices[0].message.content
            except Exception as e:
                print(f"Error calling OpenAI for refinement: {e}")
                reply = similar["answer"]  
        else:
            # Build token-efficent context (sliding window)
            temp_history = history + [{"role": "user", "content": message}]
            context_for_api = temp_history[-self.max_history_turns:]
            messages = [{"role": "system", "content": self.system_prompt}] + context_for_api

            try:
                response = await self.openai.chat.completions.create(
                    model="gpt-4o-mini",
                    messages=messages
                )
                reply = response.choices[0].message.content
            except Exception as e:
                print(f"Error calling OpenAI: {e}")

        try:
            emb = await self.embed(message)
        except Exception as e:
            print(f"Embedding Error: {e}")
            traceback.print_exc()
            emb = None
        
        self.qa_cache.append({
            "question":message,
            "answer":reply,
            "embedding":emb
        })

        return reply
    
    @traceable(run_type="chain", name="ProfileChat")
    async def chat_traced(self, *args, **kwargs):
        """Wrapper for LangSmith tracing. Accepts any extra arguments

        (like from Gradio) and passes only message/history to chat()."""

        if len(args) >=2:
            message, history = args[0], args[1]
        else:
            message = kwargs.get("message")
            history = kwargs.get("history")
        return await self.chat(message, history)
        
if __name__ == "__main__":

    my_profile = MyProfileAvatarChat()
    with gr.Blocks() as demo:
       # Per-user chat history state
       state = gr.State([])

       # Chat interface
       chat = gr.ChatInterface(
           my_profile.chat_traced
       )

    demo.queue(max_size=10).launch(
        server_name="0.0.0.0",
        show_error=8000,
        share=True
    )