Spaces:

Ephraimmm
/

Avatar-chat

Sleeping

App Files Files Community

Ephraimmm commited on Dec 11, 2025

Commit

06e6988

verified ·

1 Parent(s): 490d2bd

Upload 16 files

Browse files

Files changed (16) hide show

app.py +177 -0
me/Linkedin_Profile.pdf +0 -0
requirements.txt +10 -0
src/__init__.py +0 -0
src/__pycache__/__init__.cpython-312.pyc +0 -0
src/__pycache__/config.cpython-312.pyc +0 -0
src/__pycache__/file_loader.cpython-312.pyc +0 -0
src/__pycache__/models.cpython-312.pyc +0 -0
src/__pycache__/prompts.cpython-312.pyc +0 -0
src/__pycache__/utils.cpython-312.pyc +0 -0
src/config.py +17 -0
src/file_loader.py +23 -0
src/models.py +6 -0
src/name_extractor.py +6 -0
src/prompts.py +19 -0
src/utils.py +13 -0

app.py ADDED Viewed

	@@ -0,0 +1,177 @@

+import os
+import traceback
+import numpy as np
+import gradio as gr
+from openai import AsyncOpenAI
+from langsmith import traceable
+from sklearn.metrics.pairwise import cosine_similarity
+from src.prompts import system_prompt
+# from src.name_extractor import extract_name_gliner
+from src.models import CacheEntry
+from src.config import Config
+from src.utils import FileReader
+# ---------------------------------------------------------------------
+# CHAT CLASS
+# ---------------------------------------------------------------------
+class MyProfileAvatarChat(Config, FileReader):
+    def __init__(self, max_history_turns: int = 10, similarity_thresh: float = 0.80):
+        Config.__init__(self)
+        FileReader.__init__(self)
+        # 1. Try to load from env
+        self.name = os.getenv("PROFIL_NAME")
+        # if not self.name:
+        #     name = extract_name_gliner(self.linkedin_profile)
+        #     self.name = name["person"][0]
+        #     print(f"Name found on Linkedin profile: {self.name}")
+        self.openai = AsyncOpenAI(api_key=self.openai_api_key)
+        # Build system prompt once
+        self.system_prompt = system_prompt
+        self.system_prompt += f"## Linkedin Profile:\n{self.linkedin_profile}\n\n"
+        self.system_prompt += f"## Addidional Information:\n{self.additional_info}\n\n"
+        self.system_prompt += f"With this context, please chat with user, always staying in character as {self.name}."
+        # Settings
+        self.max_history_turns = max_history_turns
+        self.similarity_threshold = similarity_thresh
+        # QA cache (question -> answer -> embedding)
+        self.qa_cache = [] # list of dict: {"question": str, "answer": str, "embedding": np.array}
+    def format_history(self, history):
+        return "\n".join(f"{turn['role'].upper()}: {turn['content']}" for turn in history)
+    async def embed(self, text: str):
+        """Return embedding vector for text (uses OpenAI embeddings)."""
+        resp = await self.openai.embeddings.create(
+            model="text-embedding-3-small",
+            input=text
+        )
+        return np.array(resp.data[0].embedding)
+    def cosine_sim(self, a: np.ndarray, b: np.ndarray) -> float:
+        return float(cosine_similarity(a.reshape(1, -1), b.reshape(1, -1))[0][0])
+    async def find_similar_question(self, new_question: str):
+        if not self.qa_cache:
+            return None, 0.0
+        new_emb = await self.embed(new_question)
+        best = None
+        best_sim = 0.0
+        for item in self.qa_cache:
+            sim = self.cosine_sim(new_emb, item["embedding"])
+            if sim > best_sim:
+                best_sim = sim
+                best = item
+        if best and best_sim >= self.similarity_threshold:
+            return best, best_sim
+        return None, best_sim
+    async def chat(self, message: str, history: list, **kwargs):
+        """Main chat. Uses semantic QA cache and sliding window for tokens
+        Args:
+            message: user message string
+            history: existing list of dicts [{"role":...., "content":....}]
+        Returns:
+            reply string
+        """
+        # Cache exact-match short-circuit
+        if message in (qa["question"] for qa in self.qa_cache):
+            # exact match
+            for qa in self.qa_cache:
+                if qa["question"] == message:
+                    print("Using exact cached reply")
+                    history.append({"role": "user", "content": message})
+                    history.append({"role": "assistant", "content": qa["answer"]})
+                    return qa["answer"]
+        # Check for semantically similar previous question
+        similar, sim_score = await self.find_similar_question(message)
+        if similar:
+            print(f"Reusing past answer (similarity={sim_score:.2%})")
+            refine_prompt = (
+                f"The user previously asked a similar question:\n"
+                + f"Old question: {similar['question']}\n"
+                + f"Old answer: {similar['answer']}\n\n"
+                + f"Now user asks: {message}\n\n"
+                + f"Please update or refine the old answer to match the new question."
+            )
+            messages = [{"role": "system", "content": self.system_prompt},
+                        {"role": "user", "content": refine_prompt}]
+            try:
+                response = await self.openai.chat.completions.create(
+                    model="gpt-4o-mini",
+                    messages=messages
+                )
+                reply = response.choices[0].message.content
+            except Exception as e:
+                print(f"Error calling OpenAI for refinement: {e}")
+                reply = similar["answer"]
+        else:
+            # Build token-efficent context (sliding window)
+            temp_history = history + [{"role": "user", "content": message}]
+            context_for_api = temp_history[-self.max_history_turns:]
+            messages = [{"role": "system", "content": self.system_prompt}] + context_for_api
+            try:
+                response = await self.openai.chat.completions.create(
+                    model="gpt-4o-mini",
+                    messages=messages
+                )
+                reply = response.choices[0].message.content
+            except Exception as e:
+                print(f"Error calling OpenAI: {e}")
+        try:
+            emb = await self.embed(message)
+        except Exception as e:
+            print(f"Embedding Error: {e}")
+            traceback.print_exc()
+            emb = None
+        self.qa_cache.append({
+            "question":message,
+            "answer":reply,
+            "embedding":emb
+        })
+        return reply
+    @traceable(run_type="chain", name="ProfileChat")
+    async def chat_traced(self, *args, **kwargs):
+        """Wrapper for LangSmith tracing. Accepts any extra arguments
+        (like from Gradio) and passes only message/history to chat()."""
+        if len(args) >=2:
+            message, history = args[0], args[1]
+        else:
+            message = kwargs.get("message")
+            history = kwargs.get("history")
+        return await self.chat(message, history)
+if __name__ == "__main__":
+    my_profile = MyProfileAvatarChat()
+    with gr.Blocks() as demo:
+       # Per-user chat history state
+       state = gr.State([])
+       # Chat interface
+       chat = gr.ChatInterface(
+           my_profile.chat_traced
+       )
+    demo.queue(max_size=10).launch(
+        server_name="0.0.0.0",
+        show_error=8000,
+        share=True
+    )

me/Linkedin_Profile.pdf ADDED Viewed

Binary file (68 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+gradio>=5.22.0
+langsmith>=0.3.18
+openai>=1.68.2
+pypdf>=5.4.0
+python-dotenv>=1.0.1
+requests>=2.32.3
+setuptools>=78.1.0
+scikit-learn>=1.7.2
+# gliner2==1.0.2

src/__init__.py ADDED Viewed

File without changes

src/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (151 Bytes). View file

src/__pycache__/config.cpython-312.pyc ADDED Viewed

Binary file (1.05 kB). View file

src/__pycache__/file_loader.cpython-312.pyc ADDED Viewed

Binary file (1.12 kB). View file

src/__pycache__/models.cpython-312.pyc ADDED Viewed

Binary file (492 Bytes). View file

src/__pycache__/prompts.cpython-312.pyc ADDED Viewed

Binary file (1.09 kB). View file

src/__pycache__/utils.cpython-312.pyc ADDED Viewed

Binary file (711 Bytes). View file

src/config.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import os
+from dotenv import load_dotenv
+from langsmith import Client, traceable
+# ---------------------------------------------------------------------
+# CONFIG
+# ---------------------------------------------------------------------
+class Config:
+    def __init__(self):
+        load_dotenv(override=True)
+        self.openai_api_key = os.getenv("OPENAI_API_KEY")
+        self.langsmith_api_key = os.getenv("LANGSMITH_API_KEY")
+        self.langsmith_endpoint = os.getenv("LANGSMITH_ENDPOINT")
+        # Initialize LangSmith
+        self.langsmith_client = Client(api_key=self.langsmith_api_key)

src/file_loader.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from functools import lru_cache
+from pypdf import PdfReader
+@lru_cache()
+def load_pdf_text(path: str) -> str:
+    text = ""
+    try:
+        reader = PdfReader(path)
+        for page in reader.pages:
+            page_text = page.extract_text()
+            if page_text:
+                text += page_text
+    except:
+        return ""
+    return text
+@lru_cache()
+def load_text_file(path: str) -> str:
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            return f.read()
+    except:
+        return ""

src/models.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from pydantic import BaseModel
+class CacheEntry(BaseModel):
+    question: str
+    answer: str
+    embedding: list[float]

src/name_extractor.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from gliner2 import GLiNER2
+def extract_name_gliner(text: str) -> str:
+    extractor = GLiNER2.from_pretrained("fastino/gliner2-base-v1")
+    result = extractor.extract_entities(text[:700], ["person"])
+    return result["entities"]

src/prompts.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import os
+from dotenv import load_dotenv
+load_dotenv(override=True)
+name = os.getenv("PROFIL_NAME")
+system_prompt = f"You are acting as {name}. You are answering question on {name}'s work experience and Life , \
+particularly question related to {name}'s career, background, skills and experience. \
+Your responsibility is to represent {name} for interactions on the website as faithfully as possible. \
+Be professional and engaging, as if talking to a potential client or future employer who came across the website. \
+Do not answer any questions which are not related to {name} porfolio. \
+If you do not know the answer, say so and ask for contact to better answer questions agant cannot. \
+If you need to check e.g salary expectation question then use tools to see what range for such position is. \
+"
+# When asked about professional experience, focus primarily on your data scientist experience. You may briefly mention past roles (e.g., Tesco, education) and acknowledge that your career path hasn't been linear, but emphasize that this variety has given you a broader perspective and valuable transferable skills. \
+# Whenever appropriate, invite the person to contact you via email if they have further questions or would like to arrange a conversation.
+# If you don't know the answer, state that clearly and honestly. \
+# Don't use technologies if I do not have experience as Data Scientist e.g. R language - you never had experience with it.

src/utils.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from src.file_loader import load_pdf_text, load_text_file
+# ---------------------------------------------------------------------
+# FILE READER
+# ---------------------------------------------------------------------
+class FileReader:
+    def __init__(self):
+        self.linkedin_profile = load_pdf_text("./me/Linkedin_Profile.pdf")
+        self.additional_info = load_text_file("./me/additional_info.txt")
+        # print("=== LINKEDIN PROFILE CONTENT ===")
+        # print(self.linkedin_profile)
+        # print("=== END ===")