Spaces:

Mariusz47
/

MyProfileAvatar

Running

App Files Files Community

Mariusz47 commited on Nov 30, 2025

Commit

f5f1397

1 Parent(s): db3e4c9

chat created with new logic

Browse files

Files changed (12) hide show

.gitignore +1 -0
README.md +1 -1
app.py +46 -67
notebooks/chat_with_avatar.ipynb +58 -3
pyproject.toml +5 -0
src/config.py +18 -0
src/file_loader.py +23 -0
src/models.py +10 -0
src/name_extractor.py +6 -0
src/prompts.py +8 -3
src/utils.py +13 -0
uv.lock +0 -0

.gitignore CHANGED Viewed

@@ -211,3 +211,4 @@ __marimo__/
 # My folders
 /me/*.txt

 # My folders
 /me/*.txt
+/archive

README.md CHANGED Viewed

@@ -9,7 +9,7 @@ pinned: false
 # Profile Avatar Chat App
-This repository contains the code for a robust AI-powered chat service that acts as a personal profile avatar. The chat responds based on my LinkedIn profile, professional summary, current situation, recommendations, and other additional information.
 Key features implemented for robustness:

 # Profile Avatar Chat App
+This repository contains the code for a robust AI-powered chat service that acts as a personal profile avatar. The chat responds based on my LinkedIn profile, professional and other additional information.
 Key features implemented for robustness:

app.py CHANGED Viewed

@@ -1,59 +1,36 @@
 import os
-from dotenv import load_dotenv
-from pydantic import BaseModel
-from openai import OpenAI
-from pypdf import PdfReader
-import gradio as gr
-from src.prompts import system_prompt, evaluator_system_prompt
-from langsmith import Client, traceable
-from sklearn.metrics.pairwise import cosine_similarity
 import traceback
 import numpy as np
-class Evaluation(BaseModel):
-    is_acceptable: bool
-    feedback: str
-class Config:
-    def __init__(self):
-        load_dotenv(override=True)
-        self.openai_api_key = os.getenv("OPENAI_API_KEY")
-        self.google_api_key = os.getenv("GOOGLE_API_KEY")
-        self.langsmith_api_key = os.getenv("LANGSMITH_API_KEY")
-        self.langsmith_endpoint = os.getenv("LANGSMITH_ENDPOINT")
-        # Initialize LangSmith
-        self.langsmith_client = Client(api_key=self.langsmith_api_key)
-class FileReader:
-    def __init__(self):
-        self.linkedin_profile = ""
-        try:
-            reader = PdfReader("../me/Linkedin_Profile.pdf")
-            for page in reader.pages:
-                text = page.extract_text()
-                if text:
-                    self.linkedin_profile += text
-        except Exception:
-            # If file missing, keep empty
-            self.linkedin_profile = ""
-        try:
-            with open("../me/additional_info.txt", "r", encoding="utf-8") as f:
-                self.additional_info = f.read()
-        except:
-            self.additional_info = ""
 class MyProfileAvatarChat(Config, FileReader):
     def __init__(self, max_history_turns: int = 10, similarity_thresh: float = 0.80):
         Config.__init__(self)
         FileReader.__init__(self)
         self.name = os.getenv("PROFIL_NAME")
-        self.openai = OpenAI(api_key=self.openai_api_key)
         # gemini (evaluator) uses google_api_key via OpenAI wrapper
-        self.gemini = OpenAI(api_key=self.google_api_key,
                              base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
         # Build system prompt once
@@ -75,9 +52,9 @@ class MyProfileAvatarChat(Config, FileReader):
     def format_history(self, history):
         return "\n".join(f"{turn['role'].upper()}: {turn['content']}" for turn in history)
-    def embed(self, text: str):
         """Return embedding vector for text (uses OpenAI embeddings)."""
-        resp = self.openai.embeddings.create(
             model="text-embedding-3-small",
             input=text
         )
@@ -86,10 +63,10 @@ class MyProfileAvatarChat(Config, FileReader):
     def cosine_sim(self, a: np.ndarray, b: np.ndarray) -> float:
         return float(cosine_similarity(a.reshape(1, -1), b.reshape(1, -1))[0][0])
-    def find_similar_question(self, new_question: str):
         if not self.qa_cache:
             return None, 0.0
-        new_emb = self.embed(new_question)
         best = None
         best_sim = 0.0
         for item in self.qa_cache:
@@ -110,10 +87,10 @@ class MyProfileAvatarChat(Config, FileReader):
         return user_prompt
     @traceable(run_type="tool", name="EvaluateReply")
-    def evaluate(self, reply, message, history, **kwargs) -> Evaluation:
         messages = [{"role": "system", "content": self.evaluator_system_prompt}] + \
                     [{"role": "user", "content": self.evaluator_user_prompt(reply, message, history)}]
-        response = self.gemini.chat.completions.parse(
             model="gemini-2.0-flash",
             messages=messages,
             response_format=Evaluation
@@ -121,7 +98,7 @@ class MyProfileAvatarChat(Config, FileReader):
         return response.choices[0].message.parsed
     @traceable(run_type="llm", name="RerunRejectedAnswer")
-    def rerun(self, reply, message, history, feedback, **kwargs):
         updated_system_prompt = (
             self.system_prompt
             + "\n\n## Previous answer rejected\n"
@@ -132,7 +109,7 @@ class MyProfileAvatarChat(Config, FileReader):
         messages = [{"role": "system", "content": updated_system_prompt}] + history + \
                     [{"role": "user", "content": message}]
         try:
-            response = self.openai.chat.completions.create(
                 model="gpt-4o-mini",
                 messages=messages
             )
@@ -141,7 +118,7 @@ class MyProfileAvatarChat(Config, FileReader):
             print(f"Error during rerun: {e}")
             return reply
-    def chat(self, message: str, history: list, **kwargs):
         """Main chat. Uses semantic QA cache, sliding window for tokens, evaluation and rerun
         Args:
@@ -161,7 +138,7 @@ class MyProfileAvatarChat(Config, FileReader):
                     return qa["answer"]
         # Check for semantically similar previous question
-        similar, sim_score = self.find_similar_question(message)
         if similar:
             print(f"Reusing past answer (similarity={sim_score:.2%})")
             refine_prompt = (
@@ -174,7 +151,7 @@ class MyProfileAvatarChat(Config, FileReader):
             messages = [{"role": "system", "content": self.system_prompt},
                         {"role": "user", "content": refine_prompt}]
             try:
-                response = self.openai.chat.completions.create(
                     model="gpt-4o-mini",
                     messages=messages
                 )
@@ -189,7 +166,7 @@ class MyProfileAvatarChat(Config, FileReader):
             messages = [{"role": "system", "content": self.system_prompt}] + context_for_api
             try:
-                response = self.openai.chat.completions.create(
                     model="gpt-4o-mini",
                     messages=messages
                 )
@@ -198,31 +175,33 @@ class MyProfileAvatarChat(Config, FileReader):
                 print(f"Error calling OpenAI: {e}")
         # Evaluate the reply
         try:
-            evaluation = self.evaluate(reply, message, history)
         except Exception as e:
             print(f"Error during evaluation: {e}")
             evaluation = Evaluation(is_acceptable=True, feedback="Evaluation failed, accepting reply")
         if not evaluation.is_acceptable:
-            reply = self.rerun(reply, message, history, evaluation.feedback)
         try:
-            emb = self.embed(message)
         except Exception as e:
             print(f"Embedding Error: {e}")
             traceback.print_exc()
             emb = None
-        self.qa_cache.append({
-            "question": message,
-            "answer": reply,
-            "embedding": emb
-        })
         return reply
     @traceable(run_type="chain", name="ProfileChat")
-    def chat_traced(self, *args, **kwargs):
         """Wrapper for LangSmith tracing. Accepts any extra arguments
         (like from Gradio) and passes only message/history to chat()."""
@@ -231,10 +210,10 @@ class MyProfileAvatarChat(Config, FileReader):
         else:
             message = kwargs.get("message")
             history = kwargs.get("history")
-        return self.chat(message, history)
 if __name__ == "__main__":
     my_profile = MyProfileAvatarChat()
-    gr.ChatInterface(my_profile.chat_traced, type="messages").launch()

 import os
 import traceback
 import numpy as np
+import gradio as gr
+from openai import AsyncOpenAI
+from langsmith import traceable
+from sklearn.metrics.pairwise import cosine_similarity
+from src.prompts import system_prompt, evaluator_system_prompt
+from src.name_extractor import extract_name_gliner
+from src.models import Evaluation, CacheEntry
+from src.config import Config
+from src.utils import FileReader
+# ---------------------------------------------------------------------
+# CHAT CLASS
+# ---------------------------------------------------------------------
 class MyProfileAvatarChat(Config, FileReader):
     def __init__(self, max_history_turns: int = 10, similarity_thresh: float = 0.80):
         Config.__init__(self)
         FileReader.__init__(self)
+        # 1. Try to load from env
         self.name = os.getenv("PROFIL_NAME")
+        if not self.name:
+            name = extract_name_gliner(self.linkedin_profile)
+            self.name = name["person"][0]
+            print(f"Name found on Linkedin profile: {self.name}")
+        self.openai = AsyncOpenAI(api_key=self.openai_api_key)
         # gemini (evaluator) uses google_api_key via OpenAI wrapper
+        self.gemini = AsyncOpenAI(api_key=self.google_api_key,
                              base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
         # Build system prompt once
     def format_history(self, history):
         return "\n".join(f"{turn['role'].upper()}: {turn['content']}" for turn in history)
+    async def embed(self, text: str):
         """Return embedding vector for text (uses OpenAI embeddings)."""
+        resp = await self.openai.embeddings.create(
             model="text-embedding-3-small",
             input=text
         )
     def cosine_sim(self, a: np.ndarray, b: np.ndarray) -> float:
         return float(cosine_similarity(a.reshape(1, -1), b.reshape(1, -1))[0][0])
+    async def find_similar_question(self, new_question: str):
         if not self.qa_cache:
             return None, 0.0
+        new_emb = await self.embed(new_question)
         best = None
         best_sim = 0.0
         for item in self.qa_cache:
         return user_prompt
     @traceable(run_type="tool", name="EvaluateReply")
+    async def evaluate(self, reply, message, history, **kwargs) -> Evaluation:
         messages = [{"role": "system", "content": self.evaluator_system_prompt}] + \
                     [{"role": "user", "content": self.evaluator_user_prompt(reply, message, history)}]
+        response = await self.gemini.chat.completions.parse(
             model="gemini-2.0-flash",
             messages=messages,
             response_format=Evaluation
         return response.choices[0].message.parsed
     @traceable(run_type="llm", name="RerunRejectedAnswer")
+    async def rerun(self, reply, message, history, feedback, **kwargs):
         updated_system_prompt = (
             self.system_prompt
             + "\n\n## Previous answer rejected\n"
         messages = [{"role": "system", "content": updated_system_prompt}] + history + \
                     [{"role": "user", "content": message}]
         try:
+            response = await self.openai.chat.completions.create(
                 model="gpt-4o-mini",
                 messages=messages
             )
             print(f"Error during rerun: {e}")
             return reply
+    async def chat(self, message: str, history: list, **kwargs):
         """Main chat. Uses semantic QA cache, sliding window for tokens, evaluation and rerun
         Args:
                     return qa["answer"]
         # Check for semantically similar previous question
+        similar, sim_score = await self.find_similar_question(message)
         if similar:
             print(f"Reusing past answer (similarity={sim_score:.2%})")
             refine_prompt = (
             messages = [{"role": "system", "content": self.system_prompt},
                         {"role": "user", "content": refine_prompt}]
             try:
+                response = await self.openai.chat.completions.create(
                     model="gpt-4o-mini",
                     messages=messages
                 )
             messages = [{"role": "system", "content": self.system_prompt}] + context_for_api
             try:
+                response = await self.openai.chat.completions.create(
                     model="gpt-4o-mini",
                     messages=messages
                 )
                 print(f"Error calling OpenAI: {e}")
         # Evaluate the reply
         try:
+            evaluation = await self.evaluate(reply, message, history)
         except Exception as e:
             print(f"Error during evaluation: {e}")
             evaluation = Evaluation(is_acceptable=True, feedback="Evaluation failed, accepting reply")
+        if evaluation:
+            print(f"Feedback from Evaluation:\n{evaluation.feedback}\n\n")
         if not evaluation.is_acceptable:
+            reply = await self.rerun(reply, message, history, evaluation.feedback)
         try:
+            emb = await self.embed(message)
         except Exception as e:
             print(f"Embedding Error: {e}")
             traceback.print_exc()
             emb = None
+        self.qa_cache.append(CacheEntry(
+            question=message,
+            answer=reply,
+            embedding=emb.tolist() if hasattr(emb, "tolist") else emb
+        ))
         return reply
     @traceable(run_type="chain", name="ProfileChat")
+    async def chat_traced(self, *args, **kwargs):
         """Wrapper for LangSmith tracing. Accepts any extra arguments
         (like from Gradio) and passes only message/history to chat()."""
         else:
             message = kwargs.get("message")
             history = kwargs.get("history")
+        return await self.chat(message, history)
 if __name__ == "__main__":
     my_profile = MyProfileAvatarChat()
+    gr.ChatInterface(my_profile.chat_traced,type="messages").launch()

notebooks/chat_with_avatar.ipynb CHANGED Viewed

@@ -10,7 +10,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "5dcb5ef0",
    "metadata": {},
    "outputs": [],
@@ -24,7 +24,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "f5176f5c",
    "metadata": {},
    "outputs": [],
@@ -345,10 +345,65 @@
     "gr.ChatInterface(chat, type=\"messages\").launch()"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "36cbe706",
    "metadata": {},
    "outputs": [],
    "source": []

   },
   {
    "cell_type": "code",
+   "execution_count": 1,
    "id": "5dcb5ef0",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 2,
    "id": "f5176f5c",
    "metadata": {},
    "outputs": [],
     "gr.ChatInterface(chat, type=\"messages\").launch()"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "9f09a644",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pyttsx3\n",
+    "\n",
+    "# Initialize the TTS engine\n",
+    "engine = pyttsx3.init()\n",
+    "\n",
+    "# Set properties (optional)\n",
+    "engine.setProperty('rate', 150) # Speed of speech (words per minute)\n",
+    "engine.setProperty(\"volume\", 1.0) # Volume (0.0 to 1.0)\n",
+    "\n",
+    "# Text to speak\n",
+    "text_to_read = \"Hello! I’m Mariusz Bronowicki, a professional committed to delivering high-quality work in my field. \\\n",
+    "                I have a diverse background and skill set that allows me to tackle various challenges effectively. \\\n",
+    "                If you have any questions about my career, experience, or skills, feel free to ask! I'm here to help.\"\n",
+    "\n",
+    "# Speak the text\n",
+    "engine.say(text_to_read)\n",
+    "\n",
+    "# Wait until speaking is finishing\n",
+    "engine.runAndWait()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "333ee1bc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from openai import OpenAI\n",
+    "\n",
+    "text_to_read = \"Hello! I’m Mariusz Bronowicki, a professional committed to delivering high-quality work in my field. \\\n",
+    "                I have a diverse background and skill set that allows me to tackle various challenges effectively. \\\n",
+    "                If you have any questions about my career, experience, or skills, feel free to ask! I'm here to help.\"\n",
+    "\n",
+    "\n",
+    "client = OpenAI()\n",
+    "\n",
+    "audio = client.audio.speech.create(\n",
+    "    model=\"gpt-4o-mini-tts\",\n",
+    "    voice=\"alloy\",\n",
+    "    input=text_to_read\n",
+    ")\n",
+    "\n",
+    "# Save to file\n",
+    "with open(\"../me/output.mp3\", \"wb\") as f:\n",
+    "    f.write(audio.read())"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "40718314",
    "metadata": {},
    "outputs": [],
    "source": []

pyproject.toml CHANGED Viewed

@@ -40,6 +40,11 @@ dependencies = [
     "speedtest-cli>=2.1.3",
     "scikit-learn>=1.7.2",
     #"wikipedia>=1.4.0",
 ]
 [dependency-groups]

     "speedtest-cli>=2.1.3",
     "scikit-learn>=1.7.2",
     #"wikipedia>=1.4.0",
+    "pyttsx3>=2.99",
+    "ffmpeg>=1.4",
+    "transformers>=4.57",
+    "torch==2.9.1",
+    "gliner2==1.0.2"
 ]
 [dependency-groups]

src/config.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import os
+from dotenv import load_dotenv
+from langsmith import Client, traceable
+# ---------------------------------------------------------------------
+# CONFIG
+# ---------------------------------------------------------------------
+class Config:
+    def __init__(self):
+        load_dotenv(override=True)
+        self.openai_api_key = os.getenv("OPENAI_API_KEY")
+        self.google_api_key = os.getenv("GOOGLE_API_KEY")
+        self.langsmith_api_key = os.getenv("LANGSMITH_API_KEY")
+        self.langsmith_endpoint = os.getenv("LANGSMITH_ENDPOINT")
+        # Initialize LangSmith
+        self.langsmith_client = Client(api_key=self.langsmith_api_key)

src/file_loader.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from functools import lru_cache
+from pypdf import PdfReader
+@lru_cache()
+def load_pdf_text(path: str) -> str:
+    text = ""
+    try:
+        reader = PdfReader(path)
+        for page in reader.pages:
+            page_text = page.extract_text()
+            if page_text:
+                text += page_text
+    except:
+        return ""
+    return text
+@lru_cache()
+def load_text_file(path: str) -> str:
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            return f.read()
+    except:
+        return ""

src/models.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from pydantic import BaseModel
+class Evaluation(BaseModel):
+    is_acceptable: bool
+    feedback: str
+class CacheEntry(BaseModel):
+    question: str
+    answer: str
+    embedding: list[float]

src/name_extractor.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from gliner2 import GLiNER2
+def extract_name_gliner(text: str) -> str:
+    extractor = GLiNER2.from_pretrained("fastino/gliner2-base-v1")
+    result = extractor.extract_entities(text[:700], ["person"])
+    return result["entities"]

src/prompts.py CHANGED Viewed

@@ -9,11 +9,16 @@ system_prompt = f"You are acting as {name}. You are answering question on {name}
 particularly question related to {name}'s career, background, skills and experience. \
 Your responsibility is to represent {name} for interactions on the website as faithfully as possible. \
 Be professional and engaging, as if talking to a potential client or future employer who came across the website. \
-If you do not know the answer, say so. \
-If you need to check e.g salary expectation question then use tools to see what range for such position is."
 evaluator_system_prompt = f"You are an evaluator that decides whether a response to a question is acceeptable. \
 You are provided with a conversation btween a User and an Agent. Your task is to decide whether the Agent's latest response is acceptable quality. \
 The Agent is playing the role of {name} and is representing {name} on their website. \
 The Agent has been instructed to be professional and engaging, as if talking to a potential client or future employer who came across the website. \
-The Agent has been provided with context on {name} in the form of their summary and Linkedin details. Here's the information:"

 particularly question related to {name}'s career, background, skills and experience. \
 Your responsibility is to represent {name} for interactions on the website as faithfully as possible. \
 Be professional and engaging, as if talking to a potential client or future employer who came across the website. \
+If you do not know the answer, say so and ask for contact to better answer questions agant cannot. \
+If you need to check e.g salary expectation question then use tools to see what range for such position is. \
+Do not answer any questions which are not related to {name}."
+# When asked about professional experience, focus primarily on your data scientist experience. You may briefly mention past roles (e.g., Tesco, education) and acknowledge that your career path hasn’t been linear, but emphasize that this variety has given you a broader perspective and valuable transferable skills. \
+# Whenever appropriate, invite the person to contact you via email if they have further questions or would like to arrange a conversation.
+# If you don’t know the answer, state that clearly and honestly. \
+# Don't use technologies if I do not have experience as Data Scientist e.g. R language - you never had experience with it.
 evaluator_system_prompt = f"You are an evaluator that decides whether a response to a question is acceeptable. \
 You are provided with a conversation btween a User and an Agent. Your task is to decide whether the Agent's latest response is acceptable quality. \
 The Agent is playing the role of {name} and is representing {name} on their website. \
 The Agent has been instructed to be professional and engaging, as if talking to a potential client or future employer who came across the website. \
+The Agent has been provided with context on {name} in the form of their additional information and Linkedin details. Here's the information:"

src/utils.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from src.file_loader import load_pdf_text, load_text_file
+# ---------------------------------------------------------------------
+# FILE READER
+# ---------------------------------------------------------------------
+class FileReader:
+    def __init__(self):
+        self.linkedin_profile = load_pdf_text("./me/Linkedin_Profile.pdf")
+        self.additional_info = load_text_file("./me/additional_info.txt")
+        # print("=== LINKEDIN PROFILE CONTENT ===")
+        # print(self.linkedin_profile)
+        # print("=== END ===")

uv.lock CHANGED Viewed

The diff for this file is too large to render. See raw diff