Spaces:
Running
Running
chat created with new logic
Browse files- .gitignore +1 -0
- README.md +1 -1
- app.py +46 -67
- notebooks/chat_with_avatar.ipynb +58 -3
- pyproject.toml +5 -0
- src/config.py +18 -0
- src/file_loader.py +23 -0
- src/models.py +10 -0
- src/name_extractor.py +6 -0
- src/prompts.py +8 -3
- src/utils.py +13 -0
- uv.lock +0 -0
.gitignore
CHANGED
|
@@ -211,3 +211,4 @@ __marimo__/
|
|
| 211 |
|
| 212 |
# My folders
|
| 213 |
/me/*.txt
|
|
|
|
|
|
| 211 |
|
| 212 |
# My folders
|
| 213 |
/me/*.txt
|
| 214 |
+
/archive
|
README.md
CHANGED
|
@@ -9,7 +9,7 @@ pinned: false
|
|
| 9 |
|
| 10 |
# Profile Avatar Chat App
|
| 11 |
|
| 12 |
-
This repository contains the code for a robust AI-powered chat service that acts as a personal profile avatar. The chat responds based on my LinkedIn profile, professional
|
| 13 |
|
| 14 |
Key features implemented for robustness:
|
| 15 |
|
|
|
|
| 9 |
|
| 10 |
# Profile Avatar Chat App
|
| 11 |
|
| 12 |
+
This repository contains the code for a robust AI-powered chat service that acts as a personal profile avatar. The chat responds based on my LinkedIn profile, professional and other additional information.
|
| 13 |
|
| 14 |
Key features implemented for robustness:
|
| 15 |
|
app.py
CHANGED
|
@@ -1,59 +1,36 @@
|
|
| 1 |
import os
|
| 2 |
-
from dotenv import load_dotenv
|
| 3 |
-
from pydantic import BaseModel
|
| 4 |
-
from openai import OpenAI
|
| 5 |
-
from pypdf import PdfReader
|
| 6 |
-
import gradio as gr
|
| 7 |
-
from src.prompts import system_prompt, evaluator_system_prompt
|
| 8 |
-
from langsmith import Client, traceable
|
| 9 |
-
from sklearn.metrics.pairwise import cosine_similarity
|
| 10 |
import traceback
|
| 11 |
-
|
| 12 |
import numpy as np
|
|
|
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
class Config:
|
| 19 |
-
def __init__(self):
|
| 20 |
-
load_dotenv(override=True)
|
| 21 |
-
self.openai_api_key = os.getenv("OPENAI_API_KEY")
|
| 22 |
-
self.google_api_key = os.getenv("GOOGLE_API_KEY")
|
| 23 |
-
self.langsmith_api_key = os.getenv("LANGSMITH_API_KEY")
|
| 24 |
-
self.langsmith_endpoint = os.getenv("LANGSMITH_ENDPOINT")
|
| 25 |
-
|
| 26 |
-
# Initialize LangSmith
|
| 27 |
-
self.langsmith_client = Client(api_key=self.langsmith_api_key)
|
| 28 |
-
|
| 29 |
-
class FileReader:
|
| 30 |
-
def __init__(self):
|
| 31 |
-
self.linkedin_profile = ""
|
| 32 |
-
try:
|
| 33 |
-
reader = PdfReader("../me/Linkedin_Profile.pdf")
|
| 34 |
-
for page in reader.pages:
|
| 35 |
-
text = page.extract_text()
|
| 36 |
-
if text:
|
| 37 |
-
self.linkedin_profile += text
|
| 38 |
-
except Exception:
|
| 39 |
-
# If file missing, keep empty
|
| 40 |
-
self.linkedin_profile = ""
|
| 41 |
-
try:
|
| 42 |
-
with open("../me/additional_info.txt", "r", encoding="utf-8") as f:
|
| 43 |
-
self.additional_info = f.read()
|
| 44 |
-
except:
|
| 45 |
-
self.additional_info = ""
|
| 46 |
-
|
| 47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
class MyProfileAvatarChat(Config, FileReader):
|
| 49 |
def __init__(self, max_history_turns: int = 10, similarity_thresh: float = 0.80):
|
| 50 |
Config.__init__(self)
|
| 51 |
FileReader.__init__(self)
|
| 52 |
|
|
|
|
| 53 |
self.name = os.getenv("PROFIL_NAME")
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
# gemini (evaluator) uses google_api_key via OpenAI wrapper
|
| 56 |
-
self.gemini =
|
| 57 |
base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
|
| 58 |
|
| 59 |
# Build system prompt once
|
|
@@ -75,9 +52,9 @@ class MyProfileAvatarChat(Config, FileReader):
|
|
| 75 |
def format_history(self, history):
|
| 76 |
return "\n".join(f"{turn['role'].upper()}: {turn['content']}" for turn in history)
|
| 77 |
|
| 78 |
-
def embed(self, text: str):
|
| 79 |
"""Return embedding vector for text (uses OpenAI embeddings)."""
|
| 80 |
-
resp = self.openai.embeddings.create(
|
| 81 |
model="text-embedding-3-small",
|
| 82 |
input=text
|
| 83 |
)
|
|
@@ -86,10 +63,10 @@ class MyProfileAvatarChat(Config, FileReader):
|
|
| 86 |
def cosine_sim(self, a: np.ndarray, b: np.ndarray) -> float:
|
| 87 |
return float(cosine_similarity(a.reshape(1, -1), b.reshape(1, -1))[0][0])
|
| 88 |
|
| 89 |
-
def find_similar_question(self, new_question: str):
|
| 90 |
if not self.qa_cache:
|
| 91 |
return None, 0.0
|
| 92 |
-
new_emb = self.embed(new_question)
|
| 93 |
best = None
|
| 94 |
best_sim = 0.0
|
| 95 |
for item in self.qa_cache:
|
|
@@ -110,10 +87,10 @@ class MyProfileAvatarChat(Config, FileReader):
|
|
| 110 |
return user_prompt
|
| 111 |
|
| 112 |
@traceable(run_type="tool", name="EvaluateReply")
|
| 113 |
-
def evaluate(self, reply, message, history, **kwargs) -> Evaluation:
|
| 114 |
messages = [{"role": "system", "content": self.evaluator_system_prompt}] + \
|
| 115 |
[{"role": "user", "content": self.evaluator_user_prompt(reply, message, history)}]
|
| 116 |
-
response = self.gemini.chat.completions.parse(
|
| 117 |
model="gemini-2.0-flash",
|
| 118 |
messages=messages,
|
| 119 |
response_format=Evaluation
|
|
@@ -121,7 +98,7 @@ class MyProfileAvatarChat(Config, FileReader):
|
|
| 121 |
return response.choices[0].message.parsed
|
| 122 |
|
| 123 |
@traceable(run_type="llm", name="RerunRejectedAnswer")
|
| 124 |
-
def rerun(self, reply, message, history, feedback, **kwargs):
|
| 125 |
updated_system_prompt = (
|
| 126 |
self.system_prompt
|
| 127 |
+ "\n\n## Previous answer rejected\n"
|
|
@@ -132,7 +109,7 @@ class MyProfileAvatarChat(Config, FileReader):
|
|
| 132 |
messages = [{"role": "system", "content": updated_system_prompt}] + history + \
|
| 133 |
[{"role": "user", "content": message}]
|
| 134 |
try:
|
| 135 |
-
response = self.openai.chat.completions.create(
|
| 136 |
model="gpt-4o-mini",
|
| 137 |
messages=messages
|
| 138 |
)
|
|
@@ -141,7 +118,7 @@ class MyProfileAvatarChat(Config, FileReader):
|
|
| 141 |
print(f"Error during rerun: {e}")
|
| 142 |
return reply
|
| 143 |
|
| 144 |
-
def chat(self, message: str, history: list, **kwargs):
|
| 145 |
"""Main chat. Uses semantic QA cache, sliding window for tokens, evaluation and rerun
|
| 146 |
|
| 147 |
Args:
|
|
@@ -161,7 +138,7 @@ class MyProfileAvatarChat(Config, FileReader):
|
|
| 161 |
return qa["answer"]
|
| 162 |
|
| 163 |
# Check for semantically similar previous question
|
| 164 |
-
similar, sim_score = self.find_similar_question(message)
|
| 165 |
if similar:
|
| 166 |
print(f"Reusing past answer (similarity={sim_score:.2%})")
|
| 167 |
refine_prompt = (
|
|
@@ -174,7 +151,7 @@ class MyProfileAvatarChat(Config, FileReader):
|
|
| 174 |
messages = [{"role": "system", "content": self.system_prompt},
|
| 175 |
{"role": "user", "content": refine_prompt}]
|
| 176 |
try:
|
| 177 |
-
response = self.openai.chat.completions.create(
|
| 178 |
model="gpt-4o-mini",
|
| 179 |
messages=messages
|
| 180 |
)
|
|
@@ -189,7 +166,7 @@ class MyProfileAvatarChat(Config, FileReader):
|
|
| 189 |
messages = [{"role": "system", "content": self.system_prompt}] + context_for_api
|
| 190 |
|
| 191 |
try:
|
| 192 |
-
response = self.openai.chat.completions.create(
|
| 193 |
model="gpt-4o-mini",
|
| 194 |
messages=messages
|
| 195 |
)
|
|
@@ -198,31 +175,33 @@ class MyProfileAvatarChat(Config, FileReader):
|
|
| 198 |
print(f"Error calling OpenAI: {e}")
|
| 199 |
# Evaluate the reply
|
| 200 |
try:
|
| 201 |
-
evaluation = self.evaluate(reply, message, history)
|
| 202 |
except Exception as e:
|
| 203 |
print(f"Error during evaluation: {e}")
|
| 204 |
evaluation = Evaluation(is_acceptable=True, feedback="Evaluation failed, accepting reply")
|
| 205 |
|
|
|
|
|
|
|
| 206 |
if not evaluation.is_acceptable:
|
| 207 |
-
reply = self.rerun(reply, message, history, evaluation.feedback)
|
| 208 |
|
| 209 |
try:
|
| 210 |
-
emb = self.embed(message)
|
| 211 |
except Exception as e:
|
| 212 |
print(f"Embedding Error: {e}")
|
| 213 |
traceback.print_exc()
|
| 214 |
emb = None
|
| 215 |
|
| 216 |
-
self.qa_cache.append(
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
|
| 222 |
return reply
|
| 223 |
|
| 224 |
@traceable(run_type="chain", name="ProfileChat")
|
| 225 |
-
def chat_traced(self, *args, **kwargs):
|
| 226 |
"""Wrapper for LangSmith tracing. Accepts any extra arguments
|
| 227 |
(like from Gradio) and passes only message/history to chat()."""
|
| 228 |
|
|
@@ -231,10 +210,10 @@ class MyProfileAvatarChat(Config, FileReader):
|
|
| 231 |
else:
|
| 232 |
message = kwargs.get("message")
|
| 233 |
history = kwargs.get("history")
|
| 234 |
-
return self.chat(message, history)
|
| 235 |
|
| 236 |
if __name__ == "__main__":
|
| 237 |
|
| 238 |
my_profile = MyProfileAvatarChat()
|
| 239 |
-
gr.ChatInterface(my_profile.chat_traced,
|
| 240 |
|
|
|
|
| 1 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import traceback
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
+
import gradio as gr
|
| 5 |
|
| 6 |
+
from openai import AsyncOpenAI
|
| 7 |
+
from langsmith import traceable
|
| 8 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
+
from src.prompts import system_prompt, evaluator_system_prompt
|
| 11 |
+
from src.name_extractor import extract_name_gliner
|
| 12 |
+
from src.models import Evaluation, CacheEntry
|
| 13 |
+
from src.config import Config
|
| 14 |
+
from src.utils import FileReader
|
| 15 |
+
|
| 16 |
+
# ---------------------------------------------------------------------
|
| 17 |
+
# CHAT CLASS
|
| 18 |
+
# ---------------------------------------------------------------------
|
| 19 |
class MyProfileAvatarChat(Config, FileReader):
|
| 20 |
def __init__(self, max_history_turns: int = 10, similarity_thresh: float = 0.80):
|
| 21 |
Config.__init__(self)
|
| 22 |
FileReader.__init__(self)
|
| 23 |
|
| 24 |
+
# 1. Try to load from env
|
| 25 |
self.name = os.getenv("PROFIL_NAME")
|
| 26 |
+
if not self.name:
|
| 27 |
+
name = extract_name_gliner(self.linkedin_profile)
|
| 28 |
+
self.name = name["person"][0]
|
| 29 |
+
print(f"Name found on Linkedin profile: {self.name}")
|
| 30 |
+
|
| 31 |
+
self.openai = AsyncOpenAI(api_key=self.openai_api_key)
|
| 32 |
# gemini (evaluator) uses google_api_key via OpenAI wrapper
|
| 33 |
+
self.gemini = AsyncOpenAI(api_key=self.google_api_key,
|
| 34 |
base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
|
| 35 |
|
| 36 |
# Build system prompt once
|
|
|
|
| 52 |
def format_history(self, history):
|
| 53 |
return "\n".join(f"{turn['role'].upper()}: {turn['content']}" for turn in history)
|
| 54 |
|
| 55 |
+
async def embed(self, text: str):
|
| 56 |
"""Return embedding vector for text (uses OpenAI embeddings)."""
|
| 57 |
+
resp = await self.openai.embeddings.create(
|
| 58 |
model="text-embedding-3-small",
|
| 59 |
input=text
|
| 60 |
)
|
|
|
|
| 63 |
def cosine_sim(self, a: np.ndarray, b: np.ndarray) -> float:
|
| 64 |
return float(cosine_similarity(a.reshape(1, -1), b.reshape(1, -1))[0][0])
|
| 65 |
|
| 66 |
+
async def find_similar_question(self, new_question: str):
|
| 67 |
if not self.qa_cache:
|
| 68 |
return None, 0.0
|
| 69 |
+
new_emb = await self.embed(new_question)
|
| 70 |
best = None
|
| 71 |
best_sim = 0.0
|
| 72 |
for item in self.qa_cache:
|
|
|
|
| 87 |
return user_prompt
|
| 88 |
|
| 89 |
@traceable(run_type="tool", name="EvaluateReply")
|
| 90 |
+
async def evaluate(self, reply, message, history, **kwargs) -> Evaluation:
|
| 91 |
messages = [{"role": "system", "content": self.evaluator_system_prompt}] + \
|
| 92 |
[{"role": "user", "content": self.evaluator_user_prompt(reply, message, history)}]
|
| 93 |
+
response = await self.gemini.chat.completions.parse(
|
| 94 |
model="gemini-2.0-flash",
|
| 95 |
messages=messages,
|
| 96 |
response_format=Evaluation
|
|
|
|
| 98 |
return response.choices[0].message.parsed
|
| 99 |
|
| 100 |
@traceable(run_type="llm", name="RerunRejectedAnswer")
|
| 101 |
+
async def rerun(self, reply, message, history, feedback, **kwargs):
|
| 102 |
updated_system_prompt = (
|
| 103 |
self.system_prompt
|
| 104 |
+ "\n\n## Previous answer rejected\n"
|
|
|
|
| 109 |
messages = [{"role": "system", "content": updated_system_prompt}] + history + \
|
| 110 |
[{"role": "user", "content": message}]
|
| 111 |
try:
|
| 112 |
+
response = await self.openai.chat.completions.create(
|
| 113 |
model="gpt-4o-mini",
|
| 114 |
messages=messages
|
| 115 |
)
|
|
|
|
| 118 |
print(f"Error during rerun: {e}")
|
| 119 |
return reply
|
| 120 |
|
| 121 |
+
async def chat(self, message: str, history: list, **kwargs):
|
| 122 |
"""Main chat. Uses semantic QA cache, sliding window for tokens, evaluation and rerun
|
| 123 |
|
| 124 |
Args:
|
|
|
|
| 138 |
return qa["answer"]
|
| 139 |
|
| 140 |
# Check for semantically similar previous question
|
| 141 |
+
similar, sim_score = await self.find_similar_question(message)
|
| 142 |
if similar:
|
| 143 |
print(f"Reusing past answer (similarity={sim_score:.2%})")
|
| 144 |
refine_prompt = (
|
|
|
|
| 151 |
messages = [{"role": "system", "content": self.system_prompt},
|
| 152 |
{"role": "user", "content": refine_prompt}]
|
| 153 |
try:
|
| 154 |
+
response = await self.openai.chat.completions.create(
|
| 155 |
model="gpt-4o-mini",
|
| 156 |
messages=messages
|
| 157 |
)
|
|
|
|
| 166 |
messages = [{"role": "system", "content": self.system_prompt}] + context_for_api
|
| 167 |
|
| 168 |
try:
|
| 169 |
+
response = await self.openai.chat.completions.create(
|
| 170 |
model="gpt-4o-mini",
|
| 171 |
messages=messages
|
| 172 |
)
|
|
|
|
| 175 |
print(f"Error calling OpenAI: {e}")
|
| 176 |
# Evaluate the reply
|
| 177 |
try:
|
| 178 |
+
evaluation = await self.evaluate(reply, message, history)
|
| 179 |
except Exception as e:
|
| 180 |
print(f"Error during evaluation: {e}")
|
| 181 |
evaluation = Evaluation(is_acceptable=True, feedback="Evaluation failed, accepting reply")
|
| 182 |
|
| 183 |
+
if evaluation:
|
| 184 |
+
print(f"Feedback from Evaluation:\n{evaluation.feedback}\n\n")
|
| 185 |
if not evaluation.is_acceptable:
|
| 186 |
+
reply = await self.rerun(reply, message, history, evaluation.feedback)
|
| 187 |
|
| 188 |
try:
|
| 189 |
+
emb = await self.embed(message)
|
| 190 |
except Exception as e:
|
| 191 |
print(f"Embedding Error: {e}")
|
| 192 |
traceback.print_exc()
|
| 193 |
emb = None
|
| 194 |
|
| 195 |
+
self.qa_cache.append(CacheEntry(
|
| 196 |
+
question=message,
|
| 197 |
+
answer=reply,
|
| 198 |
+
embedding=emb.tolist() if hasattr(emb, "tolist") else emb
|
| 199 |
+
))
|
| 200 |
|
| 201 |
return reply
|
| 202 |
|
| 203 |
@traceable(run_type="chain", name="ProfileChat")
|
| 204 |
+
async def chat_traced(self, *args, **kwargs):
|
| 205 |
"""Wrapper for LangSmith tracing. Accepts any extra arguments
|
| 206 |
(like from Gradio) and passes only message/history to chat()."""
|
| 207 |
|
|
|
|
| 210 |
else:
|
| 211 |
message = kwargs.get("message")
|
| 212 |
history = kwargs.get("history")
|
| 213 |
+
return await self.chat(message, history)
|
| 214 |
|
| 215 |
if __name__ == "__main__":
|
| 216 |
|
| 217 |
my_profile = MyProfileAvatarChat()
|
| 218 |
+
gr.ChatInterface(my_profile.chat_traced,type="messages").launch()
|
| 219 |
|
notebooks/chat_with_avatar.ipynb
CHANGED
|
@@ -10,7 +10,7 @@
|
|
| 10 |
},
|
| 11 |
{
|
| 12 |
"cell_type": "code",
|
| 13 |
-
"execution_count":
|
| 14 |
"id": "5dcb5ef0",
|
| 15 |
"metadata": {},
|
| 16 |
"outputs": [],
|
|
@@ -24,7 +24,7 @@
|
|
| 24 |
},
|
| 25 |
{
|
| 26 |
"cell_type": "code",
|
| 27 |
-
"execution_count":
|
| 28 |
"id": "f5176f5c",
|
| 29 |
"metadata": {},
|
| 30 |
"outputs": [],
|
|
@@ -345,10 +345,65 @@
|
|
| 345 |
"gr.ChatInterface(chat, type=\"messages\").launch()"
|
| 346 |
]
|
| 347 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 348 |
{
|
| 349 |
"cell_type": "code",
|
| 350 |
"execution_count": null,
|
| 351 |
-
"id": "
|
| 352 |
"metadata": {},
|
| 353 |
"outputs": [],
|
| 354 |
"source": []
|
|
|
|
| 10 |
},
|
| 11 |
{
|
| 12 |
"cell_type": "code",
|
| 13 |
+
"execution_count": 1,
|
| 14 |
"id": "5dcb5ef0",
|
| 15 |
"metadata": {},
|
| 16 |
"outputs": [],
|
|
|
|
| 24 |
},
|
| 25 |
{
|
| 26 |
"cell_type": "code",
|
| 27 |
+
"execution_count": 2,
|
| 28 |
"id": "f5176f5c",
|
| 29 |
"metadata": {},
|
| 30 |
"outputs": [],
|
|
|
|
| 345 |
"gr.ChatInterface(chat, type=\"messages\").launch()"
|
| 346 |
]
|
| 347 |
},
|
| 348 |
+
{
|
| 349 |
+
"cell_type": "code",
|
| 350 |
+
"execution_count": 3,
|
| 351 |
+
"id": "9f09a644",
|
| 352 |
+
"metadata": {},
|
| 353 |
+
"outputs": [],
|
| 354 |
+
"source": [
|
| 355 |
+
"import pyttsx3\n",
|
| 356 |
+
"\n",
|
| 357 |
+
"# Initialize the TTS engine\n",
|
| 358 |
+
"engine = pyttsx3.init()\n",
|
| 359 |
+
"\n",
|
| 360 |
+
"# Set properties (optional)\n",
|
| 361 |
+
"engine.setProperty('rate', 150) # Speed of speech (words per minute)\n",
|
| 362 |
+
"engine.setProperty(\"volume\", 1.0) # Volume (0.0 to 1.0)\n",
|
| 363 |
+
"\n",
|
| 364 |
+
"# Text to speak\n",
|
| 365 |
+
"text_to_read = \"Hello! I’m Mariusz Bronowicki, a professional committed to delivering high-quality work in my field. \\\n",
|
| 366 |
+
" I have a diverse background and skill set that allows me to tackle various challenges effectively. \\\n",
|
| 367 |
+
" If you have any questions about my career, experience, or skills, feel free to ask! I'm here to help.\"\n",
|
| 368 |
+
"\n",
|
| 369 |
+
"# Speak the text\n",
|
| 370 |
+
"engine.say(text_to_read)\n",
|
| 371 |
+
"\n",
|
| 372 |
+
"# Wait until speaking is finishing\n",
|
| 373 |
+
"engine.runAndWait()"
|
| 374 |
+
]
|
| 375 |
+
},
|
| 376 |
+
{
|
| 377 |
+
"cell_type": "code",
|
| 378 |
+
"execution_count": null,
|
| 379 |
+
"id": "333ee1bc",
|
| 380 |
+
"metadata": {},
|
| 381 |
+
"outputs": [],
|
| 382 |
+
"source": [
|
| 383 |
+
"from openai import OpenAI\n",
|
| 384 |
+
"\n",
|
| 385 |
+
"text_to_read = \"Hello! I’m Mariusz Bronowicki, a professional committed to delivering high-quality work in my field. \\\n",
|
| 386 |
+
" I have a diverse background and skill set that allows me to tackle various challenges effectively. \\\n",
|
| 387 |
+
" If you have any questions about my career, experience, or skills, feel free to ask! I'm here to help.\"\n",
|
| 388 |
+
"\n",
|
| 389 |
+
"\n",
|
| 390 |
+
"client = OpenAI()\n",
|
| 391 |
+
"\n",
|
| 392 |
+
"audio = client.audio.speech.create(\n",
|
| 393 |
+
" model=\"gpt-4o-mini-tts\",\n",
|
| 394 |
+
" voice=\"alloy\",\n",
|
| 395 |
+
" input=text_to_read\n",
|
| 396 |
+
")\n",
|
| 397 |
+
"\n",
|
| 398 |
+
"# Save to file\n",
|
| 399 |
+
"with open(\"../me/output.mp3\", \"wb\") as f:\n",
|
| 400 |
+
" f.write(audio.read())"
|
| 401 |
+
]
|
| 402 |
+
},
|
| 403 |
{
|
| 404 |
"cell_type": "code",
|
| 405 |
"execution_count": null,
|
| 406 |
+
"id": "40718314",
|
| 407 |
"metadata": {},
|
| 408 |
"outputs": [],
|
| 409 |
"source": []
|
pyproject.toml
CHANGED
|
@@ -40,6 +40,11 @@ dependencies = [
|
|
| 40 |
"speedtest-cli>=2.1.3",
|
| 41 |
"scikit-learn>=1.7.2",
|
| 42 |
#"wikipedia>=1.4.0",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
]
|
| 44 |
|
| 45 |
[dependency-groups]
|
|
|
|
| 40 |
"speedtest-cli>=2.1.3",
|
| 41 |
"scikit-learn>=1.7.2",
|
| 42 |
#"wikipedia>=1.4.0",
|
| 43 |
+
"pyttsx3>=2.99",
|
| 44 |
+
"ffmpeg>=1.4",
|
| 45 |
+
"transformers>=4.57",
|
| 46 |
+
"torch==2.9.1",
|
| 47 |
+
"gliner2==1.0.2"
|
| 48 |
]
|
| 49 |
|
| 50 |
[dependency-groups]
|
src/config.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
|
| 4 |
+
from langsmith import Client, traceable
|
| 5 |
+
|
| 6 |
+
# ---------------------------------------------------------------------
|
| 7 |
+
# CONFIG
|
| 8 |
+
# ---------------------------------------------------------------------
|
| 9 |
+
class Config:
|
| 10 |
+
def __init__(self):
|
| 11 |
+
load_dotenv(override=True)
|
| 12 |
+
self.openai_api_key = os.getenv("OPENAI_API_KEY")
|
| 13 |
+
self.google_api_key = os.getenv("GOOGLE_API_KEY")
|
| 14 |
+
self.langsmith_api_key = os.getenv("LANGSMITH_API_KEY")
|
| 15 |
+
self.langsmith_endpoint = os.getenv("LANGSMITH_ENDPOINT")
|
| 16 |
+
|
| 17 |
+
# Initialize LangSmith
|
| 18 |
+
self.langsmith_client = Client(api_key=self.langsmith_api_key)
|
src/file_loader.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from functools import lru_cache
|
| 2 |
+
from pypdf import PdfReader
|
| 3 |
+
|
| 4 |
+
@lru_cache()
|
| 5 |
+
def load_pdf_text(path: str) -> str:
|
| 6 |
+
text = ""
|
| 7 |
+
try:
|
| 8 |
+
reader = PdfReader(path)
|
| 9 |
+
for page in reader.pages:
|
| 10 |
+
page_text = page.extract_text()
|
| 11 |
+
if page_text:
|
| 12 |
+
text += page_text
|
| 13 |
+
except:
|
| 14 |
+
return ""
|
| 15 |
+
return text
|
| 16 |
+
|
| 17 |
+
@lru_cache()
|
| 18 |
+
def load_text_file(path: str) -> str:
|
| 19 |
+
try:
|
| 20 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 21 |
+
return f.read()
|
| 22 |
+
except:
|
| 23 |
+
return ""
|
src/models.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
|
| 3 |
+
class Evaluation(BaseModel):
|
| 4 |
+
is_acceptable: bool
|
| 5 |
+
feedback: str
|
| 6 |
+
|
| 7 |
+
class CacheEntry(BaseModel):
|
| 8 |
+
question: str
|
| 9 |
+
answer: str
|
| 10 |
+
embedding: list[float]
|
src/name_extractor.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from gliner2 import GLiNER2
|
| 2 |
+
|
| 3 |
+
def extract_name_gliner(text: str) -> str:
|
| 4 |
+
extractor = GLiNER2.from_pretrained("fastino/gliner2-base-v1")
|
| 5 |
+
result = extractor.extract_entities(text[:700], ["person"])
|
| 6 |
+
return result["entities"]
|
src/prompts.py
CHANGED
|
@@ -9,11 +9,16 @@ system_prompt = f"You are acting as {name}. You are answering question on {name}
|
|
| 9 |
particularly question related to {name}'s career, background, skills and experience. \
|
| 10 |
Your responsibility is to represent {name} for interactions on the website as faithfully as possible. \
|
| 11 |
Be professional and engaging, as if talking to a potential client or future employer who came across the website. \
|
| 12 |
-
If you do not know the answer, say so. \
|
| 13 |
-
If you need to check e.g salary expectation question then use tools to see what range for such position is.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
evaluator_system_prompt = f"You are an evaluator that decides whether a response to a question is acceeptable. \
|
| 16 |
You are provided with a conversation btween a User and an Agent. Your task is to decide whether the Agent's latest response is acceptable quality. \
|
| 17 |
The Agent is playing the role of {name} and is representing {name} on their website. \
|
| 18 |
The Agent has been instructed to be professional and engaging, as if talking to a potential client or future employer who came across the website. \
|
| 19 |
-
The Agent has been provided with context on {name} in the form of their
|
|
|
|
| 9 |
particularly question related to {name}'s career, background, skills and experience. \
|
| 10 |
Your responsibility is to represent {name} for interactions on the website as faithfully as possible. \
|
| 11 |
Be professional and engaging, as if talking to a potential client or future employer who came across the website. \
|
| 12 |
+
If you do not know the answer, say so and ask for contact to better answer questions agant cannot. \
|
| 13 |
+
If you need to check e.g salary expectation question then use tools to see what range for such position is. \
|
| 14 |
+
Do not answer any questions which are not related to {name}."
|
| 15 |
+
# When asked about professional experience, focus primarily on your data scientist experience. You may briefly mention past roles (e.g., Tesco, education) and acknowledge that your career path hasn’t been linear, but emphasize that this variety has given you a broader perspective and valuable transferable skills. \
|
| 16 |
+
# Whenever appropriate, invite the person to contact you via email if they have further questions or would like to arrange a conversation.
|
| 17 |
+
# If you don’t know the answer, state that clearly and honestly. \
|
| 18 |
+
# Don't use technologies if I do not have experience as Data Scientist e.g. R language - you never had experience with it.
|
| 19 |
|
| 20 |
evaluator_system_prompt = f"You are an evaluator that decides whether a response to a question is acceeptable. \
|
| 21 |
You are provided with a conversation btween a User and an Agent. Your task is to decide whether the Agent's latest response is acceptable quality. \
|
| 22 |
The Agent is playing the role of {name} and is representing {name} on their website. \
|
| 23 |
The Agent has been instructed to be professional and engaging, as if talking to a potential client or future employer who came across the website. \
|
| 24 |
+
The Agent has been provided with context on {name} in the form of their additional information and Linkedin details. Here's the information:"
|
src/utils.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from src.file_loader import load_pdf_text, load_text_file
|
| 2 |
+
|
| 3 |
+
# ---------------------------------------------------------------------
|
| 4 |
+
# FILE READER
|
| 5 |
+
# ---------------------------------------------------------------------
|
| 6 |
+
class FileReader:
|
| 7 |
+
def __init__(self):
|
| 8 |
+
self.linkedin_profile = load_pdf_text("./me/Linkedin_Profile.pdf")
|
| 9 |
+
self.additional_info = load_text_file("./me/additional_info.txt")
|
| 10 |
+
|
| 11 |
+
# print("=== LINKEDIN PROFILE CONTENT ===")
|
| 12 |
+
# print(self.linkedin_profile)
|
| 13 |
+
# print("=== END ===")
|
uv.lock
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|