Carbaz's picture
Refactor application structure: implement main entrypoint, initialize environment, and set up logging; add tools for user interaction and data handling
b594867
"""Tool functions and definitions for the AI Career Digital Twin application."""
from logging import getLogger
from os import environ
from dotenv import load_dotenv
from huggingface_hub import hf_hub_download
from pypdf import PdfReader
from requests import post
# Environment initialization.
load_dotenv(override=True)
# Required env vars. (KeyError raised if missing)
HF_SELF_TOKEN = environ["HF_SELF_TOKEN"]
PUSHOVER_USER = environ["PUSHOVER_USER"]
PUSHOVER_TOKEN = environ["PUSHOVER_TOKEN"]
# Instantiate logger.
_logger = getLogger(__name__)
# Function definitions.
def read_pdf_from_hub(repo_id, filename) -> str:
"""Download PDF from HF Hub and return extracted text."""
try:
path = hf_hub_download(repo_id=repo_id, repo_type="dataset",
filename=filename, token=HF_SELF_TOKEN)
except Exception as ex:
_logger.error(f"FAILED TO DOWNLOAD PDF FROM HUB: "
f"{repo_id}/{filename}: {ex}")
return "NO DATA"
try:
reader = PdfReader(path)
except Exception as ex:
_logger.error(f"FAILED TO OPEN PDF FILE AT {path}: {ex}")
return "NO DATA"
text_out = ""
for page in reader.pages:
try:
text = page.extract_text()
except Exception as ex:
_logger.error(f"FAILED TO EXTRACT TEXT FROM A PAGE IN {path}: {ex}")
text = None
if text:
text_out += text
return text_out if text_out else "NO DATA"
def read_text_from_hub(repo_id, filename) -> str:
"""Download text file from HF Hub and return its contents."""
try:
path = hf_hub_download(repo_id=repo_id, repo_type="dataset",
filename=filename, token=HF_SELF_TOKEN)
except Exception as ex:
_logger.error(f"FAILED TO DOWNLOAD TEXT FROM HUB: "
f"{repo_id}/{filename}: {ex}")
return "NO DATA"
try:
with open(path, "r", encoding="utf-8") as f:
content = f.read()
return content if content else "NO DATA"
except Exception as ex:
_logger.error(f"FAILED TO READ TEXT FROM {path}: {ex}")
return "NO DATA"
def push_notification(title, message):
"""Send a push notification using Pushover."""
try:
response = post("https://api.pushover.net/1/messages.json", timeout=3,
data={"sound": "gamelan", "title": title,
"message": message, "user": PUSHOVER_USER,
"token": PUSHOVER_TOKEN})
if response.status_code != 200:
_logger.error(f"PUSHOVER NOTIFICATION FAILED: "
f"{response.status_code} - {response.text}")
raise RuntimeError(f"Pushover failed: {response.status_code}")
_logger.info(f"PUSHOVER NOTIFICATION SENT: {title}")
except RuntimeError:
raise
except Exception as ex:
_logger.error(f"PUSHOVER NOTIFICATION ERROR: {ex}")
raise RuntimeError(f"Pushover error: {ex}") from ex
def record_user_details(email, name="No Name", context="No Context"):
"""Record user details via a push notification."""
push_notification("Career Contact Request.",
f"From: {name} with email: {email}"
f"\n\nIn context:\n{context}")
return {"recorded": "ok"}
def record_unknown_question(question, name="No Name",
context="No Context"):
"""Record an unknown question via a push notification."""
push_notification("Career Unknown Question.",
f"{name} asked: {question}"
f"\n\nIn context:\n{context}")
return {"recorded": "ok"}
# Define "record_user_details" tool JSON schema.
record_user_details_json = {
"name": "record_user_details",
"description": ("Use this tool to record that a user is interested in being "
"in touch and provided an email address along with any "
"additional details such as their name or context about the "
"conversation"),
"parameters": {
"type": "object",
"properties": {
"email": {
"type": "string",
"maxLength": 254,
"format": "email",
"description": "The email address of this user"
},
"name": {
"type": "string",
"maxLength": 100,
"description": "The user's name if they provided it"
},
"context": {
"type": "string",
"maxLength": 550,
"description": ("Any additional contextual information about the "
"conversation that's worth recording for follow-up")
}
},
"required": ["email"],
"additionalProperties": False
}
}
# Define "record_unknown_question" tool JSON schema.
record_unknown_question_json = {
"name": "record_unknown_question",
"description": ("Use this tool to record any question that couldn't be "
"answered as you didn't know the answer along with any "
"additional details such as their name or context about the "
"conversation"),
"parameters": {
"type": "object",
"properties": {
"question": {
"type": "string",
"maxLength": 300,
"description": "The question that couldn't be answered"
},
"name": {
"type": "string",
"maxLength": 100,
"description": "The user's name if they provided it"
},
"context": {
"type": "string",
"maxLength": 550,
"description": ("Any additional contextual information about the "
"conversation that's worth recording for follow-up")
}
},
"required": ["question"],
"additionalProperties": False
}
}
# Instantiate logger.
_logger = getLogger(__name__)
# Define tools collections.
tools_def = [{"type": "function", "function": record_user_details_json},
{"type": "function", "function": record_unknown_question_json}]
tools_map = {"record_user_details": record_user_details,
"record_unknown_question": record_unknown_question}