File size: 6,476 Bytes
b594867
 
60c843a
 
b594867
60c843a
 
 
 
 
 
 
 
 
 
 
b594867
 
 
60c843a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
"""Tool functions and definitions for the AI Career Digital Twin application."""


from logging import getLogger
from os import environ

from dotenv import load_dotenv
from huggingface_hub import hf_hub_download
from pypdf import PdfReader
from requests import post


# Environment initialization.
load_dotenv(override=True)

# Required env vars. (KeyError raised if missing)
HF_SELF_TOKEN = environ["HF_SELF_TOKEN"]
PUSHOVER_USER = environ["PUSHOVER_USER"]
PUSHOVER_TOKEN = environ["PUSHOVER_TOKEN"]

# Instantiate logger.
_logger = getLogger(__name__)


# Function definitions.
def read_pdf_from_hub(repo_id, filename) -> str:
    """Download PDF from HF Hub and return extracted text."""
    try:
        path = hf_hub_download(repo_id=repo_id, repo_type="dataset",
                               filename=filename, token=HF_SELF_TOKEN)
    except Exception as ex:
        _logger.error(f"FAILED TO DOWNLOAD PDF FROM HUB: "
                      f"{repo_id}/{filename}: {ex}")
        return "NO DATA"
    try:
        reader = PdfReader(path)
    except Exception as ex:
        _logger.error(f"FAILED TO OPEN PDF FILE AT {path}: {ex}")
        return "NO DATA"
    text_out = ""
    for page in reader.pages:
        try:
            text = page.extract_text()
        except Exception as ex:
            _logger.error(f"FAILED TO EXTRACT TEXT FROM A PAGE IN {path}: {ex}")
            text = None
        if text:
            text_out += text
    return text_out if text_out else "NO DATA"


def read_text_from_hub(repo_id, filename) -> str:
    """Download text file from HF Hub and return its contents."""
    try:
        path = hf_hub_download(repo_id=repo_id, repo_type="dataset",
                               filename=filename, token=HF_SELF_TOKEN)
    except Exception as ex:
        _logger.error(f"FAILED TO DOWNLOAD TEXT FROM HUB: "
                      f"{repo_id}/{filename}: {ex}")
        return "NO DATA"
    try:
        with open(path, "r", encoding="utf-8") as f:
            content = f.read()
        return content if content else "NO DATA"
    except Exception as ex:
        _logger.error(f"FAILED TO READ TEXT FROM {path}: {ex}")
        return "NO DATA"


def push_notification(title, message):
    """Send a push notification using Pushover."""
    try:
        response = post("https://api.pushover.net/1/messages.json", timeout=3,
                        data={"sound": "gamelan", "title": title,
                              "message": message, "user": PUSHOVER_USER,
                              "token": PUSHOVER_TOKEN})
        if response.status_code != 200:
            _logger.error(f"PUSHOVER NOTIFICATION FAILED: "
                          f"{response.status_code} - {response.text}")
            raise RuntimeError(f"Pushover failed: {response.status_code}")
        _logger.info(f"PUSHOVER NOTIFICATION SENT: {title}")
    except RuntimeError:
        raise
    except Exception as ex:
        _logger.error(f"PUSHOVER NOTIFICATION ERROR: {ex}")
        raise RuntimeError(f"Pushover error: {ex}") from ex


def record_user_details(email, name="No Name", context="No Context"):
    """Record user details via a push notification."""
    push_notification("Career Contact Request.",
                      f"From: {name} with email: {email}"
                      f"\n\nIn context:\n{context}")
    return {"recorded": "ok"}


def record_unknown_question(question, name="No Name",
                            context="No Context"):
    """Record an unknown question via a push notification."""
    push_notification("Career Unknown Question.",
                      f"{name} asked: {question}"
                      f"\n\nIn context:\n{context}")
    return {"recorded": "ok"}


# Define "record_user_details" tool JSON schema.
record_user_details_json = {
    "name": "record_user_details",
    "description": ("Use this tool to record that a user is interested in being "
                    "in touch and provided an email address along with any "
                    "additional details such as their name or context about the "
                    "conversation"),
    "parameters": {
        "type": "object",
        "properties": {
            "email": {
                "type": "string",
                "maxLength": 254,
                "format": "email",
                "description": "The email address of this user"
            },
            "name": {
                "type": "string",
                "maxLength": 100,
                "description": "The user's name if they provided it"
            },
            "context": {
                "type": "string",
                "maxLength": 550,
                "description": ("Any additional contextual information about the "
                                "conversation that's worth recording for follow-up")
            }
        },
        "required": ["email"],
        "additionalProperties": False
    }
}

# Define "record_unknown_question" tool JSON schema.
record_unknown_question_json = {
    "name": "record_unknown_question",
    "description": ("Use this tool to record any question that couldn't be "
                    "answered as you didn't know the answer along with any "
                    "additional details such as their name or context about the "
                    "conversation"),
    "parameters": {
        "type": "object",
        "properties": {
            "question": {
                "type": "string",
                "maxLength": 300,
                "description": "The question that couldn't be answered"
            },
            "name": {
                "type": "string",
                "maxLength": 100,
                "description": "The user's name if they provided it"
            },
            "context": {
                "type": "string",
                "maxLength": 550,
                "description": ("Any additional contextual information about the "
                                "conversation that's worth recording for follow-up")
            }
        },
        "required": ["question"],
        "additionalProperties": False
    }
}

# Instantiate logger.
_logger = getLogger(__name__)

# Define tools collections.
tools_def = [{"type": "function", "function": record_user_details_json},
             {"type": "function", "function": record_unknown_question_json}]

tools_map = {"record_user_details": record_user_details,
             "record_unknown_question": record_unknown_question}