Sadashiv commited on
Commit
a21ddd7
·
verified ·
1 Parent(s): 3e2da0d

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ me/Sadashiv_Data_Scientist_Resume.pdf filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: Resume Chatbot
3
- emoji:
4
- colorFrom: green
5
- colorTo: red
6
  sdk: gradio
7
  sdk_version: 5.33.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Resume_Chatbot
3
+ app_file: app.py
 
 
4
  sdk: gradio
5
  sdk_version: 5.33.0
 
 
6
  ---
 
 
Resume_chatbot.ipynb ADDED
@@ -0,0 +1,719 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "ca5e90ad-e3bf-446a-8dd1-56026a46d2dc",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stderr",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "D:\\Projects\\Agents\\.venv-agents\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
14
+ " from .autonotebook import tqdm as notebook_tqdm\n"
15
+ ]
16
+ }
17
+ ],
18
+ "source": [
19
+ "# Importing the libraries\n",
20
+ "from dotenv import load_dotenv\n",
21
+ "from openai import OpenAI\n",
22
+ "import json\n",
23
+ "import os\n",
24
+ "import requests\n",
25
+ "import gradio as gr\n",
26
+ "import fitz # PyMuPDF"
27
+ ]
28
+ },
29
+ {
30
+ "cell_type": "code",
31
+ "execution_count": 2,
32
+ "id": "6d5aac95-a0d2-42e7-b55a-883e6617d87e",
33
+ "metadata": {},
34
+ "outputs": [],
35
+ "source": [
36
+ "# load the environment variables\n",
37
+ "load_dotenv(override=True)\n",
38
+ "\n",
39
+ "# Setting up pushover for notification\n",
40
+ "pushover_user = os.getenv(\"PUSHOVER_USER\")\n",
41
+ "pushover_token = os.getenv(\"PUSHOVER_TOKEN\")\n",
42
+ "pushover_url = \"https://api.pushover.net/1/messages.json\"\n",
43
+ "\n",
44
+ "# setting up openai \n",
45
+ "open_ai = OpenAI()"
46
+ ]
47
+ },
48
+ {
49
+ "cell_type": "code",
50
+ "execution_count": 3,
51
+ "id": "1164e747-a349-4b29-85f4-3f559a1a5750",
52
+ "metadata": {},
53
+ "outputs": [
54
+ {
55
+ "name": "stdout",
56
+ "output_type": "stream",
57
+ "text": [
58
+ "Push: Hey!\n"
59
+ ]
60
+ }
61
+ ],
62
+ "source": [
63
+ "# function to send notifications\n",
64
+ "def push(message):\n",
65
+ " print(f\"Push: {message}\")\n",
66
+ " payload = {\"user\": pushover_user, \"token\": pushover_token, \"message\": message}\n",
67
+ " requests.post(pushover_url, data=payload)\n",
68
+ "\n",
69
+ "# testing the function\n",
70
+ "push('Hey!')"
71
+ ]
72
+ },
73
+ {
74
+ "cell_type": "code",
75
+ "execution_count": 4,
76
+ "id": "4cd1cb67-de1c-4e45-8929-98d41fc303c4",
77
+ "metadata": {},
78
+ "outputs": [],
79
+ "source": [
80
+ "# Function to record the user details\n",
81
+ "def record_user_details(email, name='Name not provided', notes='Notes not provided'):\n",
82
+ " push(f\"Recording interest from {name} with email {email} and notes {notes}\")\n",
83
+ " return {\"recorded\": \"ok\"}\n",
84
+ "\n",
85
+ "# Function to record unknown questions\n",
86
+ "def record_unknown_question(question):\n",
87
+ " push(f\"Recording {question} asked that I couldn't answer\")\n",
88
+ " return {\"recorded\": \"ok\"}"
89
+ ]
90
+ },
91
+ {
92
+ "cell_type": "code",
93
+ "execution_count": 14,
94
+ "id": "30b64638-9868-4856-880e-356adf25e8e2",
95
+ "metadata": {},
96
+ "outputs": [],
97
+ "source": [
98
+ "# Creating a function-like tool definition to give LLM structured access to specific actions\n",
99
+ "# This JSON structure defines a tool (function schema) that LLMs like GPT can call via function calling API\n",
100
+ "\n",
101
+ "# Tool to record user details such as name, email, and notes during a conversation\n",
102
+ "record_user_details_json = {\n",
103
+ " \"name\": \"record_user_details\", # Unique identifier for the tool/function\n",
104
+ " \"description\": \"Use this tool to record that a user is interested in being touch and provided an email address\", # Describes the tool's purpose for the LLM\n",
105
+ "\n",
106
+ " \"parameters\": {\n",
107
+ " \"type\": \"object\", # The expected input type is a JSON object\n",
108
+ "\n",
109
+ " \"properties\": { # Defines the input fields (like function arguments)\n",
110
+ " \"email\": {\n",
111
+ " \"type\": \"string\", # Must be a string\n",
112
+ " \"description\": \"The email address of this user\" # Purpose of this field\n",
113
+ " },\n",
114
+ " \"name\": {\n",
115
+ " \"type\": \"string\", # Optional string input for user's name\n",
116
+ " \"description\": \"The user's name, if they provided it\"\n",
117
+ " },\n",
118
+ " \"notes\": {\n",
119
+ " \"type\": \"string\", # Optional string input for storing conversation context\n",
120
+ " \"description\": \"Any additional information about the conversation that's worth recording to give context\"\n",
121
+ " }\n",
122
+ " },\n",
123
+ "\n",
124
+ " \"required\": [\"email\"], # Only 'email' is mandatory; others are optional\n",
125
+ " \"additionalProperties\": False # Disallows extra/unexpected fields in input\n",
126
+ " }\n",
127
+ "}\n",
128
+ "\n",
129
+ "\n",
130
+ "# Tool definition to help the LLM log questions it couldn't answer\n",
131
+ "# This helps track unanswered queries for future improvement or manual review\n",
132
+ "\n",
133
+ "record_unknown_question_json = {\n",
134
+ " \"name\": \"record_unknown_question\", # Unique identifier for this tool\n",
135
+ " \"description\": \"Always use this tool to record any question that couldn't be answered as you didn't know the answer\", # Explains to the LLM when to use this tool\n",
136
+ "\n",
137
+ " \"parameters\": {\n",
138
+ " \"type\": \"object\", # The tool expects an input object (i.e., JSON format)\n",
139
+ "\n",
140
+ " \"properties\": { # Defines the structure of the expected input\n",
141
+ " \"question\": {\n",
142
+ " \"type\": \"string\", # Input must be a string\n",
143
+ " \"description\": \"The question that you couldn't answered\" # Describes what should be captured\n",
144
+ " }\n",
145
+ " },\n",
146
+ "\n",
147
+ " \"required\": [\"question\"], # 'question' is mandatory\n",
148
+ " \"additionalProperties\": False # Disallows any other fields beyond what's defined\n",
149
+ " }\n",
150
+ "}"
151
+ ]
152
+ },
153
+ {
154
+ "cell_type": "code",
155
+ "execution_count": 15,
156
+ "id": "243894ed-66d8-41c4-b055-da052709f69c",
157
+ "metadata": {},
158
+ "outputs": [
159
+ {
160
+ "data": {
161
+ "text/plain": [
162
+ "[{'type': 'function',\n",
163
+ " 'function': {'name': 'record_user_details',\n",
164
+ " 'description': 'Use this tool to record that a user is interested in being touch and provided an email address',\n",
165
+ " 'parameters': {'type': 'object',\n",
166
+ " 'properties': {'email': {'type': 'string',\n",
167
+ " 'description': 'The email address of this user'},\n",
168
+ " 'name': {'type': 'string',\n",
169
+ " 'description': \"The user's name, if they provided it\"},\n",
170
+ " 'notes': {'type': 'string',\n",
171
+ " 'description': \"Any additional information about the conversation that's worth recording to give context\"}},\n",
172
+ " 'required': ['email'],\n",
173
+ " 'additionalProperties': False}}},\n",
174
+ " {'type': 'function',\n",
175
+ " 'function': {'name': 'record_unknown_question',\n",
176
+ " 'description': \"Always use this tool to record any question that couldn't be answered as you didn't know the answer\",\n",
177
+ " 'parameters': {'type': 'object',\n",
178
+ " 'properties': {'question': {'type': 'string',\n",
179
+ " 'description': \"The question that you couldn't answered\"}},\n",
180
+ " 'required': ['question'],\n",
181
+ " 'additionalProperties': False}}}]"
182
+ ]
183
+ },
184
+ "execution_count": 15,
185
+ "metadata": {},
186
+ "output_type": "execute_result"
187
+ }
188
+ ],
189
+ "source": [
190
+ "# Creating a list of tools to feed to the LLM\n",
191
+ "tools = [\n",
192
+ " {\"type\": \"function\", \"function\": record_user_details_json},\n",
193
+ " {\"type\": \"function\", \"function\": record_unknown_question_json}\n",
194
+ "]\n",
195
+ "\n",
196
+ "tools"
197
+ ]
198
+ },
199
+ {
200
+ "cell_type": "code",
201
+ "execution_count": 16,
202
+ "id": "dc8861f9-e3dd-4766-941c-aa7c95611f69",
203
+ "metadata": {},
204
+ "outputs": [],
205
+ "source": [
206
+ "# This function takes a list of tool calls (as provided by an LLM) and executes them\n",
207
+ "def handle_tool_call(tool_calls):\n",
208
+ " results = [] # To store the results of each tool execution\n",
209
+ "\n",
210
+ " for tool_call in tool_calls:\n",
211
+ " tool_name = tool_call.function.name # Extract the name of the tool/function to be called\n",
212
+ " arguments = json.loads(tool_call.function.arguments) # Parse the function arguments from JSON string to Python dict\n",
213
+ "\n",
214
+ " # Print tool invocation info for debugging\n",
215
+ " print(f\"Tool called: {tool_name} with arguments: {arguments}\", flush=True)\n",
216
+ "\n",
217
+ " # Match the tool name and call the appropriate function with unpacked arguments\n",
218
+ " if tool_name == 'record_user_details':\n",
219
+ " result = record_user_details(**arguments) # Call function to record user details\n",
220
+ " elif tool_name == 'record_unknown_question':\n",
221
+ " result = record_unknown_question(**arguments) # Call function to log unknown questions\n",
222
+ "\n",
223
+ " # Append the result in a format expected by OpenAI's function calling\n",
224
+ " results.append({\n",
225
+ " \"role\": \"tool\", # Indicates this is a tool-generated response\n",
226
+ " \"content\": json.dump(result), # Convert the result to a JSON string (consider fixing to json.dumps)\n",
227
+ " \"tool_call_id\": tool_call.id # Associate result with the original tool call ID\n",
228
+ " })\n",
229
+ "\n",
230
+ " return results # Return list of tool responses"
231
+ ]
232
+ },
233
+ {
234
+ "cell_type": "markdown",
235
+ "id": "08ed672f-77c0-4481-95f6-2151d463fb5f",
236
+ "metadata": {},
237
+ "source": [
238
+ "This line is dynamically calling a function named `\"record_unknown_question\"` by accessing it through Python's global symbol table.\n",
239
+ "```python\n",
240
+ "globals()[\"record_unknown_question\"](\"this is a really hard question\")\n",
241
+ "```"
242
+ ]
243
+ },
244
+ {
245
+ "cell_type": "code",
246
+ "execution_count": 17,
247
+ "id": "0755d22c-06aa-4482-8d81-3fb97b7518cd",
248
+ "metadata": {},
249
+ "outputs": [],
250
+ "source": [
251
+ "# This is a more elegant version of tool execution that avoids using if-else chains\n",
252
+ "def handle_tool_calls(tool_calls):\n",
253
+ " results = [] # List to store outputs from all tool calls\n",
254
+ "\n",
255
+ " for tool_call in tool_calls:\n",
256
+ " # Extract the name of the tool (function) to be called\n",
257
+ " tool_name = tool_call.function.name\n",
258
+ "\n",
259
+ " # Parse the arguments from string to Python dict\n",
260
+ " arguments = json.loads(tool_call.function.arguments)\n",
261
+ "\n",
262
+ " # Print debug info to trace tool calls\n",
263
+ " print(f\"Tool called: {tool_name} with arguments: {arguments}\", flush=True)\n",
264
+ "\n",
265
+ " # Look up the function in the global namespace (returns None if not found)\n",
266
+ " tool = globals().get(tool_name)\n",
267
+ "\n",
268
+ " # If tool exists, call it with arguments; otherwise, return an empty result\n",
269
+ " result = tool(**arguments) if tool else {}\n",
270
+ "\n",
271
+ " # Append the result in the expected message format for the LLM\n",
272
+ " results.append({\n",
273
+ " \"role\": \"tool\", # Indicates the message is from a tool\n",
274
+ " \"content\": json.dumps(result), # Convert result back to JSON string\n",
275
+ " \"tool_call_id\": tool_call.id # Associate result with its tool_call\n",
276
+ " })\n",
277
+ "\n",
278
+ " # Return the full list of tool responses\n",
279
+ " return results"
280
+ ]
281
+ },
282
+ {
283
+ "cell_type": "code",
284
+ "execution_count": 18,
285
+ "id": "ee35bd27-048c-49a8-9a72-830ab564ced8",
286
+ "metadata": {},
287
+ "outputs": [],
288
+ "source": [
289
+ "# read the pdf file\n",
290
+ "def extract_text_from_pdf(pdf_path):\n",
291
+ " doc = fitz.open(pdf_path)\n",
292
+ " full_text = \"\"\n",
293
+ " for page in doc:\n",
294
+ " full_text += page.get_text()\n",
295
+ " return full_text\n",
296
+ "\n",
297
+ "resume_text = extract_text_from_pdf(\"me/Sadashiv_Data_Scientist_Resume.pdf\")"
298
+ ]
299
+ },
300
+ {
301
+ "cell_type": "code",
302
+ "execution_count": 19,
303
+ "id": "4ee0cee0-d7f0-4fbc-9052-210cfa208cfe",
304
+ "metadata": {},
305
+ "outputs": [],
306
+ "source": [
307
+ "system_prompt = f\"\"\"\n",
308
+ "You are acting as an expert assistant representing the individual whose resume is provided below.\n",
309
+ "Your task is to answer questions strictly based on the information contained in the resume.\n",
310
+ "Do not fabricate or assume any details that are not explicitly mentioned in the resume.\n",
311
+ "\n",
312
+ "If asked about improvements or suggestions, respond with clear, concise, and focused points only.\n",
313
+ "Keep your answers compact and to the point, and expand only if the user explicitly asks for more details.\n",
314
+ "\n",
315
+ "If a user asks a question you cannot answer from the resume, use the record_unknown_question tool to log the unanswered query.\n",
316
+ "\n",
317
+ "If the user expresses interest in following up or staying in touch, politely ask for their name and email,\n",
318
+ "then record it using the record_user_details tool.\n",
319
+ "\n",
320
+ "Resume Content:\n",
321
+ "{resume_text}\n",
322
+ "\"\"\""
323
+ ]
324
+ },
325
+ {
326
+ "cell_type": "code",
327
+ "execution_count": 20,
328
+ "id": "d9b8ad52-7778-44c5-8337-c4ef0e6f68b8",
329
+ "metadata": {},
330
+ "outputs": [],
331
+ "source": [
332
+ "def chat(message, history):\n",
333
+ " # Construct the full message history: system prompt, chat history, and new user message\n",
334
+ " messages = [{\"role\": \"system\", \"content\": system_prompt}] + history + [{\"role\": \"user\", \"content\": message}]\n",
335
+ " \n",
336
+ " done = False # Flag to track when the chat loop should stop\n",
337
+ "\n",
338
+ " while not done:\n",
339
+ " # Call the OpenAI chat model with messages and available tools\n",
340
+ " response = open_ai.chat.completions.create(\n",
341
+ " model=\"gpt-4o-mini\", # Model to use\n",
342
+ " messages=messages, # Full conversation history\n",
343
+ " tools=tools # Pass in tools so the LLM can invoke them\n",
344
+ " )\n",
345
+ "\n",
346
+ " # Check how the model decided to end its generation\n",
347
+ " finish_reason = response.choices[0].finish_reason\n",
348
+ "\n",
349
+ " # If the model wants to call a tool, handle the tool calls\n",
350
+ " if finish_reason == \"tool_calls\":\n",
351
+ " message = response.choices[0].message # Extract the message containing the tool call\n",
352
+ " tool_calls = message.tool_calls # Get the list of tool calls\n",
353
+ " results = handle_tool_calls(tool_calls) # Run the tools and get their results\n",
354
+ " messages.append(message) # Add the original tool call message to history\n",
355
+ " messages.extend(results) # Add tool results to message history for LLM to continue\n",
356
+ " else:\n",
357
+ " # If no tool call is needed, we're done and can return the final response\n",
358
+ " done = True\n",
359
+ "\n",
360
+ " # Return the final message content from the model as the assistant's reply\n",
361
+ " return response.choices[0].message.content\n"
362
+ ]
363
+ },
364
+ {
365
+ "cell_type": "code",
366
+ "execution_count": 21,
367
+ "id": "184405d4-349b-44c9-b8aa-faf4b5f0756a",
368
+ "metadata": {},
369
+ "outputs": [
370
+ {
371
+ "name": "stdout",
372
+ "output_type": "stream",
373
+ "text": [
374
+ "* Running on local URL: http://127.0.0.1:7863\n",
375
+ "* To create a public link, set `share=True` in `launch()`.\n"
376
+ ]
377
+ },
378
+ {
379
+ "data": {
380
+ "text/html": [
381
+ "<div><iframe src=\"http://127.0.0.1:7863/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
382
+ ],
383
+ "text/plain": [
384
+ "<IPython.core.display.HTML object>"
385
+ ]
386
+ },
387
+ "metadata": {},
388
+ "output_type": "display_data"
389
+ },
390
+ {
391
+ "data": {
392
+ "text/plain": []
393
+ },
394
+ "execution_count": 21,
395
+ "metadata": {},
396
+ "output_type": "execute_result"
397
+ },
398
+ {
399
+ "name": "stdout",
400
+ "output_type": "stream",
401
+ "text": [
402
+ "Tool called: record_unknown_question with arguments: {'question': 'Where did Sadashiv Nandanikar complete his 10th?'}\n",
403
+ "Push: Recording Where did Sadashiv Nandanikar complete his 10th? asked that I couldn't answer\n",
404
+ "Tool called: record_user_details with arguments: {'email': 'sada@gmail.com', 'name': 'Sada'}\n",
405
+ "Push: Recording interest from Sada with email sada@gmail.com and notes Notes not provided\n"
406
+ ]
407
+ }
408
+ ],
409
+ "source": [
410
+ "gr.ChatInterface(chat, type=\"messages\").launch()"
411
+ ]
412
+ },
413
+ {
414
+ "cell_type": "code",
415
+ "execution_count": 32,
416
+ "id": "23c6bd49-5f8c-4fe4-8ca7-f19ee734d172",
417
+ "metadata": {},
418
+ "outputs": [
419
+ {
420
+ "name": "stderr",
421
+ "output_type": "stream",
422
+ "text": [
423
+ "C:\\Users\\Sadashiv\\AppData\\Local\\Temp\\ipykernel_14264\\15198562.py:215: UserWarning: You have not specified a value for the `type` parameter. Defaulting to the 'tuples' format for chatbot messages, but this is deprecated and will be removed in a future version of Gradio. Please set type='messages' instead, which uses openai-style dictionaries with 'role' and 'content' keys.\n",
424
+ " chatbot = gr.Chatbot(label=\"Conversation\", height=500)\n"
425
+ ]
426
+ },
427
+ {
428
+ "name": "stdout",
429
+ "output_type": "stream",
430
+ "text": [
431
+ "* Running on local URL: http://127.0.0.1:7867\n",
432
+ "* To create a public link, set `share=True` in `launch()`.\n"
433
+ ]
434
+ },
435
+ {
436
+ "data": {
437
+ "text/html": [
438
+ "<div><iframe src=\"http://127.0.0.1:7867/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
439
+ ],
440
+ "text/plain": [
441
+ "<IPython.core.display.HTML object>"
442
+ ]
443
+ },
444
+ "metadata": {},
445
+ "output_type": "display_data"
446
+ }
447
+ ],
448
+ "source": [
449
+ "from dotenv import load_dotenv\n",
450
+ "from openai import OpenAI\n",
451
+ "import json\n",
452
+ "import os\n",
453
+ "import requests\n",
454
+ "import gradio as gr\n",
455
+ "import fitz # PyMuPDF\n",
456
+ "\n",
457
+ "# load the environment variables\n",
458
+ "load_dotenv(override=True)\n",
459
+ "\n",
460
+ "# Setting up pushover for notification\n",
461
+ "pushover_user = os.getenv(\"PUSHOVER_USER\")\n",
462
+ "pushover_token = os.getenv(\"PUSHOVER_TOKEN\")\n",
463
+ "pushover_url = \"https://api.pushover.net/1/messages.json\"\n",
464
+ "\n",
465
+ "# function to send notifications\n",
466
+ "def push(message: str):\n",
467
+ " if pushover_user and pushover_token:\n",
468
+ " payload = {\"user\": pushover_user, \"token\": pushover_token, \"message\": message}\n",
469
+ " try:\n",
470
+ " requests.post(pushover_url, data=payload, timeout=5)\n",
471
+ " except requests.exceptions.RequestError as e:\n",
472
+ " print(f\"Pushover notification failed: {e}\")\n",
473
+ " else:\n",
474
+ " print(\"Pushover credentials not found. Skipping notification\")\n",
475
+ "\n",
476
+ "# Function to record the user details\n",
477
+ "def record_user_details(email: str, name: str='Name not provided', notes: str='Notes not provided'):\n",
478
+ " push(f\"Recording interest from {name} with email {email} and notes {notes}\")\n",
479
+ " return {\"recorded\": \"ok\"}\n",
480
+ "\n",
481
+ "# Tool to record user details\n",
482
+ "record_user_details_json = {\n",
483
+ " \"name\": \"record_user_details\",\n",
484
+ " \"description\": \"Use this tool to record that a user is interested in being touch and provided an email address\",\n",
485
+ " \"parameters\": {\n",
486
+ " \"type\": \"object\",\n",
487
+ " \"properties\": {\n",
488
+ " \"email\": {\"type\": \"string\", \"description\": \"The email address of this user\"},\n",
489
+ " \"name\": {\"type\": \"string\", \"description\": \"The user's name, if they provided it\"},\n",
490
+ " \"notes\": {\"type\": \"string\", \"description\": \"Any additional information about the conversation that's worth recording to give context\"}\n",
491
+ " },\n",
492
+ " \"required\": [\"email\"],\n",
493
+ " \"additionalProperties\": False\n",
494
+ " }\n",
495
+ "}\n",
496
+ "\n",
497
+ "# Tool to log unanswered questions\n",
498
+ "record_unknown_question_json = {\n",
499
+ " \"name\": \"record_unknown_question\",\n",
500
+ " \"description\": \"Always use this tool to record any question that couldn't be answered as you didn't know the answer\",\n",
501
+ " \"parameters\": {\n",
502
+ " \"type\": \"object\",\n",
503
+ " \"properties\": {\n",
504
+ " \"question\": {\"type\": \"string\", \"description\": \"The question that you couldn't answered\"}\n",
505
+ " },\n",
506
+ " \"required\": [\"question\"],\n",
507
+ " \"additionalProperties\": False\n",
508
+ " }\n",
509
+ "}\n",
510
+ "\n",
511
+ "# List of tools for the LLM\n",
512
+ "tools = [\n",
513
+ " {\"type\": \"function\", \"function\": record_user_details_json},\n",
514
+ " {\"type\": \"function\", \"function\": record_unknown_question_json}\n",
515
+ "]\n",
516
+ "\n",
517
+ "class ResumeChatbot:\n",
518
+ " def __init__(self):\n",
519
+ " self.open_ai = OpenAI()\n",
520
+ "\n",
521
+ " def extract_text_from_pdf(self, pdf_path):\n",
522
+ " \"\"\"Extracts text from a given PDF file path.\"\"\"\n",
523
+ " try:\n",
524
+ " doc = fitz.open(pdf_path)\n",
525
+ " full_text = \"\"\n",
526
+ " for page in doc:\n",
527
+ " full_text += page.get_text()\n",
528
+ " return full_text\n",
529
+ " except Exception as e:\n",
530
+ " print(f\"Error reading PDF: {e}\")\n",
531
+ " return None\n",
532
+ "\n",
533
+ " def handle_tool_call(self, tool_calls):\n",
534
+ " results = []\n",
535
+ " for tool_call in tool_calls:\n",
536
+ " tool_name = tool_call.function.name\n",
537
+ " arguments = json.loads(tool_call.function.arguments)\n",
538
+ " tool = globals().get(tool_name)\n",
539
+ " result = tool(**arguments) if tool else {}\n",
540
+ " results.append({\n",
541
+ " \"role\": \"tool\",\n",
542
+ " \"content\": json.dumps(result),\n",
543
+ " \"tool_call_id\": tool_call.id\n",
544
+ " })\n",
545
+ " return results\n",
546
+ "\n",
547
+ " def get_system_prompt(self, resume_text):\n",
548
+ " system_prompt = f\"\"\"\n",
549
+ " You are acting as an expert assistant representing the individual whose resume is provided below.\n",
550
+ " Your task is to answer questions strictly based on the information contained in the resume.\n",
551
+ " Do not fabricate or assume any details that are not explicitly mentioned in the resume.\n",
552
+ "\n",
553
+ " If asked about improvements or suggestions, respond with clear, concise, and focused points only.\n",
554
+ " Keep your answers compact and to the point, and expand only if the user explicitly asks for more details.\n",
555
+ "\n",
556
+ " If a user asks a question you cannot answer from the resume, use the record_unknown_question tool to log the unanswered query.\n",
557
+ "\n",
558
+ " If the user expresses interest in following up or staying in touch, politely ask for their name and email,\n",
559
+ " then record it using the record_user_details tool.\n",
560
+ "\n",
561
+ " Resume Content:\n",
562
+ " {resume_text}\n",
563
+ " \"\"\"\n",
564
+ " return system_prompt\n",
565
+ "\n",
566
+ " def chat(self, message: str, history: list, resume_text: str):\n",
567
+ " system_prompt = self.get_system_prompt(resume_text)\n",
568
+ " \n",
569
+ " # Convert Gradio chat_history to OpenAI messages format\n",
570
+ " formatted_history = []\n",
571
+ " for user_msg, bot_msg in history:\n",
572
+ " if user_msg is not None: # User messages are not None when they've actually typed something\n",
573
+ " formatted_history.append({\"role\": \"user\", \"content\": user_msg})\n",
574
+ " if bot_msg is not None: # Bot messages are not None when they've responded\n",
575
+ " formatted_history.append({\"role\": \"assistant\", \"content\": bot_msg})\n",
576
+ "\n",
577
+ " # Construct the full message history: system prompt, formatted chat history, and new user message\n",
578
+ " messages = [{\"role\": \"system\", \"content\": system_prompt}] + formatted_history + [{\"role\": \"user\", \"content\": message}]\n",
579
+ " \n",
580
+ " done = False # Flag to track when the chat loop should stop\n",
581
+ "\n",
582
+ " while not done:\n",
583
+ " # Call the OpenAI chat model with messages and available tools\n",
584
+ " response = self.open_ai.chat.completions.create(\n",
585
+ " model=\"gpt-4o-mini\", # Model to use\n",
586
+ " messages=messages, # Full conversation history\n",
587
+ " tools=tools # Pass in tools so the LLM can invoke them\n",
588
+ " )\n",
589
+ "\n",
590
+ " # Check how the model decided to end its generation\n",
591
+ " finish_reason = response.choices[0].finish_reason\n",
592
+ "\n",
593
+ " # If the model wants to call a tool, handle the tool calls\n",
594
+ " if finish_reason == \"tool_calls\":\n",
595
+ " message_response = response.choices[0].message # Extract the message containing the tool call\n",
596
+ " tool_calls = message_response.tool_calls # Get the list of tool calls\n",
597
+ " results = self.handle_tool_call(tool_calls) # Run the tools and get their results\n",
598
+ " messages.append(message_response) # Add the original tool call message to history\n",
599
+ " messages.extend(results) # Add tool results to message history for LLM to continue\n",
600
+ " else:\n",
601
+ " # If no tool call is needed, we're done and can return the final response\n",
602
+ " done = True\n",
603
+ "\n",
604
+ " # Return the final message content from the model as the assistant's reply\n",
605
+ " return response.choices[0].message.content\n",
606
+ "\n",
607
+ "# Create a single instance of the Me class\n",
608
+ "chatbot_instance = ResumeChatbot()\n",
609
+ "\n",
610
+ "def upload_and_process_resume(file_obj):\n",
611
+ " \"\"\"\n",
612
+ " Gradio function to handle file uploads.\n",
613
+ " It extracts text from the uploaded PDF and stores it.\n",
614
+ " \"\"\"\n",
615
+ " if file_obj is None:\n",
616
+ " return None, [], \"Please upload a PDF resume to begin.\"\n",
617
+ "\n",
618
+ " # The file_obj has a .name attribute which is the temporary path to the uploaded file\n",
619
+ " resume_text = chatbot_instance.extract_text_from_pdf(file_obj.name)\n",
620
+ " \n",
621
+ " if resume_text is None or not resume_text.strip():\n",
622
+ " return None, [], \"Could not read text from the uploaded PDF. Please try another file.\"\n",
623
+ " \n",
624
+ " # Clear chat history and provide a welcome message\n",
625
+ " # The welcome message is structured to fit Gradio's chat history format\n",
626
+ " initial_message = \"Thank you for uploading the resume. How can I help you today?\"\n",
627
+ " chat_history = [[None, initial_message]] # User message is None for the initial bot message\n",
628
+ " return resume_text, chat_history, \"\" # returns resume_text to state, updated chatbot, and clears textbox\n",
629
+ "\n",
630
+ "def respond(message: str, chat_history: list, resume_state: str):\n",
631
+ " \"\"\"\n",
632
+ " Gradio function to handle the chat interaction.\n",
633
+ " It gets the resume text from the session's state.\n",
634
+ " \"\"\"\n",
635
+ " if not resume_state:\n",
636
+ " # If no resume has been uploaded yet\n",
637
+ " chat_history.append([message, \"Please upload a resume before starting the conversation.\"])\n",
638
+ " return \"\", chat_history\n",
639
+ " \n",
640
+ " # Get the bot's response\n",
641
+ " # The chat_history passed to chatbot_instance.chat is still in Gradio's format\n",
642
+ " bot_message = chatbot_instance.chat(message, chat_history, resume_state)\n",
643
+ " chat_history.append([message, bot_message]) # Append the new user message and bot response to Gradio's history\n",
644
+ " return \"\", chat_history # Clears the textbox and returns the updated history\n",
645
+ "\n",
646
+ "# --- Gradio Interface ---\n",
647
+ "if __name__ == \"__main__\":\n",
648
+ " with gr.Blocks(theme=gr.themes.Soft(), title=\"Resume Chatbot\") as demo:\n",
649
+ " # State to hold the extracted resume text for the user's session\n",
650
+ " resume_text_state = gr.State(None)\n",
651
+ "\n",
652
+ " gr.Markdown(\"# Chat with a Resume\")\n",
653
+ " gr.Markdown(\"Upload a PDF resume below, then ask questions about it.\")\n",
654
+ "\n",
655
+ " with gr.Row():\n",
656
+ " with gr.Column(scale=1):\n",
657
+ " file_uploader = gr.File(\n",
658
+ " label=\"Upload PDF Resume\",\n",
659
+ " file_types=[\".pdf\"],\n",
660
+ " type=\"filepath\" # Passes the temporary filepath to the function\n",
661
+ " )\n",
662
+ " with gr.Column(scale=2):\n",
663
+ " chatbot = gr.Chatbot(label=\"Conversation\", height=500)\n",
664
+ " msg_box = gr.Textbox(label=\"Your Question\", placeholder=\"e.g., What are the key skills mentioned?\")\n",
665
+ " submit_btn = gr.Button(\"Send\")\n",
666
+ "\n",
667
+ " # Event handler for the file upload\n",
668
+ " file_uploader.upload(\n",
669
+ " fn=upload_and_process_resume,\n",
670
+ " inputs=[file_uploader],\n",
671
+ " outputs=[resume_text_state, chatbot, msg_box]\n",
672
+ " )\n",
673
+ "\n",
674
+ " # Event handlers for chat submission\n",
675
+ " msg_box.submit(\n",
676
+ " fn=respond,\n",
677
+ " inputs=[msg_box, chatbot, resume_text_state],\n",
678
+ " outputs=[msg_box, chatbot]\n",
679
+ " )\n",
680
+ " submit_btn.click(\n",
681
+ " fn=respond,\n",
682
+ " inputs=[msg_box, chatbot, resume_text_state],\n",
683
+ " outputs=[msg_box, chatbot]\n",
684
+ " )\n",
685
+ "\n",
686
+ " demo.launch()"
687
+ ]
688
+ },
689
+ {
690
+ "cell_type": "code",
691
+ "execution_count": null,
692
+ "id": "b5f2bca0-1932-4ce7-b67f-3121c39dc296",
693
+ "metadata": {},
694
+ "outputs": [],
695
+ "source": []
696
+ }
697
+ ],
698
+ "metadata": {
699
+ "kernelspec": {
700
+ "display_name": "Python (venv-agents)",
701
+ "language": "python",
702
+ "name": "venv-agents"
703
+ },
704
+ "language_info": {
705
+ "codemirror_mode": {
706
+ "name": "ipython",
707
+ "version": 3
708
+ },
709
+ "file_extension": ".py",
710
+ "mimetype": "text/x-python",
711
+ "name": "python",
712
+ "nbconvert_exporter": "python",
713
+ "pygments_lexer": "ipython3",
714
+ "version": "3.13.1"
715
+ }
716
+ },
717
+ "nbformat": 4,
718
+ "nbformat_minor": 5
719
+ }
Vanilla_Resume_Reader.ipynb ADDED
@@ -0,0 +1,296 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 6,
6
+ "id": "f52b7c2e-2a60-43e5-8534-9074d61a65b2",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "# Installing the libraries\n",
11
+ "from dotenv import load_dotenv\n",
12
+ "import os\n",
13
+ "import gradio as gr\n",
14
+ "from openai import OpenAI\n",
15
+ "from pypdf import PdfReader\n",
16
+ "import fitz # PyMuPDF"
17
+ ]
18
+ },
19
+ {
20
+ "cell_type": "code",
21
+ "execution_count": 5,
22
+ "id": "da7b3295-d080-4b49-afde-6a4b37bbabd3",
23
+ "metadata": {},
24
+ "outputs": [],
25
+ "source": [
26
+ "# loading the environment variables and API keys\n",
27
+ "load_dotenv(override=True)\n",
28
+ "\n",
29
+ "# Create the instance of OpenAI\n",
30
+ "openai = OpenAI()"
31
+ ]
32
+ },
33
+ {
34
+ "cell_type": "code",
35
+ "execution_count": 7,
36
+ "id": "e219abce-0b05-4991-80da-85e9ea84b881",
37
+ "metadata": {},
38
+ "outputs": [],
39
+ "source": [
40
+ "# read the pdf file\n",
41
+ "def extract_text_from_pdf(pdf_path):\n",
42
+ " doc = fitz.open(pdf_path)\n",
43
+ " full_text = \"\"\n",
44
+ " for page in doc:\n",
45
+ " full_text += page.get_text()\n",
46
+ " return full_text\n",
47
+ "\n",
48
+ "resume_text = extract_text_from_pdf(\"me/Sadashiv_Data_Scientist_Resume.pdf\")"
49
+ ]
50
+ },
51
+ {
52
+ "cell_type": "code",
53
+ "execution_count": 9,
54
+ "id": "239b0875-3d09-4ab4-98bb-254c5bcfcd20",
55
+ "metadata": {},
56
+ "outputs": [
57
+ {
58
+ "name": "stdout",
59
+ "output_type": "stream",
60
+ "text": [
61
+ "Sadashiv Nandanikar\n",
62
+ "ƒ 8431114989\n",
63
+ "# nandanikar.sadashiv0712@gmail.com\n",
64
+ "ï linkedin.com/in/sadashiv-nandanikar\n",
65
+ "§ github.com/07Sada\n",
66
+ "Technical Skills\n",
67
+ "Programming & Databases: Python, SQL, NoSQL (MongoDB)\n",
68
+ "Machine Learning & Computer Vision: YOLOv8, PyTorch, Scikit-learn, Supervised Learning, Unsupervised Learning,\n",
69
+ "Image Classification, Object Detection, Model Quantization (ONNX), Langchain, RAG (Retrieval-Augmented Generation)\n",
70
+ "Cloud & Deployment: Docker, AWS EC2\n",
71
+ "Data Augmentation: Albumentations\n",
72
+ "Vector Databases: FAISS, Chroma DB\n",
73
+ "Large Language Models: Google Gemini Model\n",
74
+ "Data Processing & Engineering: Pandas, NumPy, Data Cleaning, Feature Engineering, REST APIs (Flask), Streamlit\n",
75
+ "Experience\n",
76
+ "Maruti Suzuki India Limited\n",
77
+ "May 2024 – Present\n",
78
+ "Data Scientist\n",
79
+ "Bengaluru, India\n",
80
+ "• Developed a rule-based system to identify potential customer segments for Advanced Driver Assistance Systems (ADAS)\n",
81
+ "• Analyzed automotive sensor data to detect traffic scenarios and improve vehicle signal processing.\n",
82
+ "• Optimized scenario detection algorithms, enhancing processing efficiency and memory utilization.\n",
83
+ "• Collaborated with cross-functional teams to refine detection of high-stress driving conditions.\n",
84
+ "• Processed large telematics datasets, supporting product validation and customer insights.\n",
85
+ "Codify Software Services\n",
86
+ "September 2022 – May 2024\n",
87
+ "Machine Learning Engineer\n",
88
+ "Pune, India\n",
89
+ "• Built predictive models for preventive maintenance, reducing machine breakdown risks.\n",
90
+ "• Analyzed operational challenges and derived insights from diverse data sources.\n",
91
+ "• Collaborated with engineering teams to design ML infrastructure, including data cleansing, transformation, feature\n",
92
+ "engineering, analysis, and visualization.\n",
93
+ "Flex\n",
94
+ "February 2022 – September 2022\n",
95
+ "SBM-Master Data Mangement\n",
96
+ "Pune, India\n",
97
+ "• Analyzed supplier transaction data to optimize procurement strategies.\n",
98
+ "• Automated reports to support data-driven decision-making.\n",
99
+ "• Ensured vendor data integrity and streamlined onboarding processes.\n",
100
+ "Success Automation\n",
101
+ "January 2019 – February 2022\n",
102
+ "Design Engineer\n",
103
+ "Pune, India\n",
104
+ "• Developed data-driven solutions like a Production Efficiency Dashboard for real-time insights.\n",
105
+ "• Analyzed production data to enhance process efficiency and reduce downtime.\n",
106
+ "Projects\n",
107
+ "CropGuard: GitHub Repository (Personal Proof of Concept Project)\n",
108
+ "• Developed a web application leveraging machine learning to provide insights and recommendations for farmers.\n",
109
+ "• Key Features:\n",
110
+ "- Crop Recommendation System: Suggests suitable crops based on soil composition and climate conditions.\n",
111
+ "- Fertilizer Recommendation System: Offers personalized fertilizer advice to optimize crop growth.\n",
112
+ "- Plant Disease Classification: Detects plant diseases through image classification models using user-uploaded\n",
113
+ "images.\n",
114
+ "- Real-time Commodity Price Updates: Integrates a government API for daily commodity prices, aiding market\n",
115
+ "decisions.\n",
116
+ "• Aimed at enhancing agricultural productivity through data-driven solutions while emphasizing the importance of\n",
117
+ "verified data sources for reliable farming decisions.\n",
118
+ "Brand Detection: GitHub Repository (Personal Proof of Concept Project)\n",
119
+ "• Developed a web application utilizing YOLOv8 for real-time object detection to identify brand logos in visual content.\n",
120
+ "• Manually scraped and annotated data to train the model effectively.\n",
121
+ "• Containerized the application using Docker for easy deployment and scalability.\n",
122
+ "• Hosted the application on AWS EC2 to ensure accessibility and performance.\n",
123
+ "• This project demonstrates the potential of ML-powered computer vision in marketing by providing insights into\n",
124
+ "audience behavior and enhancing advertising strategies.\n",
125
+ "Vegetable Recognition: Google Colab Notebook (Personal Proof of Concept Project)\n",
126
+ "• Developed an image classification model using PyTorch to recognize various vegetables.\n",
127
+ "• Employed Albumentations techniques for data augmentation to improve model robustness.\n",
128
+ "• Implemented a hard-coded ResNet architecture tailored for the classification task.\n",
129
+ "• Trained and tested the model on a dataset of vegetable images to achieve high accuracy.\n",
130
+ "• Utilized ONNX for model quantization, optimizing performance for deployment.\n",
131
+ "Bike Share Demand Prediction: GitHub Repository\n",
132
+ "• Built a regression model to predict bike demand using hyperparameter-tuned ML algorithms.\n",
133
+ "• Engineered features from weather, time, and holiday data for better accuracy.\n",
134
+ "• Developed an interactive Streamlit-based web app for real-time predictions.\n",
135
+ "• Provided multiple deployment options for user flexibility.\n",
136
+ "Education\n",
137
+ "B.E (Bachelor of Engineering)\n",
138
+ "Visvesvaraya Technological University, Belgaum, India\n",
139
+ "2013 – 2017\n",
140
+ "Interest\n",
141
+ "Reading Books: Atomic Habits, Who Moved My Cheese?\n",
142
+ "Anime: One Piece, Naruto.\n",
143
+ "Declaration\n",
144
+ "I hereby declare that the information provided in this resume is true and accurate to the best of my knowledge and\n",
145
+ "belief.\n",
146
+ "Date:\n",
147
+ "Signature:\n",
148
+ "\n"
149
+ ]
150
+ }
151
+ ],
152
+ "source": [
153
+ "print(resume_text)"
154
+ ]
155
+ },
156
+ {
157
+ "cell_type": "code",
158
+ "execution_count": 14,
159
+ "id": "c1170119-f9d1-4c16-9684-51990dbe5dba",
160
+ "metadata": {},
161
+ "outputs": [],
162
+ "source": [
163
+ "system_prompt = f\"\"\"\n",
164
+ "You are acting as an expert assistant representing the individual whose resume is provided below.\n",
165
+ "Your task is to answer questions strictly based on the information contained in the resume.\n",
166
+ "Do not fabricate or assume any details that are not explicitly mentioned in the resume.\n",
167
+ "\n",
168
+ "If asked about improvements or suggestions, respond with clear, concise, and focused points only. \n",
169
+ "Keep your answers compact and to the point, expanding only if the user requests further clarification.\n",
170
+ "\n",
171
+ "Resume Content:\n",
172
+ "{resume_text}\n",
173
+ "\"\"\""
174
+ ]
175
+ },
176
+ {
177
+ "cell_type": "code",
178
+ "execution_count": 16,
179
+ "id": "3ffec2d8-ad62-4e21-857b-db41ae1e021c",
180
+ "metadata": {},
181
+ "outputs": [],
182
+ "source": [
183
+ "# Function for creating chat inferface with gradio\n",
184
+ "def chat(message, history):\n",
185
+ " messages = [{\"role\": \"system\", \"content\": system_prompt}] + history + [{\"role\": \"user\", \"content\": message}]\n",
186
+ " response = openai.chat.completions.create(model='gpt-4o-mini', messages=messages)\n",
187
+ " return response.choices[0].message.content"
188
+ ]
189
+ },
190
+ {
191
+ "cell_type": "code",
192
+ "execution_count": 17,
193
+ "id": "64fc63bd-c682-4d23-bf58-8dff19a4efcd",
194
+ "metadata": {},
195
+ "outputs": [
196
+ {
197
+ "name": "stdout",
198
+ "output_type": "stream",
199
+ "text": [
200
+ "* Running on local URL: http://127.0.0.1:7861\n",
201
+ "* To create a public link, set `share=True` in `launch()`.\n"
202
+ ]
203
+ },
204
+ {
205
+ "data": {
206
+ "text/html": [
207
+ "<div><iframe src=\"http://127.0.0.1:7861/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
208
+ ],
209
+ "text/plain": [
210
+ "<IPython.core.display.HTML object>"
211
+ ]
212
+ },
213
+ "metadata": {},
214
+ "output_type": "display_data"
215
+ },
216
+ {
217
+ "data": {
218
+ "text/plain": []
219
+ },
220
+ "execution_count": 17,
221
+ "metadata": {},
222
+ "output_type": "execute_result"
223
+ }
224
+ ],
225
+ "source": [
226
+ "# launch the gradio interface\n",
227
+ "gr.ChatInterface(fn=chat, type='messages').launch()"
228
+ ]
229
+ },
230
+ {
231
+ "cell_type": "markdown",
232
+ "id": "163ceb77-96d4-4d9f-8d77-a91b10901f7e",
233
+ "metadata": {},
234
+ "source": [
235
+ "Gradio handles chat history internally when you use:\n",
236
+ "\n",
237
+ "```python\n",
238
+ "gr.ChatInterface(fn=chat, type='messages')\n",
239
+ "```\n",
240
+ "\n",
241
+ "✅ How Gradio Handles `history`:\n",
242
+ "- When you use `type='messages'`, Gradio:\n",
243
+ " - Automatically maintains a list of previous user and assistant messages in the format:\n",
244
+ " ```python\n",
245
+ " [\n",
246
+ " {\"role\": \"user\", \"content\": \"Hi\"},\n",
247
+ " {\"role\": \"assistant\", \"content\": \"Hello!\"},\n",
248
+ " ...\n",
249
+ " ]\n",
250
+ " ```\n",
251
+ " - This `history` is passed to your `chat()` function each time a new message is sent.\n",
252
+ "\n",
253
+ "💾 Where is history stored?\n",
254
+ "- It’s stored in memory inside the Gradio session (i.e., in the browser tab + backend process).\n",
255
+ "- It resets when:\n",
256
+ " - The user reloads the page\n",
257
+ " - The app restarts\n",
258
+ " - You call `gr.ChatInterface(..., clear=True)` or implement a `\"Clear chat\"` button\n",
259
+ "\n",
260
+ "🔐 Is it reliable?\n",
261
+ "- Yes, for single-session usage, like prototyping, demos, or small-scale apps.\n",
262
+ "- No persistence across sessions, so:\n",
263
+ " - If you need long-term history (e.g. save chats per user), you must store it yourself (e.g., in a database or file)."
264
+ ]
265
+ },
266
+ {
267
+ "cell_type": "code",
268
+ "execution_count": null,
269
+ "id": "0d9b7819-e087-4c5f-8536-3f3d5f2c9aa7",
270
+ "metadata": {},
271
+ "outputs": [],
272
+ "source": []
273
+ }
274
+ ],
275
+ "metadata": {
276
+ "kernelspec": {
277
+ "display_name": "Python (venv-agents)",
278
+ "language": "python",
279
+ "name": "venv-agents"
280
+ },
281
+ "language_info": {
282
+ "codemirror_mode": {
283
+ "name": "ipython",
284
+ "version": 3
285
+ },
286
+ "file_extension": ".py",
287
+ "mimetype": "text/x-python",
288
+ "name": "python",
289
+ "nbconvert_exporter": "python",
290
+ "pygments_lexer": "ipython3",
291
+ "version": "3.13.1"
292
+ }
293
+ },
294
+ "nbformat": 4,
295
+ "nbformat_minor": 5
296
+ }
app.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ from openai import OpenAI
3
+ import json
4
+ import os
5
+ import requests
6
+ import gradio as gr
7
+ import fitz # PyMuPDF
8
+
9
+ # load the environment variables
10
+ load_dotenv(override=True)
11
+
12
+ # Setting up pushover for notification
13
+ pushover_user = os.getenv("PUSHOVER_USER")
14
+ pushover_token = os.getenv("PUSHOVER_TOKEN")
15
+ pushover_url = "https://api.pushover.net/1/messages.json"
16
+
17
+ # function to send notifications
18
+ def push(message: str):
19
+ if pushover_user and pushover_token:
20
+ payload = {"user": pushover_user, "token": pushover_token, "message": message}
21
+ try:
22
+ requests.post(pushover_url, data=payload, timeout=5)
23
+ except requests.exceptions.RequestError as e:
24
+ print(f"Pushover notification failed: {e}")
25
+ else:
26
+ print("Pushover credentials not found. Skipping notification")
27
+
28
+ # Function to record the user details
29
+ def record_user_details(email: str, name: str='Name not provided', notes: str='Notes not provided'):
30
+ push(f"Recording interest from {name} with email {email} and notes {notes}")
31
+ return {"recorded": "ok"}
32
+
33
+ # Tool to record user details
34
+ record_user_details_json = {
35
+ "name": "record_user_details",
36
+ "description": "Use this tool to record that a user is interested in being touch and provided an email address",
37
+ "parameters": {
38
+ "type": "object",
39
+ "properties": {
40
+ "email": {"type": "string", "description": "The email address of this user"},
41
+ "name": {"type": "string", "description": "The user's name, if they provided it"},
42
+ "notes": {"type": "string", "description": "Any additional information about the conversation that's worth recording to give context"}
43
+ },
44
+ "required": ["email"],
45
+ "additionalProperties": False
46
+ }
47
+ }
48
+
49
+ # Tool to log unanswered questions
50
+ record_unknown_question_json = {
51
+ "name": "record_unknown_question",
52
+ "description": "Always use this tool to record any question that couldn't be answered as you didn't know the answer",
53
+ "parameters": {
54
+ "type": "object",
55
+ "properties": {
56
+ "question": {"type": "string", "description": "The question that you couldn't answered"}
57
+ },
58
+ "required": ["question"],
59
+ "additionalProperties": False
60
+ }
61
+ }
62
+
63
+ # List of tools for the LLM
64
+ tools = [
65
+ {"type": "function", "function": record_user_details_json},
66
+ {"type": "function", "function": record_unknown_question_json}
67
+ ]
68
+
69
+ class ResumeChatbot:
70
+ def __init__(self):
71
+ self.open_ai = OpenAI()
72
+
73
+ def extract_text_from_pdf(self, pdf_path):
74
+ """Extracts text from a given PDF file path."""
75
+ try:
76
+ doc = fitz.open(pdf_path)
77
+ full_text = ""
78
+ for page in doc:
79
+ full_text += page.get_text()
80
+ return full_text
81
+ except Exception as e:
82
+ print(f"Error reading PDF: {e}")
83
+ return None
84
+
85
+ def handle_tool_call(self, tool_calls):
86
+ results = []
87
+ for tool_call in tool_calls:
88
+ tool_name = tool_call.function.name
89
+ arguments = json.loads(tool_call.function.arguments)
90
+ tool = globals().get(tool_name)
91
+ result = tool(**arguments) if tool else {}
92
+ results.append({
93
+ "role": "tool",
94
+ "content": json.dumps(result),
95
+ "tool_call_id": tool_call.id
96
+ })
97
+ return results
98
+
99
+ def get_system_prompt(self, resume_text):
100
+ system_prompt = f"""
101
+ You are acting as an expert assistant representing the individual whose resume is provided below.
102
+ Your task is to answer questions strictly based on the information contained in the resume.
103
+ Do not fabricate or assume any details that are not explicitly mentioned in the resume.
104
+
105
+ If asked about improvements or suggestions, respond with clear, concise, and focused points only.
106
+ Keep your answers compact and to the point, and expand only if the user explicitly asks for more details.
107
+
108
+ If a user asks a question you cannot answer from the resume, use the record_unknown_question tool to log the unanswered query.
109
+
110
+ If the user expresses interest in following up or staying in touch, politely ask for their name and email,
111
+ then record it using the record_user_details tool.
112
+
113
+ Resume Content:
114
+ {resume_text}
115
+ """
116
+ return system_prompt
117
+
118
+ def chat(self, message: str, history: list, resume_text: str):
119
+ system_prompt = self.get_system_prompt(resume_text)
120
+
121
+ # Convert Gradio chat_history to OpenAI messages format
122
+ formatted_history = []
123
+ for user_msg, bot_msg in history:
124
+ if user_msg is not None: # User messages are not None when they've actually typed something
125
+ formatted_history.append({"role": "user", "content": user_msg})
126
+ if bot_msg is not None: # Bot messages are not None when they've responded
127
+ formatted_history.append({"role": "assistant", "content": bot_msg})
128
+
129
+ # Construct the full message history: system prompt, formatted chat history, and new user message
130
+ messages = [{"role": "system", "content": system_prompt}] + formatted_history + [{"role": "user", "content": message}]
131
+
132
+ done = False # Flag to track when the chat loop should stop
133
+
134
+ while not done:
135
+ # Call the OpenAI chat model with messages and available tools
136
+ response = self.open_ai.chat.completions.create(
137
+ model="gpt-4o-mini", # Model to use
138
+ messages=messages, # Full conversation history
139
+ tools=tools # Pass in tools so the LLM can invoke them
140
+ )
141
+
142
+ # Check how the model decided to end its generation
143
+ finish_reason = response.choices[0].finish_reason
144
+
145
+ # If the model wants to call a tool, handle the tool calls
146
+ if finish_reason == "tool_calls":
147
+ message_response = response.choices[0].message # Extract the message containing the tool call
148
+ tool_calls = message_response.tool_calls # Get the list of tool calls
149
+ results = self.handle_tool_call(tool_calls) # Run the tools and get their results
150
+ messages.append(message_response) # Add the original tool call message to history
151
+ messages.extend(results) # Add tool results to message history for LLM to continue
152
+ else:
153
+ # If no tool call is needed, we're done and can return the final response
154
+ done = True
155
+
156
+ # Return the final message content from the model as the assistant's reply
157
+ return response.choices[0].message.content
158
+
159
+ # Create a single instance of the Me class
160
+ chatbot_instance = ResumeChatbot()
161
+
162
+ def upload_and_process_resume(file_obj):
163
+ """
164
+ Gradio function to handle file uploads.
165
+ It extracts text from the uploaded PDF and stores it.
166
+ """
167
+ if file_obj is None:
168
+ return None, [], "Please upload a PDF resume to begin."
169
+
170
+ # The file_obj has a .name attribute which is the temporary path to the uploaded file
171
+ resume_text = chatbot_instance.extract_text_from_pdf(file_obj.name)
172
+
173
+ if resume_text is None or not resume_text.strip():
174
+ return None, [], "Could not read text from the uploaded PDF. Please try another file."
175
+
176
+ # Clear chat history and provide a welcome message
177
+ # The welcome message is structured to fit Gradio's chat history format
178
+ initial_message = "Thank you for uploading the resume. How can I help you today?"
179
+ chat_history = [[None, initial_message]] # User message is None for the initial bot message
180
+ return resume_text, chat_history, "" # returns resume_text to state, updated chatbot, and clears textbox
181
+
182
+ def respond(message: str, chat_history: list, resume_state: str):
183
+ """
184
+ Gradio function to handle the chat interaction.
185
+ It gets the resume text from the session's state.
186
+ """
187
+ if not resume_state:
188
+ # If no resume has been uploaded yet
189
+ chat_history.append([message, "Please upload a resume before starting the conversation."])
190
+ return "", chat_history
191
+
192
+ # Get the bot's response
193
+ # The chat_history passed to chatbot_instance.chat is still in Gradio's format
194
+ bot_message = chatbot_instance.chat(message, chat_history, resume_state)
195
+ chat_history.append([message, bot_message]) # Append the new user message and bot response to Gradio's history
196
+ return "", chat_history # Clears the textbox and returns the updated history
197
+
198
+ # --- Gradio Interface ---
199
+ if __name__ == "__main__":
200
+ with gr.Blocks(theme=gr.themes.Soft(), title="Resume Chatbot") as demo:
201
+ # State to hold the extracted resume text for the user's session
202
+ resume_text_state = gr.State(None)
203
+
204
+ gr.Markdown("# Chat with a Resume")
205
+ gr.Markdown("Upload a PDF resume below, then ask questions about it.")
206
+
207
+ with gr.Row():
208
+ with gr.Column(scale=1):
209
+ file_uploader = gr.File(
210
+ label="Upload PDF Resume",
211
+ file_types=[".pdf"],
212
+ type="filepath" # Passes the temporary filepath to the function
213
+ )
214
+ with gr.Column(scale=2):
215
+ chatbot = gr.Chatbot(label="Conversation", height=500)
216
+ msg_box = gr.Textbox(label="Your Question", placeholder="e.g., What are the key skills mentioned?")
217
+ submit_btn = gr.Button("Send")
218
+
219
+ # Event handler for the file upload
220
+ file_uploader.upload(
221
+ fn=upload_and_process_resume,
222
+ inputs=[file_uploader],
223
+ outputs=[resume_text_state, chatbot, msg_box]
224
+ )
225
+
226
+ # Event handlers for chat submission
227
+ msg_box.submit(
228
+ fn=respond,
229
+ inputs=[msg_box, chatbot, resume_text_state],
230
+ outputs=[msg_box, chatbot]
231
+ )
232
+ submit_btn.click(
233
+ fn=respond,
234
+ inputs=[msg_box, chatbot, resume_text_state],
235
+ outputs=[msg_box, chatbot]
236
+ )
237
+
238
+ demo.launch()
me/Sadashiv_Data_Scientist_Resume.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfa6fbf3d616b934f4a5fa7b9a07a1c206c2b2f4e0ffd48fc098fdb257886d7b
3
+ size 113708