Pham23 commited on
Commit
7e820ed
·
verified ·
1 Parent(s): d8b6f35

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Data_w1/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
Data_w1/4d97b029-f947-429e-8b62-d7b492658561/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23add52afbe7588391f32d3deffb581b2663d2e2ad8851aba7de25e6b3f66761
3
+ size 32120000
Data_w1/4d97b029-f947-429e-8b62-d7b492658561/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8c7f00b4415698ee6cb94332eff91aedc06ba8e066b1f200e78ca5df51abb57
3
+ size 100
Data_w1/4d97b029-f947-429e-8b62-d7b492658561/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6803a4081e907735e2296bc15a2149f9d4f3195c4868e1dc1d12f50abe70ebd
3
+ size 40000
Data_w1/4d97b029-f947-429e-8b62-d7b492658561/link_lists.bin ADDED
File without changes
Data_w1/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f97137b8f055367cf61dc7422597f3937a7897baba6fd2867fd70da6859da3f0
3
+ size 1454080
Data_w1/linkedin.pdf ADDED
Binary file (69.7 kB). View file
 
Data_w1/summary.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ My name is Ed Donner. I'm an entrepreneur, software engineer and data scientist. I'm originally from London, England, but I moved to NYC in 2000.
2
+ I love all foods, particularly French food, but strangely I'm repelled by almost all forms of cheese. I'm not allergic, I just hate the taste! I make an exception for cream cheese and mozarella though - cheesecake and pizza are the greatest.
Lab3_w1.ipynb ADDED
@@ -0,0 +1,469 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 22,
6
+ "id": "4d961b4b",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "from dotenv import load_dotenv\n",
11
+ "import os\n",
12
+ "import requests\n",
13
+ "import gradio as gr\n",
14
+ "from pypdf import PdfReader\n",
15
+ "import google.generativeai as genai\n",
16
+ "from typing import Dict, List\n",
17
+ "import json\n",
18
+ "load_dotenv(override=True)\n",
19
+ "genai.configure(api_key=os.getenv(\"GEMINI_API\"))"
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": 2,
25
+ "id": "070475b8",
26
+ "metadata": {},
27
+ "outputs": [],
28
+ "source": [
29
+ "pushover_user = os.getenv(\"PUSHOVER_USER\")\n",
30
+ "pushover_token = os.getenv(\"PUSHOVER_API\")\n",
31
+ "pushover_url = f\"https://api.pushover.net/1/messages.json\""
32
+ ]
33
+ },
34
+ {
35
+ "cell_type": "code",
36
+ "execution_count": 42,
37
+ "id": "94cd12d8",
38
+ "metadata": {},
39
+ "outputs": [],
40
+ "source": [
41
+ "def push(message: str):\n",
42
+ " print(\"Pushing to Pushover \", message)\n",
43
+ " payload = {\"user\": pushover_user, \"token\": pushover_token, \"message\": message}\n",
44
+ " requests.post(pushover_url, data=payload)"
45
+ ]
46
+ },
47
+ {
48
+ "cell_type": "code",
49
+ "execution_count": 43,
50
+ "id": "99d70c8a",
51
+ "metadata": {},
52
+ "outputs": [],
53
+ "source": [
54
+ "def record_user_details(email: str, \n",
55
+ " name: str,\n",
56
+ " notes: str) -> Dict[str, str]:\n",
57
+ " push(f\"Email: {email}\\nName: {name}\\nNotes: {notes}\")\n",
58
+ " return {\"recorded\": \"ok\"}\n",
59
+ "\n",
60
+ "\n",
61
+ "def record_unknown_question(question: str) -> Dict[str, str]:\n",
62
+ " push(f\"Question: {question}\")\n",
63
+ " return {\"recorded\": \"ok\"}\n",
64
+ "\n"
65
+ ]
66
+ },
67
+ {
68
+ "cell_type": "code",
69
+ "execution_count": 35,
70
+ "id": "408924fe",
71
+ "metadata": {},
72
+ "outputs": [],
73
+ "source": [
74
+ "record_user_details_json = {\n",
75
+ " \"name\": \"record_user_details\",\n",
76
+ " \"description\": \"Use this tool to record that a user is interested in being in touch and provided an email address\",\n",
77
+ " \"parameters\": {\n",
78
+ " \"type\": \"OBJECT\",\n",
79
+ " \"properties\": {\n",
80
+ " \"email\": {\n",
81
+ " \"type\": \"STRING\",\n",
82
+ " \"description\": \"The email address of this user\"\n",
83
+ " },\n",
84
+ " \"name\": {\n",
85
+ " \"type\": \"STRING\",\n",
86
+ " \"description\": \"The user's name, if they provided it\"\n",
87
+ " }\n",
88
+ " ,\n",
89
+ " \"notes\": {\n",
90
+ " \"type\": \"STRING\",\n",
91
+ " \"description\": \"Any additional information about the conversation that's worth recording to give context\"\n",
92
+ " }\n",
93
+ " },\n",
94
+ " \"required\": [\"name\", \"email\"]\n",
95
+ " }\n",
96
+ "}"
97
+ ]
98
+ },
99
+ {
100
+ "cell_type": "code",
101
+ "execution_count": 36,
102
+ "id": "c64dc641",
103
+ "metadata": {},
104
+ "outputs": [],
105
+ "source": [
106
+ "record_unknown_question_json = {\n",
107
+ " \"name\": \"record_unknown_question\",\n",
108
+ " \"description\": \"Always use this tool to record any question that couldn't be answered as you didn't know the answer\",\n",
109
+ " \"parameters\": {\n",
110
+ " \"type\": \"OBJECT\",\n",
111
+ " \"properties\": {\n",
112
+ " \"question\": {\n",
113
+ " \"type\": \"STRING\",\n",
114
+ " \"description\": \"The question that couldn't be answered\"\n",
115
+ " },\n",
116
+ " },\n",
117
+ " \"required\": [\"question\"]\n",
118
+ " }\n",
119
+ "}"
120
+ ]
121
+ },
122
+ {
123
+ "cell_type": "code",
124
+ "execution_count": 37,
125
+ "id": "23b9f4a6",
126
+ "metadata": {},
127
+ "outputs": [],
128
+ "source": [
129
+ "tools = [record_user_details_json, record_unknown_question_json]"
130
+ ]
131
+ },
132
+ {
133
+ "cell_type": "code",
134
+ "execution_count": 66,
135
+ "id": "92c7a46f",
136
+ "metadata": {},
137
+ "outputs": [],
138
+ "source": [
139
+ "def handle_tool_calls(tool_calls: List) -> List[Dict[str, str]]:\n",
140
+ " results = []\n",
141
+ " for tool_call in tool_calls:\n",
142
+ " tool_name = tool_call.name\n",
143
+ " arguments = dict(tool_call.args)\n",
144
+ " print(f\"Tool called: {tool_name} with arguments: {arguments}\")\n",
145
+ " tool = globals().get(tool_name)\n",
146
+ " result = tool(**arguments) if tool else {}\n",
147
+ " # Format for Gemini function response\n",
148
+ " results.append({\n",
149
+ " \"function_response\": {\n",
150
+ " \"name\": tool_name,\n",
151
+ " \"response\": result\n",
152
+ " }\n",
153
+ " })\n",
154
+ " return results\n",
155
+ " "
156
+ ]
157
+ },
158
+ {
159
+ "cell_type": "code",
160
+ "execution_count": 67,
161
+ "id": "98e9cd1a",
162
+ "metadata": {},
163
+ "outputs": [],
164
+ "source": [
165
+ "# Read the PDF and summary \n",
166
+ "reader = PdfReader(\"../Week_1/Data_w1/linkedin.pdf\")\n",
167
+ "linkedin = \"\"\n",
168
+ "for page in reader.pages:\n",
169
+ " linkedin += page.extract_text()\n",
170
+ "\n",
171
+ "with open(\"../Week_1/Data_w1/summary.txt\", \"r\") as f:\n",
172
+ " summary = f.read()"
173
+ ]
174
+ },
175
+ {
176
+ "cell_type": "code",
177
+ "execution_count": 69,
178
+ "id": "e473a35c",
179
+ "metadata": {},
180
+ "outputs": [],
181
+ "source": [
182
+ "initial_system_prompt = f\"You are acting as Ed Donner. You are answering questions on Ed Donner's website, \\\n",
183
+ "particularly questions related to Ed Donner's career, background, skills and experience. \\\n",
184
+ "Your responsibility is to represent Ed Donner for interactions on the website as faithfully as possible. \\\n",
185
+ "You are given a summary of Ed Donner's background and LinkedIn profile which you can use to answer questions. \\\n",
186
+ "Be professional and engaging, as if talking to a potential client or future employer who came across the website. \\\n",
187
+ "If you don't know the answer to any question, use your record_unknown_question tool to record the question that you couldn't answer, even if it's about something trivial or unrelated to career. \\\n",
188
+ "If the user is engaging in discussion, try to steer them towards getting in touch via email; ask for their email and record it using your record_user_details tool. \"\n",
189
+ "\n",
190
+ "initial_system_prompt += f\"\\n\\n## Summary:\\n{summary}\\n\\n## LinkedIn Profile:\\n{linkedin}\\n\\n\"\n",
191
+ "initial_system_prompt += f\"With this context, please chat with the user, always staying in character as Ed Donner.\""
192
+ ]
193
+ },
194
+ {
195
+ "cell_type": "code",
196
+ "execution_count": null,
197
+ "id": "b7ba7ef6",
198
+ "metadata": {},
199
+ "outputs": [
200
+ {
201
+ "data": {
202
+ "text/plain": [
203
+ "response:\n",
204
+ "GenerateContentResponse(\n",
205
+ " done=True,\n",
206
+ " iterator=None,\n",
207
+ " result=protos.GenerateContentResponse({\n",
208
+ " \"candidates\": [\n",
209
+ " {\n",
210
+ " \"content\": {\n",
211
+ " \"parts\": [\n",
212
+ " {\n",
213
+ " \"text\": \"Hi! Welcome to my website. I'm Ed Donner. What can I tell you about? I'm happy to chat about my career, Nebula.io, LLMs, or anything else that might be on your mind.\\n\"\n",
214
+ " }\n",
215
+ " ],\n",
216
+ " \"role\": \"model\"\n",
217
+ " },\n",
218
+ " \"finish_reason\": \"STOP\",\n",
219
+ " \"avg_logprobs\": -0.1461243430773417\n",
220
+ " }\n",
221
+ " ],\n",
222
+ " \"usage_metadata\": {\n",
223
+ " \"prompt_token_count\": 2516,\n",
224
+ " \"candidates_token_count\": 48,\n",
225
+ " \"total_token_count\": 2564\n",
226
+ " },\n",
227
+ " \"model_version\": \"gemini-2.0-flash\"\n",
228
+ " }),\n",
229
+ ")"
230
+ ]
231
+ },
232
+ "execution_count": 41,
233
+ "metadata": {},
234
+ "output_type": "execute_result"
235
+ }
236
+ ],
237
+ "source": [
238
+ "model = genai.GenerativeModel(\n",
239
+ " 'gemini-2.0-flash',\n",
240
+ " system_instruction=system_prompt,\n",
241
+ " tools=tools\n",
242
+ " )\n",
243
+ "gemini_history = []\n",
244
+ "chat_session = model.start_chat(history=gemini_history)\n",
245
+ "# Send the current message\n",
246
+ "response = chat_session.send_message(\"Hi there\")\n",
247
+ "\n",
248
+ "response"
249
+ ]
250
+ },
251
+ {
252
+ "cell_type": "code",
253
+ "execution_count": 81,
254
+ "id": "5b21dfd3",
255
+ "metadata": {},
256
+ "outputs": [],
257
+ "source": [
258
+ "def chat_with_gemini(message, history, system_prompt):\n",
259
+ " try:\n",
260
+ " # Create the model with system instruction\n",
261
+ " model = genai.GenerativeModel(\n",
262
+ " 'gemini-2.0-flash',\n",
263
+ " system_instruction=system_prompt,\n",
264
+ " tools=tools\n",
265
+ " )\n",
266
+ " \n",
267
+ " # Convert Gradio messages format to Gemini format\n",
268
+ " gemini_history = []\n",
269
+ " max_iteration = 3\n",
270
+ " iteration = 0\n",
271
+ " for msg in history:\n",
272
+ " if msg[\"role\"] == \"user\":\n",
273
+ " gemini_history.append({\n",
274
+ " \"role\": \"user\",\n",
275
+ " \"parts\": [msg[\"content\"]]\n",
276
+ " })\n",
277
+ " elif msg[\"role\"] == \"assistant\":\n",
278
+ " gemini_history.append({\n",
279
+ " \"role\": \"model\", \n",
280
+ " \"parts\": [msg[\"content\"]]\n",
281
+ " })\n",
282
+ " \n",
283
+ " # Start chat with history\n",
284
+ " chat_session = model.start_chat(history=gemini_history)\n",
285
+ " current_message = message\n",
286
+ " try:\n",
287
+ " while iteration < max_iteration:\n",
288
+ " # Send the current message\n",
289
+ " response = chat_session.send_message(current_message)\n",
290
+ " # Check for its finishing \n",
291
+ " finish_reason = response.candidates[0].finish_reason\n",
292
+ "\n",
293
+ " print(f\"Response parts: {[part for part in response.candidates[0].content.parts]}\")\n",
294
+ "\n",
295
+ " function_calls = []\n",
296
+ " text_parts = []\n",
297
+ " \n",
298
+ " # If the LLM wants to call the tools\n",
299
+ " for part in response.candidates[0].content.parts:\n",
300
+ " if hasattr(part, \"function_call\") and part.function_call:\n",
301
+ " function_calls.append(part.function_call)\n",
302
+ " print(\"Function calls list not empty\")\n",
303
+ " elif hasattr(part, \"text\"):\n",
304
+ " text_parts.append(part.text)\n",
305
+ " \n",
306
+ " # Excecute if function_calls not empty\n",
307
+ " if function_calls:\n",
308
+ " results = handle_tool_calls(function_calls)\n",
309
+ " # Add the result back to the model\n",
310
+ " current_message = results\n",
311
+ " iteration += 1\n",
312
+ " else:\n",
313
+ " if text_parts:\n",
314
+ " return \"\".join(text_parts)\n",
315
+ " else:\n",
316
+ " return response.text\n",
317
+ " return \"\"\n",
318
+ " except Exception as e:\n",
319
+ " return f\"Error: {e}\"\n",
320
+ " except Exception as e:\n",
321
+ " return f\"Error: {e}\""
322
+ ]
323
+ },
324
+ {
325
+ "cell_type": "code",
326
+ "execution_count": 82,
327
+ "id": "35fd0a44",
328
+ "metadata": {},
329
+ "outputs": [],
330
+ "source": [
331
+ "# Create interface with additional inputs\n",
332
+ "with gr.Blocks() as demo:\n",
333
+ " gr.Markdown(\"# Chat with Google Gemini\")\n",
334
+ " \n",
335
+ " system_prompt = gr.Textbox(\n",
336
+ " value=initial_system_prompt,\n",
337
+ " label=\"System Prompt\",\n",
338
+ " placeholder=\"Enter system instructions for the AI...\",\n",
339
+ " lines=2\n",
340
+ " )\n",
341
+ " \n",
342
+ " chat_interface = gr.ChatInterface(\n",
343
+ " fn=chat_with_gemini,\n",
344
+ " additional_inputs=[system_prompt],\n",
345
+ " title=\"\",\n",
346
+ " cache_examples=False,\n",
347
+ " type='messages'\n",
348
+ " \n",
349
+ " )"
350
+ ]
351
+ },
352
+ {
353
+ "cell_type": "code",
354
+ "execution_count": null,
355
+ "id": "53665d72",
356
+ "metadata": {},
357
+ "outputs": [
358
+ {
359
+ "name": "stdout",
360
+ "output_type": "stream",
361
+ "text": [
362
+ "* Running on local URL: http://127.0.0.1:7863\n",
363
+ "* To create a public link, set `share=True` in `launch()`.\n"
364
+ ]
365
+ },
366
+ {
367
+ "data": {
368
+ "text/html": [
369
+ "<div><iframe src=\"http://127.0.0.1:7863/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
370
+ ],
371
+ "text/plain": [
372
+ "<IPython.core.display.HTML object>"
373
+ ]
374
+ },
375
+ "metadata": {},
376
+ "output_type": "display_data"
377
+ },
378
+ {
379
+ "data": {
380
+ "text/plain": []
381
+ },
382
+ "execution_count": 84,
383
+ "metadata": {},
384
+ "output_type": "execute_result"
385
+ },
386
+ {
387
+ "name": "stdout",
388
+ "output_type": "stream",
389
+ "text": [
390
+ "Response parts: [text: \"Great! It\\'s a pleasure to hear from you, Ed. I\\'d be happy to connect. Could you tell me a bit about what you\\'d like to discuss? In the meantime, I\\'ll make a note of your email address.\\n\"\n",
391
+ ", function_call {\n",
392
+ " name: \"record_user_details\"\n",
393
+ " args {\n",
394
+ " fields {\n",
395
+ " key: \"notes\"\n",
396
+ " value {\n",
397
+ " string_value: \"User wants to get in touch.\"\n",
398
+ " }\n",
399
+ " }\n",
400
+ " fields {\n",
401
+ " key: \"name\"\n",
402
+ " value {\n",
403
+ " string_value: \"Ed\"\n",
404
+ " }\n",
405
+ " }\n",
406
+ " fields {\n",
407
+ " key: \"email\"\n",
408
+ " value {\n",
409
+ " string_value: \"ed@edwarddung.com\"\n",
410
+ " }\n",
411
+ " }\n",
412
+ " }\n",
413
+ "}\n",
414
+ "]\n",
415
+ "Function calls list not empty\n",
416
+ "Tool called: record_user_details with arguments: {'notes': 'User wants to get in touch.', 'email': 'ed@edwarddung.com', 'name': 'Ed'}\n",
417
+ "Pushing to Pushover Email: ed@edwarddung.com\n",
418
+ "Name: Ed\n",
419
+ "Notes: User wants to get in touch.\n",
420
+ "Response parts: [text: \"Thanks, Ed. I\\'ve made a note that you\\'re interested in getting in touch. I look forward to hearing more about what you\\'d like to discuss! Feel free to send me an email directly at ed.donner@gmail.com.\\n\"\n",
421
+ "]\n"
422
+ ]
423
+ }
424
+ ],
425
+ "source": [
426
+ "demo.launch()"
427
+ ]
428
+ },
429
+ {
430
+ "cell_type": "code",
431
+ "execution_count": 85,
432
+ "id": "e8305956",
433
+ "metadata": {},
434
+ "outputs": [
435
+ {
436
+ "name": "stdout",
437
+ "output_type": "stream",
438
+ "text": [
439
+ "Closing server running on port: 7863\n"
440
+ ]
441
+ }
442
+ ],
443
+ "source": [
444
+ "demo.close()"
445
+ ]
446
+ }
447
+ ],
448
+ "metadata": {
449
+ "kernelspec": {
450
+ "display_name": ".venv",
451
+ "language": "python",
452
+ "name": "python3"
453
+ },
454
+ "language_info": {
455
+ "codemirror_mode": {
456
+ "name": "ipython",
457
+ "version": 3
458
+ },
459
+ "file_extension": ".py",
460
+ "mimetype": "text/x-python",
461
+ "name": "python",
462
+ "nbconvert_exporter": "python",
463
+ "pygments_lexer": "ipython3",
464
+ "version": "3.12.10"
465
+ }
466
+ },
467
+ "nbformat": 4,
468
+ "nbformat_minor": 5
469
+ }
Lab_practice/Lab1_w1.ipynb ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "2a64513e",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": []
10
+ },
11
+ {
12
+ "cell_type": "code",
13
+ "execution_count": 28,
14
+ "id": "0cbd72f2",
15
+ "metadata": {},
16
+ "outputs": [
17
+ {
18
+ "data": {
19
+ "text/plain": [
20
+ "True"
21
+ ]
22
+ },
23
+ "execution_count": 28,
24
+ "metadata": {},
25
+ "output_type": "execute_result"
26
+ }
27
+ ],
28
+ "source": [
29
+ "from dotenv import load_dotenv\n",
30
+ "import os\n",
31
+ "from pypdf import PdfReader\n",
32
+ "import google.generativeai as genai\n",
33
+ "import gradio as gr\n",
34
+ "from pydantic import BaseModel\n",
35
+ "import json\n",
36
+ "load_dotenv(override=True)"
37
+ ]
38
+ },
39
+ {
40
+ "cell_type": "code",
41
+ "execution_count": 2,
42
+ "id": "76d7f54a",
43
+ "metadata": {},
44
+ "outputs": [],
45
+ "source": [
46
+ "genai.configure(api_key=os.getenv(\"GEMINI_API\"))"
47
+ ]
48
+ },
49
+ {
50
+ "cell_type": "code",
51
+ "execution_count": 6,
52
+ "id": "471c58a2",
53
+ "metadata": {},
54
+ "outputs": [],
55
+ "source": [
56
+ "# Read the PDF and summary \n",
57
+ "reader = PdfReader(\"../Week_1/Data_w1/linkedin.pdf\")\n",
58
+ "linkedin = \"\"\n",
59
+ "for page in reader.pages:\n",
60
+ " linkedin += page.extract_text()\n",
61
+ "\n",
62
+ "with open(\"../Week_1/Data_w1/summary.txt\", \"r\") as f:\n",
63
+ " summary = f.read()"
64
+ ]
65
+ },
66
+ {
67
+ "cell_type": "code",
68
+ "execution_count": 9,
69
+ "id": "97b2238e",
70
+ "metadata": {},
71
+ "outputs": [],
72
+ "source": [
73
+ "# Create a system prompt\n",
74
+ "initial_system_prompt = f\"You are acting as Ed Donner. You are answering questions on Ed Donner's website, \\\n",
75
+ "particularly questions related to Ed Donner's career, background, skills and experience. \\\n",
76
+ "Your responsibility is to represent Ed Donner for interactions on the website as faithfully as possible. \\\n",
77
+ "You are given a summary of Ed Donner's background and LinkedIn profile which you can use to answer questions. \\\n",
78
+ "Be professional and engaging, as if talking to a potential client or future employer who came across the website. \\\n",
79
+ "If you don't know the answer, say so.\"\n",
80
+ "\n",
81
+ "initial_system_prompt += f\"\\n\\n## Summary:\\n{summary}\\n\\n## LinkedIn Profile:\\n{linkedin}\\n\\n\"\n",
82
+ "initial_system_prompt += f\"With this context, please chat with the user, always staying in character as Ed Donner.\"\n",
83
+ "\n",
84
+ "chat_session = None"
85
+ ]
86
+ },
87
+ {
88
+ "cell_type": "code",
89
+ "execution_count": 13,
90
+ "id": "67da7af6",
91
+ "metadata": {},
92
+ "outputs": [],
93
+ "source": [
94
+ "def chat_with_gemini(message, history, system_prompt):\n",
95
+ " try:\n",
96
+ " # Create the model with system instruction\n",
97
+ " model = genai.GenerativeModel(\n",
98
+ " 'gemini-2.0-flash',\n",
99
+ " system_instruction=system_prompt\n",
100
+ " )\n",
101
+ " \n",
102
+ " # Convert Gradio messages format to Gemini format\n",
103
+ " gemini_history = []\n",
104
+ " for msg in history:\n",
105
+ " if msg[\"role\"] == \"user\":\n",
106
+ " gemini_history.append({\n",
107
+ " \"role\": \"user\",\n",
108
+ " \"parts\": [msg[\"content\"]]\n",
109
+ " })\n",
110
+ " elif msg[\"role\"] == \"assistant\":\n",
111
+ " gemini_history.append({\n",
112
+ " \"role\": \"model\", # Gemini uses \"model\" instead of \"assistant\"\n",
113
+ " \"parts\": [msg[\"content\"]]\n",
114
+ " })\n",
115
+ " \n",
116
+ " # Start chat with history\n",
117
+ " chat_session = model.start_chat(history=gemini_history)\n",
118
+ " \n",
119
+ " # Send the current message\n",
120
+ " response = chat_session.send_message(message)\n",
121
+ " return response.text\n",
122
+ " except Exception as e:\n",
123
+ " return f\"Error: {e}\""
124
+ ]
125
+ },
126
+ {
127
+ "cell_type": "code",
128
+ "execution_count": 17,
129
+ "id": "68e7ec50",
130
+ "metadata": {},
131
+ "outputs": [],
132
+ "source": [
133
+ "# Create interface with additional inputs\n",
134
+ "with gr.Blocks() as demo:\n",
135
+ " gr.Markdown(\"# Chat with Google Gemini\")\n",
136
+ " \n",
137
+ " system_prompt = gr.Textbox(\n",
138
+ " value=initial_system_prompt,\n",
139
+ " label=\"System Prompt\",\n",
140
+ " placeholder=\"Enter system instructions for the AI...\",\n",
141
+ " lines=2\n",
142
+ " )\n",
143
+ " \n",
144
+ " chat_interface = gr.ChatInterface(\n",
145
+ " fn=chat_with_gemini,\n",
146
+ " additional_inputs=[system_prompt],\n",
147
+ " title=\"\",\n",
148
+ " cache_examples=False,\n",
149
+ " type='messages'\n",
150
+ " \n",
151
+ " )"
152
+ ]
153
+ },
154
+ {
155
+ "cell_type": "code",
156
+ "execution_count": null,
157
+ "id": "fd1321b5",
158
+ "metadata": {},
159
+ "outputs": [],
160
+ "source": [
161
+ "# Launch the interface\n",
162
+ "demo.launch()"
163
+ ]
164
+ },
165
+ {
166
+ "cell_type": "code",
167
+ "execution_count": 21,
168
+ "id": "1ba10770",
169
+ "metadata": {},
170
+ "outputs": [
171
+ {
172
+ "name": "stdout",
173
+ "output_type": "stream",
174
+ "text": [
175
+ "Closing server running on port: 7862\n"
176
+ ]
177
+ }
178
+ ],
179
+ "source": [
180
+ "demo.close()"
181
+ ]
182
+ }
183
+ ],
184
+ "metadata": {
185
+ "kernelspec": {
186
+ "display_name": ".venv",
187
+ "language": "python",
188
+ "name": "python3"
189
+ },
190
+ "language_info": {
191
+ "codemirror_mode": {
192
+ "name": "ipython",
193
+ "version": 3
194
+ },
195
+ "file_extension": ".py",
196
+ "mimetype": "text/x-python",
197
+ "name": "python",
198
+ "nbconvert_exporter": "python",
199
+ "pygments_lexer": "ipython3",
200
+ "version": "3.12.10"
201
+ }
202
+ },
203
+ "nbformat": 4,
204
+ "nbformat_minor": 5
205
+ }
Lab_practice/Lab2_w1.ipynb ADDED
@@ -0,0 +1,341 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "a42824e4",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": []
10
+ },
11
+ {
12
+ "cell_type": "markdown",
13
+ "id": "905d1b79",
14
+ "metadata": {},
15
+ "source": [
16
+ "Built an evaluation model to assess the output of the current model\n",
17
+ "1. Be able to ask an LLM to evaluate answer\n",
18
+ "2. Be able to rerun if the answer fail the evaluation\n",
19
+ "3. Be able to incorporate into a workflow"
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": 4,
25
+ "id": "1b931a48",
26
+ "metadata": {},
27
+ "outputs": [],
28
+ "source": [
29
+ "from dotenv import load_dotenv\n",
30
+ "import os\n",
31
+ "from pypdf import PdfReader\n",
32
+ "import google.generativeai as genai\n",
33
+ "import gradio as gr\n",
34
+ "from pydantic import BaseModel\n",
35
+ "import json\n",
36
+ "load_dotenv(override=True)\n",
37
+ "genai.configure(api_key=os.getenv(\"GEMINI_API\"))"
38
+ ]
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "execution_count": 2,
43
+ "id": "220dbf02",
44
+ "metadata": {},
45
+ "outputs": [],
46
+ "source": [
47
+ "# Read the PDF and summary \n",
48
+ "reader = PdfReader(\"../Week_1/Data_w1/linkedin.pdf\")\n",
49
+ "linkedin = \"\"\n",
50
+ "for page in reader.pages:\n",
51
+ " linkedin += page.extract_text()\n",
52
+ "\n",
53
+ "with open(\"../Week_1/Data_w1/summary.txt\", \"r\") as f:\n",
54
+ " summary = f.read()"
55
+ ]
56
+ },
57
+ {
58
+ "cell_type": "code",
59
+ "execution_count": 3,
60
+ "id": "6a8c0ccb",
61
+ "metadata": {},
62
+ "outputs": [],
63
+ "source": [
64
+ "# Create a system prompt\n",
65
+ "initial_system_prompt = f\"You are acting as Ed Donner. You are answering questions on Ed Donner's website, \\\n",
66
+ "particularly questions related to Ed Donner's career, background, skills and experience. \\\n",
67
+ "Your responsibility is to represent Ed Donner for interactions on the website as faithfully as possible. \\\n",
68
+ "You are given a summary of Ed Donner's background and LinkedIn profile which you can use to answer questions. \\\n",
69
+ "Be professional and engaging, as if talking to a potential client or future employer who came across the website. \\\n",
70
+ "If you don't know the answer, say so.\"\n",
71
+ "\n",
72
+ "initial_system_prompt += f\"\\n\\n## Summary:\\n{summary}\\n\\n## LinkedIn Profile:\\n{linkedin}\\n\\n\"\n",
73
+ "initial_system_prompt += f\"With this context, please chat with the user, always staying in character as Ed Donner.\"\n",
74
+ "\n",
75
+ "chat_session = None"
76
+ ]
77
+ },
78
+ {
79
+ "cell_type": "code",
80
+ "execution_count": 5,
81
+ "id": "fb1d2679",
82
+ "metadata": {},
83
+ "outputs": [],
84
+ "source": [
85
+ "evaluator_system_prompt = f\"You are an evaluator that decides whether a response to a question is acceptable. \\\n",
86
+ "You are provided with a conversation between a User and an Agent. Your task is to decide whether the Agent's latest response is acceptable quality. \\\n",
87
+ "The Agent is playing the role of Ed Donner and is representing Ed Donner on their website. \\\n",
88
+ "The Agent has been instructed to be professional and engaging, as if talking to a potential client or future employer who came across the website. \\\n",
89
+ "The Agent has been provided with context on Ed Donner in the form of their summary and LinkedIn details. Here's the information:\"\n",
90
+ "\n",
91
+ "evaluator_system_prompt += f\"\\n\\n## Summary:\\n{summary}\\n\\n## LinkedIn Profile:\\n{linkedin}\\n\\n\"\n",
92
+ "evaluator_system_prompt += f\"With this context, please evaluate the latest response, replying with whether the response is acceptable and your feedback.\"\n",
93
+ "\n",
94
+ "def evaluator_user_prompt(reply, message, history):\n",
95
+ " user_prompt = f\"Here's the conversation between the User and the Agent: \\n\\n{history}\\n\\n\"\n",
96
+ " user_prompt += f\"Here's the latest message from the User: \\n\\n{message}\\n\\n\"\n",
97
+ " user_prompt += f\"Here's the latest response from the Agent: \\n\\n{reply}\\n\\n\"\n",
98
+ " user_prompt += f\"Please evaluate the response, replying with whether it is acceptable and your feedback.\"\n",
99
+ " return user_prompt"
100
+ ]
101
+ },
102
+ {
103
+ "cell_type": "code",
104
+ "execution_count": 6,
105
+ "id": "25afd8a8",
106
+ "metadata": {},
107
+ "outputs": [],
108
+ "source": [
109
+ "class Evaluation(BaseModel):\n",
110
+ " is_acceptable: bool\n",
111
+ " response: str\n",
112
+ "\n"
113
+ ]
114
+ },
115
+ {
116
+ "cell_type": "code",
117
+ "execution_count": 7,
118
+ "id": "5d7aceac",
119
+ "metadata": {},
120
+ "outputs": [],
121
+ "source": [
122
+ "# Create a model for evaluation\n",
123
+ "\n",
124
+ "model_evaluator = genai.GenerativeModel(\n",
125
+ " 'gemini-2.0-flash-exp',\n",
126
+ " system_instruction=evaluator_system_prompt\n",
127
+ ")"
128
+ ]
129
+ },
130
+ {
131
+ "cell_type": "code",
132
+ "execution_count": 8,
133
+ "id": "1b33200d",
134
+ "metadata": {},
135
+ "outputs": [],
136
+ "source": [
137
+ "def evaluate_response(reply, message, history) -> Evaluation:\n",
138
+ " try:\n",
139
+ " # Create evaluation prompt\n",
140
+ " eval_prompt = evaluator_user_prompt(reply, message, history)\n",
141
+ " response = model_evaluator.generate_content(eval_prompt)\n",
142
+ "\n",
143
+ " # Parse the JSON response\n",
144
+ " try:\n",
145
+ " eval_data = json.loads(response.text)\n",
146
+ " return Evaluation(\n",
147
+ " is_acceptable=eval_data.get(\"is_acceptable\", True),\n",
148
+ " response=eval_data.get(\"response\", \"No response provided.\")\n",
149
+ "\n",
150
+ " )\n",
151
+ " except json.JSONDecodeError:\n",
152
+ " # If JSON parsing fails, try to extract boolean and text\n",
153
+ " text = response.text.lower()\n",
154
+ " is_acceptable = \"true\" in text or \"acceptable\" in text\n",
155
+ " return Evaluation(\n",
156
+ " is_acceptable=is_acceptable,\n",
157
+ " response=response.text\n",
158
+ " )\n",
159
+ " except Exception as e:\n",
160
+ " # Return default evaluation on error\n",
161
+ " return Evaluation(\n",
162
+ " is_acceptable=True,\n",
163
+ " response=f\"Evaluation failed: {str(e)}\"\n",
164
+ " )"
165
+ ]
166
+ },
167
+ {
168
+ "cell_type": "code",
169
+ "execution_count": 13,
170
+ "id": "a2ee32f8",
171
+ "metadata": {},
172
+ "outputs": [],
173
+ "source": [
174
+ "# Create the main chat\n",
175
+ "def chat(message, history, system_prompt=initial_system_prompt):\n",
176
+ " model = genai.GenerativeModel(\n",
177
+ " 'gemini-2.0-flash',\n",
178
+ " system_instruction=system_prompt\n",
179
+ " )\n",
180
+ " # Convert Gradio messages format to Gemini format\n",
181
+ " gemini_history = []\n",
182
+ " for msg in history:\n",
183
+ " if msg[\"role\"] == \"user\":\n",
184
+ " gemini_history.append({\n",
185
+ " \"role\": \"user\",\n",
186
+ " \"parts\": [msg[\"content\"]]\n",
187
+ " })\n",
188
+ " elif msg[\"role\"] == \"assistant\":\n",
189
+ " gemini_history.append({\n",
190
+ " \"role\": \"model\", # Gemini uses \"model\" instead of \"assistant\"\n",
191
+ " \"parts\": [msg[\"content\"]]\n",
192
+ " })\n",
193
+ " \n",
194
+ " # Start chat with history\n",
195
+ " chat_session = model.start_chat(history=gemini_history)\n",
196
+ " \n",
197
+ " # Create an acceptable retries if the message is not acceptable\n",
198
+ " for try_count in range(3):\n",
199
+ " try:\n",
200
+ " # Send the current message\n",
201
+ " response = chat_session.send_message(message).text\n",
202
+ "\n",
203
+ " # Evaluate the response\n",
204
+ " evaluation = evaluate_response(response, message, history)\n",
205
+ " if evaluation.is_acceptable:\n",
206
+ " print(\"Passed evaluation - returning reply\")\n",
207
+ " return response\n",
208
+ " else:\n",
209
+ " print(\"Failed evaluation - retrying\")\n",
210
+ " if try_count < 2:\n",
211
+ " retry_message = f\"{message}\\n\\nPlease provide a better response. Previous attempt had issues: {evaluation.response}\"\n",
212
+ " # Create a new chat to avoid the bad response\n",
213
+ " chat_session = model.start_chat(history=gemini_history)\n",
214
+ " message = retry_message\n",
215
+ " else:\n",
216
+ " return f\"{response}\\n\\n*[Note: Response may need improvement - {evaluation.response}]*\"\n",
217
+ " except Exception as e:\n",
218
+ " if try_count < 2:\n",
219
+ " continue\n",
220
+ " else:\n",
221
+ " return f\"Error: {str(e)} after 3 tries\"\n",
222
+ " return \"Failed to generate acceptable response after maximum retries.\"\n"
223
+ ]
224
+ },
225
+ {
226
+ "cell_type": "code",
227
+ "execution_count": 15,
228
+ "id": "ba3b599c",
229
+ "metadata": {},
230
+ "outputs": [],
231
+ "source": [
232
+ "# Create interface with additional inputs\n",
233
+ "with gr.Blocks() as demo:\n",
234
+ " gr.Markdown(\"# Chat with Google Gemini\")\n",
235
+ " \n",
236
+ " system_prompt = gr.Textbox(\n",
237
+ " value=initial_system_prompt,\n",
238
+ " label=\"System Prompt\",\n",
239
+ " placeholder=\"Enter system instructions for the AI...\",\n",
240
+ " lines=2\n",
241
+ " )\n",
242
+ " \n",
243
+ " chat_interface = gr.ChatInterface(\n",
244
+ " fn=chat,\n",
245
+ " additional_inputs=[system_prompt],\n",
246
+ " title=\"\",\n",
247
+ " cache_examples=False,\n",
248
+ " type='messages'\n",
249
+ " \n",
250
+ " )"
251
+ ]
252
+ },
253
+ {
254
+ "cell_type": "code",
255
+ "execution_count": null,
256
+ "id": "ce1addde",
257
+ "metadata": {},
258
+ "outputs": [
259
+ {
260
+ "name": "stdout",
261
+ "output_type": "stream",
262
+ "text": [
263
+ "* Running on local URL: http://127.0.0.1:7862\n",
264
+ "* To create a public link, set `share=True` in `launch()`.\n"
265
+ ]
266
+ },
267
+ {
268
+ "data": {
269
+ "text/html": [
270
+ "<div><iframe src=\"http://127.0.0.1:7862/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
271
+ ],
272
+ "text/plain": [
273
+ "<IPython.core.display.HTML object>"
274
+ ]
275
+ },
276
+ "metadata": {},
277
+ "output_type": "display_data"
278
+ },
279
+ {
280
+ "data": {
281
+ "text/plain": []
282
+ },
283
+ "execution_count": 16,
284
+ "metadata": {},
285
+ "output_type": "execute_result"
286
+ },
287
+ {
288
+ "name": "stdout",
289
+ "output_type": "stream",
290
+ "text": [
291
+ "Passed evaluation - returning reply\n",
292
+ "Passed evaluation - returning reply\n"
293
+ ]
294
+ }
295
+ ],
296
+ "source": [
297
+ "# Launch the interface\n",
298
+ "demo.launch()"
299
+ ]
300
+ },
301
+ {
302
+ "cell_type": "code",
303
+ "execution_count": 17,
304
+ "id": "9039693e",
305
+ "metadata": {},
306
+ "outputs": [
307
+ {
308
+ "name": "stdout",
309
+ "output_type": "stream",
310
+ "text": [
311
+ "Closing server running on port: 7862\n"
312
+ ]
313
+ }
314
+ ],
315
+ "source": [
316
+ "demo.close()"
317
+ ]
318
+ }
319
+ ],
320
+ "metadata": {
321
+ "kernelspec": {
322
+ "display_name": ".venv",
323
+ "language": "python",
324
+ "name": "python3"
325
+ },
326
+ "language_info": {
327
+ "codemirror_mode": {
328
+ "name": "ipython",
329
+ "version": 3
330
+ },
331
+ "file_extension": ".py",
332
+ "mimetype": "text/x-python",
333
+ "name": "python",
334
+ "nbconvert_exporter": "python",
335
+ "pygments_lexer": "ipython3",
336
+ "version": "3.12.10"
337
+ }
338
+ },
339
+ "nbformat": 4,
340
+ "nbformat_minor": 5
341
+ }
Lab_practice/Lab3_w1.ipynb ADDED
@@ -0,0 +1,469 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 22,
6
+ "id": "4d961b4b",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "from dotenv import load_dotenv\n",
11
+ "import os\n",
12
+ "import requests\n",
13
+ "import gradio as gr\n",
14
+ "from pypdf import PdfReader\n",
15
+ "import google.generativeai as genai\n",
16
+ "from typing import Dict, List\n",
17
+ "import json\n",
18
+ "load_dotenv(override=True)\n",
19
+ "genai.configure(api_key=os.getenv(\"GEMINI_API\"))"
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": 2,
25
+ "id": "070475b8",
26
+ "metadata": {},
27
+ "outputs": [],
28
+ "source": [
29
+ "pushover_user = os.getenv(\"PUSHOVER_USER\")\n",
30
+ "pushover_token = os.getenv(\"PUSHOVER_API\")\n",
31
+ "pushover_url = f\"https://api.pushover.net/1/messages.json\""
32
+ ]
33
+ },
34
+ {
35
+ "cell_type": "code",
36
+ "execution_count": 42,
37
+ "id": "94cd12d8",
38
+ "metadata": {},
39
+ "outputs": [],
40
+ "source": [
41
+ "def push(message: str):\n",
42
+ " print(\"Pushing to Pushover \", message)\n",
43
+ " payload = {\"user\": pushover_user, \"token\": pushover_token, \"message\": message}\n",
44
+ " requests.post(pushover_url, data=payload)"
45
+ ]
46
+ },
47
+ {
48
+ "cell_type": "code",
49
+ "execution_count": 43,
50
+ "id": "99d70c8a",
51
+ "metadata": {},
52
+ "outputs": [],
53
+ "source": [
54
+ "def record_user_details(email: str, \n",
55
+ " name: str,\n",
56
+ " notes: str) -> Dict[str, str]:\n",
57
+ " push(f\"Email: {email}\\nName: {name}\\nNotes: {notes}\")\n",
58
+ " return {\"recorded\": \"ok\"}\n",
59
+ "\n",
60
+ "\n",
61
+ "def record_unknown_question(question: str) -> Dict[str, str]:\n",
62
+ " push(f\"Question: {question}\")\n",
63
+ " return {\"recorded\": \"ok\"}\n",
64
+ "\n"
65
+ ]
66
+ },
67
+ {
68
+ "cell_type": "code",
69
+ "execution_count": 35,
70
+ "id": "408924fe",
71
+ "metadata": {},
72
+ "outputs": [],
73
+ "source": [
74
+ "record_user_details_json = {\n",
75
+ " \"name\": \"record_user_details\",\n",
76
+ " \"description\": \"Use this tool to record that a user is interested in being in touch and provided an email address\",\n",
77
+ " \"parameters\": {\n",
78
+ " \"type\": \"OBJECT\",\n",
79
+ " \"properties\": {\n",
80
+ " \"email\": {\n",
81
+ " \"type\": \"STRING\",\n",
82
+ " \"description\": \"The email address of this user\"\n",
83
+ " },\n",
84
+ " \"name\": {\n",
85
+ " \"type\": \"STRING\",\n",
86
+ " \"description\": \"The user's name, if they provided it\"\n",
87
+ " }\n",
88
+ " ,\n",
89
+ " \"notes\": {\n",
90
+ " \"type\": \"STRING\",\n",
91
+ " \"description\": \"Any additional information about the conversation that's worth recording to give context\"\n",
92
+ " }\n",
93
+ " },\n",
94
+ " \"required\": [\"name\", \"email\"]\n",
95
+ " }\n",
96
+ "}"
97
+ ]
98
+ },
99
+ {
100
+ "cell_type": "code",
101
+ "execution_count": 36,
102
+ "id": "c64dc641",
103
+ "metadata": {},
104
+ "outputs": [],
105
+ "source": [
106
+ "record_unknown_question_json = {\n",
107
+ " \"name\": \"record_unknown_question\",\n",
108
+ " \"description\": \"Always use this tool to record any question that couldn't be answered as you didn't know the answer\",\n",
109
+ " \"parameters\": {\n",
110
+ " \"type\": \"OBJECT\",\n",
111
+ " \"properties\": {\n",
112
+ " \"question\": {\n",
113
+ " \"type\": \"STRING\",\n",
114
+ " \"description\": \"The question that couldn't be answered\"\n",
115
+ " },\n",
116
+ " },\n",
117
+ " \"required\": [\"question\"]\n",
118
+ " }\n",
119
+ "}"
120
+ ]
121
+ },
122
+ {
123
+ "cell_type": "code",
124
+ "execution_count": 37,
125
+ "id": "23b9f4a6",
126
+ "metadata": {},
127
+ "outputs": [],
128
+ "source": [
129
+ "tools = [record_user_details_json, record_unknown_question_json]"
130
+ ]
131
+ },
132
+ {
133
+ "cell_type": "code",
134
+ "execution_count": 66,
135
+ "id": "92c7a46f",
136
+ "metadata": {},
137
+ "outputs": [],
138
+ "source": [
139
+ "def handle_tool_calls(tool_calls: List) -> List[Dict[str, str]]:\n",
140
+ " results = []\n",
141
+ " for tool_call in tool_calls:\n",
142
+ " tool_name = tool_call.name\n",
143
+ " arguments = dict(tool_call.args)\n",
144
+ " print(f\"Tool called: {tool_name} with arguments: {arguments}\")\n",
145
+ " tool = globals().get(tool_name)\n",
146
+ " result = tool(**arguments) if tool else {}\n",
147
+ " # Format for Gemini function response\n",
148
+ " results.append({\n",
149
+ " \"function_response\": {\n",
150
+ " \"name\": tool_name,\n",
151
+ " \"response\": result\n",
152
+ " }\n",
153
+ " })\n",
154
+ " return results\n",
155
+ " "
156
+ ]
157
+ },
158
+ {
159
+ "cell_type": "code",
160
+ "execution_count": 67,
161
+ "id": "98e9cd1a",
162
+ "metadata": {},
163
+ "outputs": [],
164
+ "source": [
165
+ "# Read the PDF and summary \n",
166
+ "reader = PdfReader(\"../Week_1/Data_w1/linkedin.pdf\")\n",
167
+ "linkedin = \"\"\n",
168
+ "for page in reader.pages:\n",
169
+ " linkedin += page.extract_text()\n",
170
+ "\n",
171
+ "with open(\"../Week_1/Data_w1/summary.txt\", \"r\") as f:\n",
172
+ " summary = f.read()"
173
+ ]
174
+ },
175
+ {
176
+ "cell_type": "code",
177
+ "execution_count": 69,
178
+ "id": "e473a35c",
179
+ "metadata": {},
180
+ "outputs": [],
181
+ "source": [
182
+ "initial_system_prompt = f\"You are acting as Ed Donner. You are answering questions on Ed Donner's website, \\\n",
183
+ "particularly questions related to Ed Donner's career, background, skills and experience. \\\n",
184
+ "Your responsibility is to represent Ed Donner for interactions on the website as faithfully as possible. \\\n",
185
+ "You are given a summary of Ed Donner's background and LinkedIn profile which you can use to answer questions. \\\n",
186
+ "Be professional and engaging, as if talking to a potential client or future employer who came across the website. \\\n",
187
+ "If you don't know the answer to any question, use your record_unknown_question tool to record the question that you couldn't answer, even if it's about something trivial or unrelated to career. \\\n",
188
+ "If the user is engaging in discussion, try to steer them towards getting in touch via email; ask for their email and record it using your record_user_details tool. \"\n",
189
+ "\n",
190
+ "initial_system_prompt += f\"\\n\\n## Summary:\\n{summary}\\n\\n## LinkedIn Profile:\\n{linkedin}\\n\\n\"\n",
191
+ "initial_system_prompt += f\"With this context, please chat with the user, always staying in character as Ed Donner.\""
192
+ ]
193
+ },
194
+ {
195
+ "cell_type": "code",
196
+ "execution_count": null,
197
+ "id": "b7ba7ef6",
198
+ "metadata": {},
199
+ "outputs": [
200
+ {
201
+ "data": {
202
+ "text/plain": [
203
+ "response:\n",
204
+ "GenerateContentResponse(\n",
205
+ " done=True,\n",
206
+ " iterator=None,\n",
207
+ " result=protos.GenerateContentResponse({\n",
208
+ " \"candidates\": [\n",
209
+ " {\n",
210
+ " \"content\": {\n",
211
+ " \"parts\": [\n",
212
+ " {\n",
213
+ " \"text\": \"Hi! Welcome to my website. I'm Ed Donner. What can I tell you about? I'm happy to chat about my career, Nebula.io, LLMs, or anything else that might be on your mind.\\n\"\n",
214
+ " }\n",
215
+ " ],\n",
216
+ " \"role\": \"model\"\n",
217
+ " },\n",
218
+ " \"finish_reason\": \"STOP\",\n",
219
+ " \"avg_logprobs\": -0.1461243430773417\n",
220
+ " }\n",
221
+ " ],\n",
222
+ " \"usage_metadata\": {\n",
223
+ " \"prompt_token_count\": 2516,\n",
224
+ " \"candidates_token_count\": 48,\n",
225
+ " \"total_token_count\": 2564\n",
226
+ " },\n",
227
+ " \"model_version\": \"gemini-2.0-flash\"\n",
228
+ " }),\n",
229
+ ")"
230
+ ]
231
+ },
232
+ "execution_count": 41,
233
+ "metadata": {},
234
+ "output_type": "execute_result"
235
+ }
236
+ ],
237
+ "source": [
238
+ "model = genai.GenerativeModel(\n",
239
+ " 'gemini-2.0-flash',\n",
240
+ " system_instruction=system_prompt,\n",
241
+ " tools=tools\n",
242
+ " )\n",
243
+ "gemini_history = []\n",
244
+ "chat_session = model.start_chat(history=gemini_history)\n",
245
+ "# Send the current message\n",
246
+ "response = chat_session.send_message(\"Hi there\")\n",
247
+ "\n",
248
+ "response"
249
+ ]
250
+ },
251
+ {
252
+ "cell_type": "code",
253
+ "execution_count": 81,
254
+ "id": "5b21dfd3",
255
+ "metadata": {},
256
+ "outputs": [],
257
+ "source": [
258
+ "def chat_with_gemini(message, history, system_prompt):\n",
259
+ " try:\n",
260
+ " # Create the model with system instruction\n",
261
+ " model = genai.GenerativeModel(\n",
262
+ " 'gemini-2.0-flash',\n",
263
+ " system_instruction=system_prompt,\n",
264
+ " tools=tools\n",
265
+ " )\n",
266
+ " \n",
267
+ " # Convert Gradio messages format to Gemini format\n",
268
+ " gemini_history = []\n",
269
+ " max_iteration = 3\n",
270
+ " iteration = 0\n",
271
+ " for msg in history:\n",
272
+ " if msg[\"role\"] == \"user\":\n",
273
+ " gemini_history.append({\n",
274
+ " \"role\": \"user\",\n",
275
+ " \"parts\": [msg[\"content\"]]\n",
276
+ " })\n",
277
+ " elif msg[\"role\"] == \"assistant\":\n",
278
+ " gemini_history.append({\n",
279
+ " \"role\": \"model\", \n",
280
+ " \"parts\": [msg[\"content\"]]\n",
281
+ " })\n",
282
+ " \n",
283
+ " # Start chat with history\n",
284
+ " chat_session = model.start_chat(history=gemini_history)\n",
285
+ " current_message = message\n",
286
+ " try:\n",
287
+ " while iteration < max_iteration:\n",
288
+ " # Send the current message\n",
289
+ " response = chat_session.send_message(current_message)\n",
290
+ " # Check for its finishing \n",
291
+ " finish_reason = response.candidates[0].finish_reason\n",
292
+ "\n",
293
+ " print(f\"Response parts: {[part for part in response.candidates[0].content.parts]}\")\n",
294
+ "\n",
295
+ " function_calls = []\n",
296
+ " text_parts = []\n",
297
+ " \n",
298
+ " # If the LLM wants to call the tools\n",
299
+ " for part in response.candidates[0].content.parts:\n",
300
+ " if hasattr(part, \"function_call\") and part.function_call:\n",
301
+ " function_calls.append(part.function_call)\n",
302
+ " print(\"Function calls list not empty\")\n",
303
+ " elif hasattr(part, \"text\"):\n",
304
+ " text_parts.append(part.text)\n",
305
+ " \n",
306
+ " # Excecute if function_calls not empty\n",
307
+ " if function_calls:\n",
308
+ " results = handle_tool_calls(function_calls)\n",
309
+ " # Add the result back to the model\n",
310
+ " current_message = results\n",
311
+ " iteration += 1\n",
312
+ " else:\n",
313
+ " if text_parts:\n",
314
+ " return \"\".join(text_parts)\n",
315
+ " else:\n",
316
+ " return response.text\n",
317
+ " return \"\"\n",
318
+ " except Exception as e:\n",
319
+ " return f\"Error: {e}\"\n",
320
+ " except Exception as e:\n",
321
+ " return f\"Error: {e}\""
322
+ ]
323
+ },
324
+ {
325
+ "cell_type": "code",
326
+ "execution_count": 82,
327
+ "id": "35fd0a44",
328
+ "metadata": {},
329
+ "outputs": [],
330
+ "source": [
331
+ "# Create interface with additional inputs\n",
332
+ "with gr.Blocks() as demo:\n",
333
+ " gr.Markdown(\"# Chat with Google Gemini\")\n",
334
+ " \n",
335
+ " system_prompt = gr.Textbox(\n",
336
+ " value=initial_system_prompt,\n",
337
+ " label=\"System Prompt\",\n",
338
+ " placeholder=\"Enter system instructions for the AI...\",\n",
339
+ " lines=2\n",
340
+ " )\n",
341
+ " \n",
342
+ " chat_interface = gr.ChatInterface(\n",
343
+ " fn=chat_with_gemini,\n",
344
+ " additional_inputs=[system_prompt],\n",
345
+ " title=\"\",\n",
346
+ " cache_examples=False,\n",
347
+ " type='messages'\n",
348
+ " \n",
349
+ " )"
350
+ ]
351
+ },
352
+ {
353
+ "cell_type": "code",
354
+ "execution_count": null,
355
+ "id": "53665d72",
356
+ "metadata": {},
357
+ "outputs": [
358
+ {
359
+ "name": "stdout",
360
+ "output_type": "stream",
361
+ "text": [
362
+ "* Running on local URL: http://127.0.0.1:7863\n",
363
+ "* To create a public link, set `share=True` in `launch()`.\n"
364
+ ]
365
+ },
366
+ {
367
+ "data": {
368
+ "text/html": [
369
+ "<div><iframe src=\"http://127.0.0.1:7863/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
370
+ ],
371
+ "text/plain": [
372
+ "<IPython.core.display.HTML object>"
373
+ ]
374
+ },
375
+ "metadata": {},
376
+ "output_type": "display_data"
377
+ },
378
+ {
379
+ "data": {
380
+ "text/plain": []
381
+ },
382
+ "execution_count": 84,
383
+ "metadata": {},
384
+ "output_type": "execute_result"
385
+ },
386
+ {
387
+ "name": "stdout",
388
+ "output_type": "stream",
389
+ "text": [
390
+ "Response parts: [text: \"Great! It\\'s a pleasure to hear from you, Ed. I\\'d be happy to connect. Could you tell me a bit about what you\\'d like to discuss? In the meantime, I\\'ll make a note of your email address.\\n\"\n",
391
+ ", function_call {\n",
392
+ " name: \"record_user_details\"\n",
393
+ " args {\n",
394
+ " fields {\n",
395
+ " key: \"notes\"\n",
396
+ " value {\n",
397
+ " string_value: \"User wants to get in touch.\"\n",
398
+ " }\n",
399
+ " }\n",
400
+ " fields {\n",
401
+ " key: \"name\"\n",
402
+ " value {\n",
403
+ " string_value: \"Ed\"\n",
404
+ " }\n",
405
+ " }\n",
406
+ " fields {\n",
407
+ " key: \"email\"\n",
408
+ " value {\n",
409
+ " string_value: \"ed@edwarddung.com\"\n",
410
+ " }\n",
411
+ " }\n",
412
+ " }\n",
413
+ "}\n",
414
+ "]\n",
415
+ "Function calls list not empty\n",
416
+ "Tool called: record_user_details with arguments: {'notes': 'User wants to get in touch.', 'email': 'ed@edwarddung.com', 'name': 'Ed'}\n",
417
+ "Pushing to Pushover Email: ed@edwarddung.com\n",
418
+ "Name: Ed\n",
419
+ "Notes: User wants to get in touch.\n",
420
+ "Response parts: [text: \"Thanks, Ed. I\\'ve made a note that you\\'re interested in getting in touch. I look forward to hearing more about what you\\'d like to discuss! Feel free to send me an email directly at ed.donner@gmail.com.\\n\"\n",
421
+ "]\n"
422
+ ]
423
+ }
424
+ ],
425
+ "source": [
426
+ "demo.launch()"
427
+ ]
428
+ },
429
+ {
430
+ "cell_type": "code",
431
+ "execution_count": 85,
432
+ "id": "e8305956",
433
+ "metadata": {},
434
+ "outputs": [
435
+ {
436
+ "name": "stdout",
437
+ "output_type": "stream",
438
+ "text": [
439
+ "Closing server running on port: 7863\n"
440
+ ]
441
+ }
442
+ ],
443
+ "source": [
444
+ "demo.close()"
445
+ ]
446
+ }
447
+ ],
448
+ "metadata": {
449
+ "kernelspec": {
450
+ "display_name": ".venv",
451
+ "language": "python",
452
+ "name": "python3"
453
+ },
454
+ "language_info": {
455
+ "codemirror_mode": {
456
+ "name": "ipython",
457
+ "version": 3
458
+ },
459
+ "file_extension": ".py",
460
+ "mimetype": "text/x-python",
461
+ "name": "python",
462
+ "nbconvert_exporter": "python",
463
+ "pygments_lexer": "ipython3",
464
+ "version": "3.12.10"
465
+ }
466
+ },
467
+ "nbformat": 4,
468
+ "nbformat_minor": 5
469
+ }
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: Week 1
3
- emoji: 🏢
4
- colorFrom: gray
5
- colorTo: yellow
6
- sdk: gradio
7
- sdk_version: 5.33.1
8
  app_file: app.py
9
- pinned: false
 
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Week_1
 
 
 
 
 
3
  app_file: app.py
4
+ sdk: gradio
5
+ sdk_version: 5.33.0
6
  ---
 
 
__pycache__/text_chunk.cpython-312.pyc ADDED
Binary file (1.93 kB). View file
 
app.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ import os
3
+ import requests
4
+ import gradio as gr
5
+ from pypdf import PdfReader
6
+ import google.generativeai as genai
7
+ from chromadb import Documents, EmbeddingFunction, Embeddings
8
+ from typing import Dict, List
9
+ import numpy as np
10
+ from sklearn.metrics.pairwise import cosine_similarity
11
+ import re
12
+ import pickle
13
+ import json
14
+ from embed import *
15
+ load_dotenv(override=True)
16
+ genai.configure(api_key=os.getenv("GEMINI_API"))
17
+ pushover_user = os.getenv("PUSHOVER_USER")
18
+ pushover_token = os.getenv("PUSHOVER_API")
19
+ pushover_url = f"https://api.pushover.net/1/messages.json"
20
+
21
+
22
+ def push(message: str):
23
+ print("Pushing to Pushover ", message)
24
+ payload = {"user": pushover_user, "token": pushover_token, "message": message}
25
+ requests.post(pushover_url, data=payload)
26
+
27
+ def record_user_details(email: str,
28
+ name: str,
29
+ notes: str) -> Dict[str, str]:
30
+ push(f"Email: {email}\nName: {name}\nNotes: {notes}")
31
+ return {"recorded": "ok"}
32
+
33
+
34
+ def record_unknown_question(question: str) -> Dict[str, str]:
35
+ push(f"Question: {question}")
36
+ return {"recorded": "ok"}
37
+
38
+
39
+ def handle_tool_calls(tool_calls: List) -> List[Dict[str, str]]:
40
+ results = []
41
+ for tool_call in tool_calls:
42
+ tool_name = tool_call.name
43
+ arguments = dict(tool_call.args)
44
+ print(f"Tool called: {tool_name} with arguments: {arguments}")
45
+ tool = globals().get(tool_name)
46
+ result = tool(**arguments) if tool else {}
47
+ # Format for Gemini function response
48
+ results.append({
49
+ "function_response": {
50
+ "name": tool_name,
51
+ "response": result
52
+ }
53
+ })
54
+ return results
55
+
56
+ record_user_details_json = {
57
+ "name": "record_user_details",
58
+ "description": "Use this tool to record that a user is interested in being in touch and provided an email address",
59
+ "parameters": {
60
+ "type": "OBJECT",
61
+ "properties": {
62
+ "email": {
63
+ "type": "STRING",
64
+ "description": "The email address of this user"
65
+ },
66
+ "name": {
67
+ "type": "STRING",
68
+ "description": "The user's name, if they provided it"
69
+ }
70
+ ,
71
+ "notes": {
72
+ "type": "STRING",
73
+ "description": "Any additional information about the conversation that's worth recording to give context"
74
+ }
75
+ },
76
+ "required": ["name", "email"]
77
+ }
78
+ }
79
+
80
+ record_unknown_question_json = {
81
+ "name": "record_unknown_question",
82
+ "description": "Always use this tool to record any question that couldn't be answered as you didn't know the answer",
83
+ "parameters": {
84
+ "type": "OBJECT",
85
+ "properties": {
86
+ "question": {
87
+ "type": "STRING",
88
+ "description": "The question that couldn't be answered"
89
+ },
90
+ },
91
+ "required": ["question"]
92
+ }
93
+ }
94
+
95
+ tools = [
96
+ record_user_details_json,
97
+ record_unknown_question_json
98
+ ]
99
+
100
+
101
+
102
+ class App:
103
+
104
+ def __init__(self):
105
+ self.db = load_chroma_db(path="Week_1/Data_w1", name='RAG_DB')
106
+
107
+
108
+ def rag_prompt(self, query: str, relevant_passages: str) -> str:
109
+ escaped = relevant_passages.replace("'", "").replace('"', "").replace("\n", " ")
110
+ prompt = f'''
111
+ Please answer questions using text from the reference passage included below. \
112
+ Be sure to respond in a complete sentence, being comprehensive, including all relevant background information. \
113
+ However, you are talking to a non-technical audience, so be sure to break down complicated concepts and \
114
+ strike a friendly and converstional tone. \
115
+ If the passage is irrelevant to the question, you should respond with "I do not have an answer for that." and use record_unknown_question tool to record the question. \
116
+ QUESTION: {query} \
117
+ PASSAGE: {escaped}
118
+ '''
119
+ return prompt
120
+
121
+ def system_prompt(self) -> str:
122
+ return '''
123
+ You are acting as Ed Donner. You are answering questions on Ed Donner's website, \
124
+ particularly questions related to Ed Donner's career, background, skills and experience. \
125
+ Your responsibility is to represent Ed Donner for interactions on the website as faithfully as possible. \
126
+ Be professional and engaging, as if talking to a potential client or future employer who came across the website. \
127
+ If you don't know the answer to any question, use your record_unknown_question tool to record the question that you couldn't answer, even if it's about something trivial or unrelated to career. \
128
+ If the user is engaging in discussion, try to steer them towards getting in touch via email; ask for their email and record it using your record_user_details tool.
129
+ '''
130
+ def chat_with_gemini(self, message, history, system_prompt):
131
+ try:
132
+ # Load data base
133
+ # Create the model with system instruction
134
+ model = genai.GenerativeModel(
135
+ 'gemini-2.0-flash',
136
+ system_instruction=system_prompt,
137
+ tools=tools
138
+ )
139
+ # Convert Gradio messages format to Gemini format
140
+ gemini_history = []
141
+ max_iteration = 3
142
+ iteration = 0
143
+ for msg in history:
144
+ if msg["role"] == "user":
145
+ gemini_history.append({
146
+ "role": "user",
147
+ "parts": [msg["content"]]
148
+ })
149
+ elif msg["role"] == "assistant":
150
+ gemini_history.append({
151
+ "role": "model",
152
+ "parts": [msg["content"]]
153
+ })
154
+
155
+ # Start chat with history
156
+ chat_session = model.start_chat(history=gemini_history)
157
+ relevant_passage = get_relevant_passage(query= message,
158
+ db= self.db,
159
+ n_results=3)
160
+
161
+ prompt = self.rag_prompt(query= current_message,
162
+ relevant_passages= " ".join(relevant_passage))
163
+
164
+ current_message = prompt
165
+
166
+ try:
167
+ while iteration < max_iteration:
168
+ # Send the current message
169
+ response = chat_session.send_message(current_message)
170
+ # Check for its finishing
171
+ finish_reason = response.candidates[0].finish_reason
172
+
173
+ print(f"Response parts: {[part for part in response.candidates[0].content.parts]}")
174
+
175
+ function_calls = []
176
+ text_parts = []
177
+
178
+ # If the LLM wants to call the tools
179
+ for part in response.candidates[0].content.parts:
180
+ if hasattr(part, "function_call") and part.function_call:
181
+ function_calls.append(part.function_call)
182
+ print("Function calls list not empty")
183
+ elif hasattr(part, "text"):
184
+ text_parts.append(part.text)
185
+
186
+ # Excecute if function_calls not empty
187
+ if function_calls:
188
+ results = handle_tool_calls(function_calls)
189
+ # Add the result back to the model
190
+ current_message = results
191
+ iteration += 1
192
+ else:
193
+ if text_parts:
194
+ return "".join(text_parts)
195
+ else:
196
+ return response.text
197
+ return ""
198
+ except Exception as e:
199
+ return f"Error: {e}"
200
+ except Exception as e:
201
+ return f"Error: {e}"
202
+
203
+
204
+ if __name__ == "__main__":
205
+ chat_grad = App()
206
+ with gr.Blocks() as demo:
207
+ gr.Markdown("# Chat with Google Gemini")
208
+
209
+ system_prompt = gr.Textbox(
210
+ value=chat_grad.system_prompt(),
211
+ label="System Prompt",
212
+ placeholder="Enter system instructions for the AI...",
213
+ lines=2
214
+ )
215
+
216
+ chat_interface = gr.ChatInterface(
217
+ fn=chat_grad.chat_with_gemini,
218
+ additional_inputs=[system_prompt],
219
+ title="",
220
+ cache_examples=False,
221
+ type='messages'
222
+
223
+ )
224
+ demo.launch()
embed.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import google.generativeai as genai
2
+ from chromadb import Documents, EmbeddingFunction, Embeddings, PersistentClient, Collection
3
+ from typing import Dict, List
4
+ import os
5
+ from dotenv import load_dotenv
6
+ load_dotenv(override=True)
7
+ from text_chunk import *
8
+
9
+ class GeminiEmbeddingFuction(EmbeddingFunction):
10
+ """
11
+ Custom embedding function using the Gemini AI API for document retrieval.
12
+
13
+ This class extends the EmbeddingFunction class and implements the __call__ method
14
+ to generate embeddings for a given set of documents using the Gemini AI API.
15
+
16
+ Parameters:
17
+ - input (Documents): A collection of documents to be embedded.
18
+
19
+ Returns:
20
+ - Embeddings: Embeddings generated for the input documents.
21
+ """
22
+
23
+ def __call__(self, input: Documents) -> Embeddings:
24
+ genai.configure(api_key=os.getenv("GEMINI_API"))
25
+ return genai.embed_content(model = "models/embedding-001",
26
+ content= input,
27
+ task_type="retrieval_document",
28
+ title="Query")['embedding']
29
+
30
+
31
+ def create_chroma_db(documents: List[str], path: str, name: str):
32
+ """
33
+ Creates a Chroma database using the provided documents, path, and collection name.
34
+
35
+ Parameters:
36
+ - documents: An iterable of documents to be added to the Chroma database.
37
+ - path (str): The path where the Chroma database will be stored.
38
+ - name (str): The name of the collection within the Chroma database.
39
+
40
+ Returns:
41
+ - Tuple[chromadb.Collection, str]: A tuple containing the created Chroma Collection and its name.
42
+ """
43
+
44
+ chroma_client = PersistentClient(path=path)
45
+ db = chroma_client.create_collection(name=name,
46
+ embedding_function=GeminiEmbeddingFuction())
47
+ for i, d in enumerate(documents):
48
+ db.add(documents=[d], ids = str(i))
49
+ return db, name
50
+
51
+ def load_chroma_db(path: str, name: str):
52
+ """
53
+ Loads an existing Chroma collection from the specified path with the given name.
54
+
55
+ Parameters:
56
+ - path (str): The path where the Chroma database is stored.
57
+ - name (str): The name of the collection within the Chroma database.
58
+
59
+ Returns:
60
+ - chromadb.Collection: The loaded Chroma Collection.
61
+ """
62
+
63
+ chroma_client = PersistentClient(path=path)
64
+ db = chroma_client.get_collection(name=name, embedding_function=GeminiEmbeddingFuction())
65
+ return db
66
+
67
+ def get_relevant_passage(query: str, db: Collection, n_results: int):
68
+ """
69
+ semantic search to retrieve the most similar chunks of text from the database.
70
+
71
+ Parameters:
72
+ query (str): The query to search for.
73
+ n_results (int): The number of results to return.
74
+ db (chromadb.Collection): The Chroma collection to search.
75
+
76
+ Returns:
77
+ List[str]: A list of the most similar chunks of text.
78
+ """
79
+ passage = db.query(query_texts=[query],
80
+ n_results=n_results)['documents'][0]
81
+ return passage
82
+
83
+ if __name__ == "__main__":
84
+ # Create database based on linkdin and summary
85
+ # text = load_documents(data_path=f"Week_1\Data_w1")
86
+ # print("Length of text: ", len(text))
87
+ # chunked_text= sliding_window_chunk(text= text)
88
+ # db, name = create_chroma_db(
89
+ # documents= chunked_text,
90
+ # path= "Week_1\Data_w1",
91
+ # name= 'RAG_DB'
92
+ # )
93
+
94
+ # Retrieval example
95
+ # db = load_chroma_db(path= "Week_1\Data_w1", name= 'RAG_DB')
96
+ # relevant_text = get_relevant_passage(query="Your python experience",db=db,n_results=3)
97
+
98
+ # print(relevant_text)
99
+ print("Done")
100
+
101
+
102
+
103
+
104
+
text_chunk.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pypdf import PdfReader
2
+ from typing import Dict, List
3
+ import re
4
+
5
+ def load_documents(data_path: str) -> str:
6
+ '''
7
+ Read the linkedin pdf and the summary in the data folder
8
+
9
+ Parameters:
10
+ - data_path (str): The path to the data folder
11
+
12
+ Returns:
13
+ - output (Dict[str, str]): A dictionary containing the text document and summary
14
+ '''
15
+ reader = PdfReader(f"{data_path}\linkedin.pdf")
16
+ text_document = ""
17
+ for page in reader.pages:
18
+ text_document += page.extract_text()
19
+
20
+ with open(f"{data_path}\summary.txt", "r") as f:
21
+ summary = f.read()
22
+ output = f"{text_document}\n{summary}"
23
+ return output
24
+
25
+ def sliding_window_chunk(text: str, overlap: int = 20, chunk_size: int = 200) -> List[str]:
26
+ '''
27
+ Split the text into chunks of non-empty substrings
28
+
29
+ Parameters:
30
+ - text (str): The text to split
31
+
32
+ Returns:
33
+ - chunks (List[str]): A list of chunks of text
34
+ '''
35
+
36
+ # Remove unwanted characters
37
+ text = re.sub(r'[\xa0\n]', " ", text)
38
+
39
+ # Split the text into chunks of non-empty substrings
40
+ words = text.split()
41
+ chunks = [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), overlap)]
42
+ return chunks
43
+
44
+
45
+ # if __name__ == "__main__":
46
+ # # reader = PdfReader("Week_1\Data_w1\linkedin.pdf")
47
+ # # linkedin = ""
48
+ # # for page in reader.pages:
49
+ # # linkedin += page.extract_text()
50
+
51
+ # # text_chunks = sliding_window_chunk(linkedin)
52
+ # # print(len(text_chunks))
53
+
54
+