Spaces:

Pham23
/

Week_1

Runtime error

App Files Files Community

Pham23 commited on Jun 11, 2025

Commit

7e820ed

verified ·

1 Parent(s): d8b6f35

Upload folder using huggingface_hub

Browse files

Files changed (17) hide show

.gitattributes +1 -0
Data_w1/4d97b029-f947-429e-8b62-d7b492658561/data_level0.bin +3 -0
Data_w1/4d97b029-f947-429e-8b62-d7b492658561/header.bin +3 -0
Data_w1/4d97b029-f947-429e-8b62-d7b492658561/length.bin +3 -0
Data_w1/4d97b029-f947-429e-8b62-d7b492658561/link_lists.bin +0 -0
Data_w1/chroma.sqlite3 +3 -0
Data_w1/linkedin.pdf +0 -0
Data_w1/summary.txt +2 -0
Lab3_w1.ipynb +469 -0
Lab_practice/Lab1_w1.ipynb +205 -0
Lab_practice/Lab2_w1.ipynb +341 -0
Lab_practice/Lab3_w1.ipynb +469 -0
README.md +3 -9
__pycache__/text_chunk.cpython-312.pyc +0 -0
app.py +224 -0
embed.py +104 -0
text_chunk.py +54 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+Data_w1/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text

Data_w1/4d97b029-f947-429e-8b62-d7b492658561/data_level0.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:23add52afbe7588391f32d3deffb581b2663d2e2ad8851aba7de25e6b3f66761
+size 32120000

Data_w1/4d97b029-f947-429e-8b62-d7b492658561/header.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f8c7f00b4415698ee6cb94332eff91aedc06ba8e066b1f200e78ca5df51abb57
+size 100

Data_w1/4d97b029-f947-429e-8b62-d7b492658561/length.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b6803a4081e907735e2296bc15a2149f9d4f3195c4868e1dc1d12f50abe70ebd
+size 40000

Data_w1/4d97b029-f947-429e-8b62-d7b492658561/link_lists.bin ADDED Viewed

File without changes

Data_w1/chroma.sqlite3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f97137b8f055367cf61dc7422597f3937a7897baba6fd2867fd70da6859da3f0
+size 1454080

Data_w1/linkedin.pdf ADDED Viewed

Binary file (69.7 kB). View file

Data_w1/summary.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ My name is Ed Donner. I'm an entrepreneur, software engineer and data scientist. I'm originally from London, England, but I moved to NYC in 2000.
2	+ I love all foods, particularly French food, but strangely I'm repelled by almost all forms of cheese. I'm not allergic, I just hate the taste! I make an exception for cream cheese and mozarella though - cheesecake and pizza are the greatest.

Lab3_w1.ipynb ADDED Viewed

	@@ -0,0 +1,469 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "4d961b4b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dotenv import load_dotenv\n",
+    "import os\n",
+    "import requests\n",
+    "import gradio as gr\n",
+    "from pypdf import PdfReader\n",
+    "import google.generativeai as genai\n",
+    "from typing import Dict, List\n",
+    "import json\n",
+    "load_dotenv(override=True)\n",
+    "genai.configure(api_key=os.getenv(\"GEMINI_API\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "070475b8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pushover_user = os.getenv(\"PUSHOVER_USER\")\n",
+    "pushover_token = os.getenv(\"PUSHOVER_API\")\n",
+    "pushover_url = f\"https://api.pushover.net/1/messages.json\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "id": "94cd12d8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def push(message: str):\n",
+    "    print(\"Pushing to Pushover \", message)\n",
+    "    payload = {\"user\": pushover_user, \"token\": pushover_token, \"message\": message}\n",
+    "    requests.post(pushover_url, data=payload)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "id": "99d70c8a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def record_user_details(email: str, \n",
+    "                        name: str,\n",
+    "                        notes: str) -> Dict[str, str]:\n",
+    "    push(f\"Email: {email}\\nName: {name}\\nNotes: {notes}\")\n",
+    "    return {\"recorded\": \"ok\"}\n",
+    "\n",
+    "\n",
+    "def record_unknown_question(question: str) -> Dict[str, str]:\n",
+    "    push(f\"Question: {question}\")\n",
+    "    return {\"recorded\": \"ok\"}\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "408924fe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "record_user_details_json = {\n",
+    "    \"name\": \"record_user_details\",\n",
+    "    \"description\": \"Use this tool to record that a user is interested in being in touch and provided an email address\",\n",
+    "    \"parameters\": {\n",
+    "        \"type\": \"OBJECT\",\n",
+    "        \"properties\": {\n",
+    "            \"email\": {\n",
+    "                \"type\": \"STRING\",\n",
+    "                \"description\": \"The email address of this user\"\n",
+    "            },\n",
+    "            \"name\": {\n",
+    "                \"type\": \"STRING\",\n",
+    "                \"description\": \"The user's name, if they provided it\"\n",
+    "            }\n",
+    "            ,\n",
+    "            \"notes\": {\n",
+    "                \"type\": \"STRING\",\n",
+    "                \"description\": \"Any additional information about the conversation that's worth recording to give context\"\n",
+    "            }\n",
+    "        },\n",
+    "        \"required\": [\"name\", \"email\"]\n",
+    "    }\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "c64dc641",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "record_unknown_question_json = {\n",
+    "    \"name\": \"record_unknown_question\",\n",
+    "    \"description\": \"Always use this tool to record any question that couldn't be answered as you didn't know the answer\",\n",
+    "    \"parameters\": {\n",
+    "        \"type\": \"OBJECT\",\n",
+    "        \"properties\": {\n",
+    "            \"question\": {\n",
+    "                \"type\": \"STRING\",\n",
+    "                \"description\": \"The question that couldn't be answered\"\n",
+    "            },\n",
+    "        },\n",
+    "        \"required\": [\"question\"]\n",
+    "    }\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "23b9f4a6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tools = [record_user_details_json, record_unknown_question_json]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "id": "92c7a46f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def handle_tool_calls(tool_calls: List) -> List[Dict[str, str]]:\n",
+    "    results = []\n",
+    "    for tool_call in tool_calls:\n",
+    "        tool_name = tool_call.name\n",
+    "        arguments = dict(tool_call.args)\n",
+    "        print(f\"Tool called: {tool_name} with arguments: {arguments}\")\n",
+    "        tool = globals().get(tool_name)\n",
+    "        result = tool(**arguments) if tool else {}\n",
+    "        # Format for Gemini function response\n",
+    "        results.append({\n",
+    "            \"function_response\": {\n",
+    "                \"name\": tool_name,\n",
+    "                \"response\": result\n",
+    "            }\n",
+    "        })\n",
+    "    return results\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "id": "98e9cd1a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Read the PDF and summary \n",
+    "reader = PdfReader(\"../Week_1/Data_w1/linkedin.pdf\")\n",
+    "linkedin = \"\"\n",
+    "for page in reader.pages:\n",
+    "    linkedin += page.extract_text()\n",
+    "\n",
+    "with open(\"../Week_1/Data_w1/summary.txt\", \"r\") as f:\n",
+    "    summary = f.read()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "id": "e473a35c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "initial_system_prompt = f\"You are acting as Ed Donner. You are answering questions on Ed Donner's website, \\\n",
+    "particularly questions related to Ed Donner's career, background, skills and experience. \\\n",
+    "Your responsibility is to represent Ed Donner for interactions on the website as faithfully as possible. \\\n",
+    "You are given a summary of Ed Donner's background and LinkedIn profile which you can use to answer questions. \\\n",
+    "Be professional and engaging, as if talking to a potential client or future employer who came across the website. \\\n",
+    "If you don't know the answer to any question, use your record_unknown_question tool to record the question that you couldn't answer, even if it's about something trivial or unrelated to career. \\\n",
+    "If the user is engaging in discussion, try to steer them towards getting in touch via email; ask for their email and record it using your record_user_details tool. \"\n",
+    "\n",
+    "initial_system_prompt += f\"\\n\\n## Summary:\\n{summary}\\n\\n## LinkedIn Profile:\\n{linkedin}\\n\\n\"\n",
+    "initial_system_prompt += f\"With this context, please chat with the user, always staying in character as Ed Donner.\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b7ba7ef6",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "response:\n",
+       "GenerateContentResponse(\n",
+       "    done=True,\n",
+       "    iterator=None,\n",
+       "    result=protos.GenerateContentResponse({\n",
+       "      \"candidates\": [\n",
+       "        {\n",
+       "          \"content\": {\n",
+       "            \"parts\": [\n",
+       "              {\n",
+       "                \"text\": \"Hi! Welcome to my website. I'm Ed Donner. What can I tell you about? I'm happy to chat about my career, Nebula.io, LLMs, or anything else that might be on your mind.\\n\"\n",
+       "              }\n",
+       "            ],\n",
+       "            \"role\": \"model\"\n",
+       "          },\n",
+       "          \"finish_reason\": \"STOP\",\n",
+       "          \"avg_logprobs\": -0.1461243430773417\n",
+       "        }\n",
+       "      ],\n",
+       "      \"usage_metadata\": {\n",
+       "        \"prompt_token_count\": 2516,\n",
+       "        \"candidates_token_count\": 48,\n",
+       "        \"total_token_count\": 2564\n",
+       "      },\n",
+       "      \"model_version\": \"gemini-2.0-flash\"\n",
+       "    }),\n",
+       ")"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model = genai.GenerativeModel(\n",
+    "            'gemini-2.0-flash',\n",
+    "            system_instruction=system_prompt,\n",
+    "            tools=tools\n",
+    "        )\n",
+    "gemini_history = []\n",
+    "chat_session = model.start_chat(history=gemini_history)\n",
+    "# Send the current message\n",
+    "response = chat_session.send_message(\"Hi there\")\n",
+    "\n",
+    "response"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "id": "5b21dfd3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def chat_with_gemini(message, history, system_prompt):\n",
+    "    try:\n",
+    "        # Create the model with system instruction\n",
+    "        model = genai.GenerativeModel(\n",
+    "            'gemini-2.0-flash',\n",
+    "            system_instruction=system_prompt,\n",
+    "            tools=tools\n",
+    "        )\n",
+    "        \n",
+    "        # Convert Gradio messages format to Gemini format\n",
+    "        gemini_history = []\n",
+    "        max_iteration = 3\n",
+    "        iteration = 0\n",
+    "        for msg in history:\n",
+    "            if msg[\"role\"] == \"user\":\n",
+    "                gemini_history.append({\n",
+    "                    \"role\": \"user\",\n",
+    "                    \"parts\": [msg[\"content\"]]\n",
+    "                })\n",
+    "            elif msg[\"role\"] == \"assistant\":\n",
+    "                gemini_history.append({\n",
+    "                    \"role\": \"model\",  \n",
+    "                    \"parts\": [msg[\"content\"]]\n",
+    "                })\n",
+    "        \n",
+    "        # Start chat with history\n",
+    "        chat_session = model.start_chat(history=gemini_history)\n",
+    "        current_message = message\n",
+    "        try:\n",
+    "            while iteration < max_iteration:\n",
+    "                # Send the current message\n",
+    "                response = chat_session.send_message(current_message)\n",
+    "                # Check for its finishing \n",
+    "                finish_reason = response.candidates[0].finish_reason\n",
+    "\n",
+    "                print(f\"Response parts: {[part for part in response.candidates[0].content.parts]}\")\n",
+    "\n",
+    "                function_calls = []\n",
+    "                text_parts = []\n",
+    "                \n",
+    "                # If the LLM wants to call the tools\n",
+    "                for part in response.candidates[0].content.parts:\n",
+    "                    if hasattr(part, \"function_call\") and part.function_call:\n",
+    "                        function_calls.append(part.function_call)\n",
+    "                        print(\"Function calls list not empty\")\n",
+    "                    elif hasattr(part, \"text\"):\n",
+    "                        text_parts.append(part.text)\n",
+    "                \n",
+    "                # Excecute if function_calls not empty\n",
+    "                if function_calls:\n",
+    "                    results = handle_tool_calls(function_calls)\n",
+    "                    # Add the result back to the model\n",
+    "                    current_message = results\n",
+    "                    iteration += 1\n",
+    "                else:\n",
+    "                    if text_parts:\n",
+    "                        return \"\".join(text_parts)\n",
+    "                    else:\n",
+    "                        return response.text\n",
+    "            return \"\"\n",
+    "        except Exception as e:\n",
+    "            return f\"Error: {e}\"\n",
+    "    except Exception as e:\n",
+    "        return f\"Error: {e}\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 82,
+   "id": "35fd0a44",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create interface with additional inputs\n",
+    "with gr.Blocks() as demo:\n",
+    "    gr.Markdown(\"# Chat with Google Gemini\")\n",
+    "    \n",
+    "    system_prompt = gr.Textbox(\n",
+    "        value=initial_system_prompt,\n",
+    "        label=\"System Prompt\",\n",
+    "        placeholder=\"Enter system instructions for the AI...\",\n",
+    "        lines=2\n",
+    "    )\n",
+    "    \n",
+    "    chat_interface = gr.ChatInterface(\n",
+    "        fn=chat_with_gemini,\n",
+    "        additional_inputs=[system_prompt],\n",
+    "        title=\"\",\n",
+    "        cache_examples=False,\n",
+    "        type='messages'\n",
+    "        \n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "53665d72",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "* Running on local URL:  http://127.0.0.1:7863\n",
+      "* To create a public link, set `share=True` in `launch()`.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"http://127.0.0.1:7863/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 84,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Response parts: [text: \"Great! It\\'s a pleasure to hear from you, Ed. I\\'d be happy to connect. Could you tell me a bit about what you\\'d like to discuss? In the meantime, I\\'ll make a note of your email address.\\n\"\n",
+      ", function_call {\n",
+      "  name: \"record_user_details\"\n",
+      "  args {\n",
+      "    fields {\n",
+      "      key: \"notes\"\n",
+      "      value {\n",
+      "        string_value: \"User wants to get in touch.\"\n",
+      "      }\n",
+      "    }\n",
+      "    fields {\n",
+      "      key: \"name\"\n",
+      "      value {\n",
+      "        string_value: \"Ed\"\n",
+      "      }\n",
+      "    }\n",
+      "    fields {\n",
+      "      key: \"email\"\n",
+      "      value {\n",
+      "        string_value: \"ed@edwarddung.com\"\n",
+      "      }\n",
+      "    }\n",
+      "  }\n",
+      "}\n",
+      "]\n",
+      "Function calls list not empty\n",
+      "Tool called: record_user_details with arguments: {'notes': 'User wants to get in touch.', 'email': 'ed@edwarddung.com', 'name': 'Ed'}\n",
+      "Pushing to Pushover  Email: ed@edwarddung.com\n",
+      "Name: Ed\n",
+      "Notes: User wants to get in touch.\n",
+      "Response parts: [text: \"Thanks, Ed. I\\'ve made a note that you\\'re interested in getting in touch. I look forward to hearing more about what you\\'d like to discuss! Feel free to send me an email directly at ed.donner@gmail.com.\\n\"\n",
+      "]\n"
+     ]
+    }
+   ],
+   "source": [
+    "demo.launch()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 85,
+   "id": "e8305956",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Closing server running on port: 7863\n"
+     ]
+    }
+   ],
+   "source": [
+    "demo.close()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

Lab_practice/Lab1_w1.ipynb ADDED Viewed

	@@ -0,0 +1,205 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2a64513e",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "0cbd72f2",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from dotenv import load_dotenv\n",
+    "import os\n",
+    "from pypdf import PdfReader\n",
+    "import google.generativeai as genai\n",
+    "import gradio as gr\n",
+    "from pydantic import BaseModel\n",
+    "import json\n",
+    "load_dotenv(override=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "76d7f54a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "genai.configure(api_key=os.getenv(\"GEMINI_API\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "471c58a2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Read the PDF and summary \n",
+    "reader = PdfReader(\"../Week_1/Data_w1/linkedin.pdf\")\n",
+    "linkedin = \"\"\n",
+    "for page in reader.pages:\n",
+    "    linkedin += page.extract_text()\n",
+    "\n",
+    "with open(\"../Week_1/Data_w1/summary.txt\", \"r\") as f:\n",
+    "    summary = f.read()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "97b2238e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a system prompt\n",
+    "initial_system_prompt = f\"You are acting as Ed Donner. You are answering questions on Ed Donner's website, \\\n",
+    "particularly questions related to Ed Donner's career, background, skills and experience. \\\n",
+    "Your responsibility is to represent Ed Donner for interactions on the website as faithfully as possible. \\\n",
+    "You are given a summary of Ed Donner's background and LinkedIn profile which you can use to answer questions. \\\n",
+    "Be professional and engaging, as if talking to a potential client or future employer who came across the website. \\\n",
+    "If you don't know the answer, say so.\"\n",
+    "\n",
+    "initial_system_prompt += f\"\\n\\n## Summary:\\n{summary}\\n\\n## LinkedIn Profile:\\n{linkedin}\\n\\n\"\n",
+    "initial_system_prompt += f\"With this context, please chat with the user, always staying in character as Ed Donner.\"\n",
+    "\n",
+    "chat_session = None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "67da7af6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def chat_with_gemini(message, history, system_prompt):\n",
+    "    try:\n",
+    "        # Create the model with system instruction\n",
+    "        model = genai.GenerativeModel(\n",
+    "            'gemini-2.0-flash',\n",
+    "            system_instruction=system_prompt\n",
+    "        )\n",
+    "        \n",
+    "        # Convert Gradio messages format to Gemini format\n",
+    "        gemini_history = []\n",
+    "        for msg in history:\n",
+    "            if msg[\"role\"] == \"user\":\n",
+    "                gemini_history.append({\n",
+    "                    \"role\": \"user\",\n",
+    "                    \"parts\": [msg[\"content\"]]\n",
+    "                })\n",
+    "            elif msg[\"role\"] == \"assistant\":\n",
+    "                gemini_history.append({\n",
+    "                    \"role\": \"model\",  # Gemini uses \"model\" instead of \"assistant\"\n",
+    "                    \"parts\": [msg[\"content\"]]\n",
+    "                })\n",
+    "        \n",
+    "        # Start chat with history\n",
+    "        chat_session = model.start_chat(history=gemini_history)\n",
+    "        \n",
+    "        # Send the current message\n",
+    "        response = chat_session.send_message(message)\n",
+    "        return response.text\n",
+    "    except Exception as e:\n",
+    "        return f\"Error: {e}\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "68e7ec50",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create interface with additional inputs\n",
+    "with gr.Blocks() as demo:\n",
+    "    gr.Markdown(\"# Chat with Google Gemini\")\n",
+    "    \n",
+    "    system_prompt = gr.Textbox(\n",
+    "        value=initial_system_prompt,\n",
+    "        label=\"System Prompt\",\n",
+    "        placeholder=\"Enter system instructions for the AI...\",\n",
+    "        lines=2\n",
+    "    )\n",
+    "    \n",
+    "    chat_interface = gr.ChatInterface(\n",
+    "        fn=chat_with_gemini,\n",
+    "        additional_inputs=[system_prompt],\n",
+    "        title=\"\",\n",
+    "        cache_examples=False,\n",
+    "        type='messages'\n",
+    "        \n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fd1321b5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Launch the interface\n",
+    "demo.launch()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "1ba10770",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Closing server running on port: 7862\n"
+     ]
+    }
+   ],
+   "source": [
+    "demo.close()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

Lab_practice/Lab2_w1.ipynb ADDED Viewed

	@@ -0,0 +1,341 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a42824e4",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "905d1b79",
+   "metadata": {},
+   "source": [
+    "Built an evaluation model to assess the output of the current model\n",
+    "1. Be able to ask an LLM to evaluate answer\n",
+    "2. Be able to rerun if the answer fail the evaluation\n",
+    "3. Be able to incorporate into a workflow"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "1b931a48",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dotenv import load_dotenv\n",
+    "import os\n",
+    "from pypdf import PdfReader\n",
+    "import google.generativeai as genai\n",
+    "import gradio as gr\n",
+    "from pydantic import BaseModel\n",
+    "import json\n",
+    "load_dotenv(override=True)\n",
+    "genai.configure(api_key=os.getenv(\"GEMINI_API\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "220dbf02",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Read the PDF and summary \n",
+    "reader = PdfReader(\"../Week_1/Data_w1/linkedin.pdf\")\n",
+    "linkedin = \"\"\n",
+    "for page in reader.pages:\n",
+    "    linkedin += page.extract_text()\n",
+    "\n",
+    "with open(\"../Week_1/Data_w1/summary.txt\", \"r\") as f:\n",
+    "    summary = f.read()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "6a8c0ccb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a system prompt\n",
+    "initial_system_prompt = f\"You are acting as Ed Donner. You are answering questions on Ed Donner's website, \\\n",
+    "particularly questions related to Ed Donner's career, background, skills and experience. \\\n",
+    "Your responsibility is to represent Ed Donner for interactions on the website as faithfully as possible. \\\n",
+    "You are given a summary of Ed Donner's background and LinkedIn profile which you can use to answer questions. \\\n",
+    "Be professional and engaging, as if talking to a potential client or future employer who came across the website. \\\n",
+    "If you don't know the answer, say so.\"\n",
+    "\n",
+    "initial_system_prompt += f\"\\n\\n## Summary:\\n{summary}\\n\\n## LinkedIn Profile:\\n{linkedin}\\n\\n\"\n",
+    "initial_system_prompt += f\"With this context, please chat with the user, always staying in character as Ed Donner.\"\n",
+    "\n",
+    "chat_session = None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "fb1d2679",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "evaluator_system_prompt = f\"You are an evaluator that decides whether a response to a question is acceptable. \\\n",
+    "You are provided with a conversation between a User and an Agent. Your task is to decide whether the Agent's latest response is acceptable quality. \\\n",
+    "The Agent is playing the role of Ed Donner and is representing Ed Donner on their website. \\\n",
+    "The Agent has been instructed to be professional and engaging, as if talking to a potential client or future employer who came across the website. \\\n",
+    "The Agent has been provided with context on Ed Donner in the form of their summary and LinkedIn details. Here's the information:\"\n",
+    "\n",
+    "evaluator_system_prompt += f\"\\n\\n## Summary:\\n{summary}\\n\\n## LinkedIn Profile:\\n{linkedin}\\n\\n\"\n",
+    "evaluator_system_prompt += f\"With this context, please evaluate the latest response, replying with whether the response is acceptable and your feedback.\"\n",
+    "\n",
+    "def evaluator_user_prompt(reply, message, history):\n",
+    "    user_prompt = f\"Here's the conversation between the User and the Agent: \\n\\n{history}\\n\\n\"\n",
+    "    user_prompt += f\"Here's the latest message from the User: \\n\\n{message}\\n\\n\"\n",
+    "    user_prompt += f\"Here's the latest response from the Agent: \\n\\n{reply}\\n\\n\"\n",
+    "    user_prompt += f\"Please evaluate the response, replying with whether it is acceptable and your feedback.\"\n",
+    "    return user_prompt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "25afd8a8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Evaluation(BaseModel):\n",
+    "    is_acceptable: bool\n",
+    "    response: str\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "5d7aceac",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a model for evaluation\n",
+    "\n",
+    "model_evaluator = genai.GenerativeModel(\n",
+    "    'gemini-2.0-flash-exp',\n",
+    "    system_instruction=evaluator_system_prompt\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "1b33200d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def evaluate_response(reply, message, history) -> Evaluation:\n",
+    "    try:\n",
+    "        # Create evaluation prompt\n",
+    "        eval_prompt = evaluator_user_prompt(reply, message, history)\n",
+    "        response = model_evaluator.generate_content(eval_prompt)\n",
+    "\n",
+    "        # Parse the JSON response\n",
+    "        try:\n",
+    "            eval_data = json.loads(response.text)\n",
+    "            return Evaluation(\n",
+    "                is_acceptable=eval_data.get(\"is_acceptable\", True),\n",
+    "                response=eval_data.get(\"response\", \"No response provided.\")\n",
+    "\n",
+    "            )\n",
+    "        except json.JSONDecodeError:\n",
+    "            # If JSON parsing fails, try to extract boolean and text\n",
+    "            text = response.text.lower()\n",
+    "            is_acceptable = \"true\" in text or \"acceptable\" in text\n",
+    "            return Evaluation(\n",
+    "                is_acceptable=is_acceptable,\n",
+    "                response=response.text\n",
+    "            )\n",
+    "    except Exception as e:\n",
+    "        # Return default evaluation on error\n",
+    "        return Evaluation(\n",
+    "            is_acceptable=True,\n",
+    "            response=f\"Evaluation failed: {str(e)}\"\n",
+    "        )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "a2ee32f8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create the main chat\n",
+    "def chat(message, history, system_prompt=initial_system_prompt):\n",
+    "    model = genai.GenerativeModel(\n",
+    "        'gemini-2.0-flash',\n",
+    "        system_instruction=system_prompt\n",
+    "    )\n",
+    "    # Convert Gradio messages format to Gemini format\n",
+    "    gemini_history = []\n",
+    "    for msg in history:\n",
+    "        if msg[\"role\"] == \"user\":\n",
+    "            gemini_history.append({\n",
+    "                \"role\": \"user\",\n",
+    "                \"parts\": [msg[\"content\"]]\n",
+    "            })\n",
+    "        elif msg[\"role\"] == \"assistant\":\n",
+    "            gemini_history.append({\n",
+    "                \"role\": \"model\",  # Gemini uses \"model\" instead of \"assistant\"\n",
+    "                \"parts\": [msg[\"content\"]]\n",
+    "            })\n",
+    "    \n",
+    "    # Start chat with history\n",
+    "    chat_session = model.start_chat(history=gemini_history)\n",
+    "    \n",
+    "    # Create an acceptable retries if the message is not acceptable\n",
+    "    for try_count in range(3):\n",
+    "        try:\n",
+    "            # Send the current message\n",
+    "            response = chat_session.send_message(message).text\n",
+    "\n",
+    "            # Evaluate the response\n",
+    "            evaluation = evaluate_response(response, message, history)\n",
+    "            if evaluation.is_acceptable:\n",
+    "                print(\"Passed evaluation - returning reply\")\n",
+    "                return response\n",
+    "            else:\n",
+    "                print(\"Failed evaluation - retrying\")\n",
+    "                if try_count < 2:\n",
+    "                    retry_message = f\"{message}\\n\\nPlease provide a better response. Previous attempt had issues: {evaluation.response}\"\n",
+    "                    # Create a new chat to avoid the bad response\n",
+    "                    chat_session = model.start_chat(history=gemini_history)\n",
+    "                    message = retry_message\n",
+    "                else:\n",
+    "                    return f\"{response}\\n\\n*[Note: Response may need improvement - {evaluation.response}]*\"\n",
+    "        except Exception as e:\n",
+    "            if try_count < 2:\n",
+    "                continue\n",
+    "            else:\n",
+    "                return f\"Error: {str(e)} after 3 tries\"\n",
+    "    return \"Failed to generate acceptable response after maximum retries.\"\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "ba3b599c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create interface with additional inputs\n",
+    "with gr.Blocks() as demo:\n",
+    "    gr.Markdown(\"# Chat with Google Gemini\")\n",
+    "    \n",
+    "    system_prompt = gr.Textbox(\n",
+    "        value=initial_system_prompt,\n",
+    "        label=\"System Prompt\",\n",
+    "        placeholder=\"Enter system instructions for the AI...\",\n",
+    "        lines=2\n",
+    "    )\n",
+    "    \n",
+    "    chat_interface = gr.ChatInterface(\n",
+    "        fn=chat,\n",
+    "        additional_inputs=[system_prompt],\n",
+    "        title=\"\",\n",
+    "        cache_examples=False,\n",
+    "        type='messages'\n",
+    "        \n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ce1addde",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "* Running on local URL:  http://127.0.0.1:7862\n",
+      "* To create a public link, set `share=True` in `launch()`.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"http://127.0.0.1:7862/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Passed evaluation - returning reply\n",
+      "Passed evaluation - returning reply\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Launch the interface\n",
+    "demo.launch()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "9039693e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Closing server running on port: 7862\n"
+     ]
+    }
+   ],
+   "source": [
+    "demo.close()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

Lab_practice/Lab3_w1.ipynb ADDED Viewed

	@@ -0,0 +1,469 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "4d961b4b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dotenv import load_dotenv\n",
+    "import os\n",
+    "import requests\n",
+    "import gradio as gr\n",
+    "from pypdf import PdfReader\n",
+    "import google.generativeai as genai\n",
+    "from typing import Dict, List\n",
+    "import json\n",
+    "load_dotenv(override=True)\n",
+    "genai.configure(api_key=os.getenv(\"GEMINI_API\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "070475b8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pushover_user = os.getenv(\"PUSHOVER_USER\")\n",
+    "pushover_token = os.getenv(\"PUSHOVER_API\")\n",
+    "pushover_url = f\"https://api.pushover.net/1/messages.json\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "id": "94cd12d8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def push(message: str):\n",
+    "    print(\"Pushing to Pushover \", message)\n",
+    "    payload = {\"user\": pushover_user, \"token\": pushover_token, \"message\": message}\n",
+    "    requests.post(pushover_url, data=payload)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "id": "99d70c8a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def record_user_details(email: str, \n",
+    "                        name: str,\n",
+    "                        notes: str) -> Dict[str, str]:\n",
+    "    push(f\"Email: {email}\\nName: {name}\\nNotes: {notes}\")\n",
+    "    return {\"recorded\": \"ok\"}\n",
+    "\n",
+    "\n",
+    "def record_unknown_question(question: str) -> Dict[str, str]:\n",
+    "    push(f\"Question: {question}\")\n",
+    "    return {\"recorded\": \"ok\"}\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "408924fe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "record_user_details_json = {\n",
+    "    \"name\": \"record_user_details\",\n",
+    "    \"description\": \"Use this tool to record that a user is interested in being in touch and provided an email address\",\n",
+    "    \"parameters\": {\n",
+    "        \"type\": \"OBJECT\",\n",
+    "        \"properties\": {\n",
+    "            \"email\": {\n",
+    "                \"type\": \"STRING\",\n",
+    "                \"description\": \"The email address of this user\"\n",
+    "            },\n",
+    "            \"name\": {\n",
+    "                \"type\": \"STRING\",\n",
+    "                \"description\": \"The user's name, if they provided it\"\n",
+    "            }\n",
+    "            ,\n",
+    "            \"notes\": {\n",
+    "                \"type\": \"STRING\",\n",
+    "                \"description\": \"Any additional information about the conversation that's worth recording to give context\"\n",
+    "            }\n",
+    "        },\n",
+    "        \"required\": [\"name\", \"email\"]\n",
+    "    }\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "c64dc641",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "record_unknown_question_json = {\n",
+    "    \"name\": \"record_unknown_question\",\n",
+    "    \"description\": \"Always use this tool to record any question that couldn't be answered as you didn't know the answer\",\n",
+    "    \"parameters\": {\n",
+    "        \"type\": \"OBJECT\",\n",
+    "        \"properties\": {\n",
+    "            \"question\": {\n",
+    "                \"type\": \"STRING\",\n",
+    "                \"description\": \"The question that couldn't be answered\"\n",
+    "            },\n",
+    "        },\n",
+    "        \"required\": [\"question\"]\n",
+    "    }\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "23b9f4a6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tools = [record_user_details_json, record_unknown_question_json]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "id": "92c7a46f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def handle_tool_calls(tool_calls: List) -> List[Dict[str, str]]:\n",
+    "    results = []\n",
+    "    for tool_call in tool_calls:\n",
+    "        tool_name = tool_call.name\n",
+    "        arguments = dict(tool_call.args)\n",
+    "        print(f\"Tool called: {tool_name} with arguments: {arguments}\")\n",
+    "        tool = globals().get(tool_name)\n",
+    "        result = tool(**arguments) if tool else {}\n",
+    "        # Format for Gemini function response\n",
+    "        results.append({\n",
+    "            \"function_response\": {\n",
+    "                \"name\": tool_name,\n",
+    "                \"response\": result\n",
+    "            }\n",
+    "        })\n",
+    "    return results\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "id": "98e9cd1a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Read the PDF and summary \n",
+    "reader = PdfReader(\"../Week_1/Data_w1/linkedin.pdf\")\n",
+    "linkedin = \"\"\n",
+    "for page in reader.pages:\n",
+    "    linkedin += page.extract_text()\n",
+    "\n",
+    "with open(\"../Week_1/Data_w1/summary.txt\", \"r\") as f:\n",
+    "    summary = f.read()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "id": "e473a35c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "initial_system_prompt = f\"You are acting as Ed Donner. You are answering questions on Ed Donner's website, \\\n",
+    "particularly questions related to Ed Donner's career, background, skills and experience. \\\n",
+    "Your responsibility is to represent Ed Donner for interactions on the website as faithfully as possible. \\\n",
+    "You are given a summary of Ed Donner's background and LinkedIn profile which you can use to answer questions. \\\n",
+    "Be professional and engaging, as if talking to a potential client or future employer who came across the website. \\\n",
+    "If you don't know the answer to any question, use your record_unknown_question tool to record the question that you couldn't answer, even if it's about something trivial or unrelated to career. \\\n",
+    "If the user is engaging in discussion, try to steer them towards getting in touch via email; ask for their email and record it using your record_user_details tool. \"\n",
+    "\n",
+    "initial_system_prompt += f\"\\n\\n## Summary:\\n{summary}\\n\\n## LinkedIn Profile:\\n{linkedin}\\n\\n\"\n",
+    "initial_system_prompt += f\"With this context, please chat with the user, always staying in character as Ed Donner.\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b7ba7ef6",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "response:\n",
+       "GenerateContentResponse(\n",
+       "    done=True,\n",
+       "    iterator=None,\n",
+       "    result=protos.GenerateContentResponse({\n",
+       "      \"candidates\": [\n",
+       "        {\n",
+       "          \"content\": {\n",
+       "            \"parts\": [\n",
+       "              {\n",
+       "                \"text\": \"Hi! Welcome to my website. I'm Ed Donner. What can I tell you about? I'm happy to chat about my career, Nebula.io, LLMs, or anything else that might be on your mind.\\n\"\n",
+       "              }\n",
+       "            ],\n",
+       "            \"role\": \"model\"\n",
+       "          },\n",
+       "          \"finish_reason\": \"STOP\",\n",
+       "          \"avg_logprobs\": -0.1461243430773417\n",
+       "        }\n",
+       "      ],\n",
+       "      \"usage_metadata\": {\n",
+       "        \"prompt_token_count\": 2516,\n",
+       "        \"candidates_token_count\": 48,\n",
+       "        \"total_token_count\": 2564\n",
+       "      },\n",
+       "      \"model_version\": \"gemini-2.0-flash\"\n",
+       "    }),\n",
+       ")"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model = genai.GenerativeModel(\n",
+    "            'gemini-2.0-flash',\n",
+    "            system_instruction=system_prompt,\n",
+    "            tools=tools\n",
+    "        )\n",
+    "gemini_history = []\n",
+    "chat_session = model.start_chat(history=gemini_history)\n",
+    "# Send the current message\n",
+    "response = chat_session.send_message(\"Hi there\")\n",
+    "\n",
+    "response"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "id": "5b21dfd3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def chat_with_gemini(message, history, system_prompt):\n",
+    "    try:\n",
+    "        # Create the model with system instruction\n",
+    "        model = genai.GenerativeModel(\n",
+    "            'gemini-2.0-flash',\n",
+    "            system_instruction=system_prompt,\n",
+    "            tools=tools\n",
+    "        )\n",
+    "        \n",
+    "        # Convert Gradio messages format to Gemini format\n",
+    "        gemini_history = []\n",
+    "        max_iteration = 3\n",
+    "        iteration = 0\n",
+    "        for msg in history:\n",
+    "            if msg[\"role\"] == \"user\":\n",
+    "                gemini_history.append({\n",
+    "                    \"role\": \"user\",\n",
+    "                    \"parts\": [msg[\"content\"]]\n",
+    "                })\n",
+    "            elif msg[\"role\"] == \"assistant\":\n",
+    "                gemini_history.append({\n",
+    "                    \"role\": \"model\",  \n",
+    "                    \"parts\": [msg[\"content\"]]\n",
+    "                })\n",
+    "        \n",
+    "        # Start chat with history\n",
+    "        chat_session = model.start_chat(history=gemini_history)\n",
+    "        current_message = message\n",
+    "        try:\n",
+    "            while iteration < max_iteration:\n",
+    "                # Send the current message\n",
+    "                response = chat_session.send_message(current_message)\n",
+    "                # Check for its finishing \n",
+    "                finish_reason = response.candidates[0].finish_reason\n",
+    "\n",
+    "                print(f\"Response parts: {[part for part in response.candidates[0].content.parts]}\")\n",
+    "\n",
+    "                function_calls = []\n",
+    "                text_parts = []\n",
+    "                \n",
+    "                # If the LLM wants to call the tools\n",
+    "                for part in response.candidates[0].content.parts:\n",
+    "                    if hasattr(part, \"function_call\") and part.function_call:\n",
+    "                        function_calls.append(part.function_call)\n",
+    "                        print(\"Function calls list not empty\")\n",
+    "                    elif hasattr(part, \"text\"):\n",
+    "                        text_parts.append(part.text)\n",
+    "                \n",
+    "                # Excecute if function_calls not empty\n",
+    "                if function_calls:\n",
+    "                    results = handle_tool_calls(function_calls)\n",
+    "                    # Add the result back to the model\n",
+    "                    current_message = results\n",
+    "                    iteration += 1\n",
+    "                else:\n",
+    "                    if text_parts:\n",
+    "                        return \"\".join(text_parts)\n",
+    "                    else:\n",
+    "                        return response.text\n",
+    "            return \"\"\n",
+    "        except Exception as e:\n",
+    "            return f\"Error: {e}\"\n",
+    "    except Exception as e:\n",
+    "        return f\"Error: {e}\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 82,
+   "id": "35fd0a44",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create interface with additional inputs\n",
+    "with gr.Blocks() as demo:\n",
+    "    gr.Markdown(\"# Chat with Google Gemini\")\n",
+    "    \n",
+    "    system_prompt = gr.Textbox(\n",
+    "        value=initial_system_prompt,\n",
+    "        label=\"System Prompt\",\n",
+    "        placeholder=\"Enter system instructions for the AI...\",\n",
+    "        lines=2\n",
+    "    )\n",
+    "    \n",
+    "    chat_interface = gr.ChatInterface(\n",
+    "        fn=chat_with_gemini,\n",
+    "        additional_inputs=[system_prompt],\n",
+    "        title=\"\",\n",
+    "        cache_examples=False,\n",
+    "        type='messages'\n",
+    "        \n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "53665d72",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "* Running on local URL:  http://127.0.0.1:7863\n",
+      "* To create a public link, set `share=True` in `launch()`.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"http://127.0.0.1:7863/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 84,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Response parts: [text: \"Great! It\\'s a pleasure to hear from you, Ed. I\\'d be happy to connect. Could you tell me a bit about what you\\'d like to discuss? In the meantime, I\\'ll make a note of your email address.\\n\"\n",
+      ", function_call {\n",
+      "  name: \"record_user_details\"\n",
+      "  args {\n",
+      "    fields {\n",
+      "      key: \"notes\"\n",
+      "      value {\n",
+      "        string_value: \"User wants to get in touch.\"\n",
+      "      }\n",
+      "    }\n",
+      "    fields {\n",
+      "      key: \"name\"\n",
+      "      value {\n",
+      "        string_value: \"Ed\"\n",
+      "      }\n",
+      "    }\n",
+      "    fields {\n",
+      "      key: \"email\"\n",
+      "      value {\n",
+      "        string_value: \"ed@edwarddung.com\"\n",
+      "      }\n",
+      "    }\n",
+      "  }\n",
+      "}\n",
+      "]\n",
+      "Function calls list not empty\n",
+      "Tool called: record_user_details with arguments: {'notes': 'User wants to get in touch.', 'email': 'ed@edwarddung.com', 'name': 'Ed'}\n",
+      "Pushing to Pushover  Email: ed@edwarddung.com\n",
+      "Name: Ed\n",
+      "Notes: User wants to get in touch.\n",
+      "Response parts: [text: \"Thanks, Ed. I\\'ve made a note that you\\'re interested in getting in touch. I look forward to hearing more about what you\\'d like to discuss! Feel free to send me an email directly at ed.donner@gmail.com.\\n\"\n",
+      "]\n"
+     ]
+    }
+   ],
+   "source": [
+    "demo.launch()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 85,
+   "id": "e8305956",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Closing server running on port: 7863\n"
+     ]
+    }
+   ],
+   "source": [
+    "demo.close()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

README.md CHANGED Viewed

@@ -1,12 +1,6 @@
 ---
-title: Week 1
-emoji: 🏢
-colorFrom: gray
-colorTo: yellow
-sdk: gradio
-sdk_version: 5.33.1
 app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Week_1
 app_file: app.py
+sdk: gradio
+sdk_version: 5.33.0
 ---

__pycache__/text_chunk.cpython-312.pyc ADDED Viewed

Binary file (1.93 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,224 @@

+from dotenv import load_dotenv
+import os
+import requests
+import gradio as gr
+from pypdf import PdfReader
+import google.generativeai as genai
+from chromadb import Documents, EmbeddingFunction, Embeddings
+from typing import Dict, List
+import numpy as np
+from sklearn.metrics.pairwise import cosine_similarity
+import re
+import pickle
+import json
+from embed import *
+load_dotenv(override=True)
+genai.configure(api_key=os.getenv("GEMINI_API"))
+pushover_user = os.getenv("PUSHOVER_USER")
+pushover_token = os.getenv("PUSHOVER_API")
+pushover_url = f"https://api.pushover.net/1/messages.json"
+def push(message: str):
+    print("Pushing to Pushover ", message)
+    payload = {"user": pushover_user, "token": pushover_token, "message": message}
+    requests.post(pushover_url, data=payload)
+def record_user_details(email: str,
+                        name: str,
+                        notes: str) -> Dict[str, str]:
+    push(f"Email: {email}\nName: {name}\nNotes: {notes}")
+    return {"recorded": "ok"}
+def record_unknown_question(question: str) -> Dict[str, str]:
+    push(f"Question: {question}")
+    return {"recorded": "ok"}
+def handle_tool_calls(tool_calls: List) -> List[Dict[str, str]]:
+    results = []
+    for tool_call in tool_calls:
+        tool_name = tool_call.name
+        arguments = dict(tool_call.args)
+        print(f"Tool called: {tool_name} with arguments: {arguments}")
+        tool = globals().get(tool_name)
+        result = tool(**arguments) if tool else {}
+        # Format for Gemini function response
+        results.append({
+            "function_response": {
+                "name": tool_name,
+                "response": result
+            }
+        })
+    return results
+record_user_details_json = {
+    "name": "record_user_details",
+    "description": "Use this tool to record that a user is interested in being in touch and provided an email address",
+    "parameters": {
+        "type": "OBJECT",
+        "properties": {
+            "email": {
+                "type": "STRING",
+                "description": "The email address of this user"
+            },
+            "name": {
+                "type": "STRING",
+                "description": "The user's name, if they provided it"
+            }
+            ,
+            "notes": {
+                "type": "STRING",
+                "description": "Any additional information about the conversation that's worth recording to give context"
+            }
+        },
+        "required": ["name", "email"]
+    }
+}
+record_unknown_question_json = {
+    "name": "record_unknown_question",
+    "description": "Always use this tool to record any question that couldn't be answered as you didn't know the answer",
+    "parameters": {
+        "type": "OBJECT",
+        "properties": {
+            "question": {
+                "type": "STRING",
+                "description": "The question that couldn't be answered"
+            },
+        },
+        "required": ["question"]
+    }
+}
+tools = [
+    record_user_details_json,
+    record_unknown_question_json
+]
+class App:
+    def __init__(self):
+        self.db = load_chroma_db(path="Week_1/Data_w1", name='RAG_DB')
+    def rag_prompt(self, query: str, relevant_passages: str) -> str:
+        escaped = relevant_passages.replace("'", "").replace('"', "").replace("\n", " ")
+        prompt = f'''
+            Please answer questions using text from the reference passage included below. \
+            Be sure to respond in a complete sentence, being comprehensive, including all relevant background information. \
+            However, you are talking to a non-technical audience, so be sure to break down complicated concepts and \
+            strike a friendly and converstional tone. \
+            If the passage is irrelevant to the question, you should respond with "I do not have an answer for that." and use record_unknown_question tool to record the question. \
+            QUESTION: {query} \
+            PASSAGE: {escaped}
+        '''
+        return prompt
+    def system_prompt(self) -> str:
+        return '''
+            You are acting as Ed Donner. You are answering questions on Ed Donner's website, \
+            particularly questions related to Ed Donner's career, background, skills and experience. \
+            Your responsibility is to represent Ed Donner for interactions on the website as faithfully as possible. \
+            Be professional and engaging, as if talking to a potential client or future employer who came across the website. \
+            If you don't know the answer to any question, use your record_unknown_question tool to record the question that you couldn't answer, even if it's about something trivial or unrelated to career. \
+            If the user is engaging in discussion, try to steer them towards getting in touch via email; ask for their email and record it using your record_user_details tool.
+        '''
+    def chat_with_gemini(self, message, history, system_prompt):
+        try:
+            # Load data base
+            # Create the model with system instruction
+            model = genai.GenerativeModel(
+                'gemini-2.0-flash',
+                system_instruction=system_prompt,
+                tools=tools
+            )
+            # Convert Gradio messages format to Gemini format
+            gemini_history = []
+            max_iteration = 3
+            iteration = 0
+            for msg in history:
+                if msg["role"] == "user":
+                    gemini_history.append({
+                        "role": "user",
+                        "parts": [msg["content"]]
+                    })
+                elif msg["role"] == "assistant":
+                    gemini_history.append({
+                        "role": "model",
+                        "parts": [msg["content"]]
+                    })
+            # Start chat with history
+            chat_session = model.start_chat(history=gemini_history)
+            relevant_passage = get_relevant_passage(query= message,
+                                                            db= self.db,
+                                                            n_results=3)
+            prompt = self.rag_prompt(query= current_message,
+                                        relevant_passages= " ".join(relevant_passage))
+            current_message = prompt
+            try:
+                while iteration < max_iteration:
+                    # Send the current message
+                    response = chat_session.send_message(current_message)
+                    # Check for its finishing
+                    finish_reason = response.candidates[0].finish_reason
+                    print(f"Response parts: {[part for part in response.candidates[0].content.parts]}")
+                    function_calls = []
+                    text_parts = []
+                    # If the LLM wants to call the tools
+                    for part in response.candidates[0].content.parts:
+                        if hasattr(part, "function_call") and part.function_call:
+                            function_calls.append(part.function_call)
+                            print("Function calls list not empty")
+                        elif hasattr(part, "text"):
+                            text_parts.append(part.text)
+                    # Excecute if function_calls not empty
+                    if function_calls:
+                        results = handle_tool_calls(function_calls)
+                        # Add the result back to the model
+                        current_message = results
+                        iteration += 1
+                    else:
+                        if text_parts:
+                            return "".join(text_parts)
+                        else:
+                            return response.text
+                return ""
+            except Exception as e:
+                return f"Error: {e}"
+        except Exception as e:
+            return f"Error: {e}"
+if __name__ == "__main__":
+    chat_grad = App()
+    with gr.Blocks() as demo:
+        gr.Markdown("# Chat with Google Gemini")
+        system_prompt = gr.Textbox(
+            value=chat_grad.system_prompt(),
+            label="System Prompt",
+            placeholder="Enter system instructions for the AI...",
+            lines=2
+        )
+        chat_interface = gr.ChatInterface(
+            fn=chat_grad.chat_with_gemini,
+            additional_inputs=[system_prompt],
+            title="",
+            cache_examples=False,
+            type='messages'
+        )
+    demo.launch()

embed.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import google.generativeai as genai
+from chromadb import Documents, EmbeddingFunction, Embeddings, PersistentClient, Collection
+from typing import Dict, List
+import os
+from dotenv import load_dotenv
+load_dotenv(override=True)
+from text_chunk import *
+class GeminiEmbeddingFuction(EmbeddingFunction):
+    """
+    Custom embedding function using the Gemini AI API for document retrieval.
+    This class extends the EmbeddingFunction class and implements the __call__ method
+    to generate embeddings for a given set of documents using the Gemini AI API.
+    Parameters:
+    - input (Documents): A collection of documents to be embedded.
+    Returns:
+    - Embeddings: Embeddings generated for the input documents.
+    """
+    def __call__(self, input: Documents) -> Embeddings:
+        genai.configure(api_key=os.getenv("GEMINI_API"))
+        return genai.embed_content(model = "models/embedding-001",
+                                   content= input,
+                                   task_type="retrieval_document",
+                                   title="Query")['embedding']
+def create_chroma_db(documents: List[str], path: str, name: str):
+    """
+    Creates a Chroma database using the provided documents, path, and collection name.
+    Parameters:
+    - documents: An iterable of documents to be added to the Chroma database.
+    - path (str): The path where the Chroma database will be stored.
+    - name (str): The name of the collection within the Chroma database.
+    Returns:
+    - Tuple[chromadb.Collection, str]: A tuple containing the created Chroma Collection and its name.
+    """
+    chroma_client = PersistentClient(path=path)
+    db = chroma_client.create_collection(name=name,
+                                         embedding_function=GeminiEmbeddingFuction())
+    for i, d in enumerate(documents):
+        db.add(documents=[d], ids = str(i))
+    return db, name
+def load_chroma_db(path: str, name: str):
+    """
+    Loads an existing Chroma collection from the specified path with the given name.
+    Parameters:
+    - path (str): The path where the Chroma database is stored.
+    - name (str): The name of the collection within the Chroma database.
+    Returns:
+    - chromadb.Collection: The loaded Chroma Collection.
+    """
+    chroma_client = PersistentClient(path=path)
+    db = chroma_client.get_collection(name=name, embedding_function=GeminiEmbeddingFuction())
+    return db
+def get_relevant_passage(query: str, db: Collection, n_results: int):
+    """
+    semantic search to retrieve the most similar chunks of text from the database.
+    Parameters:
+    query (str): The query to search for.
+    n_results (int): The number of results to return.
+    db (chromadb.Collection): The Chroma collection to search.
+    Returns:
+    List[str]: A list of the most similar chunks of text.
+    """
+    passage = db.query(query_texts=[query],
+                      n_results=n_results)['documents'][0]
+    return passage
+if __name__ == "__main__":
+    # Create database based on linkdin and summary
+    # text = load_documents(data_path=f"Week_1\Data_w1")
+    # print("Length of text: ", len(text))
+    # chunked_text= sliding_window_chunk(text= text)
+    # db, name = create_chroma_db(
+    #     documents= chunked_text,
+    #     path= "Week_1\Data_w1",
+    #     name= 'RAG_DB'
+    # )
+    # Retrieval example
+    # db = load_chroma_db(path= "Week_1\Data_w1", name= 'RAG_DB')
+    # relevant_text = get_relevant_passage(query="Your python experience",db=db,n_results=3)
+    # print(relevant_text)
+    print("Done")

text_chunk.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from pypdf import PdfReader
+from typing import Dict, List
+import re
+def load_documents(data_path: str) -> str:
+    '''
+    Read the linkedin pdf and the summary in the data folder
+    Parameters:
+    - data_path (str): The path to the data folder
+    Returns:
+    - output (Dict[str, str]): A dictionary containing the text document and summary
+    '''
+    reader = PdfReader(f"{data_path}\linkedin.pdf")
+    text_document = ""
+    for page in reader.pages:
+        text_document += page.extract_text()
+    with open(f"{data_path}\summary.txt", "r") as f:
+        summary = f.read()
+    output = f"{text_document}\n{summary}"
+    return output
+def sliding_window_chunk(text: str, overlap: int = 20, chunk_size: int = 200) -> List[str]:
+    '''
+    Split the text into chunks of non-empty substrings
+    Parameters:
+    - text (str): The text to split
+    Returns:
+    - chunks (List[str]): A list of chunks of text
+    '''
+    # Remove unwanted characters
+    text = re.sub(r'[\xa0\n]', " ", text)
+    # Split the text into chunks of non-empty substrings
+    words = text.split()
+    chunks = [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), overlap)]
+    return chunks
+# if __name__ == "__main__":
+#     # reader = PdfReader("Week_1\Data_w1\linkedin.pdf")
+#     # linkedin = ""
+#     # for page in reader.pages:
+#     #     linkedin += page.extract_text()
+#     # text_chunks = sliding_window_chunk(linkedin)
+#     # print(len(text_chunks))