{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "02186871", "metadata": {}, "outputs": [], "source": [ "import requests\n", "from dotenv import load_dotenv\n", "import os" ] }, { "cell_type": "code", "execution_count": 3, "id": "b2275123", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "DEFAULT_API_URL = \"https://agents-course-unit4-scoring.hf.space\"\n", "api_url = DEFAULT_API_URL\n", "questions_url = f\"{api_url}/questions\"\n", "\n", "\n", "load_dotenv()\n", "#print(os.environ[\"OPENAI_API_KEY\"])\n", "#print(os.environ[\"TAVILY_API_KEY\"])" ] }, { "cell_type": "code", "execution_count": 4, "id": "8faab0d9", "metadata": {}, "outputs": [], "source": [ "response = requests.get(questions_url, timeout=15)\n", "response.raise_for_status()\n", "questions_data = response.json()" ] }, { "cell_type": "code", "execution_count": 5, "id": "eb345cd7", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "USER_AGENT environment variable not set, consider setting it to identify your requests.\n" ] } ], "source": [ "from langchain.tools import tool\n", "#from langchain_community.tools import DuckDuckGoSearchRun\n", "from langchain_community.tools import TavilySearchResults\n", "from langchain_community.document_loaders import WebBaseLoader\n", "from langchain_community.document_loaders import YoutubeLoader\n", "from langchain_community.document_loaders import WikipediaLoader\n", "from langchain_community.document_loaders import ArxivLoader\n", "from typing import TypedDict, Annotated\n", "from langgraph.graph.message import add_messages\n", "from langchain_core.messages import AnyMessage, HumanMessage, AIMessage\n", "from langgraph.prebuilt import ToolNode, tools_condition\n", "from langgraph.graph import START, StateGraph\n", "#from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace\n", "from langchain_openai import ChatOpenAI\n", "\n", "@tool\n", "def add(a: float, b: float) -> float:\n", " \"\"\"Add two integers and return the result.\"\"\"\n", " return a + b\n", "\n", "@tool\n", "def subtract(a: float, b: float) -> float:\n", " \"\"\"Subtract two integers and return the result.\"\"\"\n", " return a - b\n", "\n", "@tool\n", "def multiply(a: float, b: float) -> float:\n", " \"\"\"Multiply two integers and return the result.\"\"\"\n", " return a * b\n", "\n", "@tool\n", "def divide(a: float, b: float) -> float:\n", " \"\"\"Divide two integers and return the result.\"\"\"\n", " if b == 0:\n", " raise ValueError(\"Cannot divide by zero.\")\n", " return a / b\n", "\n", "@tool\n", "def exponentiate(base: float, exponent: float) -> float:\n", " \"\"\"Raise a number to the power of another number and return the result.\"\"\"\n", " return base ** exponent\n", "\n", "@tool\n", "def modulus(a: float, b: float) -> float:\n", " \"\"\"Return the modulus of two integers.\"\"\"\n", " return a % b\n", "\n", "@tool\n", "def wiki_search(query: str) -> str:\n", " \"\"\"Search Wikipedia and returns only 2 results. \n", " \n", " Args:\n", " query: The search query.\"\"\"\n", " docs = WikipediaLoader(query=query, load_max_docs=2).load()\n", " res = \"\\n#######\\n\".join(\n", " [\n", " f\"Document {i+1}:\\nSource: {doc.metadata.get('source', '')}\\nPage: {doc.metadata.get('page', '')}\\nContent:\\n{doc.page_content}\\n\"\n", " for i, doc in enumerate(docs)\n", " ])\n", " print(f\"load wiki page : {res}\")\n", " return {\"results\": res}\n", "\n", "@tool\n", "def load_web_page(url: str) -> str:\n", " \"\"\"Load a web page and return its content.\n", " \n", " Args:\n", " url: The URL of the web page to load.\n", " \"\"\"\n", " loader = WebBaseLoader(url)\n", " docs = loader.load()\n", " res = \"\\n#######\\n\".join(\n", " [\n", " f\"Document {i+1}:\\nSource: {doc.metadata.get('source', '')}\\nPage: {doc.metadata.get('page', '')}\\nContent:\\n{doc.page_content}\\n\"\n", " for i, doc in enumerate(docs)\n", " ])\n", " print(f\"load web page : {res}\")\n", " return {\"results\": res}\n", " \n", "@tool\n", "def paper_search(query: str) -> str:\n", " \"\"\"Search Arxiv for a query and return maximum 3 result.\n", " \n", " Args:\n", " query: The search query.\"\"\"\n", " docs = ArxivLoader(query=query, load_max_docs=3).load()\n", " res = \"\\n#######\\n\".join(\n", " [\n", " f\"Document {i+1}:\\nSource: {doc.metadata.get('source', '')}\\nPage: {doc.metadata.get('page', '')}\\nContent:\\n{doc.page_content}\\n\"\n", " for i, doc in enumerate(docs)\n", " ])\n", " print(f\"load paper page : {res}\")\n", " return {\"results\": res}\n", "\n", "@tool\n", "def understand_image(text: str, image_url: str):\n", " \"\"\"\n", " Sends a text prompt and an image URL to OpenAI's API using the ChatOpenAI model.\n", " Returns the model's response.\n", "\n", " Args:\n", " text (str): The text prompt to send.\n", " image_url (str): URL to the image to send.\n", "\n", " Returns:\n", " str: The response from the model.\n", " \"\"\"\n", "\n", " # Fetch image from URL and encode as base64\n", " #response = requests.get(image_url)\n", " #image_bytes = response.content\n", " #image_b64 = base64.b64encode(image_bytes).decode(\"utf-8\")\n", "\n", " # Prepare message with text and image\n", " message = HumanMessage(\n", " content=[\n", " {\"type\": \"text\", \"text\": text},\n", " #{\"type\": \"image_url\", \"image_url\": {\"url\": f\"data:image/png;base64,{image_b64}\", \"detail\": \"auto\"}}\n", " {\"type\": \"image_url\", \"image_url\": {\"url\": image_url}}\n", " ]\n", " )\n", " model = ChatOpenAI(model=\"gpt-4o\", temperature=0)\n", " response = model.invoke([message])\n", " return response.content\n", "\n", "@tool\n", "def load_youtube_video(url: str) -> str:\n", " \"\"\"Load a YouTube video and return its content.\"\"\"\n", " loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)\n", " documents = loader.load()\n", " return documents[0].page_content if documents else \"No content found\"\n", "\n", "@tool\n", "def web_search(query: str) -> str:\n", " \"\"\"Search Tavily for a query and return maximum 5 results.\n", " \n", " Args:\n", " query: The search query.\"\"\"\n", " documents = TavilySearchResults(max_results=5).invoke(input=query)\n", " res = \"\\n#######\\n\".join(\n", " [\n", " f\"Document {i+1}:\\nContent: {doc['content']}\\n\"\n", " for i, doc in enumerate(documents)\n", " ])\n", " print(f\"load tavily search : {res}\")\n", " return {\"results\": res}\n", "\n", "@tool\n", "def transcribe_audio(audio_url: str) -> str:\n", " \"\"\"Transcribe audio from a URL and return the text.\n", " \n", " Args:\n", " audio_url: The URL of the audio file to transcribe.\n", " \"\"\"\n", " \n", " response = requests.get(audio_url)\n", " audio_file = \"audio.mp3\"\n", " with open(audio_file, \"wb\") as f:\n", " f.write(response.content)\n", "\n", " # Step 2: Send it to OpenAI's transcription API\n", " headers = {\n", " \"Authorization\": f\"Bearer {api_key}\"\n", " }\n", " files = {\n", " 'file': (audio_file, open(audio_file, 'rb')),\n", " 'model': (None, 'whisper-1')\n", " }\n", "\n", " transcribe_response = requests.post(\n", " \"https://api.openai.com/v1/audio/transcriptions\",\n", " headers=headers,\n", " files=files\n", " )\n", " print(f\"Transcription response: {transcribe_response.json()}\")\n", " return {\"results\": transcribe_response.json().get(\"text\", \"Transcription failed.\")}\n" ] }, { "cell_type": "code", "execution_count": 6, "id": "1ac4d78a", "metadata": {}, "outputs": [], "source": [ "\n", "model = ChatOpenAI(model=\"gpt-4o-mini\", temperature=0)\n", "tools = [add, subtract, multiply, divide, exponentiate, web_search, paper_search, load_web_page, understand_image, transcribe_audio]\n", "model_with_tools = model.bind_tools(tools)" ] }, { "cell_type": "code", "execution_count": 15, "id": "b8f136a6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_nUuMRddDxj42ZLqmcvcldXGl', 'function': {'arguments': '{\"query\":\"Featured Article dinosaur November 2016 site:en.wikipedia.org\"}', 'name': 'web_search'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 26, 'prompt_tokens': 363, 'total_tokens': 389, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_96c46af214', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--adcbcb47-d3e2-4ded-9b3a-842e89b5df75-0', tool_calls=[{'name': 'web_search', 'args': {'query': 'Featured Article dinosaur November 2016 site:en.wikipedia.org'}, 'id': 'call_nUuMRddDxj42ZLqmcvcldXGl', 'type': 'tool_call'}], usage_metadata={'input_tokens': 363, 'output_tokens': 26, 'total_tokens': 389, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model_with_tools.invoke([HumanMessage(\"Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?\")])" ] }, { "cell_type": "code", "execution_count": 7, "id": "050fafc8", "metadata": {}, "outputs": [], "source": [ "class AgentState(TypedDict):\n", " messages: Annotated[list[AnyMessage], add_messages]" ] }, { "cell_type": "code", "execution_count": 8, "id": "d3c345b0", "metadata": {}, "outputs": [], "source": [ "def assistant(state: AgentState):\n", " return {\n", " \"messages\": [model_with_tools.invoke(state[\"messages\"])],\n", " }" ] }, { "cell_type": "code", "execution_count": 9, "id": "8e861b9b", "metadata": {}, "outputs": [], "source": [ "builder = StateGraph(AgentState)\n", "\n", "builder.add_node(\"assistant\", assistant) \n", "builder.add_node(\"tools\", ToolNode(tools))\n", "\n", "builder.add_edge(START, \"assistant\")\n", "builder.add_conditional_edges(\"assistant\", tools_condition)\n", "builder.add_edge(\"tools\", \"assistant\")\n", "\n", "agent = builder.compile()\n", "\n" ] }, { "cell_type": "code", "execution_count": 10, "id": "69d23363", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'task_id': '305ac316-eef6-4446-960a-92d80d542f82',\n", " 'question': 'Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? Give only the first name.',\n", " 'Level': '1',\n", " 'file_name': ''}" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "questions_data[10]\n", "#TavilySearchResults(max_results=10).invoke(input=\"What is the surname of the equine veterinarian mentioned in 1.E Exercises from the chemistry materials licensed by Marisa Alviar-Agnew & Henry Agnew under the CK-12 license in LibreText's Introductory Chemistry materials as compiled 08/21/2023?\")\n", "#transcribe_audio(api_url + \"/files/\" + questions_data[9][\"task_id\"])" ] }, { "cell_type": "code", "execution_count": 11, "id": "ebd9bab7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "load tavily search : Document 1:\n", "Content: Magda M.\n", "\n", "Magda M. (Polish pronunciation: [ˈmaɡda ˈɛm]) was a Polish soap opera which aired on TVN from 2005 to 2007.\n", "\n", "Magda M. [...] Actor | Role | Status\n", "Joanna Brodzik | Magda Miłowicz | 2005–2007\n", "Paweł Małaszyński | Piotr Korzecki | 2005–2007\n", "Ewa Kasprzyk | Teresa Miłowicz | 2005–2007\n", "Bartłomiej Świderski | Sebastian Lewicki | 2005–2007\n", "Daria Widawska | Agata Bielecka | 2005–2007\n", "Krzysztof Stelmaszyk | Wiktor Waligóra | 2005–2007\n", "Katarzyna Herman | Karolina Waligóra | 2005–2007\n", "Bartek Kasprzykowski | Wojciech Płaska | 2005–2007\n", "Katarzyna Bujakiewicz | Mariola Adamska-Płaska | 2005–2007 [...] Genre | Soap opera\n", "Created by | Michał Kwieciński,Dorota Chamczyk\n", "Written by | Radosław Figura(Head writer)\n", "Starring | Joanna BrodzikPaweł MałaszyńskiEwa KasprzykBartłomiej ŚwiderskiDaria WidawskaKrzysztof StelmaszykKatarzyna HermanBartek KasprzykowskiKatarzyna BujakiewiczSzymon BobrowskiJacek BraciakPatrycja Durska\n", "No.of episodes | 55\n", "Production\n", "Executive producer | Dariusz Gąsiorowski\n", "Running time | 42–47 minutes\n", "Original release\n", "Network | TVN\n", "\n", "#######\n", "Document 2:\n", "Content: | \n", "GAIA\n", "| \n", "broccoli, celery, fresh basil, lettuce, sweet potatoes\n", "| \n", "None\n", "|\n", "| \n", "Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? Give only the first name.\n", "| \n", "GAIA\n", "| \n", "Wojciech\n", "| \n", "None\n", "|\n", "| \n", "How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?\n", "| \n", "GAIA\n", "| \n", "6\n", "| \n", "None\n", "|\n", "|\n", "\n", "#######\n", "Document 3:\n", "Content: Wszyscy kochają Romana (Everybody Loves Roman) is a Polish television sitcom that premiered on TVN on 2 September 2011.[1][2] The series is a Polish-language adaptation of the American Emmy Awards winner, Everybody Loves Raymond and stars Bartłomiej Kasprzykowski as the titular Roman, a newspaper sportswriter.\n", "\n", "#######\n", "Document 4:\n", "Content: Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? Give only the first name. | Wojciech | GAIA\n", "How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need? | 6 | GAIA\n", "\n", "#######\n", "Document 5:\n", "Content: Show RomancesFavorite Sub-Plot Love StoriesFavorite TV CouplesFavorite \"will they, or won't they\" CouplesGuilty Pleasure CouplesHot SeatHugo & Nika - Operación Triunfo (Spanish)If You Could Only Choose One Couple...Juan Miguel & Marichuy - Cuidado con el Angel (Spanish)Keepers ListLove Triangles: Love it? Hate it?Most Romantic MoviesMost Romantic On-Screen KissMusician CouplesMy Best Friend's GirlOff Topic ThreadPeriod Drama CouplesPiotr & Magda - Magda M. (Polish)Polish (Foreign)Pop Culture's\n", "\n", "Question: Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? Give only the first name.\n", "🎩 Agent's Response:\n", "The actor who played Ray in the Polish-language version of Everybody Loves Raymond played \"Wojciech\" in Magda M.\n" ] } ], "source": [ "import os\n", "file_name = questions_data[10]['file_name']\n", "query = questions_data[10]['question']\n", "if file_name:\n", " image_path = api_url + \"/files/\" + file_name.split(\".\")[0]\n", " msg = f\"{query} -- filename={image_path}\"\n", "else:\n", " msg = query\n", "messages = [HumanMessage(content=msg),]\n", "response = agent.invoke({\"messages\": messages})\n", "\n", "print(f\"Question: {msg}\")\n", "print(\"🎩 Agent's Response:\")\n", "print(response['messages'][-1].content)" ] }, { "cell_type": "code", "execution_count": null, "id": "5d828780", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "py311", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.10" } }, "nbformat": 4, "nbformat_minor": 5 }