{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "02186871",
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "from dotenv import load_dotenv\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "b2275123",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "DEFAULT_API_URL = \"https://agents-course-unit4-scoring.hf.space\"\n",
    "api_url = DEFAULT_API_URL\n",
    "questions_url = f\"{api_url}/questions\"\n",
    "\n",
    "\n",
    "load_dotenv()\n",
    "#print(os.environ[\"OPENAI_API_KEY\"])\n",
    "#print(os.environ[\"TAVILY_API_KEY\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "8faab0d9",
   "metadata": {},
   "outputs": [],
   "source": [
    "response = requests.get(questions_url, timeout=15)\n",
    "response.raise_for_status()\n",
    "questions_data = response.json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "eb345cd7",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "USER_AGENT environment variable not set, consider setting it to identify your requests.\n"
     ]
    }
   ],
   "source": [
    "from langchain.tools import tool\n",
    "#from langchain_community.tools import DuckDuckGoSearchRun\n",
    "from langchain_community.tools import TavilySearchResults\n",
    "from langchain_community.document_loaders import WebBaseLoader\n",
    "from langchain_community.document_loaders import YoutubeLoader\n",
    "from langchain_community.document_loaders import WikipediaLoader\n",
    "from langchain_community.document_loaders import ArxivLoader\n",
    "from typing import TypedDict, Annotated\n",
    "from langgraph.graph.message import add_messages\n",
    "from langchain_core.messages import AnyMessage, HumanMessage, AIMessage\n",
    "from langgraph.prebuilt import ToolNode, tools_condition\n",
    "from langgraph.graph import START, StateGraph\n",
    "#from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace\n",
    "from langchain_openai import ChatOpenAI\n",
    "\n",
    "@tool\n",
    "def add(a: float, b: float) -> float:\n",
    "    \"\"\"Add two integers and return the result.\"\"\"\n",
    "    return a + b\n",
    "\n",
    "@tool\n",
    "def subtract(a: float, b: float) -> float:\n",
    "    \"\"\"Subtract two integers and return the result.\"\"\"\n",
    "    return a - b\n",
    "\n",
    "@tool\n",
    "def multiply(a: float, b: float) -> float:\n",
    "    \"\"\"Multiply two integers and return the result.\"\"\"\n",
    "    return a * b\n",
    "\n",
    "@tool\n",
    "def divide(a: float, b: float) -> float:\n",
    "    \"\"\"Divide two integers and return the result.\"\"\"\n",
    "    if b == 0:\n",
    "        raise ValueError(\"Cannot divide by zero.\")\n",
    "    return a / b\n",
    "\n",
    "@tool\n",
    "def exponentiate(base: float, exponent: float) -> float:\n",
    "    \"\"\"Raise a number to the power of another number and return the result.\"\"\"\n",
    "    return base ** exponent\n",
    "\n",
    "@tool\n",
    "def modulus(a: float, b: float) -> float:\n",
    "    \"\"\"Return the modulus of two integers.\"\"\"\n",
    "    return a % b\n",
    "\n",
    "@tool\n",
    "def wiki_search(query: str) -> str:\n",
    "    \"\"\"Search Wikipedia and returns only 2 results. \n",
    "    \n",
    "    Args:\n",
    "        query: The search query.\"\"\"\n",
    "    docs = WikipediaLoader(query=query, load_max_docs=2).load()\n",
    "    res = \"\\n#######\\n\".join(\n",
    "        [\n",
    "            f\"Document {i+1}:\\nSource: {doc.metadata.get('source', '')}\\nPage: {doc.metadata.get('page', '')}\\nContent:\\n{doc.page_content}\\n\"\n",
    "            for i, doc in enumerate(docs)\n",
    "        ])\n",
    "    print(f\"load wiki page : {res}\")\n",
    "    return {\"results\": res}\n",
    "\n",
    "@tool\n",
    "def load_web_page(url: str) -> str:\n",
    "    \"\"\"Load a web page and return its content.\n",
    "    \n",
    "    Args:\n",
    "        url: The URL of the web page to load.\n",
    "    \"\"\"\n",
    "    loader = WebBaseLoader(url)\n",
    "    docs = loader.load()\n",
    "    res = \"\\n#######\\n\".join(\n",
    "        [\n",
    "            f\"Document {i+1}:\\nSource: {doc.metadata.get('source', '')}\\nPage: {doc.metadata.get('page', '')}\\nContent:\\n{doc.page_content}\\n\"\n",
    "            for i, doc in enumerate(docs)\n",
    "        ])\n",
    "    print(f\"load web page : {res}\")\n",
    "    return {\"results\": res}\n",
    "    \n",
    "@tool\n",
    "def paper_search(query: str) -> str:\n",
    "    \"\"\"Search Arxiv for a query and return maximum 3 result.\n",
    "    \n",
    "    Args:\n",
    "        query: The search query.\"\"\"\n",
    "    docs = ArxivLoader(query=query, load_max_docs=3).load()\n",
    "    res = \"\\n#######\\n\".join(\n",
    "        [\n",
    "            f\"Document {i+1}:\\nSource: {doc.metadata.get('source', '')}\\nPage: {doc.metadata.get('page', '')}\\nContent:\\n{doc.page_content}\\n\"\n",
    "            for i, doc in enumerate(docs)\n",
    "        ])\n",
    "    print(f\"load paper page : {res}\")\n",
    "    return {\"results\": res}\n",
    "\n",
    "@tool\n",
    "def understand_image(text: str, image_url: str):\n",
    "    \"\"\"\n",
    "    Sends a text prompt and an image URL to OpenAI's API using the ChatOpenAI model.\n",
    "    Returns the model's response.\n",
    "\n",
    "    Args:\n",
    "        text (str): The text prompt to send.\n",
    "        image_url (str): URL to the image to send.\n",
    "\n",
    "    Returns:\n",
    "        str: The response from the model.\n",
    "    \"\"\"\n",
    "\n",
    "    # Fetch image from URL and encode as base64\n",
    "    #response = requests.get(image_url)\n",
    "    #image_bytes = response.content\n",
    "    #image_b64 = base64.b64encode(image_bytes).decode(\"utf-8\")\n",
    "\n",
    "    # Prepare message with text and image\n",
    "    message = HumanMessage(\n",
    "        content=[\n",
    "            {\"type\": \"text\", \"text\": text},\n",
    "            #{\"type\": \"image_url\", \"image_url\": {\"url\": f\"data:image/png;base64,{image_b64}\", \"detail\": \"auto\"}}\n",
    "            {\"type\": \"image_url\", \"image_url\": {\"url\": image_url}}\n",
    "        ]\n",
    "    )\n",
    "    model = ChatOpenAI(model=\"gpt-4o\", temperature=0)\n",
    "    response = model.invoke([message])\n",
    "    return response.content\n",
    "\n",
    "@tool\n",
    "def load_youtube_video(url: str) -> str:\n",
    "    \"\"\"Load a YouTube video and return its content.\"\"\"\n",
    "    loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)\n",
    "    documents = loader.load()\n",
    "    return documents[0].page_content if documents else \"No content found\"\n",
    "\n",
    "@tool\n",
    "def web_search(query: str) -> str:\n",
    "    \"\"\"Search Tavily for a query and return maximum 5 results.\n",
    "    \n",
    "    Args:\n",
    "        query: The search query.\"\"\"\n",
    "    documents = TavilySearchResults(max_results=5).invoke(input=query)\n",
    "    res = \"\\n#######\\n\".join(\n",
    "        [\n",
    "            f\"Document {i+1}:\\nContent: {doc['content']}\\n\"\n",
    "            for i, doc in enumerate(documents)\n",
    "        ])\n",
    "    print(f\"load tavily search : {res}\")\n",
    "    return {\"results\": res}\n",
    "\n",
    "@tool\n",
    "def transcribe_audio(audio_url: str) -> str:\n",
    "    \"\"\"Transcribe audio from a URL and return the text.\n",
    "    \n",
    "    Args:\n",
    "        audio_url: The URL of the audio file to transcribe.\n",
    "    \"\"\"\n",
    "    \n",
    "    response = requests.get(audio_url)\n",
    "    audio_file = \"audio.mp3\"\n",
    "    with open(audio_file, \"wb\") as f:\n",
    "        f.write(response.content)\n",
    "\n",
    "    # Step 2: Send it to OpenAI's transcription API\n",
    "    headers = {\n",
    "        \"Authorization\": f\"Bearer {api_key}\"\n",
    "    }\n",
    "    files = {\n",
    "        'file': (audio_file, open(audio_file, 'rb')),\n",
    "        'model': (None, 'whisper-1')\n",
    "    }\n",
    "\n",
    "    transcribe_response = requests.post(\n",
    "        \"https://api.openai.com/v1/audio/transcriptions\",\n",
    "        headers=headers,\n",
    "        files=files\n",
    "    )\n",
    "    print(f\"Transcription response: {transcribe_response.json()}\")\n",
    "    return {\"results\": transcribe_response.json().get(\"text\", \"Transcription failed.\")}\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "1ac4d78a",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "model = ChatOpenAI(model=\"gpt-4o-mini\", temperature=0)\n",
    "tools = [add, subtract, multiply, divide, exponentiate, web_search, paper_search, load_web_page, understand_image, transcribe_audio]\n",
    "model_with_tools = model.bind_tools(tools)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "b8f136a6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_nUuMRddDxj42ZLqmcvcldXGl', 'function': {'arguments': '{\"query\":\"Featured Article dinosaur November 2016 site:en.wikipedia.org\"}', 'name': 'web_search'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 26, 'prompt_tokens': 363, 'total_tokens': 389, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_96c46af214', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--adcbcb47-d3e2-4ded-9b3a-842e89b5df75-0', tool_calls=[{'name': 'web_search', 'args': {'query': 'Featured Article dinosaur November 2016 site:en.wikipedia.org'}, 'id': 'call_nUuMRddDxj42ZLqmcvcldXGl', 'type': 'tool_call'}], usage_metadata={'input_tokens': 363, 'output_tokens': 26, 'total_tokens': 389, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_with_tools.invoke([HumanMessage(\"Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?\")])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "050fafc8",
   "metadata": {},
   "outputs": [],
   "source": [
    "class AgentState(TypedDict):\n",
    "    messages: Annotated[list[AnyMessage], add_messages]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "d3c345b0",
   "metadata": {},
   "outputs": [],
   "source": [
    "def assistant(state: AgentState):\n",
    "    return {\n",
    "        \"messages\": [model_with_tools.invoke(state[\"messages\"])],\n",
    "    }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "8e861b9b",
   "metadata": {},
   "outputs": [],
   "source": [
    "builder = StateGraph(AgentState)\n",
    "\n",
    "builder.add_node(\"assistant\", assistant)    \n",
    "builder.add_node(\"tools\", ToolNode(tools))\n",
    "\n",
    "builder.add_edge(START, \"assistant\")\n",
    "builder.add_conditional_edges(\"assistant\", tools_condition)\n",
    "builder.add_edge(\"tools\", \"assistant\")\n",
    "\n",
    "agent = builder.compile()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "69d23363",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'task_id': '305ac316-eef6-4446-960a-92d80d542f82',\n",
       " 'question': 'Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? Give only the first name.',\n",
       " 'Level': '1',\n",
       " 'file_name': ''}"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "questions_data[10]\n",
    "#TavilySearchResults(max_results=10).invoke(input=\"What is the surname of the equine veterinarian mentioned in 1.E Exercises from the chemistry materials licensed by Marisa Alviar-Agnew & Henry Agnew under the CK-12 license in LibreText's Introductory Chemistry materials as compiled 08/21/2023?\")\n",
    "#transcribe_audio(api_url + \"/files/\" + questions_data[9][\"task_id\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "ebd9bab7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "load tavily search : Document 1:\n",
      "Content: Magda M.\n",
      "\n",
      "Magda M. (Polish pronunciation: [ˈmaɡda ˈɛm]) was a Polish soap opera which aired on TVN from 2005 to 2007.\n",
      "\n",
      "Magda M. [...] Actor | Role | Status\n",
      "Joanna Brodzik | Magda Miłowicz | 2005–2007\n",
      "Paweł Małaszyński | Piotr Korzecki | 2005–2007\n",
      "Ewa Kasprzyk | Teresa Miłowicz | 2005–2007\n",
      "Bartłomiej Świderski | Sebastian Lewicki | 2005–2007\n",
      "Daria Widawska | Agata Bielecka | 2005–2007\n",
      "Krzysztof Stelmaszyk | Wiktor Waligóra | 2005–2007\n",
      "Katarzyna Herman | Karolina Waligóra | 2005–2007\n",
      "Bartek Kasprzykowski | Wojciech Płaska | 2005–2007\n",
      "Katarzyna Bujakiewicz | Mariola Adamska-Płaska | 2005–2007 [...] Genre | Soap opera\n",
      "Created by | Michał Kwieciński,Dorota Chamczyk\n",
      "Written by | Radosław Figura(Head writer)\n",
      "Starring | Joanna BrodzikPaweł MałaszyńskiEwa KasprzykBartłomiej ŚwiderskiDaria WidawskaKrzysztof StelmaszykKatarzyna HermanBartek KasprzykowskiKatarzyna BujakiewiczSzymon BobrowskiJacek BraciakPatrycja Durska\n",
      "No.of episodes | 55\n",
      "Production\n",
      "Executive producer | Dariusz Gąsiorowski\n",
      "Running time | 42–47 minutes\n",
      "Original release\n",
      "Network | TVN\n",
      "\n",
      "#######\n",
      "Document 2:\n",
      "Content: | \n",
      "GAIA\n",
      "| \n",
      "broccoli, celery, fresh basil, lettuce, sweet potatoes\n",
      "| \n",
      "None\n",
      "|\n",
      "| \n",
      "Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? Give only the first name.\n",
      "| \n",
      "GAIA\n",
      "| \n",
      "Wojciech\n",
      "| \n",
      "None\n",
      "|\n",
      "| \n",
      "How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?\n",
      "| \n",
      "GAIA\n",
      "| \n",
      "6\n",
      "| \n",
      "None\n",
      "|\n",
      "|\n",
      "\n",
      "#######\n",
      "Document 3:\n",
      "Content: Wszyscy kochają Romana (Everybody Loves Roman) is a Polish television sitcom that premiered on TVN on 2 September 2011.[1][2] The series is a Polish-language adaptation of the American Emmy Awards winner, Everybody Loves Raymond and stars Bartłomiej Kasprzykowski as the titular Roman, a newspaper sportswriter.\n",
      "\n",
      "#######\n",
      "Document 4:\n",
      "Content: Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? Give only the first name. | Wojciech | GAIA\n",
      "How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need? | 6 | GAIA\n",
      "\n",
      "#######\n",
      "Document 5:\n",
      "Content: Show RomancesFavorite Sub-Plot Love StoriesFavorite TV CouplesFavorite \"will they, or won't they\" CouplesGuilty Pleasure CouplesHot SeatHugo & Nika - Operación Triunfo (Spanish)If You Could Only Choose One Couple...Juan Miguel & Marichuy - Cuidado con el Angel (Spanish)Keepers ListLove Triangles: Love it? Hate it?Most Romantic MoviesMost Romantic On-Screen KissMusician CouplesMy Best Friend's GirlOff Topic ThreadPeriod Drama CouplesPiotr & Magda - Magda M. (Polish)Polish (Foreign)Pop Culture's\n",
      "\n",
      "Question: Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? Give only the first name.\n",
      "🎩 Agent's Response:\n",
      "The actor who played Ray in the Polish-language version of Everybody Loves Raymond played \"Wojciech\" in Magda M.\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "file_name = questions_data[10]['file_name']\n",
    "query = questions_data[10]['question']\n",
    "if file_name:\n",
    "    image_path = api_url + \"/files/\" + file_name.split(\".\")[0]\n",
    "    msg = f\"{query} -- filename={image_path}\"\n",
    "else:\n",
    "    msg = query\n",
    "messages = [HumanMessage(content=msg),]\n",
    "response = agent.invoke({\"messages\": messages})\n",
    "\n",
    "print(f\"Question: {msg}\")\n",
    "print(\"🎩 Agent's Response:\")\n",
    "print(response['messages'][-1].content)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5d828780",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "py311",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}