Final_Assignment_Template

Sleeping

App Files Files Community

sqfoo commited on May 15, 2025

Commit

ca811b8

verified ·

1 Parent(s): d075b73

Update agent.py

Browse files

Files changed (1) hide show

agent.py +61 -10

agent.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
 from typing import TypedDict, List, Dict, Any, Optional
-from langchain.agents import create_tool_calling_agent, AgentExecutor
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_core.tools import tool
 from langchain_core.messages import HumanMessage
@@ -12,6 +12,8 @@ from langchain_community.document_loaders import ImageCaptionLoader
 import requests
 import pandas as pd
 from pypdf import PdfReader
 @tool
 def web_search(query: str) -> str:
@@ -35,6 +37,28 @@ def visit_webpage(url: str) -> str:
     except Exception as e:
         return f"[ERROR fetching {url}]: {str(e)}"
 # 4. File Reading
 @tool
 def read_file(dir: str) -> str:
@@ -88,23 +112,50 @@ class BasicAgent:
                 If you are asked for a string, don't use articles, neither abbreviations (eg. for cities), and write the digits in plain text unless specified otherwise.
                 If you are asked for a comma separated list, apply the above rules depending of whether the element to put in the list is a number or a string.
-                There are few tools provided: web_search, visit_webpage, read_file and image_caption.
-                Here are few examples demonstrating how to call and use the tools.
         """
-        self.tools = [web_search, visit_webpage, read_file, image_caption]
         self.prompt = ChatPromptTemplate.from_messages([
             ("system", self.sys_prompt),
-            ("human", "{input}"),
-            ("placeholder", "{agent_scratchpad}")
         ])
-        self.agent = create_tool_calling_agent(self.model, self.tools, self.prompt)
-        self.agent_exe = AgentExecutor(agent=self.agent, tools=self.tools, verbose=True)
         print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
-        response = self.agent_exe.invoke({"input": f"Question: {question}"})
-        fixed_answer = response['message'][-1].content
         # fixed_answer = "This is a default answer."
         print(f"Agent returning fixed answer: {fixed_answer}")
         return fixed_answer

 import os
 from typing import TypedDict, List, Dict, Any, Optional
+from langchain.agents import create_tool_calling_agent, AgentExecutor, initialize_agent
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_core.tools import tool
 from langchain_core.messages import HumanMessage
 import requests
 import pandas as pd
 from pypdf import PdfReader
+from langchain.tools import WikipediaTool
+from youtube_transcript_api import YouTubeTranscriptApi
 @tool
 def web_search(query: str) -> str:
     except Exception as e:
         return f"[ERROR fetching {url}]: {str(e)}"
+@tool
+def wiki_search(query: str) -> str:
+    """Wiki search tools.
+    Args:
+        query: what you want to wiki
+    """
+    return WikipediaTool().query(query)
+@tool
+def youtube_transcript(video_url: str) -> str:
+    """Fetched youtube transcript
+    Args:
+        video_url: YouTube video url
+    """
+    try:
+        video_id = video_url.split("v=")[-1].split("&")[0]
+        transcript = YouTubeTranscriptApi.get_transcript(video_id)
+        return " ".join([item["text"] for item in transcript])
+    except Exception as e:
+        return f"Error fetching transcript: {str(e)}"
 # 4. File Reading
 @tool
 def read_file(dir: str) -> str:
                 If you are asked for a string, don't use articles, neither abbreviations (eg. for cities), and write the digits in plain text unless specified otherwise.
                 If you are asked for a comma separated list, apply the above rules depending of whether the element to put in the list is a number or a string.
+                You have access to the following tools:
+                - web_search: web search the content of the query by passing the query as input
+                - visit_webpage: visit the given webpage url by passing the url as input
+                - wiki_search: wiki search the content of the query by passing the query as input if the question asks for wiki search it
+                - youtube_transcript: fetch the transcript of the Youtube video by passing the video url as input if the question asks for watching a Youtube video
+                - read_file: read the content of the attached file by passing the file directory as input
+                - image_caption: understand the visual content of the attached image by passing the image directory as input
+                HERE are some examples illustrating how and what tools to call.
+                ---------------
+                TASK: Count how many birds in the provided Youtube video.
+                ACTION: Call youtube_transcript tool to extract video transcript. Use LLM to understand the retrived transcript.
+                TASK: How many Grammy Awards that Taylor Swift has won.
+                ACTION: Call the web_search tools with the query: 'how many Grammy Awards that Taylor Swift has won.' to extract the answer.
+                TASK: Count how many people in this image.
+                ACTION: Call the image_caption tool by passing the image directory as input. Then, use LLM to understand the image caption and answer the question.
+                TASK: How much the total expense in this spreadsheet?
+                ACTION: Call the read_file tool to extract the content of the provided spreadfile. Then, use LLM to extract the amount of every expense and sum them up.
+                TASK: How many All England Title that Lee Chong Wei won?
+                ACTION: Call wiki_search with the query: "Lee Chong Wei". Extract the relevant row of All England Title and count how many rows is there.
         """
+        self.tools = [web_search, visit_webpage, wiki_search, youtube_transcript, read_file, image_caption]
         self.prompt = ChatPromptTemplate.from_messages([
             ("system", self.sys_prompt),
+            ("human", "{input}")
         ])
+        self.agent = initialize_agent(
+            tools=self.tools,
+            llm=self.model,
+            agent="zero-shot-react-description",  # ReAct agent type
+            verbose=True,
+            system_prompt=self.prompt
+        )
         print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
+        # response = self.agent_exe.invoke({"input": f"Question: {question}"})
+        # fixed_answer = response['message'][-1].content
+        fixed_answer = self.agent.run(f"Answer this question: {question}")
         # fixed_answer = "This is a default answer."
         print(f"Agent returning fixed answer: {fixed_answer}")
         return fixed_answer