sqfoo commited on
Commit
ca811b8
·
verified ·
1 Parent(s): d075b73

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +61 -10
agent.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
  from typing import TypedDict, List, Dict, Any, Optional
3
- from langchain.agents import create_tool_calling_agent, AgentExecutor
4
  from langchain_google_genai import ChatGoogleGenerativeAI
5
  from langchain_core.tools import tool
6
  from langchain_core.messages import HumanMessage
@@ -12,6 +12,8 @@ from langchain_community.document_loaders import ImageCaptionLoader
12
  import requests
13
  import pandas as pd
14
  from pypdf import PdfReader
 
 
15
 
16
  @tool
17
  def web_search(query: str) -> str:
@@ -35,6 +37,28 @@ def visit_webpage(url: str) -> str:
35
  except Exception as e:
36
  return f"[ERROR fetching {url}]: {str(e)}"
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  # 4. File Reading
39
  @tool
40
  def read_file(dir: str) -> str:
@@ -88,23 +112,50 @@ class BasicAgent:
88
  If you are asked for a string, don't use articles, neither abbreviations (eg. for cities), and write the digits in plain text unless specified otherwise.
89
  If you are asked for a comma separated list, apply the above rules depending of whether the element to put in the list is a number or a string.
90
 
91
- There are few tools provided: web_search, visit_webpage, read_file and image_caption.
92
- Here are few examples demonstrating how to call and use the tools.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  """
94
- self.tools = [web_search, visit_webpage, read_file, image_caption]
95
  self.prompt = ChatPromptTemplate.from_messages([
96
  ("system", self.sys_prompt),
97
- ("human", "{input}"),
98
- ("placeholder", "{agent_scratchpad}")
99
  ])
100
- self.agent = create_tool_calling_agent(self.model, self.tools, self.prompt)
101
- self.agent_exe = AgentExecutor(agent=self.agent, tools=self.tools, verbose=True)
 
 
 
 
 
102
  print("BasicAgent initialized.")
103
 
104
  def __call__(self, question: str) -> str:
105
  print(f"Agent received question (first 50 chars): {question[:50]}...")
106
- response = self.agent_exe.invoke({"input": f"Question: {question}"})
107
- fixed_answer = response['message'][-1].content
 
108
  # fixed_answer = "This is a default answer."
109
  print(f"Agent returning fixed answer: {fixed_answer}")
110
  return fixed_answer
 
1
  import os
2
  from typing import TypedDict, List, Dict, Any, Optional
3
+ from langchain.agents import create_tool_calling_agent, AgentExecutor, initialize_agent
4
  from langchain_google_genai import ChatGoogleGenerativeAI
5
  from langchain_core.tools import tool
6
  from langchain_core.messages import HumanMessage
 
12
  import requests
13
  import pandas as pd
14
  from pypdf import PdfReader
15
+ from langchain.tools import WikipediaTool
16
+ from youtube_transcript_api import YouTubeTranscriptApi
17
 
18
  @tool
19
  def web_search(query: str) -> str:
 
37
  except Exception as e:
38
  return f"[ERROR fetching {url}]: {str(e)}"
39
 
40
+ @tool
41
+ def wiki_search(query: str) -> str:
42
+ """Wiki search tools.
43
+ Args:
44
+ query: what you want to wiki
45
+ """
46
+ return WikipediaTool().query(query)
47
+
48
+
49
+ @tool
50
+ def youtube_transcript(video_url: str) -> str:
51
+ """Fetched youtube transcript
52
+ Args:
53
+ video_url: YouTube video url
54
+ """
55
+ try:
56
+ video_id = video_url.split("v=")[-1].split("&")[0]
57
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
58
+ return " ".join([item["text"] for item in transcript])
59
+ except Exception as e:
60
+ return f"Error fetching transcript: {str(e)}"
61
+
62
  # 4. File Reading
63
  @tool
64
  def read_file(dir: str) -> str:
 
112
  If you are asked for a string, don't use articles, neither abbreviations (eg. for cities), and write the digits in plain text unless specified otherwise.
113
  If you are asked for a comma separated list, apply the above rules depending of whether the element to put in the list is a number or a string.
114
 
115
+ You have access to the following tools:
116
+ - web_search: web search the content of the query by passing the query as input
117
+ - visit_webpage: visit the given webpage url by passing the url as input
118
+ - wiki_search: wiki search the content of the query by passing the query as input if the question asks for wiki search it
119
+ - youtube_transcript: fetch the transcript of the Youtube video by passing the video url as input if the question asks for watching a Youtube video
120
+ - read_file: read the content of the attached file by passing the file directory as input
121
+ - image_caption: understand the visual content of the attached image by passing the image directory as input
122
+
123
+ HERE are some examples illustrating how and what tools to call.
124
+ ---------------
125
+ TASK: Count how many birds in the provided Youtube video.
126
+ ACTION: Call youtube_transcript tool to extract video transcript. Use LLM to understand the retrived transcript.
127
+
128
+ TASK: How many Grammy Awards that Taylor Swift has won.
129
+ ACTION: Call the web_search tools with the query: 'how many Grammy Awards that Taylor Swift has won.' to extract the answer.
130
+
131
+ TASK: Count how many people in this image.
132
+ ACTION: Call the image_caption tool by passing the image directory as input. Then, use LLM to understand the image caption and answer the question.
133
+
134
+ TASK: How much the total expense in this spreadsheet?
135
+ ACTION: Call the read_file tool to extract the content of the provided spreadfile. Then, use LLM to extract the amount of every expense and sum them up.
136
+
137
+ TASK: How many All England Title that Lee Chong Wei won?
138
+ ACTION: Call wiki_search with the query: "Lee Chong Wei". Extract the relevant row of All England Title and count how many rows is there.
139
  """
140
+ self.tools = [web_search, visit_webpage, wiki_search, youtube_transcript, read_file, image_caption]
141
  self.prompt = ChatPromptTemplate.from_messages([
142
  ("system", self.sys_prompt),
143
+ ("human", "{input}")
 
144
  ])
145
+ self.agent = initialize_agent(
146
+ tools=self.tools,
147
+ llm=self.model,
148
+ agent="zero-shot-react-description", # ReAct agent type
149
+ verbose=True,
150
+ system_prompt=self.prompt
151
+ )
152
  print("BasicAgent initialized.")
153
 
154
  def __call__(self, question: str) -> str:
155
  print(f"Agent received question (first 50 chars): {question[:50]}...")
156
+ # response = self.agent_exe.invoke({"input": f"Question: {question}"})
157
+ # fixed_answer = response['message'][-1].content
158
+ fixed_answer = self.agent.run(f"Answer this question: {question}")
159
  # fixed_answer = "This is a default answer."
160
  print(f"Agent returning fixed answer: {fixed_answer}")
161
  return fixed_answer