gokul-pv commited on
Commit
7fbe1a5
·
1 Parent(s): 656838a

prompt update and cleanup

Browse files
Files changed (3) hide show
  1. agent.py +42 -72
  2. requirements.in +1 -1
  3. requirements.txt +5 -5
agent.py CHANGED
@@ -14,7 +14,7 @@ from langchain_ollama import ChatOllama
14
  from langgraph.graph.message import add_messages
15
  from langgraph.graph import START, StateGraph
16
  from langgraph.prebuilt import ToolNode, tools_condition
17
-
18
  from langchain_google_genai import ChatGoogleGenerativeAI
19
  from langchain_groq import ChatGroq
20
  from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
@@ -25,9 +25,6 @@ from langchain_community.tools import (
25
  DuckDuckGoSearchRun,
26
  ArxivQueryRun,
27
  ShellTool,
28
- ReadFileTool,
29
- WriteFileTool,
30
- ListDirectoryTool,
31
  )
32
  from langchain_community.utilities import (
33
  WikipediaAPIWrapper,
@@ -110,7 +107,7 @@ def extract_text_multimodal(img_path: str) -> str:
110
  with open(img_path, "rb") as image_file:
111
  image_bytes = image_file.read()
112
 
113
- # image_base64 = base64.b64encode(image_bytes).decode("utf-8")
114
 
115
  message = [
116
  HumanMessage(
@@ -120,9 +117,8 @@ def extract_text_multimodal(img_path: str) -> str:
120
  "text": "Extract all the text from this image. Return only the extracted text, no explanations."
121
  },
122
  {
123
- "type": "image",
124
- # "image_url": {"url": f"data:image/png;base64,{image_base64}"}
125
- "image": image_bytes
126
  }
127
  ]
128
  )
@@ -149,44 +145,6 @@ def read_excel_file(file_path: str, sheet_name: Optional[str] = None) -> pd.Data
149
  return pd.DataFrame({"error": [f"Excel reading error: {str(e)}"]})
150
 
151
 
152
-
153
- @tool
154
- def analyze_excel_data(file_path: str, operation: str = "describe") -> str:
155
- """
156
- Analyze an Excel (.xlsx or .xls) file and perform basic statistical or structural operations.
157
-
158
- Parameters:
159
- file_path (str): The path to the Excel file to be analyzed.
160
- operation (str): The type of analysis to perform. Options include:
161
- - "describe": Returns summary statistics for numeric columns.
162
- - "info": Returns the DataFrame's structure and non-null counts.
163
- - "columns": Returns a list of column names in the Excel file.
164
- - "shape": Returns the shape of the DataFrame as a tuple (rows, columns).
165
-
166
- Returns:
167
- str: A string representation of the requested analysis output. If an error occurs
168
- or an unknown operation is provided, a descriptive message is returned.
169
- """
170
- try:
171
- df = pd.read_excel(file_path)
172
-
173
- if operation == "describe":
174
- return df.describe().to_string()
175
- elif operation == "info":
176
- import io
177
- buffer = io.StringIO()
178
- df.info(buf=buffer)
179
- return buffer.getvalue()
180
- elif operation == "columns":
181
- return str(list(df.columns))
182
- elif operation == "shape":
183
- return str(df.shape)
184
- else:
185
- return "Unknown operation. Available: describe, info, columns, shape"
186
- except Exception as e:
187
- return f"Excel analysis error: {str(e)}"
188
-
189
-
190
  @tool
191
  def visit_webpage(url: str) -> str:
192
  """
@@ -224,11 +182,6 @@ def build_tool():
224
  arxiv_tool = ArxivQueryRun(api_wrapper=ArxivAPIWrapper())
225
  shell_tool = ShellTool()
226
  python_repl = PythonREPLTool()
227
-
228
- # File management tools
229
- read_file = ReadFileTool()
230
- write_file = WriteFileTool()
231
- list_directory = ListDirectoryTool()
232
 
233
  # Combine built-in tools with custom tools
234
  all_tools = [
@@ -238,9 +191,6 @@ def build_tool():
238
  arxiv_tool,
239
  shell_tool,
240
  python_repl,
241
- read_file,
242
- write_file,
243
- list_directory,
244
 
245
  # Custom tools for specialized tasks
246
  reverse_text,
@@ -249,7 +199,6 @@ def build_tool():
249
  advanced_math,
250
  extract_text_multimodal,
251
  read_excel_file,
252
- analyze_excel_data,
253
  visit_webpage,
254
  ]
255
  return all_tools
@@ -289,11 +238,6 @@ def build_langgraph(provider: str, model: Optional[str] = None, temperature: flo
289
  tools_description = """
290
  Available tools for the tasks:
291
 
292
- FILE OPERATIONS:
293
- - read_file: Read content from files
294
- - write_file: Write content to files
295
- - list_directory: List files and directories
296
-
297
  WEB & SEARCH:
298
  - duckduckgo_search: Search the web for information
299
  - wikipedia_tool: Search Wikipedia for knowledge
@@ -314,7 +258,6 @@ def build_langgraph(provider: str, model: Optional[str] = None, temperature: flo
314
 
315
  DATA ANALYSIS:
316
  - read_excel_file: Read and preview Excel files
317
- - analyze_excel_data: Perform statistical analysis on Excel data
318
 
319
  SYSTEM:
320
  - shell_tool: Execute shell commands (use carefully)
@@ -323,16 +266,41 @@ def build_langgraph(provider: str, model: Optional[str] = None, temperature: flo
323
  file = state["input_file"]
324
  sys_msg = SystemMessage(
325
  content=(
326
- "You are a comprehensive AI agent designed to solve complex tasks using the tools provided. \n\n"
 
327
  f"{tools_description}\n\n"
328
- f"You have access to some optional files if any. Currently the loaded file is: {file}\n\n"
329
- "Approach problems systematically:\n"
330
- "1. Break down complex tasks into smaller steps\n"
331
- "2. Use appropriate tools for each step\n"
332
- "3. Chain multiple tools together when needed\n"
333
- "4. Verify results and provide clear explanations\n\n"
334
- "Be thorough and accurate in your responses. "
335
- "I will ask you a question. Report your thoughts, and finish your answer with only the final answer: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
  )
337
  )
338
 
@@ -371,8 +339,10 @@ if __name__ == "__main__":
371
 
372
  # Example: Knowledge retrieval
373
  print("📚 Testing Wikipedia search...")
374
- messages = [HumanMessage(content="How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.")]
375
- result = react_graph.invoke({"messages": messages, "input_file": None})
 
 
376
 
377
  for m in result['messages']:
378
  m.pretty_print()
 
14
  from langgraph.graph.message import add_messages
15
  from langgraph.graph import START, StateGraph
16
  from langgraph.prebuilt import ToolNode, tools_condition
17
+ from langchain_core.runnables.config import RunnableConfig
18
  from langchain_google_genai import ChatGoogleGenerativeAI
19
  from langchain_groq import ChatGroq
20
  from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
 
25
  DuckDuckGoSearchRun,
26
  ArxivQueryRun,
27
  ShellTool,
 
 
 
28
  )
29
  from langchain_community.utilities import (
30
  WikipediaAPIWrapper,
 
107
  with open(img_path, "rb") as image_file:
108
  image_bytes = image_file.read()
109
 
110
+ image_base64 = base64.b64encode(image_bytes).decode("utf-8")
111
 
112
  message = [
113
  HumanMessage(
 
117
  "text": "Extract all the text from this image. Return only the extracted text, no explanations."
118
  },
119
  {
120
+ "type": "image_url",
121
+ "image_url": {"url": f"data:image/png;base64,{image_base64}"}
 
122
  }
123
  ]
124
  )
 
145
  return pd.DataFrame({"error": [f"Excel reading error: {str(e)}"]})
146
 
147
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  @tool
149
  def visit_webpage(url: str) -> str:
150
  """
 
182
  arxiv_tool = ArxivQueryRun(api_wrapper=ArxivAPIWrapper())
183
  shell_tool = ShellTool()
184
  python_repl = PythonREPLTool()
 
 
 
 
 
185
 
186
  # Combine built-in tools with custom tools
187
  all_tools = [
 
191
  arxiv_tool,
192
  shell_tool,
193
  python_repl,
 
 
 
194
 
195
  # Custom tools for specialized tasks
196
  reverse_text,
 
199
  advanced_math,
200
  extract_text_multimodal,
201
  read_excel_file,
 
202
  visit_webpage,
203
  ]
204
  return all_tools
 
238
  tools_description = """
239
  Available tools for the tasks:
240
 
 
 
 
 
 
241
  WEB & SEARCH:
242
  - duckduckgo_search: Search the web for information
243
  - wikipedia_tool: Search Wikipedia for knowledge
 
258
 
259
  DATA ANALYSIS:
260
  - read_excel_file: Read and preview Excel files
 
261
 
262
  SYSTEM:
263
  - shell_tool: Execute shell commands (use carefully)
 
266
  file = state["input_file"]
267
  sys_msg = SystemMessage(
268
  content=(
269
+ "You are an intelligent AI agent designed to solve complex problems using the tools provided.\n\n"
270
+ "=== Available Tools ===\n"
271
  f"{tools_description}\n\n"
272
+ "=== Optional Files ===\n"
273
+ f"Currently loaded file: {file}\n\n"
274
+ "=== Problem-Solving Process ===\n"
275
+ "Follow these steps carefully when answering a question:\n"
276
+ "1. Break the problem into smaller, manageable parts.\n"
277
+ "2. Choose the most suitable tool for each part.\n"
278
+ "3. Use multiple tools in sequence if needed.\n"
279
+ "4. Verify your results and explain your reasoning clearly.\n\n"
280
+ "Be precise and clear at every step. After your reasoning, provide ONLY the final answer.\n\n"
281
+ "=== Final Answer Format Rules ===\n"
282
+ "- For numbers: Use only digits (no commas or units) unless units are explicitly requested.\n"
283
+ "- For strings: Do not use articles (a, an, the) or abbreviations. Spell out all digits.\n"
284
+ "- For lists: Use commas to separate items. Apply the above number/string rules to each item.\n"
285
+ "- If the answer is unknown: Respond exactly with \"do not know\"\n\n"
286
+ "Example Question 1:\n\n"
287
+ "If Eliud Kipchoge could maintain his marathon pace indefinitely, how many thousand hours would it take him to run from Earth to the Moon at its closest approach? Use the minimum perigee distance from Wikipedia and round to the nearest 1000 hours. Do not use commas.\n\n"
288
+ "**Example Answer 1:**\n"
289
+ "17\n\n"
290
+ "**Example Reasoning Steps 1:**\n"
291
+ "1. Found Eliud Kipchoge's marathon pace: 4 minutes 37 seconds per mile.\n"
292
+ "2. Converted pace into hours per mile.\n"
293
+ "3. Found Moon's closest distance: 225623 miles.\n"
294
+ "4. Multiplied pace by distance to get total hours and rounded to nearest 1000.\n\n"
295
+ "Example Question 2:\n\n"
296
+ "Who are the pitchers with the number before and after Taishō Tamai's number as of July 2023? Give them to me in the form Pitcher Before, Pitcher After, use their last names only, in Roman characters.\n\n"
297
+ "**Example Answer 2:**\n"
298
+ "Yoshida, Uehara\n\n"
299
+ "**Example Reasoning Steps 2:**\n"
300
+ "1. Looked up Taishō Tamai on Wikipedia.\n"
301
+ "2. Found the pitcher with number 18 is Kōsei Yoshida.\n"
302
+ "3. Found the pitcher with number 20 is Kenta Uehara.\n\n"
303
+ "Now answer the following questions:\n"
304
  )
305
  )
306
 
 
339
 
340
  # Example: Knowledge retrieval
341
  print("📚 Testing Wikipedia search...")
342
+ messages = [HumanMessage(content="Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?")]
343
+
344
+ config = RunnableConfig(recursion_limit=10)
345
+ result = react_graph.invoke({"messages": messages, "input_file": None}, config)
346
 
347
  for m in result['messages']:
348
  m.pretty_print()
requirements.in CHANGED
@@ -17,5 +17,5 @@ langgraph-sdk
17
  pandas
18
  wikipedia
19
  markdownify
20
- duckduckgo-search
21
  arxiv
 
17
  pandas
18
  wikipedia
19
  markdownify
20
+ ddgs
21
  arxiv
requirements.txt CHANGED
@@ -46,17 +46,17 @@ charset-normalizer==3.4.2
46
  # via requests
47
  click==8.2.1
48
  # via
49
- # duckduckgo-search
50
  # typer
51
  # uvicorn
52
  dataclasses-json==0.6.7
53
  # via langchain-community
 
 
54
  distro==1.9.0
55
  # via
56
  # groq
57
  # openai
58
- duckduckgo-search==8.1.1
59
- # via -r requirements.in
60
  exceptiongroup==1.3.0
61
  # via anyio
62
  fastapi==0.116.1
@@ -205,7 +205,7 @@ langsmith==0.4.8
205
  # langchain-community
206
  # langchain-core
207
  lxml==6.0.0
208
- # via duckduckgo-search
209
  markdown-it-py==3.0.0
210
  # via rich
211
  markdownify==1.1.0
@@ -255,7 +255,7 @@ pandas==2.3.1
255
  pillow==11.3.0
256
  # via gradio
257
  primp==0.15.0
258
- # via duckduckgo-search
259
  propcache==0.3.2
260
  # via
261
  # aiohttp
 
46
  # via requests
47
  click==8.2.1
48
  # via
49
+ # ddgs
50
  # typer
51
  # uvicorn
52
  dataclasses-json==0.6.7
53
  # via langchain-community
54
+ ddgs==9.4.0
55
+ # via -r requirements.in
56
  distro==1.9.0
57
  # via
58
  # groq
59
  # openai
 
 
60
  exceptiongroup==1.3.0
61
  # via anyio
62
  fastapi==0.116.1
 
205
  # langchain-community
206
  # langchain-core
207
  lxml==6.0.0
208
+ # via ddgs
209
  markdown-it-py==3.0.0
210
  # via rich
211
  markdownify==1.1.0
 
255
  pillow==11.3.0
256
  # via gradio
257
  primp==0.15.0
258
+ # via ddgs
259
  propcache==0.3.2
260
  # via
261
  # aiohttp