sebastianfrench commited on
Commit
b4cd776
·
1 Parent(s): b97774a

add python file download

Browse files
Files changed (5) hide show
  1. agents/search_agent.py +10 -5
  2. app.py +7 -0
  3. graphs/evaluation.py +43 -19
  4. tools/__init__.py +2 -2
  5. tools/sandbox.py +38 -27
agents/search_agent.py CHANGED
@@ -21,7 +21,7 @@ class SearchAgent:
21
  "is_valid_answer": False,
22
  "has_enough_information": False,
23
  "answer": "",
24
- "step_counter" : {"validator": 0},
25
  }, config={"callbacks": [langfuse_handler]})
26
 
27
  return state["answer"]
@@ -29,10 +29,15 @@ class SearchAgent:
29
  if __name__ == "__main__":
30
  #question = "In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?"
31
  #question = """How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."""
32
- question = """Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec.
33
-
34
- What does Teal'c say in response to the question "Isn't that hot?"""
 
 
 
 
 
35
  agent = SearchAgent()
36
- submit_answer = agent(question)
37
 
38
  print(submit_answer)
 
21
  "is_valid_answer": False,
22
  "has_enough_information": False,
23
  "answer": "",
24
+ "step_counter" : {"iteration": 0,"validator": 0},
25
  }, config={"callbacks": [langfuse_handler]})
26
 
27
  return state["answer"]
 
29
  if __name__ == "__main__":
30
  #question = "In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?"
31
  #question = """How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."""
32
+ #question = """Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec.
33
+ task_id = "f918266a-b3e0-4914-865d-4faa564f1aef"
34
+ question_text = "What is the final numeric output from the attached Python code?"
35
+ file_url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
36
+ question_with_file_info = f"For this task there is file available, with name {task_id}, download it from {file_url}\n\n{question_text}"
37
+
38
+
39
+ #What does Teal'c say in response to the question "Isn't that hot?"""
40
  agent = SearchAgent()
41
+ submit_answer = agent(question_with_file_info)
42
 
43
  print(submit_answer)
app.py CHANGED
@@ -70,6 +70,13 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
70
  if not task_id or question_text is None:
71
  print(f"Skipping item with missing task_id or question: {item}")
72
  continue
 
 
 
 
 
 
 
73
  try:
74
  submitted_answer = agent(question_text)
75
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
 
70
  if not task_id or question_text is None:
71
  print(f"Skipping item with missing task_id or question: {item}")
72
  continue
73
+ #Check if there is a question file
74
+ file_name = item.get("file_name")
75
+ if file_name and file_name != "":
76
+ file_url = f"{api_url}/files/{task_id}"
77
+ question_with_file_info = f"For this task there is a file available, download it from {file_url}\n{question_text}"
78
+ question_text = question_with_file_info
79
+
80
  try:
81
  submitted_answer = agent(question_text)
82
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
graphs/evaluation.py CHANGED
@@ -1,5 +1,5 @@
1
  from models.models import groq_model, anthropic_model
2
- from tools import taivily_search, serper_search, execute_code, get_youtube_transcript
3
  from langgraph.graph import StateGraph, START, END
4
  from langchain_core.messages import SystemMessage, AIMessage, ToolMessage
5
  from typing import List, TypedDict
@@ -9,7 +9,8 @@ tools = [
9
  taivily_search,
10
  serper_search,
11
  get_youtube_transcript,
12
- execute_code
 
13
  ]
14
 
15
  class EvaluationState(TypedDict):
@@ -27,6 +28,7 @@ def call_node(state: EvaluationState):
27
  """
28
  This node call the model with the question and the tools
29
  """
 
30
  response = bounded_model_groq.invoke(state["messages"])
31
 
32
  state["messages"].append(response)
@@ -80,52 +82,74 @@ def validator(state: EvaluationState):
80
  """
81
  Validate if the answer fills the requirements
82
  """
83
- state["step_counter"]["validator"] = state["step_counter"].get("validator", 0) + 1
84
-
85
- if state["step_counter"]["validator"] >= 3:
86
- state["is_valid_answer"] = True
87
- return state
88
-
89
  answer = state["answer"]
90
  result = anthropic_model.invoke(f"Validate if the answer fits the next requirements: \n\n{answer}\n\nThe answer should be a number, string or a list of numbers and/or strings. If the answer fits the requirements, return just 'yes', otherwise return 'no'.")
91
 
92
- is_valid_answer = result.content.startswith("yes")
93
- state["is_valid_answer"] = is_valid_answer
94
  state["messages"].append(SystemMessage(content=f"Validator: {result.content}"))
95
 
96
  return state
97
 
98
  def route_validator(state):
99
- if state["is_valid_answer"]:
 
 
100
  return END
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  else:
102
  return "agent"
103
 
104
  def build_workflow():
105
  """
106
- Build search workflow with conditional edge for evaluation
107
  """
108
  workflow = StateGraph(EvaluationState)
109
  workflow.add_node("agent", call_node)
110
  workflow.add_node("action", tool_node)
 
111
  workflow.add_node("answer_question", answer_question)
112
  workflow.add_node("map_answer", map_answer)
113
  workflow.add_node("validator", validator)
114
-
115
  workflow.add_edge(START, "agent")
116
  workflow.add_edge("agent", "action")
117
- workflow.add_edge("action", "answer_question")
118
 
119
- workflow.add_edge("answer_question", "map_answer")
 
 
120
  workflow.add_edge("map_answer", "validator")
121
 
122
- workflow.add_conditional_edges("validator", route_validator, {"agent":"agent", END:END})
123
-
124
  return workflow.compile()
125
 
126
- if __name__ == "__main__":
127
  graph = build_workflow()
128
 
129
  mermaid_text = graph.get_graph().draw_mermaid()
130
 
131
- print(mermaid_text)
 
1
  from models.models import groq_model, anthropic_model
2
+ from tools import taivily_search, serper_search, execute_code, get_youtube_transcript, execute_python_file_url
3
  from langgraph.graph import StateGraph, START, END
4
  from langchain_core.messages import SystemMessage, AIMessage, ToolMessage
5
  from typing import List, TypedDict
 
9
  taivily_search,
10
  serper_search,
11
  get_youtube_transcript,
12
+ execute_code,
13
+ execute_python_file_url
14
  ]
15
 
16
  class EvaluationState(TypedDict):
 
28
  """
29
  This node call the model with the question and the tools
30
  """
31
+ # Convert any ToolMessage objects to a format Groq can handle
32
  response = bounded_model_groq.invoke(state["messages"])
33
 
34
  state["messages"].append(response)
 
82
  """
83
  Validate if the answer fills the requirements
84
  """
 
 
 
 
 
 
85
  answer = state["answer"]
86
  result = anthropic_model.invoke(f"Validate if the answer fits the next requirements: \n\n{answer}\n\nThe answer should be a number, string or a list of numbers and/or strings. If the answer fits the requirements, return just 'yes', otherwise return 'no'.")
87
 
88
+ state["is_valid_answer"] = result.content.startswith("yes")
 
89
  state["messages"].append(SystemMessage(content=f"Validator: {result.content}"))
90
 
91
  return state
92
 
93
  def route_validator(state):
94
+ state["step_counter"]["validator"] = state["step_counter"].get("validator", 0) + 1
95
+
96
+ if state["is_valid_answer"] or state["step_counter"]["validator"] > 2:
97
  return END
98
+ else:
99
+ return "map_answer"
100
+
101
+ def evaluator(state):
102
+ """
103
+ Evaluate if the context information is enough to answer the question.
104
+ """
105
+ prompt = f"""## Instruction
106
+ Answer just "yes" (without the quotes), if the context information is enough to answer the question.
107
+ ## Question
108
+ {state["question"]}
109
+ ## Relevant information
110
+ {state["external_information"]}
111
+ """
112
+ result = anthropic_model.invoke(prompt)
113
+ state["has_enough_information"] = result.content.startswith("yes")
114
+ state["messages"].append(SystemMessage(content=f"Evaluator: {result.content}"))
115
+
116
+ return state
117
+
118
+ def route_iteration(state):
119
+ state["step_counter"]["iteration"] = state["step_counter"].get("iteration", 0) + 1
120
+ if state["has_enough_information"] or state["step_counter"]["iteration"] > 2:
121
+ return "answer_question"
122
  else:
123
  return "agent"
124
 
125
  def build_workflow():
126
  """
127
+ Build search workflow with conditional edge for evaluation and iteration.
128
  """
129
  workflow = StateGraph(EvaluationState)
130
  workflow.add_node("agent", call_node)
131
  workflow.add_node("action", tool_node)
132
+ workflow.add_node("evaluator", evaluator)
133
  workflow.add_node("answer_question", answer_question)
134
  workflow.add_node("map_answer", map_answer)
135
  workflow.add_node("validator", validator)
136
+
137
  workflow.add_edge(START, "agent")
138
  workflow.add_edge("agent", "action")
139
+ workflow.add_edge("action", "evaluator")
140
 
141
+ workflow.add_conditional_edges("evaluator", route_iteration, {"answer_question":"answer_question","agent":"agent"})
142
+
143
+ workflow.add_edge("answer_question","map_answer")
144
  workflow.add_edge("map_answer", "validator")
145
 
146
+ workflow.add_conditional_edges("validator", route_validator, {"map_answer": "map_answer", END: END})
147
+
148
  return workflow.compile()
149
 
150
+ """ if __name__ == "__main__":
151
  graph = build_workflow()
152
 
153
  mermaid_text = graph.get_graph().draw_mermaid()
154
 
155
+ print(mermaid_text) """
tools/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
  from tools.search import taivily_search, serper_search
2
- from tools.sandbox import execute_code, get_youtube_transcript
3
 
4
- __all__ = ["taivily_search", "serper_search", "execute_code", "get_youtube_transcript"]
 
1
  from tools.search import taivily_search, serper_search
2
+ from tools.sandbox import execute_code, get_youtube_transcript, execute_python_file_url
3
 
4
+ __all__ = ["taivily_search", "serper_search", "execute_code", "get_youtube_transcript", "execute_python_file_url"]
tools/sandbox.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from langchain_core.tools import tool
2
  from typing import Annotated
3
  from typing_extensions import Annotated
@@ -5,10 +6,7 @@ from langchain_core.tools.base import InjectedToolCallId
5
  from langchain_core.runnables import RunnableConfig
6
  from langgraph.types import Command
7
  from langchain_core.messages import ToolMessage
8
- import os
9
  from dotenv import load_dotenv
10
- import json
11
- import asyncio
12
  import tempfile
13
  from pathlib import Path
14
  import yt_dlp
@@ -19,20 +17,13 @@ load_dotenv()
19
  @tool
20
  def execute_code(code: str, tool_call_id: Annotated[str, InjectedToolCallId], config: RunnableConfig) -> Command:
21
  """
22
- Execute code in a secure E2B sandbox environment.
23
-
24
  Args:
25
  code: The code to execute. Should be Python code without the triple backticks.
26
  """
27
- try:
28
- loop = asyncio.get_event_loop()
29
- except RuntimeError:
30
- loop = asyncio.new_event_loop()
31
- asyncio.set_event_loop(loop)
32
-
33
- result = loop.run_until_complete(_execute_code_in_sandbox(code, os.getenv("E2B_API_KEY")))
34
 
35
- formatted_result = f"""# Code Execution Results
36
  ## Code
37
  ```python
38
  {code}
@@ -55,7 +46,7 @@ def execute_code(code: str, tool_call_id: Annotated[str, InjectedToolCallId], co
55
  }
56
  )
57
 
58
- async def _execute_code_in_sandbox(code: str, api_key: str):
59
  """Execute code in E2B sandbox and return the results."""
60
  sbx = Sandbox()
61
  execution = sbx.run_code(code)
@@ -63,11 +54,36 @@ async def _execute_code_in_sandbox(code: str, api_key: str):
63
  files = sbx.files.list("/")
64
 
65
  return {
66
- "stdout": execution.stdout,
67
- "stderr": execution.stderr,
68
  "files": files
69
  }
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  @tool
72
  def get_youtube_transcript(url: str, tool_call_id: Annotated[str, InjectedToolCallId] = None, config: RunnableConfig = None) -> Command | str:
73
  """
@@ -136,20 +152,15 @@ def get_youtube_transcript(url: str, tool_call_id: Annotated[str, InjectedToolCa
136
 
137
  """ if __name__ == "__main__":
138
  # Simple test: print "Hello World"
139
- test_code = "print(\"Hello World\")"
140
-
141
  # Build a minimal RunnableConfig with no external information
142
  config = RunnableConfig(**{"external_information": ""})
143
-
144
  # Execute the test code
145
  # Call the underlying function to bypass the BaseTool wrapper
146
- cmd: Command = execute_code.func(
147
- test_code,
148
  "test-call",
149
  config,
150
- )
151
-
152
- # Print the output from the sandbox execution
153
- updates = getattr(cmd, 'update', {}) or {}
154
- for msg in updates.get('messages', []):
155
- print(msg.content) """
 
1
+ import os
2
  from langchain_core.tools import tool
3
  from typing import Annotated
4
  from typing_extensions import Annotated
 
6
  from langchain_core.runnables import RunnableConfig
7
  from langgraph.types import Command
8
  from langchain_core.messages import ToolMessage
 
9
  from dotenv import load_dotenv
 
 
10
  import tempfile
11
  from pathlib import Path
12
  import yt_dlp
 
17
  @tool
18
  def execute_code(code: str, tool_call_id: Annotated[str, InjectedToolCallId], config: RunnableConfig) -> Command:
19
  """
20
+ Execute code in a e2b_code_interpreter sandbox and return the results.
 
21
  Args:
22
  code: The code to execute. Should be Python code without the triple backticks.
23
  """
24
+ result = _execute_code_in_sandbox(code, os.getenv("E2B_API_KEY"))
 
 
 
 
 
 
25
 
26
+ formatted_result = f"""
27
  ## Code
28
  ```python
29
  {code}
 
46
  }
47
  )
48
 
49
+ def _execute_code_in_sandbox(code: str, api_key: str):
50
  """Execute code in E2B sandbox and return the results."""
51
  sbx = Sandbox()
52
  execution = sbx.run_code(code)
 
54
  files = sbx.files.list("/")
55
 
56
  return {
57
+ "stdout": execution.logs.stdout,
58
+ "stderr": execution.logs.stderr,
59
  "files": files
60
  }
61
 
62
+ @tool
63
+ def execute_python_file_url(file_url: str, tool_call_id: Annotated[str, InjectedToolCallId], config: RunnableConfig) -> Command:
64
+ """
65
+ Download a python file from a given URL and get the result
66
+ Args:
67
+ file_url: The URL of the file to download.
68
+ Returns:
69
+ The content of the file as a string, or an error message if the file couldn't be downloaded
70
+ """
71
+ sbx = Sandbox()
72
+ file_name = "code.py"
73
+ result = sbx.commands.run(f"wget -O {file_name} {file_url} && cat {file_name}")
74
+
75
+ result_code = _execute_code_in_sandbox(result.stdout, os.getenv("E2B_API_KEY"))
76
+ final_result = ""
77
+ for value in result_code["stdout"]:
78
+ final_result += value
79
+
80
+ return Command(
81
+ update={
82
+ "external_information": f"{config.get('external_information', '')}\n---\n# result {final_result}",
83
+ "messages": [ToolMessage(content=final_result, tool_call_id=tool_call_id)]
84
+ }
85
+ )
86
+
87
  @tool
88
  def get_youtube_transcript(url: str, tool_call_id: Annotated[str, InjectedToolCallId] = None, config: RunnableConfig = None) -> Command | str:
89
  """
 
152
 
153
  """ if __name__ == "__main__":
154
  # Simple test: print "Hello World"
155
+ url = "https://agents-course-unit4-scoring.hf.space/files/f918266a-b3e0-4914-865d-4faa564f1aef"
156
+
157
  # Build a minimal RunnableConfig with no external information
158
  config = RunnableConfig(**{"external_information": ""})
159
+ input = f"{url}"
160
  # Execute the test code
161
  # Call the underlying function to bypass the BaseTool wrapper
162
+ cmd: Command = execute_python_file_url.func(
163
+ input,
164
  "test-call",
165
  config,
166
+ ) """