Commit
·
b4cd776
1
Parent(s):
b97774a
add python file download
Browse files- agents/search_agent.py +10 -5
- app.py +7 -0
- graphs/evaluation.py +43 -19
- tools/__init__.py +2 -2
- tools/sandbox.py +38 -27
agents/search_agent.py
CHANGED
|
@@ -21,7 +21,7 @@ class SearchAgent:
|
|
| 21 |
"is_valid_answer": False,
|
| 22 |
"has_enough_information": False,
|
| 23 |
"answer": "",
|
| 24 |
-
"step_counter" : {"validator": 0},
|
| 25 |
}, config={"callbacks": [langfuse_handler]})
|
| 26 |
|
| 27 |
return state["answer"]
|
|
@@ -29,10 +29,15 @@ class SearchAgent:
|
|
| 29 |
if __name__ == "__main__":
|
| 30 |
#question = "In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?"
|
| 31 |
#question = """How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."""
|
| 32 |
-
question = """Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec.
|
| 33 |
-
|
| 34 |
-
What
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
agent = SearchAgent()
|
| 36 |
-
submit_answer = agent(
|
| 37 |
|
| 38 |
print(submit_answer)
|
|
|
|
| 21 |
"is_valid_answer": False,
|
| 22 |
"has_enough_information": False,
|
| 23 |
"answer": "",
|
| 24 |
+
"step_counter" : {"iteration": 0,"validator": 0},
|
| 25 |
}, config={"callbacks": [langfuse_handler]})
|
| 26 |
|
| 27 |
return state["answer"]
|
|
|
|
| 29 |
if __name__ == "__main__":
|
| 30 |
#question = "In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?"
|
| 31 |
#question = """How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."""
|
| 32 |
+
#question = """Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec.
|
| 33 |
+
task_id = "f918266a-b3e0-4914-865d-4faa564f1aef"
|
| 34 |
+
question_text = "What is the final numeric output from the attached Python code?"
|
| 35 |
+
file_url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
|
| 36 |
+
question_with_file_info = f"For this task there is file available, with name {task_id}, download it from {file_url}\n\n{question_text}"
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
#What does Teal'c say in response to the question "Isn't that hot?"""
|
| 40 |
agent = SearchAgent()
|
| 41 |
+
submit_answer = agent(question_with_file_info)
|
| 42 |
|
| 43 |
print(submit_answer)
|
app.py
CHANGED
|
@@ -70,6 +70,13 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 70 |
if not task_id or question_text is None:
|
| 71 |
print(f"Skipping item with missing task_id or question: {item}")
|
| 72 |
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
try:
|
| 74 |
submitted_answer = agent(question_text)
|
| 75 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
|
|
|
| 70 |
if not task_id or question_text is None:
|
| 71 |
print(f"Skipping item with missing task_id or question: {item}")
|
| 72 |
continue
|
| 73 |
+
#Check if there is a question file
|
| 74 |
+
file_name = item.get("file_name")
|
| 75 |
+
if file_name and file_name != "":
|
| 76 |
+
file_url = f"{api_url}/files/{task_id}"
|
| 77 |
+
question_with_file_info = f"For this task there is a file available, download it from {file_url}\n{question_text}"
|
| 78 |
+
question_text = question_with_file_info
|
| 79 |
+
|
| 80 |
try:
|
| 81 |
submitted_answer = agent(question_text)
|
| 82 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
graphs/evaluation.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
from models.models import groq_model, anthropic_model
|
| 2 |
-
from tools import taivily_search, serper_search, execute_code, get_youtube_transcript
|
| 3 |
from langgraph.graph import StateGraph, START, END
|
| 4 |
from langchain_core.messages import SystemMessage, AIMessage, ToolMessage
|
| 5 |
from typing import List, TypedDict
|
|
@@ -9,7 +9,8 @@ tools = [
|
|
| 9 |
taivily_search,
|
| 10 |
serper_search,
|
| 11 |
get_youtube_transcript,
|
| 12 |
-
execute_code
|
|
|
|
| 13 |
]
|
| 14 |
|
| 15 |
class EvaluationState(TypedDict):
|
|
@@ -27,6 +28,7 @@ def call_node(state: EvaluationState):
|
|
| 27 |
"""
|
| 28 |
This node call the model with the question and the tools
|
| 29 |
"""
|
|
|
|
| 30 |
response = bounded_model_groq.invoke(state["messages"])
|
| 31 |
|
| 32 |
state["messages"].append(response)
|
|
@@ -80,52 +82,74 @@ def validator(state: EvaluationState):
|
|
| 80 |
"""
|
| 81 |
Validate if the answer fills the requirements
|
| 82 |
"""
|
| 83 |
-
state["step_counter"]["validator"] = state["step_counter"].get("validator", 0) + 1
|
| 84 |
-
|
| 85 |
-
if state["step_counter"]["validator"] >= 3:
|
| 86 |
-
state["is_valid_answer"] = True
|
| 87 |
-
return state
|
| 88 |
-
|
| 89 |
answer = state["answer"]
|
| 90 |
result = anthropic_model.invoke(f"Validate if the answer fits the next requirements: \n\n{answer}\n\nThe answer should be a number, string or a list of numbers and/or strings. If the answer fits the requirements, return just 'yes', otherwise return 'no'.")
|
| 91 |
|
| 92 |
-
is_valid_answer = result.content.startswith("yes")
|
| 93 |
-
state["is_valid_answer"] = is_valid_answer
|
| 94 |
state["messages"].append(SystemMessage(content=f"Validator: {result.content}"))
|
| 95 |
|
| 96 |
return state
|
| 97 |
|
| 98 |
def route_validator(state):
|
| 99 |
-
|
|
|
|
|
|
|
| 100 |
return END
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
else:
|
| 102 |
return "agent"
|
| 103 |
|
| 104 |
def build_workflow():
|
| 105 |
"""
|
| 106 |
-
Build search workflow with conditional edge for evaluation
|
| 107 |
"""
|
| 108 |
workflow = StateGraph(EvaluationState)
|
| 109 |
workflow.add_node("agent", call_node)
|
| 110 |
workflow.add_node("action", tool_node)
|
|
|
|
| 111 |
workflow.add_node("answer_question", answer_question)
|
| 112 |
workflow.add_node("map_answer", map_answer)
|
| 113 |
workflow.add_node("validator", validator)
|
| 114 |
-
|
| 115 |
workflow.add_edge(START, "agent")
|
| 116 |
workflow.add_edge("agent", "action")
|
| 117 |
-
workflow.add_edge("action", "
|
| 118 |
|
| 119 |
-
workflow.
|
|
|
|
|
|
|
| 120 |
workflow.add_edge("map_answer", "validator")
|
| 121 |
|
| 122 |
-
workflow.add_conditional_edges("validator", route_validator, {"
|
| 123 |
-
|
| 124 |
return workflow.compile()
|
| 125 |
|
| 126 |
-
if __name__ == "__main__":
|
| 127 |
graph = build_workflow()
|
| 128 |
|
| 129 |
mermaid_text = graph.get_graph().draw_mermaid()
|
| 130 |
|
| 131 |
-
print(mermaid_text)
|
|
|
|
| 1 |
from models.models import groq_model, anthropic_model
|
| 2 |
+
from tools import taivily_search, serper_search, execute_code, get_youtube_transcript, execute_python_file_url
|
| 3 |
from langgraph.graph import StateGraph, START, END
|
| 4 |
from langchain_core.messages import SystemMessage, AIMessage, ToolMessage
|
| 5 |
from typing import List, TypedDict
|
|
|
|
| 9 |
taivily_search,
|
| 10 |
serper_search,
|
| 11 |
get_youtube_transcript,
|
| 12 |
+
execute_code,
|
| 13 |
+
execute_python_file_url
|
| 14 |
]
|
| 15 |
|
| 16 |
class EvaluationState(TypedDict):
|
|
|
|
| 28 |
"""
|
| 29 |
This node call the model with the question and the tools
|
| 30 |
"""
|
| 31 |
+
# Convert any ToolMessage objects to a format Groq can handle
|
| 32 |
response = bounded_model_groq.invoke(state["messages"])
|
| 33 |
|
| 34 |
state["messages"].append(response)
|
|
|
|
| 82 |
"""
|
| 83 |
Validate if the answer fills the requirements
|
| 84 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
answer = state["answer"]
|
| 86 |
result = anthropic_model.invoke(f"Validate if the answer fits the next requirements: \n\n{answer}\n\nThe answer should be a number, string or a list of numbers and/or strings. If the answer fits the requirements, return just 'yes', otherwise return 'no'.")
|
| 87 |
|
| 88 |
+
state["is_valid_answer"] = result.content.startswith("yes")
|
|
|
|
| 89 |
state["messages"].append(SystemMessage(content=f"Validator: {result.content}"))
|
| 90 |
|
| 91 |
return state
|
| 92 |
|
| 93 |
def route_validator(state):
|
| 94 |
+
state["step_counter"]["validator"] = state["step_counter"].get("validator", 0) + 1
|
| 95 |
+
|
| 96 |
+
if state["is_valid_answer"] or state["step_counter"]["validator"] > 2:
|
| 97 |
return END
|
| 98 |
+
else:
|
| 99 |
+
return "map_answer"
|
| 100 |
+
|
| 101 |
+
def evaluator(state):
|
| 102 |
+
"""
|
| 103 |
+
Evaluate if the context information is enough to answer the question.
|
| 104 |
+
"""
|
| 105 |
+
prompt = f"""## Instruction
|
| 106 |
+
Answer just "yes" (without the quotes), if the context information is enough to answer the question.
|
| 107 |
+
## Question
|
| 108 |
+
{state["question"]}
|
| 109 |
+
## Relevant information
|
| 110 |
+
{state["external_information"]}
|
| 111 |
+
"""
|
| 112 |
+
result = anthropic_model.invoke(prompt)
|
| 113 |
+
state["has_enough_information"] = result.content.startswith("yes")
|
| 114 |
+
state["messages"].append(SystemMessage(content=f"Evaluator: {result.content}"))
|
| 115 |
+
|
| 116 |
+
return state
|
| 117 |
+
|
| 118 |
+
def route_iteration(state):
|
| 119 |
+
state["step_counter"]["iteration"] = state["step_counter"].get("iteration", 0) + 1
|
| 120 |
+
if state["has_enough_information"] or state["step_counter"]["iteration"] > 2:
|
| 121 |
+
return "answer_question"
|
| 122 |
else:
|
| 123 |
return "agent"
|
| 124 |
|
| 125 |
def build_workflow():
|
| 126 |
"""
|
| 127 |
+
Build search workflow with conditional edge for evaluation and iteration.
|
| 128 |
"""
|
| 129 |
workflow = StateGraph(EvaluationState)
|
| 130 |
workflow.add_node("agent", call_node)
|
| 131 |
workflow.add_node("action", tool_node)
|
| 132 |
+
workflow.add_node("evaluator", evaluator)
|
| 133 |
workflow.add_node("answer_question", answer_question)
|
| 134 |
workflow.add_node("map_answer", map_answer)
|
| 135 |
workflow.add_node("validator", validator)
|
| 136 |
+
|
| 137 |
workflow.add_edge(START, "agent")
|
| 138 |
workflow.add_edge("agent", "action")
|
| 139 |
+
workflow.add_edge("action", "evaluator")
|
| 140 |
|
| 141 |
+
workflow.add_conditional_edges("evaluator", route_iteration, {"answer_question":"answer_question","agent":"agent"})
|
| 142 |
+
|
| 143 |
+
workflow.add_edge("answer_question","map_answer")
|
| 144 |
workflow.add_edge("map_answer", "validator")
|
| 145 |
|
| 146 |
+
workflow.add_conditional_edges("validator", route_validator, {"map_answer": "map_answer", END: END})
|
| 147 |
+
|
| 148 |
return workflow.compile()
|
| 149 |
|
| 150 |
+
""" if __name__ == "__main__":
|
| 151 |
graph = build_workflow()
|
| 152 |
|
| 153 |
mermaid_text = graph.get_graph().draw_mermaid()
|
| 154 |
|
| 155 |
+
print(mermaid_text) """
|
tools/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
from tools.search import taivily_search, serper_search
|
| 2 |
-
from tools.sandbox import execute_code, get_youtube_transcript
|
| 3 |
|
| 4 |
-
__all__ = ["taivily_search", "serper_search", "execute_code", "get_youtube_transcript"]
|
|
|
|
| 1 |
from tools.search import taivily_search, serper_search
|
| 2 |
+
from tools.sandbox import execute_code, get_youtube_transcript, execute_python_file_url
|
| 3 |
|
| 4 |
+
__all__ = ["taivily_search", "serper_search", "execute_code", "get_youtube_transcript", "execute_python_file_url"]
|
tools/sandbox.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
from langchain_core.tools import tool
|
| 2 |
from typing import Annotated
|
| 3 |
from typing_extensions import Annotated
|
|
@@ -5,10 +6,7 @@ from langchain_core.tools.base import InjectedToolCallId
|
|
| 5 |
from langchain_core.runnables import RunnableConfig
|
| 6 |
from langgraph.types import Command
|
| 7 |
from langchain_core.messages import ToolMessage
|
| 8 |
-
import os
|
| 9 |
from dotenv import load_dotenv
|
| 10 |
-
import json
|
| 11 |
-
import asyncio
|
| 12 |
import tempfile
|
| 13 |
from pathlib import Path
|
| 14 |
import yt_dlp
|
|
@@ -19,20 +17,13 @@ load_dotenv()
|
|
| 19 |
@tool
|
| 20 |
def execute_code(code: str, tool_call_id: Annotated[str, InjectedToolCallId], config: RunnableConfig) -> Command:
|
| 21 |
"""
|
| 22 |
-
Execute code in a
|
| 23 |
-
|
| 24 |
Args:
|
| 25 |
code: The code to execute. Should be Python code without the triple backticks.
|
| 26 |
"""
|
| 27 |
-
|
| 28 |
-
loop = asyncio.get_event_loop()
|
| 29 |
-
except RuntimeError:
|
| 30 |
-
loop = asyncio.new_event_loop()
|
| 31 |
-
asyncio.set_event_loop(loop)
|
| 32 |
-
|
| 33 |
-
result = loop.run_until_complete(_execute_code_in_sandbox(code, os.getenv("E2B_API_KEY")))
|
| 34 |
|
| 35 |
-
formatted_result = f"""
|
| 36 |
## Code
|
| 37 |
```python
|
| 38 |
{code}
|
|
@@ -55,7 +46,7 @@ def execute_code(code: str, tool_call_id: Annotated[str, InjectedToolCallId], co
|
|
| 55 |
}
|
| 56 |
)
|
| 57 |
|
| 58 |
-
|
| 59 |
"""Execute code in E2B sandbox and return the results."""
|
| 60 |
sbx = Sandbox()
|
| 61 |
execution = sbx.run_code(code)
|
|
@@ -63,11 +54,36 @@ async def _execute_code_in_sandbox(code: str, api_key: str):
|
|
| 63 |
files = sbx.files.list("/")
|
| 64 |
|
| 65 |
return {
|
| 66 |
-
"stdout": execution.stdout,
|
| 67 |
-
"stderr": execution.stderr,
|
| 68 |
"files": files
|
| 69 |
}
|
| 70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
@tool
|
| 72 |
def get_youtube_transcript(url: str, tool_call_id: Annotated[str, InjectedToolCallId] = None, config: RunnableConfig = None) -> Command | str:
|
| 73 |
"""
|
|
@@ -136,20 +152,15 @@ def get_youtube_transcript(url: str, tool_call_id: Annotated[str, InjectedToolCa
|
|
| 136 |
|
| 137 |
""" if __name__ == "__main__":
|
| 138 |
# Simple test: print "Hello World"
|
| 139 |
-
|
| 140 |
-
|
| 141 |
# Build a minimal RunnableConfig with no external information
|
| 142 |
config = RunnableConfig(**{"external_information": ""})
|
| 143 |
-
|
| 144 |
# Execute the test code
|
| 145 |
# Call the underlying function to bypass the BaseTool wrapper
|
| 146 |
-
cmd: Command =
|
| 147 |
-
|
| 148 |
"test-call",
|
| 149 |
config,
|
| 150 |
-
)
|
| 151 |
-
|
| 152 |
-
# Print the output from the sandbox execution
|
| 153 |
-
updates = getattr(cmd, 'update', {}) or {}
|
| 154 |
-
for msg in updates.get('messages', []):
|
| 155 |
-
print(msg.content) """
|
|
|
|
| 1 |
+
import os
|
| 2 |
from langchain_core.tools import tool
|
| 3 |
from typing import Annotated
|
| 4 |
from typing_extensions import Annotated
|
|
|
|
| 6 |
from langchain_core.runnables import RunnableConfig
|
| 7 |
from langgraph.types import Command
|
| 8 |
from langchain_core.messages import ToolMessage
|
|
|
|
| 9 |
from dotenv import load_dotenv
|
|
|
|
|
|
|
| 10 |
import tempfile
|
| 11 |
from pathlib import Path
|
| 12 |
import yt_dlp
|
|
|
|
| 17 |
@tool
|
| 18 |
def execute_code(code: str, tool_call_id: Annotated[str, InjectedToolCallId], config: RunnableConfig) -> Command:
|
| 19 |
"""
|
| 20 |
+
Execute code in a e2b_code_interpreter sandbox and return the results.
|
|
|
|
| 21 |
Args:
|
| 22 |
code: The code to execute. Should be Python code without the triple backticks.
|
| 23 |
"""
|
| 24 |
+
result = _execute_code_in_sandbox(code, os.getenv("E2B_API_KEY"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
+
formatted_result = f"""
|
| 27 |
## Code
|
| 28 |
```python
|
| 29 |
{code}
|
|
|
|
| 46 |
}
|
| 47 |
)
|
| 48 |
|
| 49 |
+
def _execute_code_in_sandbox(code: str, api_key: str):
|
| 50 |
"""Execute code in E2B sandbox and return the results."""
|
| 51 |
sbx = Sandbox()
|
| 52 |
execution = sbx.run_code(code)
|
|
|
|
| 54 |
files = sbx.files.list("/")
|
| 55 |
|
| 56 |
return {
|
| 57 |
+
"stdout": execution.logs.stdout,
|
| 58 |
+
"stderr": execution.logs.stderr,
|
| 59 |
"files": files
|
| 60 |
}
|
| 61 |
|
| 62 |
+
@tool
|
| 63 |
+
def execute_python_file_url(file_url: str, tool_call_id: Annotated[str, InjectedToolCallId], config: RunnableConfig) -> Command:
|
| 64 |
+
"""
|
| 65 |
+
Download a python file from a given URL and get the result
|
| 66 |
+
Args:
|
| 67 |
+
file_url: The URL of the file to download.
|
| 68 |
+
Returns:
|
| 69 |
+
The content of the file as a string, or an error message if the file couldn't be downloaded
|
| 70 |
+
"""
|
| 71 |
+
sbx = Sandbox()
|
| 72 |
+
file_name = "code.py"
|
| 73 |
+
result = sbx.commands.run(f"wget -O {file_name} {file_url} && cat {file_name}")
|
| 74 |
+
|
| 75 |
+
result_code = _execute_code_in_sandbox(result.stdout, os.getenv("E2B_API_KEY"))
|
| 76 |
+
final_result = ""
|
| 77 |
+
for value in result_code["stdout"]:
|
| 78 |
+
final_result += value
|
| 79 |
+
|
| 80 |
+
return Command(
|
| 81 |
+
update={
|
| 82 |
+
"external_information": f"{config.get('external_information', '')}\n---\n# result {final_result}",
|
| 83 |
+
"messages": [ToolMessage(content=final_result, tool_call_id=tool_call_id)]
|
| 84 |
+
}
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
@tool
|
| 88 |
def get_youtube_transcript(url: str, tool_call_id: Annotated[str, InjectedToolCallId] = None, config: RunnableConfig = None) -> Command | str:
|
| 89 |
"""
|
|
|
|
| 152 |
|
| 153 |
""" if __name__ == "__main__":
|
| 154 |
# Simple test: print "Hello World"
|
| 155 |
+
url = "https://agents-course-unit4-scoring.hf.space/files/f918266a-b3e0-4914-865d-4faa564f1aef"
|
| 156 |
+
|
| 157 |
# Build a minimal RunnableConfig with no external information
|
| 158 |
config = RunnableConfig(**{"external_information": ""})
|
| 159 |
+
input = f"{url}"
|
| 160 |
# Execute the test code
|
| 161 |
# Call the underlying function to bypass the BaseTool wrapper
|
| 162 |
+
cmd: Command = execute_python_file_url.func(
|
| 163 |
+
input,
|
| 164 |
"test-call",
|
| 165 |
config,
|
| 166 |
+
) """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|