Final_Assignment

Paused

App Files Files Community

sebastianfrench commited on May 11, 2025

Commit

b4cd776

1 Parent(s): b97774a

add python file download

Browse files

Files changed (5) hide show

agents/search_agent.py +10 -5
app.py +7 -0
graphs/evaluation.py +43 -19
tools/__init__.py +2 -2
tools/sandbox.py +38 -27

agents/search_agent.py CHANGED Viewed

@@ -21,7 +21,7 @@ class SearchAgent:
             "is_valid_answer": False,
             "has_enough_information": False,
             "answer": "",
-            "step_counter" : {"validator": 0},
         }, config={"callbacks": [langfuse_handler]})
         return state["answer"]
@@ -29,10 +29,15 @@ class SearchAgent:
 if __name__ == "__main__":
     #question = "In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?"
     #question = """How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."""
-    question = """Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec.
-What does Teal'c say in response to the question "Isn't that hot?"""
     agent = SearchAgent()
-    submit_answer = agent(question)
     print(submit_answer)

             "is_valid_answer": False,
             "has_enough_information": False,
             "answer": "",
+            "step_counter" : {"iteration": 0,"validator": 0},
         }, config={"callbacks": [langfuse_handler]})
         return state["answer"]
 if __name__ == "__main__":
     #question = "In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?"
     #question = """How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."""
+    #question = """Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec.
+    task_id = "f918266a-b3e0-4914-865d-4faa564f1aef"
+    question_text = "What is the final numeric output from the attached Python code?"
+    file_url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
+    question_with_file_info = f"For this task there is file available, with name {task_id}, download it from {file_url}\n\n{question_text}"
+#What does Teal'c say in response to the question "Isn't that hot?"""
     agent = SearchAgent()
+    submit_answer = agent(question_with_file_info)
     print(submit_answer)

app.py CHANGED Viewed

@@ -70,6 +70,13 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
             submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})

         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
+        #Check if there is a question file
+        file_name = item.get("file_name")
+        if file_name and file_name != "":
+            file_url = f"{api_url}/files/{task_id}"
+            question_with_file_info = f"For this task there is a file available, download it from {file_url}\n{question_text}"
+            question_text = question_with_file_info
         try:
             submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})

graphs/evaluation.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from models.models import  groq_model, anthropic_model
-from tools import  taivily_search, serper_search, execute_code, get_youtube_transcript
 from langgraph.graph import StateGraph, START, END
 from langchain_core.messages import SystemMessage, AIMessage, ToolMessage
 from typing import List, TypedDict
@@ -9,7 +9,8 @@ tools = [
     taivily_search,
     serper_search,
     get_youtube_transcript,
-    execute_code
 ]
 class EvaluationState(TypedDict):
@@ -27,6 +28,7 @@ def call_node(state: EvaluationState):
     """
     This node call the model with the question and the tools
     """
     response = bounded_model_groq.invoke(state["messages"])
     state["messages"].append(response)
@@ -80,52 +82,74 @@ def validator(state: EvaluationState):
     """
     Validate if the answer fills the requirements
     """
-    state["step_counter"]["validator"] = state["step_counter"].get("validator", 0) + 1
-    if state["step_counter"]["validator"] >= 3:
-        state["is_valid_answer"] = True
-        return state
     answer = state["answer"]
     result = anthropic_model.invoke(f"Validate if the answer fits the next requirements: \n\n{answer}\n\nThe answer should be a number, string or a list of numbers and/or strings. If the answer fits the requirements, return just 'yes', otherwise return 'no'.")
-    is_valid_answer = result.content.startswith("yes")
-    state["is_valid_answer"] = is_valid_answer
     state["messages"].append(SystemMessage(content=f"Validator: {result.content}"))
     return state
 def route_validator(state):
-    if state["is_valid_answer"]:
         return END
     else:
         return "agent"
 def build_workflow():
     """
-    Build search workflow with conditional edge for evaluation
     """
     workflow = StateGraph(EvaluationState)
     workflow.add_node("agent", call_node)
     workflow.add_node("action", tool_node)
     workflow.add_node("answer_question", answer_question)
     workflow.add_node("map_answer", map_answer)
     workflow.add_node("validator", validator)
     workflow.add_edge(START, "agent")
     workflow.add_edge("agent", "action")
-    workflow.add_edge("action", "answer_question")
-    workflow.add_edge("answer_question", "map_answer")
     workflow.add_edge("map_answer", "validator")
-    workflow.add_conditional_edges("validator", route_validator, {"agent":"agent", END:END})
     return workflow.compile()
-if __name__ == "__main__":
     graph = build_workflow()
     mermaid_text = graph.get_graph().draw_mermaid()
-    print(mermaid_text)

 from models.models import  groq_model, anthropic_model
+from tools import  taivily_search, serper_search, execute_code, get_youtube_transcript, execute_python_file_url
 from langgraph.graph import StateGraph, START, END
 from langchain_core.messages import SystemMessage, AIMessage, ToolMessage
 from typing import List, TypedDict
     taivily_search,
     serper_search,
     get_youtube_transcript,
+    execute_code,
+    execute_python_file_url
 ]
 class EvaluationState(TypedDict):
     """
     This node call the model with the question and the tools
     """
+    # Convert any ToolMessage objects to a format Groq can handle
     response = bounded_model_groq.invoke(state["messages"])
     state["messages"].append(response)
     """
     Validate if the answer fills the requirements
     """
     answer = state["answer"]
     result = anthropic_model.invoke(f"Validate if the answer fits the next requirements: \n\n{answer}\n\nThe answer should be a number, string or a list of numbers and/or strings. If the answer fits the requirements, return just 'yes', otherwise return 'no'.")
+    state["is_valid_answer"] = result.content.startswith("yes")
     state["messages"].append(SystemMessage(content=f"Validator: {result.content}"))
     return state
 def route_validator(state):
+    state["step_counter"]["validator"] = state["step_counter"].get("validator", 0) + 1
+    if state["is_valid_answer"] or state["step_counter"]["validator"] > 2:
         return END
+    else:
+        return "map_answer"
+def evaluator(state):
+    """
+    Evaluate if the context information is enough to answer the question.
+    """
+    prompt = f"""## Instruction
+    Answer just "yes" (without the quotes), if the context information is enough to answer the question.
+    ## Question
+    {state["question"]}
+    ## Relevant information
+    {state["external_information"]}
+    """
+    result = anthropic_model.invoke(prompt)
+    state["has_enough_information"] = result.content.startswith("yes")
+    state["messages"].append(SystemMessage(content=f"Evaluator: {result.content}"))
+    return state
+def route_iteration(state):
+    state["step_counter"]["iteration"] = state["step_counter"].get("iteration", 0) + 1
+    if state["has_enough_information"] or state["step_counter"]["iteration"] > 2:
+        return "answer_question"
     else:
         return "agent"
 def build_workflow():
     """
+    Build search workflow with conditional edge for evaluation and iteration.
     """
     workflow = StateGraph(EvaluationState)
     workflow.add_node("agent", call_node)
     workflow.add_node("action", tool_node)
+    workflow.add_node("evaluator", evaluator)
     workflow.add_node("answer_question", answer_question)
     workflow.add_node("map_answer", map_answer)
     workflow.add_node("validator", validator)
     workflow.add_edge(START, "agent")
     workflow.add_edge("agent", "action")
+    workflow.add_edge("action", "evaluator")
+    workflow.add_conditional_edges("evaluator", route_iteration, {"answer_question":"answer_question","agent":"agent"})
+    workflow.add_edge("answer_question","map_answer")
     workflow.add_edge("map_answer", "validator")
+    workflow.add_conditional_edges("validator", route_validator, {"map_answer": "map_answer", END: END})
     return workflow.compile()
+""" if __name__ == "__main__":
     graph = build_workflow()
     mermaid_text = graph.get_graph().draw_mermaid()
+    print(mermaid_text) """

tools/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
 from tools.search import taivily_search, serper_search
-from tools.sandbox import execute_code, get_youtube_transcript
-__all__ = ["taivily_search", "serper_search", "execute_code", "get_youtube_transcript"]

 from tools.search import taivily_search, serper_search
+from tools.sandbox import execute_code, get_youtube_transcript, execute_python_file_url
+__all__ = ["taivily_search", "serper_search", "execute_code", "get_youtube_transcript", "execute_python_file_url"]

tools/sandbox.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from langchain_core.tools import tool
 from typing import Annotated
 from typing_extensions import Annotated
@@ -5,10 +6,7 @@ from langchain_core.tools.base import InjectedToolCallId
 from langchain_core.runnables import RunnableConfig
 from langgraph.types import Command
 from langchain_core.messages import ToolMessage
-import os
 from dotenv import load_dotenv
-import json
-import asyncio
 import tempfile
 from pathlib import Path
 import yt_dlp
@@ -19,20 +17,13 @@ load_dotenv()
 @tool
 def execute_code(code: str, tool_call_id: Annotated[str, InjectedToolCallId], config: RunnableConfig) -> Command:
     """
-    Execute code in a secure E2B sandbox environment.
     Args:
         code: The code to execute. Should be Python code without the triple backticks.
     """
-    try:
-        loop = asyncio.get_event_loop()
-    except RuntimeError:
-        loop = asyncio.new_event_loop()
-        asyncio.set_event_loop(loop)
-    result = loop.run_until_complete(_execute_code_in_sandbox(code, os.getenv("E2B_API_KEY")))
-    formatted_result = f"""# Code Execution Results
 ## Code
 ```python
 {code}
@@ -55,7 +46,7 @@ def execute_code(code: str, tool_call_id: Annotated[str, InjectedToolCallId], co
         }
     )
-async def _execute_code_in_sandbox(code: str, api_key: str):
     """Execute code in E2B sandbox and return the results."""
     sbx = Sandbox()
     execution = sbx.run_code(code)
@@ -63,11 +54,36 @@ async def _execute_code_in_sandbox(code: str, api_key: str):
     files = sbx.files.list("/")
     return {
-        "stdout": execution.stdout,
-        "stderr": execution.stderr,
         "files": files
     }
 @tool
 def get_youtube_transcript(url: str, tool_call_id: Annotated[str, InjectedToolCallId] = None, config: RunnableConfig = None) -> Command | str:
     """
@@ -136,20 +152,15 @@ def get_youtube_transcript(url: str, tool_call_id: Annotated[str, InjectedToolCa
 """ if __name__ == "__main__":
     # Simple test: print "Hello World"
-    test_code = "print(\"Hello World\")"
     # Build a minimal RunnableConfig with no external information
     config = RunnableConfig(**{"external_information": ""})
     # Execute the test code
     # Call the underlying function to bypass the BaseTool wrapper
-    cmd: Command = execute_code.func(
-        test_code,
         "test-call",
         config,
-    )
-    # Print the output from the sandbox execution
-    updates = getattr(cmd, 'update', {}) or {}
-    for msg in updates.get('messages', []):
-        print(msg.content) """

+import os
 from langchain_core.tools import tool
 from typing import Annotated
 from typing_extensions import Annotated
 from langchain_core.runnables import RunnableConfig
 from langgraph.types import Command
 from langchain_core.messages import ToolMessage
 from dotenv import load_dotenv
 import tempfile
 from pathlib import Path
 import yt_dlp
 @tool
 def execute_code(code: str, tool_call_id: Annotated[str, InjectedToolCallId], config: RunnableConfig) -> Command:
     """
+    Execute code in a e2b_code_interpreter sandbox and return the results.
     Args:
         code: The code to execute. Should be Python code without the triple backticks.
     """
+    result = _execute_code_in_sandbox(code, os.getenv("E2B_API_KEY"))
+    formatted_result = f"""
 ## Code
 ```python
 {code}
         }
     )
+def _execute_code_in_sandbox(code: str, api_key: str):
     """Execute code in E2B sandbox and return the results."""
     sbx = Sandbox()
     execution = sbx.run_code(code)
     files = sbx.files.list("/")
     return {
+        "stdout": execution.logs.stdout,
+        "stderr": execution.logs.stderr,
         "files": files
     }
+@tool
+def execute_python_file_url(file_url: str, tool_call_id: Annotated[str, InjectedToolCallId], config: RunnableConfig) -> Command:
+    """
+    Download a python file from a given URL and get the result
+    Args:
+        file_url: The URL of the file to download.
+    Returns:
+        The content of the file as a string, or an error message if the file couldn't be downloaded
+    """
+    sbx = Sandbox()
+    file_name = "code.py"
+    result = sbx.commands.run(f"wget -O {file_name} {file_url} && cat {file_name}")
+    result_code = _execute_code_in_sandbox(result.stdout, os.getenv("E2B_API_KEY"))
+    final_result = ""
+    for value in result_code["stdout"]:
+        final_result += value
+    return Command(
+        update={
+            "external_information": f"{config.get('external_information', '')}\n---\n# result {final_result}",
+            "messages": [ToolMessage(content=final_result, tool_call_id=tool_call_id)]
+        }
+    )
 @tool
 def get_youtube_transcript(url: str, tool_call_id: Annotated[str, InjectedToolCallId] = None, config: RunnableConfig = None) -> Command | str:
     """
 """ if __name__ == "__main__":
     # Simple test: print "Hello World"
+    url = "https://agents-course-unit4-scoring.hf.space/files/f918266a-b3e0-4914-865d-4faa564f1aef"
     # Build a minimal RunnableConfig with no external information
     config = RunnableConfig(**{"external_information": ""})
+    input = f"{url}"
     # Execute the test code
     # Call the underlying function to bypass the BaseTool wrapper
+    cmd: Command = execute_python_file_url.func(
+        input,
         "test-call",
         config,
+    ) """