Final_Assignment

Paused

App Files Files Community

sebastianfrench commited on May 10, 2025

Commit

b97774a

1 Parent(s): e38f8e4

fix youtube transcript

Browse files

Files changed (4) hide show

agents/search_agent.py +9 -1
graphs/evaluation.py +31 -81
models/models.py +1 -1
tools/sandbox.py +6 -2

agents/search_agent.py CHANGED Viewed

@@ -17,13 +17,21 @@ class SearchAgent:
         state = self.workflow.invoke({
             "messages":messages,
             "question": question,
         }, config={"callbacks": [langfuse_handler]})
         return state["answer"]
 if __name__ == "__main__":
     #question = "In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?"
-    question = """How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."""
     agent = SearchAgent()
     submit_answer = agent(question)

         state = self.workflow.invoke({
             "messages":messages,
             "question": question,
+            "external_information": "",
+            "is_valid_answer": False,
+            "has_enough_information": False,
+            "answer": "",
+            "step_counter" : {"validator": 0},
         }, config={"callbacks": [langfuse_handler]})
         return state["answer"]
 if __name__ == "__main__":
     #question = "In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?"
+    #question = """How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."""
+    question = """Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec.
+What does Teal'c say in response to the question "Isn't that hot?"""
     agent = SearchAgent()
     submit_answer = agent(question)

graphs/evaluation.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from models.models import  groq_model, anthropic_model
 from tools import  taivily_search, serper_search, execute_code, get_youtube_transcript
 from langgraph.graph import StateGraph, START, END
-from langchain_core.messages import SystemMessage
 from typing import List, TypedDict
 from langgraph.prebuilt import ToolNode
@@ -14,23 +14,20 @@ tools = [
 class EvaluationState(TypedDict):
     messages: List
-    tasks: str
-    current_task: str
     question: str
     answer: str
     external_information: str
     has_enough_information: bool
     is_valid_answer: bool
     step_counter: dict[str, int]
-bound_model_llama = groq_model.bind_tools(tools)
-bound_model_antrhropic = anthropic_model.bind_tools(tools)
 def call_node(state: EvaluationState):
     """
     This node call the model with the question and the tools
     """
-    response = bound_model_llama.invoke(state["messages"])
     state["messages"].append(response)
     return state
@@ -55,15 +52,25 @@ If you are asked for a comma separated list, apply the above rules depending of
     response  = anthropic_model.invoke(prompt)
-    state["messages"].append(response)
     state["answer"] = response.content
     return state
 def map_answer(state: EvaluationState):
     """
     Map the answer to the final answer
     """
-    answer = anthropic_model.invoke("Map the answer, I want only the number, string or list. Remove quotes. ANSWER:"+ state["answer"])
     return {
         "answer": answer.content
@@ -73,51 +80,27 @@ def validator(state: EvaluationState):
     """
     Validate if the answer fills the requirements
     """
-    # Initialize or update validator step counter
-    if "step_counter" not in state:
-        state["step_counter"] = { "validator": 0}
-    # Increment the validator step counter
     state["step_counter"]["validator"] = state["step_counter"].get("validator", 0) + 1
-    # Check if we've hit the validator recursion limit
-    if state["step_counter"]["validator"] >= 3:  # Smaller limit for validator recursion
         state["is_valid_answer"] = True
         return state
     answer = state["answer"]
-    result = anthropic_model.invoke(f"Validate if the answer fits the next requirements: \n\n{answer}\n\nThe answer should be a number, string or a list of numbers and/or strings. If the answer fits the requirements, return 'yes', otherwise return 'no'.")
     is_valid_answer = result.content.startswith("yes")
     state["is_valid_answer"] = is_valid_answer
-    return state
-def evaluator(state: EvaluationState):
-    """
-    Evaluate if it is needed more infomation to resolve the question.
-    """
-    if "step_counter" not in state:
-        state["step_counter"] = { "evaluator": 0}
-    state["step_counter"]["evaluator"] = state["step_counter"].get("evaluator", 0) + 1
-    total_iterations =  state["step_counter"].get("evaluator", 0)
-    if total_iterations >= 5:  # Using higher threshold for combined count
-        state["has_enough_information"] = True
-        return state
-    prompt = f"""Does the context information are enough to resolve the answer? \n # Context information \n {state["external_information"]} \n # Question \n {state["question"]} \n If the context information is enough to resolve the question, return 'yes', otherwise return what is missing."""
-    result = anthropic_model.invoke(prompt)
-    has_enough_information = result.content.startswith("yes")
-    state["has_enough_information"] = has_enough_information
-    if not has_enough_information:
-        # Only update messages and external information if we need more info
-        state["messages"] = [SystemMessage(content=result.content)]
-        state["external_information"] = f"{state['external_information']}\n\n---\n\n{result.content}"
     return state
 def build_workflow():
     """
     Build search workflow with conditional edge for evaluation
@@ -127,55 +110,22 @@ def build_workflow():
     workflow.add_node("action", tool_node)
     workflow.add_node("answer_question", answer_question)
     workflow.add_node("map_answer", map_answer)
-    workflow.add_node("evaluator", evaluator)
     workflow.add_node("validator", validator)
-    # Define edges
     workflow.add_edge(START, "agent")
     workflow.add_edge("agent", "action")
-    workflow.add_edge("action", "evaluator")
-    # Explicit conditional edges from evaluator
-    def route_evaluator(state):
-        if state["has_enough_information"]:
-            return "answer_question"
-        else:
-            return "agent"
-    workflow.add_conditional_edges("evaluator", route_evaluator,{"answer_question":"answer_question","agent":"agent"})
-    # Connect answer_question to map_answer
     workflow.add_edge("answer_question", "map_answer")
     workflow.add_edge("map_answer", "validator")
-    # Explicit conditional edges from validator
-    def route_validator(state):
-        if state["is_valid_answer"]:
-            return END
-        else:
-            return "map_answer"
-    workflow.add_conditional_edges("validator", route_validator, {"map_answer":"map_answer", END:END})
-    # Check if we need to manually add the edges for visualization
-    try:
-        # These are just for visualization and may not affect actual execution
-        workflow._graph.add_edge("evaluator", "agent", condition="needs more info")
-        workflow._graph.add_edge("evaluator", "answer_question", condition="has enough info")
-        workflow._graph.add_edge("validator", "map_answer", condition="invalid answer")
-        workflow._graph.add_edge("validator", END, condition="valid answer")
-    except:
-        # Skip if this approach doesn't work with current LangGraph version
-        pass
     return workflow.compile()
-""" if __name__ == "__main__":
-    # Build the graph
     graph = build_workflow()
-    # Get the Mermaid diagram as text
     mermaid_text = graph.get_graph().draw_mermaid()
-    print(mermaid_text) """

 from models.models import  groq_model, anthropic_model
 from tools import  taivily_search, serper_search, execute_code, get_youtube_transcript
 from langgraph.graph import StateGraph, START, END
+from langchain_core.messages import SystemMessage, AIMessage, ToolMessage
 from typing import List, TypedDict
 from langgraph.prebuilt import ToolNode
 class EvaluationState(TypedDict):
     messages: List
     question: str
     answer: str
     external_information: str
     has_enough_information: bool
     is_valid_answer: bool
     step_counter: dict[str, int]
+bounded_model_groq = groq_model.bind_tools(tools)
 def call_node(state: EvaluationState):
     """
     This node call the model with the question and the tools
     """
+    response = bounded_model_groq.invoke(state["messages"])
     state["messages"].append(response)
     return state
     response  = anthropic_model.invoke(prompt)
+    state["messages"].append(AIMessage(content=response.content))
     state["answer"] = response.content
     return state
 def map_answer(state: EvaluationState):
     """
     Map the answer to the final answer
     """
+    answer = state["answer"]
+    prompt = f"""## Instruction
+    map the answer to the final answer. The final answer should be a number, string or a list of numbers and/or strings. Remove quotes.
+    ## Answer
+    {answer}
+    ## Final answer"""
+    answer = anthropic_model.invoke(prompt)
     return {
         "answer": answer.content
     """
     Validate if the answer fills the requirements
     """
     state["step_counter"]["validator"] = state["step_counter"].get("validator", 0) + 1
+    if state["step_counter"]["validator"] >= 3:
         state["is_valid_answer"] = True
         return state
     answer = state["answer"]
+    result = anthropic_model.invoke(f"Validate if the answer fits the next requirements: \n\n{answer}\n\nThe answer should be a number, string or a list of numbers and/or strings. If the answer fits the requirements, return just 'yes', otherwise return 'no'.")
     is_valid_answer = result.content.startswith("yes")
     state["is_valid_answer"] = is_valid_answer
+    state["messages"].append(SystemMessage(content=f"Validator: {result.content}"))
     return state
+def route_validator(state):
+    if state["is_valid_answer"]:
+        return END
+    else:
+        return "agent"
 def build_workflow():
     """
     Build search workflow with conditional edge for evaluation
     workflow.add_node("action", tool_node)
     workflow.add_node("answer_question", answer_question)
     workflow.add_node("map_answer", map_answer)
     workflow.add_node("validator", validator)
     workflow.add_edge(START, "agent")
     workflow.add_edge("agent", "action")
+    workflow.add_edge("action", "answer_question")
     workflow.add_edge("answer_question", "map_answer")
     workflow.add_edge("map_answer", "validator")
+    workflow.add_conditional_edges("validator", route_validator, {"agent":"agent", END:END})
     return workflow.compile()
+if __name__ == "__main__":
     graph = build_workflow()
     mermaid_text = graph.get_graph().draw_mermaid()
+    print(mermaid_text)

models/models.py CHANGED Viewed

@@ -6,7 +6,7 @@ load_dotenv()
 anthropic_model = ChatAnthropic(
     model="claude-3-7-sonnet-20250219",
-    temperature=0.4
 )
 groq_model = ChatGroq(

 anthropic_model = ChatAnthropic(
     model="claude-3-7-sonnet-20250219",
+    temperature=0.7
 )
 groq_model = ChatGroq(

tools/sandbox.py CHANGED Viewed

@@ -79,6 +79,7 @@ def get_youtube_transcript(url: str, tool_call_id: Annotated[str, InjectedToolCa
     """
     temp_dir = tempfile.mkdtemp()
     current_dir = os.getcwd()
     try:
         os.chdir(temp_dir)
@@ -88,13 +89,16 @@ def get_youtube_transcript(url: str, tool_call_id: Annotated[str, InjectedToolCa
             'writeautomaticsub': True,
             'subtitleslangs': ['en'],
             'skip_download': True,
-            'outtmpl': 'subtitle',
         }
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             ydl.extract_info(url, download=True)
-        subtitle_content = ""
         subtitle_files = list(Path(temp_dir).glob("*.vtt")) + list(Path(temp_dir).glob("*.srt"))
         if subtitle_files:

     """
     temp_dir = tempfile.mkdtemp()
     current_dir = os.getcwd()
+    subtitle_content = ""
     try:
         os.chdir(temp_dir)
             'writeautomaticsub': True,
             'subtitleslangs': ['en'],
             'skip_download': True,
+            'outtmpl': 'subtitle',
+            'quiet': True,
+            'no_warnings': False,
+            'ignoreerrors': True,
+            'geo_bypass': True,
         }
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             ydl.extract_info(url, download=True)
         subtitle_files = list(Path(temp_dir).glob("*.vtt")) + list(Path(temp_dir).glob("*.srt"))
         if subtitle_files: