sebastianfrench commited on
Commit
b97774a
·
1 Parent(s): e38f8e4

fix youtube transcript

Browse files
agents/search_agent.py CHANGED
@@ -17,13 +17,21 @@ class SearchAgent:
17
  state = self.workflow.invoke({
18
  "messages":messages,
19
  "question": question,
 
 
 
 
 
20
  }, config={"callbacks": [langfuse_handler]})
21
 
22
  return state["answer"]
23
 
24
  if __name__ == "__main__":
25
  #question = "In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?"
26
- question = """How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."""
 
 
 
27
  agent = SearchAgent()
28
  submit_answer = agent(question)
29
 
 
17
  state = self.workflow.invoke({
18
  "messages":messages,
19
  "question": question,
20
+ "external_information": "",
21
+ "is_valid_answer": False,
22
+ "has_enough_information": False,
23
+ "answer": "",
24
+ "step_counter" : {"validator": 0},
25
  }, config={"callbacks": [langfuse_handler]})
26
 
27
  return state["answer"]
28
 
29
  if __name__ == "__main__":
30
  #question = "In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?"
31
+ #question = """How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."""
32
+ question = """Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec.
33
+
34
+ What does Teal'c say in response to the question "Isn't that hot?"""
35
  agent = SearchAgent()
36
  submit_answer = agent(question)
37
 
graphs/evaluation.py CHANGED
@@ -1,7 +1,7 @@
1
  from models.models import groq_model, anthropic_model
2
  from tools import taivily_search, serper_search, execute_code, get_youtube_transcript
3
  from langgraph.graph import StateGraph, START, END
4
- from langchain_core.messages import SystemMessage
5
  from typing import List, TypedDict
6
  from langgraph.prebuilt import ToolNode
7
 
@@ -14,23 +14,20 @@ tools = [
14
 
15
  class EvaluationState(TypedDict):
16
  messages: List
17
- tasks: str
18
- current_task: str
19
  question: str
20
  answer: str
21
  external_information: str
22
  has_enough_information: bool
23
  is_valid_answer: bool
24
  step_counter: dict[str, int]
25
-
26
- bound_model_llama = groq_model.bind_tools(tools)
27
- bound_model_antrhropic = anthropic_model.bind_tools(tools)
28
 
29
  def call_node(state: EvaluationState):
30
  """
31
  This node call the model with the question and the tools
32
  """
33
- response = bound_model_llama.invoke(state["messages"])
34
 
35
  state["messages"].append(response)
36
  return state
@@ -55,15 +52,25 @@ If you are asked for a comma separated list, apply the above rules depending of
55
 
56
 
57
  response = anthropic_model.invoke(prompt)
58
- state["messages"].append(response)
59
  state["answer"] = response.content
 
60
  return state
61
 
62
  def map_answer(state: EvaluationState):
63
  """
64
  Map the answer to the final answer
65
  """
66
- answer = anthropic_model.invoke("Map the answer, I want only the number, string or list. Remove quotes. ANSWER:"+ state["answer"])
 
 
 
 
 
 
 
 
 
67
 
68
  return {
69
  "answer": answer.content
@@ -73,51 +80,27 @@ def validator(state: EvaluationState):
73
  """
74
  Validate if the answer fills the requirements
75
  """
76
- # Initialize or update validator step counter
77
- if "step_counter" not in state:
78
- state["step_counter"] = { "validator": 0}
79
-
80
- # Increment the validator step counter
81
  state["step_counter"]["validator"] = state["step_counter"].get("validator", 0) + 1
82
 
83
- # Check if we've hit the validator recursion limit
84
- if state["step_counter"]["validator"] >= 3: # Smaller limit for validator recursion
85
  state["is_valid_answer"] = True
86
  return state
87
 
88
  answer = state["answer"]
89
- result = anthropic_model.invoke(f"Validate if the answer fits the next requirements: \n\n{answer}\n\nThe answer should be a number, string or a list of numbers and/or strings. If the answer fits the requirements, return 'yes', otherwise return 'no'.")
 
90
  is_valid_answer = result.content.startswith("yes")
91
  state["is_valid_answer"] = is_valid_answer
92
-
93
- return state
94
 
95
- def evaluator(state: EvaluationState):
96
- """
97
- Evaluate if it is needed more infomation to resolve the question.
98
- """
99
- if "step_counter" not in state:
100
- state["step_counter"] = { "evaluator": 0}
101
-
102
- state["step_counter"]["evaluator"] = state["step_counter"].get("evaluator", 0) + 1
103
-
104
- total_iterations = state["step_counter"].get("evaluator", 0)
105
- if total_iterations >= 5: # Using higher threshold for combined count
106
- state["has_enough_information"] = True
107
- return state
108
-
109
- prompt = f"""Does the context information are enough to resolve the answer? \n # Context information \n {state["external_information"]} \n # Question \n {state["question"]} \n If the context information is enough to resolve the question, return 'yes', otherwise return what is missing."""
110
- result = anthropic_model.invoke(prompt)
111
- has_enough_information = result.content.startswith("yes")
112
- state["has_enough_information"] = has_enough_information
113
-
114
- if not has_enough_information:
115
- # Only update messages and external information if we need more info
116
- state["messages"] = [SystemMessage(content=result.content)]
117
- state["external_information"] = f"{state['external_information']}\n\n---\n\n{result.content}"
118
-
119
  return state
120
-
 
 
 
 
 
 
121
  def build_workflow():
122
  """
123
  Build search workflow with conditional edge for evaluation
@@ -127,55 +110,22 @@ def build_workflow():
127
  workflow.add_node("action", tool_node)
128
  workflow.add_node("answer_question", answer_question)
129
  workflow.add_node("map_answer", map_answer)
130
- workflow.add_node("evaluator", evaluator)
131
  workflow.add_node("validator", validator)
132
 
133
- # Define edges
134
  workflow.add_edge(START, "agent")
135
  workflow.add_edge("agent", "action")
136
- workflow.add_edge("action", "evaluator")
137
 
138
- # Explicit conditional edges from evaluator
139
- def route_evaluator(state):
140
- if state["has_enough_information"]:
141
- return "answer_question"
142
- else:
143
- return "agent"
144
-
145
- workflow.add_conditional_edges("evaluator", route_evaluator,{"answer_question":"answer_question","agent":"agent"})
146
-
147
- # Connect answer_question to map_answer
148
  workflow.add_edge("answer_question", "map_answer")
149
  workflow.add_edge("map_answer", "validator")
150
 
151
- # Explicit conditional edges from validator
152
- def route_validator(state):
153
- if state["is_valid_answer"]:
154
- return END
155
- else:
156
- return "map_answer"
157
-
158
- workflow.add_conditional_edges("validator", route_validator, {"map_answer":"map_answer", END:END})
159
-
160
- # Check if we need to manually add the edges for visualization
161
- try:
162
- # These are just for visualization and may not affect actual execution
163
- workflow._graph.add_edge("evaluator", "agent", condition="needs more info")
164
- workflow._graph.add_edge("evaluator", "answer_question", condition="has enough info")
165
- workflow._graph.add_edge("validator", "map_answer", condition="invalid answer")
166
- workflow._graph.add_edge("validator", END, condition="valid answer")
167
- except:
168
- # Skip if this approach doesn't work with current LangGraph version
169
- pass
170
 
171
  return workflow.compile()
172
 
173
-
174
- """ if __name__ == "__main__":
175
- # Build the graph
176
  graph = build_workflow()
177
 
178
- # Get the Mermaid diagram as text
179
  mermaid_text = graph.get_graph().draw_mermaid()
180
 
181
- print(mermaid_text) """
 
1
  from models.models import groq_model, anthropic_model
2
  from tools import taivily_search, serper_search, execute_code, get_youtube_transcript
3
  from langgraph.graph import StateGraph, START, END
4
+ from langchain_core.messages import SystemMessage, AIMessage, ToolMessage
5
  from typing import List, TypedDict
6
  from langgraph.prebuilt import ToolNode
7
 
 
14
 
15
  class EvaluationState(TypedDict):
16
  messages: List
 
 
17
  question: str
18
  answer: str
19
  external_information: str
20
  has_enough_information: bool
21
  is_valid_answer: bool
22
  step_counter: dict[str, int]
23
+
24
+ bounded_model_groq = groq_model.bind_tools(tools)
 
25
 
26
  def call_node(state: EvaluationState):
27
  """
28
  This node call the model with the question and the tools
29
  """
30
+ response = bounded_model_groq.invoke(state["messages"])
31
 
32
  state["messages"].append(response)
33
  return state
 
52
 
53
 
54
  response = anthropic_model.invoke(prompt)
55
+ state["messages"].append(AIMessage(content=response.content))
56
  state["answer"] = response.content
57
+
58
  return state
59
 
60
  def map_answer(state: EvaluationState):
61
  """
62
  Map the answer to the final answer
63
  """
64
+ answer = state["answer"]
65
+
66
+ prompt = f"""## Instruction
67
+ map the answer to the final answer. The final answer should be a number, string or a list of numbers and/or strings. Remove quotes.
68
+
69
+ ## Answer
70
+ {answer}
71
+
72
+ ## Final answer"""
73
+ answer = anthropic_model.invoke(prompt)
74
 
75
  return {
76
  "answer": answer.content
 
80
  """
81
  Validate if the answer fills the requirements
82
  """
 
 
 
 
 
83
  state["step_counter"]["validator"] = state["step_counter"].get("validator", 0) + 1
84
 
85
+ if state["step_counter"]["validator"] >= 3:
 
86
  state["is_valid_answer"] = True
87
  return state
88
 
89
  answer = state["answer"]
90
+ result = anthropic_model.invoke(f"Validate if the answer fits the next requirements: \n\n{answer}\n\nThe answer should be a number, string or a list of numbers and/or strings. If the answer fits the requirements, return just 'yes', otherwise return 'no'.")
91
+
92
  is_valid_answer = result.content.startswith("yes")
93
  state["is_valid_answer"] = is_valid_answer
94
+ state["messages"].append(SystemMessage(content=f"Validator: {result.content}"))
 
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  return state
97
+
98
+ def route_validator(state):
99
+ if state["is_valid_answer"]:
100
+ return END
101
+ else:
102
+ return "agent"
103
+
104
  def build_workflow():
105
  """
106
  Build search workflow with conditional edge for evaluation
 
110
  workflow.add_node("action", tool_node)
111
  workflow.add_node("answer_question", answer_question)
112
  workflow.add_node("map_answer", map_answer)
 
113
  workflow.add_node("validator", validator)
114
 
 
115
  workflow.add_edge(START, "agent")
116
  workflow.add_edge("agent", "action")
117
+ workflow.add_edge("action", "answer_question")
118
 
 
 
 
 
 
 
 
 
 
 
119
  workflow.add_edge("answer_question", "map_answer")
120
  workflow.add_edge("map_answer", "validator")
121
 
122
+ workflow.add_conditional_edges("validator", route_validator, {"agent":"agent", END:END})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
  return workflow.compile()
125
 
126
+ if __name__ == "__main__":
 
 
127
  graph = build_workflow()
128
 
 
129
  mermaid_text = graph.get_graph().draw_mermaid()
130
 
131
+ print(mermaid_text)
models/models.py CHANGED
@@ -6,7 +6,7 @@ load_dotenv()
6
 
7
  anthropic_model = ChatAnthropic(
8
  model="claude-3-7-sonnet-20250219",
9
- temperature=0.4
10
  )
11
 
12
  groq_model = ChatGroq(
 
6
 
7
  anthropic_model = ChatAnthropic(
8
  model="claude-3-7-sonnet-20250219",
9
+ temperature=0.7
10
  )
11
 
12
  groq_model = ChatGroq(
tools/sandbox.py CHANGED
@@ -79,6 +79,7 @@ def get_youtube_transcript(url: str, tool_call_id: Annotated[str, InjectedToolCa
79
  """
80
  temp_dir = tempfile.mkdtemp()
81
  current_dir = os.getcwd()
 
82
 
83
  try:
84
  os.chdir(temp_dir)
@@ -88,13 +89,16 @@ def get_youtube_transcript(url: str, tool_call_id: Annotated[str, InjectedToolCa
88
  'writeautomaticsub': True,
89
  'subtitleslangs': ['en'],
90
  'skip_download': True,
91
- 'outtmpl': 'subtitle',
 
 
 
 
92
  }
93
 
94
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
95
  ydl.extract_info(url, download=True)
96
 
97
- subtitle_content = ""
98
  subtitle_files = list(Path(temp_dir).glob("*.vtt")) + list(Path(temp_dir).glob("*.srt"))
99
 
100
  if subtitle_files:
 
79
  """
80
  temp_dir = tempfile.mkdtemp()
81
  current_dir = os.getcwd()
82
+ subtitle_content = ""
83
 
84
  try:
85
  os.chdir(temp_dir)
 
89
  'writeautomaticsub': True,
90
  'subtitleslangs': ['en'],
91
  'skip_download': True,
92
+ 'outtmpl': 'subtitle',
93
+ 'quiet': True,
94
+ 'no_warnings': False,
95
+ 'ignoreerrors': True,
96
+ 'geo_bypass': True,
97
  }
98
 
99
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
100
  ydl.extract_info(url, download=True)
101
 
 
102
  subtitle_files = list(Path(temp_dir).glob("*.vtt")) + list(Path(temp_dir).glob("*.srt"))
103
 
104
  if subtitle_files: