Spaces:
Sleeping
Sleeping
José Enrique
commited on
Commit
·
9ccff9e
1
Parent(s):
61c17f1
updated evaluation files
Browse files- evaluation.py +16 -16
- evaluation_langgraph.py +183 -0
- langgraph_agent.py +82 -0
- requirements.txt +3 -1
- responses_GAIA_Evaluation_DatasetSingle Smolagent with tools OpenAI 4o 3planningSteps youtube.json +6 -0
- responses_GAIA_Evaluation_DatasetSingle Smolagent with tools OpenAI 4o 3planningSteps.json +82 -0
- responses_GAIA_Evaluation_DatasetSingle Smolagent with tools Qwen 32B 3planningSteps.json +34 -0
- single_smolagent.py +2 -0
- tools/transcribe.py +102 -1
evaluation.py
CHANGED
|
@@ -124,16 +124,16 @@ def run_evaluation(agent,langfuse_dataset,run_name,model_id,trace_name):
|
|
| 124 |
) as root_span:
|
| 125 |
root_span.update(input=item.input)
|
| 126 |
task_id = item.metadata["task_id"]
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
|
| 135 |
-
|
| 136 |
-
|
| 137 |
|
| 138 |
# Link the trace to the dataset item for analysis
|
| 139 |
# item.link(
|
|
@@ -167,13 +167,13 @@ def evaluate():
|
|
| 167 |
print("Starting agent...")
|
| 168 |
agent = build_agents()
|
| 169 |
print("Agent built successfully.")
|
| 170 |
-
|
| 171 |
-
simple_evaluation("Dimitry","Clasu")
|
| 172 |
-
print("comparison", simple_evaluation("Dimitry","Clasu"))
|
| 173 |
-
print("sain", simple_evaluation('"Saint Petersburg"',"Saint Petersburg"))
|
| 174 |
-
print("pages", simple_evaluation('"132,133,136,195,245"',"132, 133, 134, 197, 245"))
|
| 175 |
-
print("veg", simple_evaluation('"cornstarch, freshly squeezed lemon juice, granulated sugar, pure vanilla extract, ripe strawberries"',"cornstarch, freshly squeezed lemon juice, granulated sugar, pure vanilla extract, ripe strawberries"))
|
| 176 |
-
print("right", simple_evaluation('"right"',"Right"))
|
| 177 |
|
| 178 |
if __name__ == "__main__":
|
| 179 |
evaluate()
|
|
|
|
| 124 |
) as root_span:
|
| 125 |
root_span.update(input=item.input)
|
| 126 |
task_id = item.metadata["task_id"]
|
| 127 |
+
if task_id == "a1e91b78-d3d8-4675-bb8d-62741b4b68a6":
|
| 128 |
+
try:
|
| 129 |
+
output = run_agent(agent,item.input,trace_name,item.metadata)
|
| 130 |
+
responses.append({"task_id": task_id, "submitted_answer": output})
|
| 131 |
+
root_span.update(output=output)
|
| 132 |
+
except Exception as e:
|
| 133 |
+
output = f"Error running agent: {e}"
|
| 134 |
|
| 135 |
+
# score the result against the expected output
|
| 136 |
+
root_span.score_trace(name="exact_match", value = simple_evaluation(output, item.expected_output))
|
| 137 |
|
| 138 |
# Link the trace to the dataset item for analysis
|
| 139 |
# item.link(
|
|
|
|
| 167 |
print("Starting agent...")
|
| 168 |
agent = build_agents()
|
| 169 |
print("Agent built successfully.")
|
| 170 |
+
run_evaluation(agent,"GAIA_Evaluation_Dataset","Single Smolagent with tools OpenAI 4o 3planningSteps youtube","OpenAI gpt4o","smolagent-trace")
|
| 171 |
+
# simple_evaluation("Dimitry","Clasu")
|
| 172 |
+
# print("comparison", simple_evaluation("Dimitry","Clasu"))
|
| 173 |
+
# print("sain", simple_evaluation('"Saint Petersburg"',"Saint Petersburg"))
|
| 174 |
+
# print("pages", simple_evaluation('"132,133,136,195,245"',"132, 133, 134, 197, 245"))
|
| 175 |
+
# print("veg", simple_evaluation('"cornstarch, freshly squeezed lemon juice, granulated sugar, pure vanilla extract, ripe strawberries"',"cornstarch, freshly squeezed lemon juice, granulated sugar, pure vanilla extract, ripe strawberries"))
|
| 176 |
+
# print("right", simple_evaluation('"right"',"Right"))
|
| 177 |
|
| 178 |
if __name__ == "__main__":
|
| 179 |
evaluate()
|
evaluation_langgraph.py
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
from opentelemetry.trace import format_trace_id, get_tracer
|
| 5 |
+
from opentelemetry import trace
|
| 6 |
+
from opentelemetry.sdk.trace import TracerProvider
|
| 7 |
+
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
|
| 8 |
+
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
| 9 |
+
from openinference.instrumentation.smolagents import SmolagentsInstrumentor
|
| 10 |
+
from langfuse import observe
|
| 11 |
+
from PIL import Image
|
| 12 |
+
|
| 13 |
+
from langgraph_agent import build_agents
|
| 14 |
+
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage
|
| 15 |
+
|
| 16 |
+
from langfuse import Langfuse
|
| 17 |
+
# Load environment variables
|
| 18 |
+
load_dotenv()
|
| 19 |
+
langfuse = Langfuse()
|
| 20 |
+
# Initialize OpenTelemetry Tracer
|
| 21 |
+
#trace_provider = TracerProvider()
|
| 22 |
+
#trace_provider.add_span_processor(SimpleSpanProcessor(OTLPSpanExporter()))
|
| 23 |
+
#trace.set_tracer_provider(trace_provider) # Set as global provider
|
| 24 |
+
#tracer = trace.get_tracer(__name__) # Get a tracer instance
|
| 25 |
+
|
| 26 |
+
#SmolagentsInstrumentor().instrument(tracer_provider=trace_provider)
|
| 27 |
+
|
| 28 |
+
def add_image(metadata)->list:
|
| 29 |
+
images = []
|
| 30 |
+
task_id = metadata["task_id"]
|
| 31 |
+
attachment = metadata.get("attachment", False)
|
| 32 |
+
if attachment:
|
| 33 |
+
os.file_path = f"attachments/{attachment}"
|
| 34 |
+
if os.path.exists(os.file_path):
|
| 35 |
+
print("Attachments found for task_id:", task_id)
|
| 36 |
+
# with open(os.file_path, "rb") as file:
|
| 37 |
+
# question += f"\n\nAttachments: {file.read().decode('utf-8')}"
|
| 38 |
+
else:
|
| 39 |
+
print(f"No attachments found for task_id: {task_id}")
|
| 40 |
+
# if the file is an image, we can add it to the question
|
| 41 |
+
|
| 42 |
+
if os.path.isfile(os.file_path) and os.path.splitext(os.file_path)[1].lower() in ['.jpg', '.jpeg', '.png']:
|
| 43 |
+
# open the image and convert it to RGB
|
| 44 |
+
with open(os.file_path, "rb") as file:
|
| 45 |
+
# Read the image file and convert it to RGB
|
| 46 |
+
image = Image.open(file).convert("RGB")
|
| 47 |
+
images.append(image)
|
| 48 |
+
return images
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
#@observe()
|
| 52 |
+
def run_agent(agent, question,trace_name,metadata):
|
| 53 |
+
# with tracer.start_as_current_span(trace_name) as span:
|
| 54 |
+
# span.set_attribute("langfuse.tag", "dataset-run")
|
| 55 |
+
# span.set_attribute("langfuse.input", question)
|
| 56 |
+
# if the question has attachments:
|
| 57 |
+
# find file under /attachments with the same task_id
|
| 58 |
+
images = add_image(metadata)
|
| 59 |
+
|
| 60 |
+
question = question + " The task_id is: " + metadata["task_id"]
|
| 61 |
+
messages = [HumanMessage(content=question )]
|
| 62 |
+
|
| 63 |
+
try:
|
| 64 |
+
messages = agent.invoke(
|
| 65 |
+
{"messages": messages}
|
| 66 |
+
)
|
| 67 |
+
except Exception as e:
|
| 68 |
+
print(f"Error running agent: {e}")
|
| 69 |
+
output = f"Error running agent: {e}"
|
| 70 |
+
|
| 71 |
+
#span.set_attribute("langfuse.output", output)
|
| 72 |
+
|
| 73 |
+
# current_span = trace.get_current_span()
|
| 74 |
+
# span_context = current_span.get_span_context()
|
| 75 |
+
# trace_id = span_context.trace_id
|
| 76 |
+
# formatted_trace_id = format_trace_id(trace_id)
|
| 77 |
+
# langfuse_trace = langfuse.get_trace(id=formatted_trace_id)
|
| 78 |
+
|
| 79 |
+
# langfuse_trace = langfuse.trace(
|
| 80 |
+
# id=formatted_trace_id,
|
| 81 |
+
# input=question,
|
| 82 |
+
# output=output
|
| 83 |
+
# )
|
| 84 |
+
return output
|
| 85 |
+
def simple_evaluation(output, expected_output):
|
| 86 |
+
|
| 87 |
+
trimmed_output = str(output).strip().strip('"').strip("$")
|
| 88 |
+
# see if the output is a list:
|
| 89 |
+
expected_output_list = [item.strip() for item in expected_output.split(",") if item.strip()]
|
| 90 |
+
output_list = [item.strip() for item in trimmed_output.split(",") if item.strip()]
|
| 91 |
+
similarity = 0.0
|
| 92 |
+
if not expected_output_list and not output_list:
|
| 93 |
+
similarity = 0.0
|
| 94 |
+
if trimmed_output == expected_output:
|
| 95 |
+
similarity = 1.0
|
| 96 |
+
elif expected_output.toLower() == trimmed_output.toLower():
|
| 97 |
+
similarity = 0.8
|
| 98 |
+
else:
|
| 99 |
+
similarity = 0.0
|
| 100 |
+
return similarity
|
| 101 |
+
common_items_count = 0
|
| 102 |
+
matched_items = [False]*len(output_list)
|
| 103 |
+
for item in expected_output_list:
|
| 104 |
+
for i,item2 in enumerate(output_list):
|
| 105 |
+
if not matched_items[i]:
|
| 106 |
+
similarity = 0.0
|
| 107 |
+
if item == item2:
|
| 108 |
+
similarity = 1.0
|
| 109 |
+
elif item.lower() == item2.lower():
|
| 110 |
+
similarity = 0.8
|
| 111 |
+
else:
|
| 112 |
+
similarity = 0.0
|
| 113 |
+
if similarity >= 0.8:
|
| 114 |
+
matched_items[i] = True
|
| 115 |
+
break
|
| 116 |
+
common_items_count = sum(matched_items) / len(expected_output_list)
|
| 117 |
+
return common_items_count
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
def run_evaluation(agent,langfuse_dataset,run_name,model_id,trace_name):
|
| 121 |
+
dataset = langfuse.get_dataset(langfuse_dataset)
|
| 122 |
+
responses = []
|
| 123 |
+
# Run our agent against each dataset item (limited to first 10 above)
|
| 124 |
+
for item in dataset.items:
|
| 125 |
+
print(f"Processing item with task_id: {item.metadata['task_id']}")
|
| 126 |
+
with item.run(
|
| 127 |
+
run_name = run_name
|
| 128 |
+
) as root_span:
|
| 129 |
+
root_span.update(input=item.input)
|
| 130 |
+
task_id = item.metadata["task_id"]
|
| 131 |
+
if task_id == "a1e91b78-d3d8-4675-bb8d-62741b4b68a6":
|
| 132 |
+
try:
|
| 133 |
+
output = run_agent(agent,item.input,trace_name,item.metadata)
|
| 134 |
+
responses.append({"task_id": task_id, "submitted_answer": output})
|
| 135 |
+
root_span.update(output=output)
|
| 136 |
+
except Exception as e:
|
| 137 |
+
output = f"Error running agent: {e}"
|
| 138 |
+
|
| 139 |
+
# score the result against the expected output
|
| 140 |
+
root_span.score_trace(name="exact_match", value = simple_evaluation(output, item.expected_output))
|
| 141 |
+
|
| 142 |
+
# Link the trace to the dataset item for analysis
|
| 143 |
+
# item.link(
|
| 144 |
+
# langfuse_trace,
|
| 145 |
+
# run_name=run_name,
|
| 146 |
+
# run_metadata={ "model": model_id }
|
| 147 |
+
# )
|
| 148 |
+
|
| 149 |
+
# Optionally, store a quick evaluation score for demonstration
|
| 150 |
+
# langfuse_trace.score(
|
| 151 |
+
# name="<example_eval>",
|
| 152 |
+
# value=1,
|
| 153 |
+
# comment="This is a comment"
|
| 154 |
+
# )
|
| 155 |
+
|
| 156 |
+
# Flush data to ensure all telemetry is sent
|
| 157 |
+
langfuse.flush()
|
| 158 |
+
|
| 159 |
+
# Save the responses to a JSON lines file
|
| 160 |
+
print("Saving responses to file...")
|
| 161 |
+
responses = [{"task_id": item["task_id"], "submitted_answer": item["submitted_answer"]} for item in responses]
|
| 162 |
+
|
| 163 |
+
filename = langfuse_dataset+run_name
|
| 164 |
+
output_file = f"responses_{filename}.json"
|
| 165 |
+
with open(output_file, "w") as f:
|
| 166 |
+
json.dump(responses, f, indent=4)
|
| 167 |
+
print(f"Responses saved to {output_file}")
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def evaluate():
|
| 171 |
+
print("Starting agent...")
|
| 172 |
+
agent = build_agents()
|
| 173 |
+
print("Agent built successfully.")
|
| 174 |
+
run_evaluation(agent,"GAIA_Evaluation_Dataset","Single Langraph agent","OpenAI gpt4o","langraph-trace")
|
| 175 |
+
# simple_evaluation("Dimitry","Clasu")
|
| 176 |
+
# print("comparison", simple_evaluation("Dimitry","Clasu"))
|
| 177 |
+
# print("sain", simple_evaluation('"Saint Petersburg"',"Saint Petersburg"))
|
| 178 |
+
# print("pages", simple_evaluation('"132,133,136,195,245"',"132, 133, 134, 197, 245"))
|
| 179 |
+
# print("veg", simple_evaluation('"cornstarch, freshly squeezed lemon juice, granulated sugar, pure vanilla extract, ripe strawberries"',"cornstarch, freshly squeezed lemon juice, granulated sugar, pure vanilla extract, ripe strawberries"))
|
| 180 |
+
# print("right", simple_evaluation('"right"',"Right"))
|
| 181 |
+
|
| 182 |
+
if __name__ == "__main__":
|
| 183 |
+
evaluate()
|
langgraph_agent.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from typing import List, TypedDict, Annotated, Optional
|
| 3 |
+
from langgraph.graph import StateGraph, START, END
|
| 4 |
+
from langchain_openai import ChatOpenAI
|
| 5 |
+
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage
|
| 6 |
+
from langgraph.graph.message import add_messages
|
| 7 |
+
from langgraph.prebuilt import ToolNode, tools_condition
|
| 8 |
+
from tools.searchTools import wiki_search, mini_web_search, arvix_search
|
| 9 |
+
|
| 10 |
+
class AgentState(TypedDict):
|
| 11 |
+
input_file: Optional[str]
|
| 12 |
+
messages: Annotated[list[AnyMessage], add_messages]
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
# add toools:
|
| 16 |
+
tools = [
|
| 17 |
+
wiki_search,
|
| 18 |
+
mini_web_search,
|
| 19 |
+
arvix_search,
|
| 20 |
+
|
| 21 |
+
]
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
# LLM model and tools
|
| 25 |
+
vision_llm = ChatOpenAI(model="gpt-4o")
|
| 26 |
+
llm = ChatOpenAI(model="gpt-4o")
|
| 27 |
+
llm_withtools = llm.bind_tools(tools, parallel_tool_calls = False)
|
| 28 |
+
|
| 29 |
+
def agent(state:AgentState):
|
| 30 |
+
tools_description = """
|
| 31 |
+
wiki_search(query: str) -> str:
|
| 32 |
+
Search Wikipedia for a query and return maximum 2 results.
|
| 33 |
+
|
| 34 |
+
Args:
|
| 35 |
+
query: The search query.
|
| 36 |
+
|
| 37 |
+
Search Tavily for a query and return maximum 3 results.
|
| 38 |
+
|
| 39 |
+
mini_web_search(query: str) -> str:
|
| 40 |
+
Args:
|
| 41 |
+
query: The search query
|
| 42 |
+
|
| 43 |
+
arvix_search(query: str) -> str:
|
| 44 |
+
Search Arxiv for a query and return maximum 3 result.
|
| 45 |
+
|
| 46 |
+
Args:
|
| 47 |
+
query: The search query.
|
| 48 |
+
"""
|
| 49 |
+
sys_message = SystemMessage(content=f"""You are a helpful AI agent that can use tools to answer questions.
|
| 50 |
+
You can use the following tools:{tools_description}
|
| 51 |
+
PLEASE FOLLOW THE INSTRUCTIONS FOR ANSWERING CAREFULLY:
|
| 52 |
+
Your answer should follow the template: FINAL ANSWER: [YOUR FINAL ANSWER].
|
| 53 |
+
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
|
| 54 |
+
If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
|
| 55 |
+
If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
|
| 56 |
+
If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
|
| 57 |
+
""")
|
| 58 |
+
|
| 59 |
+
return {
|
| 60 |
+
"input_file": state["input_file"],
|
| 61 |
+
"messages": [llm_withtools.invoke([sys_message]+ state["messages"])]
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def build_agents():
|
| 66 |
+
|
| 67 |
+
builder = StateGraph(AgentState)
|
| 68 |
+
builder.add_node("agent",agent)
|
| 69 |
+
builder.add_node("tools",ToolNode(tools))
|
| 70 |
+
|
| 71 |
+
builder.add_edge(START, "agent")
|
| 72 |
+
builder.add_conditional_edges(
|
| 73 |
+
"agent",
|
| 74 |
+
tools_condition,
|
| 75 |
+
)
|
| 76 |
+
builder.add_edge("tools", "agent")
|
| 77 |
+
react_graph = builder.compile()
|
| 78 |
+
|
| 79 |
+
return react_graph
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
|
requirements.txt
CHANGED
|
@@ -31,4 +31,6 @@ pandas
|
|
| 31 |
numpy
|
| 32 |
beautifulsoup4
|
| 33 |
openai
|
| 34 |
-
pydub
|
|
|
|
|
|
|
|
|
| 31 |
numpy
|
| 32 |
beautifulsoup4
|
| 33 |
openai
|
| 34 |
+
pydub
|
| 35 |
+
yt-dlp
|
| 36 |
+
opencv-python
|
responses_GAIA_Evaluation_DatasetSingle Smolagent with tools OpenAI 4o 3planningSteps youtube.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"task_id": "a1e91b78-d3d8-4675-bb8d-62741b4b68a6",
|
| 4 |
+
"submitted_answer": "The task could not be completed due to limitations in accessing and analyzing video content directly through current tools."
|
| 5 |
+
}
|
| 6 |
+
]
|
responses_GAIA_Evaluation_DatasetSingle Smolagent with tools OpenAI 4o 3planningSteps.json
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"task_id": "5a0c1adf-205e-4841-a666-7c3ef95def9d",
|
| 4 |
+
"submitted_answer": "Dmitry"
|
| 5 |
+
},
|
| 6 |
+
{
|
| 7 |
+
"task_id": "7bd855d8-463d-4ed5-93ca-5fe35145f733",
|
| 8 |
+
"submitted_answer": "$89706.00"
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"task_id": "a0c07678-e491-4bbc-8f0b-07405144218f",
|
| 12 |
+
"submitted_answer": "Yamasaki, Uehara"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"task_id": "cf106601-ab4f-4af9-b045-5295fe67b37d",
|
| 16 |
+
"submitted_answer": "LUX"
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"task_id": "bda648d7-d618-4883-88f4-3466eabd860e",
|
| 20 |
+
"submitted_answer": "Saint Petersburg"
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"task_id": "840bfca7-4f7b-481a-8794-c560c340185d",
|
| 24 |
+
"submitted_answer": "The work performed by R. G. Arendt related to the paper \"The Population of the Galactic Center Filaments: Position Angle Distribution Reveals a Degree-scale Collimated Outflow from Sgr A*\" was supported under the NASA award number 80GSFC21M0002."
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"task_id": "1f975693-876d-457b-a649-393859e79bf3",
|
| 28 |
+
"submitted_answer": "132,133,134,197,245"
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"task_id": "3f57289b-8c60-48be-bd80-01f8099ca449",
|
| 32 |
+
"submitted_answer": 519
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
"task_id": "f918266a-b3e0-4914-865d-4faa564f1aef",
|
| 36 |
+
"submitted_answer": 0
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"task_id": "305ac316-eef6-4446-960a-92d80d542f82",
|
| 40 |
+
"submitted_answer": "Wojciech"
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"task_id": "99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3",
|
| 44 |
+
"submitted_answer": "cornstarch, freshly squeezed lemon juice, granulated sugar, pure vanilla extract, ripe strawberries"
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"task_id": "3cef3a44-215e-4aed-8e3b-b1e3f08063b7",
|
| 48 |
+
"submitted_answer": "broccoli, celery, lettuce, sweet potatoes"
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"task_id": "cabe07ed-9eca-40ea-8ead-410ef5e83f91",
|
| 52 |
+
"submitted_answer": "Louvrier"
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"task_id": "9d191bce-651d-4746-be2d-7ef8ecadb9c2",
|
| 56 |
+
"submitted_answer": "Extremely"
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"task_id": "6f37996b-2ac7-44b0-8e68-6d28256631b4",
|
| 60 |
+
"submitted_answer": "b,e"
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"task_id": "4fc2f1ae-8625-45b5-ab34-ad4433bc21f8",
|
| 64 |
+
"submitted_answer": "It appears that navigating through the available content has yet to yield direct information regarding the nominator of the \"Giganotosaurus\" featured article nomination. However, using data available from my training, the nomination of \"Giganotosaurus\" as a Featured Article was put forward by the Wikipedia user \"FunkMonk.\""
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"task_id": "cca530fc-4052-43b2-b130-b30968d8aa44",
|
| 68 |
+
"submitted_answer": "Error running agent: 'str' object has no attribute 'token_usage'"
|
| 69 |
+
},
|
| 70 |
+
{
|
| 71 |
+
"task_id": "2d83110e-a098-4ebb-9987-066c06fa42d0",
|
| 72 |
+
"submitted_answer": "right"
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"task_id": "a1e91b78-d3d8-4675-bb8d-62741b4b68a6",
|
| 76 |
+
"submitted_answer": "3"
|
| 77 |
+
},
|
| 78 |
+
{
|
| 79 |
+
"task_id": "8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
|
| 80 |
+
"submitted_answer": "To determine how many studio albums Mercedes Sosa published between 2000 and 2009, we can rely on the extracted information from a review of Mercedes Sosa's discography and award records listed on or linked from Wikipedia:\n\nFrom the search results and other visible references on Wikipedia pages:\n- **2000**: Misa Criolla (acknowledged with a Grammy award)\n- **2003**: Ac\u00fastico\n- **2006**: Coraz\u00f3n Libre\n- **2009**: Cantora 1 (part of Cantora, un Viaje \u00cdntimo)\n\nThese references match the cited awards and recognitions indicating these are studio albums released within the specified time frame. So, there are **four** studio albums released during this period according to the mentioned resources.\n\n**Final Answer**: Mercedes Sosa released **four** studio albums between 2000 and 2009."
|
| 81 |
+
}
|
| 82 |
+
]
|
responses_GAIA_Evaluation_DatasetSingle Smolagent with tools Qwen 32B 3planningSteps.json
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"task_id": "5a0c1adf-205e-4841-a666-7c3ef95def9d",
|
| 4 |
+
"submitted_answer": "Dmitry"
|
| 5 |
+
},
|
| 6 |
+
{
|
| 7 |
+
"task_id": "7bd855d8-463d-4ed5-93ca-5fe35145f733",
|
| 8 |
+
"submitted_answer": "89706.00"
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"task_id": "a0c07678-e491-4bbc-8f0b-07405144218f",
|
| 12 |
+
"submitted_answer": "Based on the information provided, the pitchers with the numbers before and after Taish\u014d Tamai's number (19) as of July 2023 are:\n\n**Pitcher Before, Pitcher After: Sachiya, Kenta**\n\nSo, the final answer is:\n**Sachiya, Kenta**"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"task_id": "cf106601-ab4f-4af9-b045-5295fe67b37d",
|
| 16 |
+
"submitted_answer": "Given the issues encountered with the CSV file, I will use a different approach to solve the task. I will manually extract the data from the Olympedia website and combine it with the IOC country codes.\n\nHere is the plan:\n1. Extract the athlete counts for each country from the Olympedia webpage.\n2. Manually map the country names to their IOC country codes.\n3. Sort the data by the number of athletes in ascending order and then alphabetically by IOC country code.\n4. Identify the country with the least number of athletes.\n5. Provide the IOC country code of the identified country as the final answer.\n\nLet's proceed with this plan.\n\n### Step 1: Extract the athlete counts for each country from the Olympedia webpage.\n\nI will manually extract the data from the Olympedia webpage and store it in a dictionary.\n\n### Step 2: Manually map the country names to their IOC country codes.\n\nI will use a dictionary to map the country names to their IOC country codes.\n\n### Step 3: Sort the data by the number of athletes in ascending order and then alphabetically by IOC country code.\n\n### Step 4: Identify the country with the least number of athletes.\n\n### Step 5: Provide the IOC country code of the identified country as the final answer.\n\nLet's implement this plan.\n```py\n# Step 1: Manually extract the athlete counts for each country from the Olympedia webpage\nathlete_counts = {\n 'ARG': 81,\n 'AUS': 18,\n 'AUT': 73,\n 'BEL': 187,\n 'BUL': 5,\n 'CAN': 68,\n 'CHI': 38,\n 'CUB': 1,\n 'DEN': 91,\n 'EGY': 32,\n 'ESP': 80,\n 'EST': 20,\n 'FIN': 69,\n 'FRA': 294,\n 'GBR': 232,\n 'GER': 298,\n 'GRE': 23,\n 'HAI': 2,\n 'HUN': 109,\n 'IND': 21,\n 'IRL': 38,\n 'ITA': 174,\n 'JPN': 40,\n 'LAT': 17,\n 'LTU': 12,\n 'LUX': 48,\n 'MEX': 30,\n 'MLT': 9,\n 'MON': 10,\n 'NED': 266,\n 'NOR': 52,\n 'NZL': 9,\n 'PAN': 1,\n 'PHI': 4,\n 'POL': 100,\n 'POR': 32,\n 'ROU': 21,\n 'RSA': 24,\n 'SUI': 133,\n 'SWE': 100,\n 'TCH': 69,\n 'TUR': 31,\n 'URU': 17,\n 'USA': 281,\n 'YUG': 34,\n 'ZIM': 2\n}\n\n# Step 2: Manually map the country names to their IOC country codes\ncountry_to_ioc = {\n 'Argentina': 'ARG',\n 'Australia': 'AUS',\n 'Austria': 'AUT',\n 'Belgium': 'BEL',\n 'Bulgaria': 'BUL',\n 'Canada': 'CAN',\n 'Chile': 'CHI',\n 'Cuba': 'CUB',\n 'Denmark': 'DEN',\n 'Egypt': 'EGY',\n 'Spain': 'ESP',\n 'Estonia': 'EST',\n 'Finland': 'FIN',\n 'France': 'FRA',\n 'Great Britain': 'GBR',\n 'Germany': 'GER',\n 'Greece': 'GRE',\n 'Haiti': 'HAI',\n 'Hungary': 'HUN',\n 'India': 'IND',\n 'Ireland': 'IRL',\n 'Italy': 'ITA',\n 'Japan': 'JPN',\n 'Latvia': 'LAT',\n 'Lithuania': 'LTU',\n 'Luxembourg': 'LUX',\n 'Mexico': 'MEX',\n 'Malta': 'MLT',\n 'Monaco': 'MON',\n 'Netherlands': 'NED',\n 'Norway': 'NOR',\n 'New Zealand': 'NZL',\n 'Panama': 'PAN',\n 'Philippines': 'PHI',\n 'Poland': 'POL',\n 'Portugal': 'POR',\n 'Romania': 'ROU',\n 'South Africa': 'RSA',\n 'Switzerland': 'SUI',\n 'Sweden': 'SWE',\n 'Czechoslovakia': 'TCH',\n 'Turkey': 'TUR',\n 'Uruguay': 'URU',\n 'United States': 'USA',\n 'Yugoslavia': 'YUG',\n 'Zimbabwe': 'ZIM'\n}\n\n# Step 3: Sort the data by the number of athletes in ascending order and then alphabetically by IOC country code\nsorted_countries = sorted(athlete_counts.items(), key=lambda x: (x[1], x[0]))\n\n# Step 4: Identify the country with the least number of athletes\nleast_athletes_country_code = sorted_countries[0][0]\n\n# Step 5: Provide the IOC country code of the identified country as the final answer\nprint(f\"The IOC country code of the country with the least number of athletes is {least_athletes_country_code}.\")\n```<end_code>\nCalling tools:\n[{'id': 'call_15', 'type': 'function', 'function': {'name': 'python_interpreter', 'arguments': \"# Step 1: Manually extract the athlete counts for each country from the Olympedia webpage\\nathlete_counts = {\\n 'ARG': 81,\\n 'AUS': 18,\\n 'AUT': 73,\\n 'BEL': 187,\\n 'BUL': 5,\\n 'CAN': 68,\\n 'CHI': 38,\\n 'CUB': 1,\\n 'DEN': 91,\\n 'EGY': 32,\\n 'ESP': 80,\\n 'EST': 20,\\n 'FIN': 69,\\n 'FRA': 294,\\n 'GBR': 232,\\n 'GER': 298,\\n 'GRE': 23,\\n 'HAI': 2,\\n 'HUN': 109,\\n 'IND': 21,\\n 'IRL': 38,\\n 'ITA': 174,\\n 'JPN': 40,\\n 'LAT': 17,\\n 'LTU': 12,\\n 'LUX': 48,\\n 'MEX': 30,\\n 'MLT': 9,\\n 'MON': 10,\\n 'NED': 266,\\n 'NOR': 52,\\n 'NZL': 9,\\n 'PAN': 1,\\n 'PHI': 4,\\n 'POL': 100,\\n 'POR': 32,\\n 'ROU': 21,\\n 'RSA': 24,\\n 'SUI': 133,\\n 'SWE': 100,\\n 'TCH': 69,\\n 'TUR': 31,\\n 'URU': 17,\\n 'USA': 281,\\n 'YUG': 34,\\n 'ZIM': 2\\n}\\n\\n# Step 2: Manually map the country names to their IOC country codes\\ncountry_to_ioc = {\\n 'Argentina': 'ARG',\\n 'Australia': 'AUS',\\n 'Austria': 'AUT',\\n 'Belgium': 'BEL',\\n 'Bulgaria': 'BUL',\\n 'Canada': 'CAN',\\n 'Chile': 'CHI',\\n 'Cuba': 'CUB',\\n 'Denmark': 'DEN',\\n 'Egypt': 'EGY',\\n 'Spain': 'ESP',\\n 'Estonia': 'EST',\\n 'Finland': 'FIN',\\n 'France': 'FRA',\\n 'Great Britain': 'GBR',\\n 'Germany': 'GER',\\n 'Greece': 'GRE',\\n 'Haiti': 'HAI',\\n 'Hungary': 'HUN',\\n 'India': 'IND',\\n 'Ireland': 'IRL',\\n 'Italy': 'ITA',\\n 'Japan': 'JPN',\\n 'Latvia': 'LAT',\\n 'Lithuania': 'LTU',\\n 'Luxembourg': 'LUX',\\n 'Mexico': 'MEX',\\n 'Malta': 'MLT"
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"task_id": "bda648d7-d618-4883-88f4-3466eabd860e",
|
| 20 |
+
"submitted_answer": "Error running agent: Error in generating model output:\n503 Server Error: Service Temporarily Unavailable for url: https://router.huggingface.co/together/v1/chat/completions"
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"task_id": "840bfca7-4f7b-481a-8794-c560c340185d",
|
| 24 |
+
"submitted_answer": "Error running agent: Error in generating model output:\n422 Client Error: Unprocessable Entity for url: https://router.huggingface.co/together/v1/chat/completions (Request ID: o1M6n9q-4YNCb4-958746faadec07fb)\n\n{'message': 'Input validation error: `inputs` tokens + `max_new_tokens` must be <= 32769. Given: 45347 `inputs` tokens and 2048 `max_new_tokens`', 'type': 'invalid_request_error', 'param': None, 'code': None}\n{\n \"id\": \"o1M6n9q-4YNCb4-958746faadec07fb\",\n \"error\": {\n \"message\": \"Input validation error: `inputs` tokens + `max_new_tokens` must be <= 32769. Given: 45347 `inputs` tokens and 2048 `max_new_tokens`\",\n \"type\": \"invalid_request_error\",\n \"param\": null,\n \"code\": null\n }\n}\n"
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"task_id": "1f975693-876d-457b-a649-393859e79bf3",
|
| 28 |
+
"submitted_answer": "132,197,245"
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"task_id": "3f57289b-8c60-48be-bd80-01f8099ca449",
|
| 32 |
+
"submitted_answer": ""
|
| 33 |
+
}
|
| 34 |
+
]
|
single_smolagent.py
CHANGED
|
@@ -19,6 +19,7 @@ from openinference.instrumentation.smolagents import SmolagentsInstrumentor
|
|
| 19 |
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
| 20 |
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
|
| 21 |
from mcp import StdioServerParameters
|
|
|
|
| 22 |
|
| 23 |
trace_provider = TracerProvider()
|
| 24 |
trace_provider.add_span_processor(SimpleSpanProcessor(OTLPSpanExporter()))
|
|
@@ -62,6 +63,7 @@ def build_agents():
|
|
| 62 |
transcribe_mp3_with_whisper],
|
| 63 |
model=model,
|
| 64 |
additional_authorized_imports=["time","pandas","json","numpy","markdownify","requests","re","openpyxl","beautifulsoup4"],
|
|
|
|
| 65 |
planning_interval=3,
|
| 66 |
max_steps=10,
|
| 67 |
add_base_tools=True)
|
|
|
|
| 19 |
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
| 20 |
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
|
| 21 |
from mcp import StdioServerParameters
|
| 22 |
+
from tools.transcribe import load_images
|
| 23 |
|
| 24 |
trace_provider = TracerProvider()
|
| 25 |
trace_provider.add_span_processor(SimpleSpanProcessor(OTLPSpanExporter()))
|
|
|
|
| 63 |
transcribe_mp3_with_whisper],
|
| 64 |
model=model,
|
| 65 |
additional_authorized_imports=["time","pandas","json","numpy","markdownify","requests","re","openpyxl","beautifulsoup4"],
|
| 66 |
+
step_callbacks=[load_images],
|
| 67 |
planning_interval=3,
|
| 68 |
max_steps=10,
|
| 69 |
add_base_tools=True)
|
tools/transcribe.py
CHANGED
|
@@ -7,6 +7,42 @@ from langchain.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLo
|
|
| 7 |
|
| 8 |
from langchain.schema import Document
|
| 9 |
from smolagents import tool
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
@tool
|
| 12 |
def parse_youtube_video(url:str,task_id:str,save_dir:str="attachments")->Document:
|
|
@@ -26,4 +62,69 @@ def parse_youtube_video(url:str,task_id:str,save_dir:str="attachments")->Documen
|
|
| 26 |
else:
|
| 27 |
parser = OpenAIWhisperParser()
|
| 28 |
document = GenericLoader(loader, parser).load()
|
| 29 |
-
return document
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
from langchain.schema import Document
|
| 9 |
from smolagents import tool
|
| 10 |
+
from yt_dlp import YoutubeDL
|
| 11 |
+
from PIL import Image
|
| 12 |
+
import cv2
|
| 13 |
+
import numpy as np
|
| 14 |
+
from smolagents.agents import ActionStep
|
| 15 |
+
from smolagents import CodeAgent
|
| 16 |
+
|
| 17 |
+
def get_video_frames(video_path:str,task_id:str)->list[str]:
|
| 18 |
+
vidcap = cv2.VideoCapture(video_path)
|
| 19 |
+
total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 20 |
+
fps = vidcap.get(cv2.CAP_PROP_FPS)
|
| 21 |
+
|
| 22 |
+
frames = []
|
| 23 |
+
frame_indices = np.linspace(0, total_frames - 1, 7, dtype=int)
|
| 24 |
+
|
| 25 |
+
for i in frame_indices:
|
| 26 |
+
vidcap.set(cv2.CAP_PROP_POS_FRAMES, i)
|
| 27 |
+
success, image = vidcap.read()
|
| 28 |
+
if success:
|
| 29 |
+
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Convert from BGR to RGB
|
| 30 |
+
pil_image = Image.fromarray(image)
|
| 31 |
+
timestamp = round(i / fps, 2)
|
| 32 |
+
frames.append((pil_image, timestamp))
|
| 33 |
+
|
| 34 |
+
vidcap.release()
|
| 35 |
+
observations = []
|
| 36 |
+
for frame in frames:
|
| 37 |
+
image, timestamp = frame
|
| 38 |
+
image.save(f"attachments/frame_{timestamp}.png", format='PNG')
|
| 39 |
+
observations.append({
|
| 40 |
+
"image": f"attachments/{task_id}_frame_{timestamp}.png",
|
| 41 |
+
"timestamp": timestamp
|
| 42 |
+
})
|
| 43 |
+
|
| 44 |
+
return observations
|
| 45 |
+
|
| 46 |
|
| 47 |
@tool
|
| 48 |
def parse_youtube_video(url:str,task_id:str,save_dir:str="attachments")->Document:
|
|
|
|
| 62 |
else:
|
| 63 |
parser = OpenAIWhisperParser()
|
| 64 |
document = GenericLoader(loader, parser).load()
|
| 65 |
+
return document
|
| 66 |
+
|
| 67 |
+
@tool
|
| 68 |
+
def download_youtube_video(url:str,task_id:str,save_dir:str="attachments")->list[str]:
|
| 69 |
+
"""Returns a list of framws of a YouTube video.
|
| 70 |
+
Args:
|
| 71 |
+
url (str): The URL of the YouTube video.
|
| 72 |
+
task_id (str): The task ID to save the transcript.
|
| 73 |
+
save_dir (str): The directory to save the downloaded video. Defaults to "attachments".
|
| 74 |
+
Returns:
|
| 75 |
+
str: The path to the downloaded video file."""
|
| 76 |
+
print(f"Downloading audio from YouTube: {url}")
|
| 77 |
+
#output_path = generate_unique_filename(".wav")
|
| 78 |
+
ydl_opts = {
|
| 79 |
+
'format': 'bestvideo[ext=mp4][height<=480]/bestvideo[ext=mp4]/bestvideo',
|
| 80 |
+
'outtmpl': os.path.join(save_dir, task_id+'.%(ext)s'),
|
| 81 |
+
# 'postprocessors': [{
|
| 82 |
+
# 'key': 'FFmpegVideoConvertor',
|
| 83 |
+
# 'preferredformat': 'mp4',
|
| 84 |
+
# }],
|
| 85 |
+
'progress_hooks': [lambda d: print(d['status'])],
|
| 86 |
+
'ignoreerrors': True,
|
| 87 |
+
'no_warnings': False,
|
| 88 |
+
'log_verbosity': 'quiet',
|
| 89 |
+
}
|
| 90 |
+
try:
|
| 91 |
+
with YoutubeDL(ydl_opts) as ydl:
|
| 92 |
+
info_dict = ydl.extract_info(url, download=True)
|
| 93 |
+
video_title = info_dict.get('title', 'video')
|
| 94 |
+
print(f"Successfully downloaded '{video_title}' as low-quality MP4 (video-only) to '{save_dir}'")
|
| 95 |
+
except Exception as e:
|
| 96 |
+
print(f"An error occurred: {e}")
|
| 97 |
+
# get the video frames:
|
| 98 |
+
observations = []
|
| 99 |
+
observations = get_video_frames(os.path.join(save_dir, task_id+'.mp4'),task_id)
|
| 100 |
+
|
| 101 |
+
return observations
|
| 102 |
+
|
| 103 |
+
def load_images(step_log: ActionStep, agent: CodeAgent) -> None:
|
| 104 |
+
current_step = step_log.step_number
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
#for step_logs in agent.logs: # Remove previous screenshots from logs for lean processing
|
| 108 |
+
# if isinstance(step_log, ActionStep) and step_log.step_number <= current_step - 2:
|
| 109 |
+
# step_logs.observations_images = None
|
| 110 |
+
print(step_log.model_output)
|
| 111 |
+
# if isinstance(step_logs,
|
| 112 |
+
# image = Image.open(BytesIO(png_bytes))
|
| 113 |
+
# print(f"Captured a browser screenshot: {image.size} pixels")
|
| 114 |
+
# step_log.observations_images = [image.copy()] # Create a copy to ensure it persists, important!
|
| 115 |
+
|
| 116 |
+
# # Update observations with current URL
|
| 117 |
+
# url_info = f"Current url: {driver.current_url}"
|
| 118 |
+
# step_log.observations = url_info if step_logs.observations is None else step_log.observations + "\n" + url_info
|
| 119 |
+
return
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
if __name__ == "__main__":
|
| 124 |
+
|
| 125 |
+
url = "https://www.youtube.com/watch?v=1htKBjuUWec"
|
| 126 |
+
# https://www.youtube.com/watch?v=L1vXCYZAYYM
|
| 127 |
+
task_id = "test_task"
|
| 128 |
+
save_dir = "attachments"
|
| 129 |
+
# Test the YouTube video parsing
|
| 130 |
+
download_youtube_video(url, task_id, save_dir)
|