Spaces:
Sleeping
Sleeping
add agent
#1
by
thivy
- opened
- .DS_Store +0 -0
- .gitattributes +0 -1
- .gitignore +0 -2
- __pycache__/agents.cpython-312.pyc +0 -0
- __pycache__/tools.cpython-312.pyc +0 -0
- agents.py +0 -138
- app.py +9 -18
- files/1f975693-876d-457b-a649-393859e79bf3.mp3 +0 -3
- files/7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx +0 -0
- files/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3 +0 -3
- files/cca530fc-4052-43b2-b130-b30968d8aa44.png +0 -0
- files/f918266a-b3e0-4914-865d-4faa564f1aef.py +0 -35
- qa_graph.py +0 -225
- requirements.txt +1 -14
- system_prompt.txt +0 -7
- tools.py +0 -391
- yolo11n.pt +0 -3
.DS_Store
DELETED
|
Binary file (6.15 kB)
|
|
|
.gitattributes
CHANGED
|
@@ -33,4 +33,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
-
*.mp3 filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
.gitignore
DELETED
|
@@ -1,2 +0,0 @@
|
|
| 1 |
-
.env
|
| 2 |
-
.venv
|
|
|
|
|
|
|
|
|
__pycache__/agents.cpython-312.pyc
DELETED
|
Binary file (4.12 kB)
|
|
|
__pycache__/tools.cpython-312.pyc
DELETED
|
Binary file (15.5 kB)
|
|
|
agents.py
DELETED
|
@@ -1,138 +0,0 @@
|
|
| 1 |
-
from tools import (
|
| 2 |
-
general_tools,
|
| 3 |
-
file_agent_tools,
|
| 4 |
-
data_agent_tools,
|
| 5 |
-
math_agent_tools,
|
| 6 |
-
analyze_video_tools,
|
| 7 |
-
youtube_transcript_tools,
|
| 8 |
-
google_search,
|
| 9 |
-
wiki_search,
|
| 10 |
-
arxiv_search
|
| 11 |
-
)
|
| 12 |
-
from langgraph.prebuilt import create_react_agent
|
| 13 |
-
from langgraph.checkpoint.memory import MemorySaver
|
| 14 |
-
from langchain_openai import ChatOpenAI
|
| 15 |
-
from langgraph_supervisor import create_supervisor
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
llm = ChatOpenAI(model="o4-mini")
|
| 19 |
-
|
| 20 |
-
memory = MemorySaver()
|
| 21 |
-
|
| 22 |
-
with open("system_prompt.txt", "r") as f:
|
| 23 |
-
prompt = f.read()
|
| 24 |
-
|
| 25 |
-
general_agent = create_react_agent(
|
| 26 |
-
model=llm,
|
| 27 |
-
tools=general_tools(),
|
| 28 |
-
checkpointer=memory,
|
| 29 |
-
prompt=prompt
|
| 30 |
-
)
|
| 31 |
-
|
| 32 |
-
# Create agents
|
| 33 |
-
file_agent = create_react_agent(
|
| 34 |
-
model=llm,
|
| 35 |
-
tools=file_agent_tools(),
|
| 36 |
-
name="file_reader",
|
| 37 |
-
prompt="You read files. Use tools to read files."
|
| 38 |
-
)
|
| 39 |
-
|
| 40 |
-
math_agent = create_react_agent(
|
| 41 |
-
model=llm,
|
| 42 |
-
tools=math_agent_tools(),
|
| 43 |
-
name="calculator",
|
| 44 |
-
prompt="You do math. Use tools for all calculations."
|
| 45 |
-
)
|
| 46 |
-
|
| 47 |
-
data_agent = create_react_agent(
|
| 48 |
-
model=llm,
|
| 49 |
-
tools=data_agent_tools(),
|
| 50 |
-
name="data_processor",
|
| 51 |
-
prompt="You process data. Use tools to filter and extract data."
|
| 52 |
-
)
|
| 53 |
-
|
| 54 |
-
# Create video analysis agents
|
| 55 |
-
video_agent = create_react_agent(
|
| 56 |
-
model=llm,
|
| 57 |
-
tools=analyze_video_tools(),
|
| 58 |
-
name="video_analyzer",
|
| 59 |
-
prompt="""You analyze visual content in videos. Use tools to detect and track objects.
|
| 60 |
-
The object_detection tool is a general object detection model. Use this for general cases.
|
| 61 |
-
The analyze_video_content uses both the object detection model and a vision llm to analyze frames with content given a question.
|
| 62 |
-
Use this for more difficult questions."""
|
| 63 |
-
)
|
| 64 |
-
|
| 65 |
-
transcript_agent = create_react_agent(
|
| 66 |
-
model=llm,
|
| 67 |
-
tools=youtube_transcript_tools(),
|
| 68 |
-
name="transcript_analyzer",
|
| 69 |
-
prompt="You analyze audio/speech content in videos. Use tools to get transcripts."
|
| 70 |
-
)
|
| 71 |
-
|
| 72 |
-
wiki_agent = create_react_agent(
|
| 73 |
-
model=llm,
|
| 74 |
-
tools=[wiki_search],
|
| 75 |
-
name="wiki_analyst",
|
| 76 |
-
prompt="You search information from wikipedia."
|
| 77 |
-
)
|
| 78 |
-
|
| 79 |
-
google_agent = create_react_agent(
|
| 80 |
-
model=llm,
|
| 81 |
-
tools=[google_search],
|
| 82 |
-
name="google_search_analyst",
|
| 83 |
-
prompt="You search information from google search."
|
| 84 |
-
)
|
| 85 |
-
|
| 86 |
-
arxiv_agent = create_react_agent(
|
| 87 |
-
model=llm,
|
| 88 |
-
tools=[arxiv_search],
|
| 89 |
-
name="arxiv_analyst",
|
| 90 |
-
prompt="You search information from arxiv."
|
| 91 |
-
)
|
| 92 |
-
|
| 93 |
-
excel_prompt = """You are a supervisor. You coordinate file_reader, calculator, and data_processor to solve problems step by step.
|
| 94 |
-
Do not do calculations or file reading yourself, use the tools.
|
| 95 |
-
Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
|
| 96 |
-
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
|
| 97 |
-
If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
|
| 98 |
-
If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
|
| 99 |
-
If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
|
| 100 |
-
"""
|
| 101 |
-
|
| 102 |
-
video_analyzer_prompt = """You coordinate video_analyzer and transcript_analyzer to answer questions about YouTube videos.
|
| 103 |
-
Use video_analyzer for visual questions (objects, people, actions). Use transcript_analyzer for audio questions (what people say).
|
| 104 |
-
Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
|
| 105 |
-
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
|
| 106 |
-
If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
|
| 107 |
-
If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
|
| 108 |
-
If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
|
| 109 |
-
"""
|
| 110 |
-
|
| 111 |
-
search_analyzer_prompt = """You coordinate different search agents to answer questions.
|
| 112 |
-
Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
|
| 113 |
-
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
|
| 114 |
-
If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
|
| 115 |
-
If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
|
| 116 |
-
If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
|
| 117 |
-
"""
|
| 118 |
-
|
| 119 |
-
# Supervisor
|
| 120 |
-
excel_supervisor = create_supervisor(
|
| 121 |
-
[file_agent, math_agent, data_agent],
|
| 122 |
-
model=llm,
|
| 123 |
-
prompt=excel_prompt
|
| 124 |
-
).compile()
|
| 125 |
-
|
| 126 |
-
# Video supervisor
|
| 127 |
-
video_supervisor = create_supervisor(
|
| 128 |
-
[video_agent, transcript_agent],
|
| 129 |
-
model=llm,
|
| 130 |
-
prompt=video_analyzer_prompt
|
| 131 |
-
).compile()
|
| 132 |
-
|
| 133 |
-
# search supervisor
|
| 134 |
-
search_supervisor = create_supervisor(
|
| 135 |
-
[wiki_agent, google_agent, arxiv_agent],
|
| 136 |
-
model=llm,
|
| 137 |
-
prompt=search_analyzer_prompt
|
| 138 |
-
).compile()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
|
@@ -3,7 +3,6 @@ import gradio as gr
|
|
| 3 |
import requests
|
| 4 |
import inspect
|
| 5 |
import pandas as pd
|
| 6 |
-
from qa_graph import build_graph, Question, extract_final_answer
|
| 7 |
|
| 8 |
# (Keep Constants as is)
|
| 9 |
# --- Constants ---
|
|
@@ -12,20 +11,13 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
|
| 12 |
# --- Basic Agent Definition ---
|
| 13 |
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
| 14 |
class BasicAgent:
|
| 15 |
-
"""A langgraph agent."""
|
| 16 |
def __init__(self):
|
| 17 |
print("BasicAgent initialized.")
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
print(f"Agent
|
| 22 |
-
|
| 23 |
-
messages = self.graph.invoke({"question": question, "decision": "",
|
| 24 |
-
"answer": ""})
|
| 25 |
-
answer = messages['answer']
|
| 26 |
-
answer = extract_final_answer(answer)[1]
|
| 27 |
-
print(answer)
|
| 28 |
-
return answer
|
| 29 |
|
| 30 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
| 31 |
"""
|
|
@@ -81,15 +73,14 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 81 |
results_log = []
|
| 82 |
answers_payload = []
|
| 83 |
print(f"Running agent on {len(questions_data)} questions...")
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
question_text = item.question
|
| 88 |
if not task_id or question_text is None:
|
| 89 |
print(f"Skipping item with missing task_id or question: {item}")
|
| 90 |
continue
|
| 91 |
try:
|
| 92 |
-
submitted_answer = agent(
|
| 93 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 94 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
| 95 |
except Exception as e:
|
|
|
|
| 3 |
import requests
|
| 4 |
import inspect
|
| 5 |
import pandas as pd
|
|
|
|
| 6 |
|
| 7 |
# (Keep Constants as is)
|
| 8 |
# --- Constants ---
|
|
|
|
| 11 |
# --- Basic Agent Definition ---
|
| 12 |
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
| 13 |
class BasicAgent:
|
|
|
|
| 14 |
def __init__(self):
|
| 15 |
print("BasicAgent initialized.")
|
| 16 |
+
def __call__(self, question: str) -> str:
|
| 17 |
+
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
| 18 |
+
fixed_answer = "This is a default answer."
|
| 19 |
+
print(f"Agent returning fixed answer: {fixed_answer}")
|
| 20 |
+
return fixed_answer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
| 23 |
"""
|
|
|
|
| 73 |
results_log = []
|
| 74 |
answers_payload = []
|
| 75 |
print(f"Running agent on {len(questions_data)} questions...")
|
| 76 |
+
for item in questions_data:
|
| 77 |
+
task_id = item.get("task_id")
|
| 78 |
+
question_text = item.get("question")
|
|
|
|
| 79 |
if not task_id or question_text is None:
|
| 80 |
print(f"Skipping item with missing task_id or question: {item}")
|
| 81 |
continue
|
| 82 |
try:
|
| 83 |
+
submitted_answer = agent(question_text)
|
| 84 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 85 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
| 86 |
except Exception as e:
|
files/1f975693-876d-457b-a649-393859e79bf3.mp3
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:200f767e732b49efef5c05d128903ee4d2c34e66fdce7f5593ac123b2e637673
|
| 3 |
-
size 280868
|
|
|
|
|
|
|
|
|
|
|
|
files/7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx
DELETED
|
Binary file (5.29 kB)
|
|
|
files/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:b218c951c1f888f0bbe6f46c080f57afc7c9348fffc7ba4da35749ff1e2ac40f
|
| 3 |
-
size 179304
|
|
|
|
|
|
|
|
|
|
|
|
files/cca530fc-4052-43b2-b130-b30968d8aa44.png
DELETED
|
Binary file (63.1 kB)
|
|
|
files/f918266a-b3e0-4914-865d-4faa564f1aef.py
DELETED
|
@@ -1,35 +0,0 @@
|
|
| 1 |
-
from random import randint
|
| 2 |
-
import time
|
| 3 |
-
|
| 4 |
-
class UhOh(Exception):
|
| 5 |
-
pass
|
| 6 |
-
|
| 7 |
-
class Hmm:
|
| 8 |
-
def __init__(self):
|
| 9 |
-
self.value = randint(-100, 100)
|
| 10 |
-
|
| 11 |
-
def Yeah(self):
|
| 12 |
-
if self.value == 0:
|
| 13 |
-
return True
|
| 14 |
-
else:
|
| 15 |
-
raise UhOh()
|
| 16 |
-
|
| 17 |
-
def Okay():
|
| 18 |
-
while True:
|
| 19 |
-
yield Hmm()
|
| 20 |
-
|
| 21 |
-
def keep_trying(go, first_try=True):
|
| 22 |
-
maybe = next(go)
|
| 23 |
-
try:
|
| 24 |
-
if maybe.Yeah():
|
| 25 |
-
return maybe.value
|
| 26 |
-
except UhOh:
|
| 27 |
-
if first_try:
|
| 28 |
-
print("Working...")
|
| 29 |
-
print("Please wait patiently...")
|
| 30 |
-
time.sleep(0.1)
|
| 31 |
-
return keep_trying(go, first_try=False)
|
| 32 |
-
|
| 33 |
-
if __name__ == "__main__":
|
| 34 |
-
go = Okay()
|
| 35 |
-
print(f"{keep_trying(go)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
qa_graph.py
DELETED
|
@@ -1,225 +0,0 @@
|
|
| 1 |
-
from dataclasses import dataclass
|
| 2 |
-
from langgraph.graph import START, StateGraph, END
|
| 3 |
-
from typing import TypedDict
|
| 4 |
-
from agents import general_agent, excel_supervisor, video_supervisor
|
| 5 |
-
import os
|
| 6 |
-
from typing import List
|
| 7 |
-
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 8 |
-
os.environ["OPENAI_API_KEY"] = str(OPENAI_API_KEY)
|
| 9 |
-
|
| 10 |
-
@dataclass
|
| 11 |
-
class Question:
|
| 12 |
-
task_id: str
|
| 13 |
-
question: str
|
| 14 |
-
Level: str
|
| 15 |
-
file_name: str
|
| 16 |
-
local_file_path: str|None = None
|
| 17 |
-
|
| 18 |
-
def get_file_type(file_path: str) -> str:
|
| 19 |
-
"""Determine file type from extension."""
|
| 20 |
-
if not file_path:
|
| 21 |
-
return "none"
|
| 22 |
-
|
| 23 |
-
file_path = file_path.lower()
|
| 24 |
-
|
| 25 |
-
if file_path.endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
|
| 26 |
-
return "image"
|
| 27 |
-
elif file_path.endswith(('.xlsx', '.xls', '.csv')):
|
| 28 |
-
return "excel"
|
| 29 |
-
elif file_path.endswith('.py'):
|
| 30 |
-
return "python"
|
| 31 |
-
elif file_path.endswith(('.mp3', '.wav', '.m4a', '.ogg')):
|
| 32 |
-
return "audio"
|
| 33 |
-
else:
|
| 34 |
-
return "unknown"
|
| 35 |
-
|
| 36 |
-
def answer_qery(question: str, thread_id: str = "default") -> str:
|
| 37 |
-
"""Ask the agent a question."""
|
| 38 |
-
config = {"configurable": {"thread_id": thread_id}, "recursion_limit": 50}
|
| 39 |
-
|
| 40 |
-
try:
|
| 41 |
-
result = video_supervisor.invoke({
|
| 42 |
-
"messages": [
|
| 43 |
-
{"role": "user", "content": question}
|
| 44 |
-
]
|
| 45 |
-
})
|
| 46 |
-
return result["messages"][-1].content
|
| 47 |
-
except Exception as e:
|
| 48 |
-
return f"Error: {str(e)}"
|
| 49 |
-
|
| 50 |
-
def ask_question(question: str, thread_id: str = "default") -> str:
|
| 51 |
-
"""Ask the agent a question."""
|
| 52 |
-
config = {"configurable": {"thread_id": thread_id}, "recursion_limit": 100}
|
| 53 |
-
|
| 54 |
-
try:
|
| 55 |
-
response = general_agent.invoke(
|
| 56 |
-
{"messages": [{"role": "user", "content": question}]},
|
| 57 |
-
config=config
|
| 58 |
-
)
|
| 59 |
-
return response["messages"][-1].content
|
| 60 |
-
except Exception as e:
|
| 61 |
-
return f"Error: {str(e)}"
|
| 62 |
-
|
| 63 |
-
def ask_question_with_file(question: Question, thread_id: str = "default") -> str:
|
| 64 |
-
"""Ask the agent a question, with optional file analysis."""
|
| 65 |
-
q = question.question
|
| 66 |
-
root_file = "./files"
|
| 67 |
-
file_path = root_file + "/" + question.file_name
|
| 68 |
-
if not question.file_name:
|
| 69 |
-
return ask_question(q, thread_id)
|
| 70 |
-
|
| 71 |
-
file_type = get_file_type(file_path)
|
| 72 |
-
|
| 73 |
-
# Create enhanced question with file guidance
|
| 74 |
-
if file_type == "image":
|
| 75 |
-
enhanced_question = f"{q}\n\nThere is an image file at '{file_path}'. Use the analyze_image tool to examine it."
|
| 76 |
-
elif file_type == "excel":
|
| 77 |
-
enhanced_question = f"{q}\n\nFile path: {file_path}"
|
| 78 |
-
result = excel_supervisor.invoke({
|
| 79 |
-
"messages": [
|
| 80 |
-
{"role": "user", "content": enhanced_question}
|
| 81 |
-
]
|
| 82 |
-
})
|
| 83 |
-
return result["messages"][-1].content
|
| 84 |
-
elif file_type == "python":
|
| 85 |
-
enhanced_question = f"{q}\n\nThere is a Python file at '{file_path}'. Use the read_python_file tool to examine it."
|
| 86 |
-
elif file_type == "audio":
|
| 87 |
-
enhanced_question = f"{q}\n\nThere is an audio file at '{file_path}'. Use the transcribe_audio tool to process it."
|
| 88 |
-
else:
|
| 89 |
-
enhanced_question = f"{q}\n\nThere is a file at '{file_path}' but I'm not sure what type it is."
|
| 90 |
-
|
| 91 |
-
return ask_question(enhanced_question, thread_id)
|
| 92 |
-
|
| 93 |
-
def ask_question_youtube(question: Question) -> str:
|
| 94 |
-
"""Ask the agent a question, with optional file analysis."""
|
| 95 |
-
q = question.question
|
| 96 |
-
result = video_supervisor.invoke({
|
| 97 |
-
"messages": [
|
| 98 |
-
{"role": "user", "content": q}
|
| 99 |
-
]
|
| 100 |
-
})
|
| 101 |
-
return result["messages"][-1].content
|
| 102 |
-
|
| 103 |
-
# State
|
| 104 |
-
class State(TypedDict):
|
| 105 |
-
question: Question
|
| 106 |
-
decision: str
|
| 107 |
-
answer: str
|
| 108 |
-
|
| 109 |
-
# NODE FUNCTIONS - These are the ones that work with LangGraph
|
| 110 |
-
def ask_question_node(state: State) -> dict:
|
| 111 |
-
"""Node function for questions without files."""
|
| 112 |
-
question_obj = state["question"]
|
| 113 |
-
thread_id = f"test_{question_obj.task_id}"
|
| 114 |
-
|
| 115 |
-
# Call your existing function
|
| 116 |
-
answer = answer_qery(question_obj.question, thread_id)
|
| 117 |
-
|
| 118 |
-
# Return dict to update state
|
| 119 |
-
return {"answer": answer}
|
| 120 |
-
|
| 121 |
-
def ask_question_with_file_node(state: State) -> dict:
|
| 122 |
-
"""Node function for questions with files."""
|
| 123 |
-
question_obj = state["question"]
|
| 124 |
-
thread_id = f"test_{question_obj.task_id}"
|
| 125 |
-
|
| 126 |
-
# Call your existing function
|
| 127 |
-
answer = ask_question_with_file(question_obj, thread_id)
|
| 128 |
-
|
| 129 |
-
# Return dict to update state
|
| 130 |
-
return {"answer": answer}
|
| 131 |
-
|
| 132 |
-
def ask_question_youtube_node(state: State) -> dict:
|
| 133 |
-
"""Node function for questions with files."""
|
| 134 |
-
question_obj = state["question"]
|
| 135 |
-
|
| 136 |
-
# Call your existing function
|
| 137 |
-
answer = ask_question_youtube(question_obj)
|
| 138 |
-
|
| 139 |
-
# Return dict to update state
|
| 140 |
-
return {"answer": answer}
|
| 141 |
-
|
| 142 |
-
def router_node(state: State):
|
| 143 |
-
"""Router node - returns dict to update state"""
|
| 144 |
-
if state["question"].file_name:
|
| 145 |
-
decision = "query_with_file"
|
| 146 |
-
elif "youtube.com" in state["question"].question or "youtu.be" in state["question"].question:
|
| 147 |
-
decision = "youtube"
|
| 148 |
-
else:
|
| 149 |
-
decision = "query"
|
| 150 |
-
|
| 151 |
-
return {"decision": decision}
|
| 152 |
-
|
| 153 |
-
def router_function(state: State):
|
| 154 |
-
"""Routing function - returns string to choose path"""
|
| 155 |
-
return state["decision"]
|
| 156 |
-
|
| 157 |
-
def build_graph():
|
| 158 |
-
# Graph
|
| 159 |
-
builder = StateGraph(State)
|
| 160 |
-
|
| 161 |
-
# Use the NODE functions (not the original functions)
|
| 162 |
-
builder.add_node("query_with_file", ask_question_with_file_node)
|
| 163 |
-
builder.add_node("query", ask_question_node)
|
| 164 |
-
builder.add_node("youtube", ask_question_youtube_node)
|
| 165 |
-
builder.add_node("router", router_node)
|
| 166 |
-
|
| 167 |
-
# Define edges
|
| 168 |
-
builder.add_edge(START, "router")
|
| 169 |
-
builder.add_conditional_edges(
|
| 170 |
-
"router",
|
| 171 |
-
router_function,
|
| 172 |
-
{
|
| 173 |
-
"query_with_file": "query_with_file",
|
| 174 |
-
"query": "query",
|
| 175 |
-
"youtube": "youtube",
|
| 176 |
-
},
|
| 177 |
-
)
|
| 178 |
-
builder.add_edge("query_with_file", END)
|
| 179 |
-
builder.add_edge("query", END)
|
| 180 |
-
builder.add_edge("youtube", END)
|
| 181 |
-
|
| 182 |
-
react_graph = builder.compile()
|
| 183 |
-
return react_graph
|
| 184 |
-
def extract_final_answer(text: str) -> str|List[str]:
|
| 185 |
-
"""Extract the final answer from a string containing 'FINAL ANSWER: answer'"""
|
| 186 |
-
|
| 187 |
-
# Method 1: Simple string split (most common case)
|
| 188 |
-
if "FINAL ANSWER:" in text:
|
| 189 |
-
# Split on "FINAL ANSWER:" and take the part after it
|
| 190 |
-
parts = text.split("FINAL ANSWER:", 1) # Split only on first occurrence
|
| 191 |
-
return parts
|
| 192 |
-
else:
|
| 193 |
-
return "FINAL ANSWER: unknown"
|
| 194 |
-
|
| 195 |
-
if __name__ == "__main__":
|
| 196 |
-
test = [
|
| 197 |
-
{
|
| 198 |
-
"task_id": "8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
|
| 199 |
-
"question": "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.",
|
| 200 |
-
"Level": "1",
|
| 201 |
-
"file_name": ""
|
| 202 |
-
},
|
| 203 |
-
{
|
| 204 |
-
"task_id": "99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3",
|
| 205 |
-
"question": "Hi, I'm making a pie but I could use some help with my shopping list. I have everything I need for the crust, but I'm not sure about the filling. I got the recipe from my friend Aditi, but she left it as a voice memo and the speaker on my phone is buzzing so I can't quite make out what she's saying. Could you please listen to the recipe and list all of the ingredients that my friend described? I only want the ingredients for the filling, as I have everything I need to make my favorite pie crust. I've attached the recipe as Strawberry pie.mp3.\n\nIn your response, please only list the ingredients, not any measurements. So if the recipe calls for \"a pinch of salt\" or \"two cups of ripe strawberries\" the ingredients on the list would be \"salt\" and \"ripe strawberries\".\n\nPlease format your response as a comma separated list of ingredients. Also, please alphabetize the ingredients.",
|
| 206 |
-
"Level": "1",
|
| 207 |
-
"file_name": "99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3"
|
| 208 |
-
}
|
| 209 |
-
]
|
| 210 |
-
|
| 211 |
-
questions = [Question(**item) for item in test]
|
| 212 |
-
for i, question in enumerate(questions):
|
| 213 |
-
print(f"\n{i}. {question.question}")
|
| 214 |
-
|
| 215 |
-
react_graph = build_graph()
|
| 216 |
-
# Invoke the graph and capture the result
|
| 217 |
-
result = react_graph.invoke({
|
| 218 |
-
"question": question,
|
| 219 |
-
"decision": "",
|
| 220 |
-
"answer": ""
|
| 221 |
-
})
|
| 222 |
-
answer = result['answer']
|
| 223 |
-
print(answer)
|
| 224 |
-
answer = extract_final_answer(answer)[1]
|
| 225 |
-
print(answer)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -1,15 +1,2 @@
|
|
| 1 |
gradio
|
| 2 |
-
requests
|
| 3 |
-
langgraph
|
| 4 |
-
langgraph-supervisor
|
| 5 |
-
langchain
|
| 6 |
-
langchain_community
|
| 7 |
-
langchain_openai
|
| 8 |
-
duckduckgo-search
|
| 9 |
-
wikipedia
|
| 10 |
-
arxiv
|
| 11 |
-
openpyxl
|
| 12 |
-
ultralytics
|
| 13 |
-
youtube-transcript-api
|
| 14 |
-
google-api-python-client
|
| 15 |
-
langchain-google-community
|
|
|
|
| 1 |
gradio
|
| 2 |
+
requests
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
system_prompt.txt
DELETED
|
@@ -1,7 +0,0 @@
|
|
| 1 |
-
You are a general AI assistant.
|
| 2 |
-
I will ask you a question.
|
| 3 |
-
Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
|
| 4 |
-
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
|
| 5 |
-
If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
|
| 6 |
-
If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
|
| 7 |
-
If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools.py
DELETED
|
@@ -1,391 +0,0 @@
|
|
| 1 |
-
from langchain_core.messages import HumanMessage
|
| 2 |
-
from langchain_core.tools import tool
|
| 3 |
-
from langchain_community.tools import (
|
| 4 |
-
DuckDuckGoSearchRun,
|
| 5 |
-
WikipediaQueryRun,
|
| 6 |
-
ArxivQueryRun
|
| 7 |
-
)
|
| 8 |
-
from langchain_google_community.search import (
|
| 9 |
-
GoogleSearchAPIWrapper,
|
| 10 |
-
GoogleSearchRun
|
| 11 |
-
)
|
| 12 |
-
from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper
|
| 13 |
-
from langchain_openai import ChatOpenAI
|
| 14 |
-
|
| 15 |
-
import base64
|
| 16 |
-
import pandas as pd
|
| 17 |
-
import os
|
| 18 |
-
|
| 19 |
-
import os
|
| 20 |
-
from huggingface_hub import InferenceClient
|
| 21 |
-
import json
|
| 22 |
-
import requests
|
| 23 |
-
from youtube_transcript_api import YouTubeTranscriptApi
|
| 24 |
-
from ultralytics import YOLO
|
| 25 |
-
import cv2
|
| 26 |
-
|
| 27 |
-
import re
|
| 28 |
-
|
| 29 |
-
from dotenv import load_dotenv
|
| 30 |
-
load_dotenv()
|
| 31 |
-
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 32 |
-
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
| 33 |
-
GOOGLE_CSE_ID = os.getenv("GOOGLE_CSE_ID")
|
| 34 |
-
client = InferenceClient(
|
| 35 |
-
provider="hf-inference",
|
| 36 |
-
api_key=HF_TOKEN,
|
| 37 |
-
)
|
| 38 |
-
|
| 39 |
-
llm = ChatOpenAI(model="o4-mini")
|
| 40 |
-
vision_llm = ChatOpenAI(model="gpt-4o")
|
| 41 |
-
|
| 42 |
-
@tool
|
| 43 |
-
def analyze_image(img_path: str, question: str) -> str:
|
| 44 |
-
"""Analyze an image and answer a question about it."""
|
| 45 |
-
try:
|
| 46 |
-
with open(img_path, "rb") as image_file:
|
| 47 |
-
image_bytes = image_file.read()
|
| 48 |
-
|
| 49 |
-
image_base64 = base64.b64encode(image_bytes).decode("utf-8")
|
| 50 |
-
|
| 51 |
-
message = [
|
| 52 |
-
HumanMessage(
|
| 53 |
-
content=[
|
| 54 |
-
{"type": "text", "text": question},
|
| 55 |
-
{
|
| 56 |
-
"type": "image_url",
|
| 57 |
-
"image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}
|
| 58 |
-
}
|
| 59 |
-
]
|
| 60 |
-
)
|
| 61 |
-
]
|
| 62 |
-
|
| 63 |
-
response = vision_llm.invoke(message)
|
| 64 |
-
return response.content
|
| 65 |
-
|
| 66 |
-
except Exception as e:
|
| 67 |
-
return f"Error analyzing image: {str(e)}"
|
| 68 |
-
|
| 69 |
-
@tool
|
| 70 |
-
def read_excel_file(file_path: str, question: str) -> str:
|
| 71 |
-
"""Read and analyze an Excel file to answer a question."""
|
| 72 |
-
try:
|
| 73 |
-
# Read Excel file
|
| 74 |
-
df = pd.read_excel(file_path)
|
| 75 |
-
|
| 76 |
-
df_dict = df.to_dict(orient='records')
|
| 77 |
-
info = json.dumps(df_dict)
|
| 78 |
-
return info
|
| 79 |
-
|
| 80 |
-
except Exception as e:
|
| 81 |
-
return f"Error reading Excel file: {str(e)}"
|
| 82 |
-
|
| 83 |
-
@tool
|
| 84 |
-
def read_python_file(file_path: str, question: str) -> str:
|
| 85 |
-
"""Read and analyze a Python file to answer a question."""
|
| 86 |
-
try:
|
| 87 |
-
with open(file_path, 'r', encoding='utf-8') as f:
|
| 88 |
-
code_content = f.read()
|
| 89 |
-
|
| 90 |
-
prompt = f"""Here is Python code from a file:
|
| 91 |
-
|
| 92 |
-
```python
|
| 93 |
-
{code_content}
|
| 94 |
-
```
|
| 95 |
-
|
| 96 |
-
Question: {question}
|
| 97 |
-
|
| 98 |
-
Please analyze the code and answer the question."""
|
| 99 |
-
|
| 100 |
-
response = llm.invoke([HumanMessage(content=prompt)])
|
| 101 |
-
return response.content
|
| 102 |
-
|
| 103 |
-
except Exception as e:
|
| 104 |
-
return f"Error reading Python file: {str(e)}"
|
| 105 |
-
|
| 106 |
-
@tool
|
| 107 |
-
def transcribe_audio(file_path: str, question: str) -> str:
|
| 108 |
-
"""Transcribe audio file."""
|
| 109 |
-
try:
|
| 110 |
-
headers = {
|
| 111 |
-
"Authorization": f"Bearer {HF_TOKEN}",
|
| 112 |
-
"Content-Type": "audio/mpeg" # Add this line for MP3 files
|
| 113 |
-
}
|
| 114 |
-
API_URL = "https://router.huggingface.co/hf-inference/models/openai/whisper-large-v3"
|
| 115 |
-
|
| 116 |
-
def query(filename):
|
| 117 |
-
with open(filename, "rb") as f:
|
| 118 |
-
data = f.read()
|
| 119 |
-
response = requests.request("POST", API_URL, headers=headers, data=data)
|
| 120 |
-
return json.loads(response.content.decode("utf-8"))
|
| 121 |
-
|
| 122 |
-
data = query(file_path)
|
| 123 |
-
return data
|
| 124 |
-
|
| 125 |
-
except Exception as e:
|
| 126 |
-
return f"Error transcribing audio: {str(e)}"
|
| 127 |
-
|
| 128 |
-
# Simple math tools
|
| 129 |
-
@tool
|
| 130 |
-
def add(a: float, b: float) -> float:
|
| 131 |
-
"""Add two numbers."""
|
| 132 |
-
return a + b
|
| 133 |
-
|
| 134 |
-
@tool
|
| 135 |
-
def sum_list(numbers: list) -> float:
|
| 136 |
-
"""Sum a list of numbers."""
|
| 137 |
-
return sum(numbers)
|
| 138 |
-
|
| 139 |
-
# Simple data tools
|
| 140 |
-
@tool
|
| 141 |
-
def extract_values(data: str, column: str) -> list:
|
| 142 |
-
"""Extract all values from a column in JSON data."""
|
| 143 |
-
parsed = json.loads(data)
|
| 144 |
-
values = []
|
| 145 |
-
for row in parsed:
|
| 146 |
-
for key, value in row.items():
|
| 147 |
-
if column.lower() in key.lower():
|
| 148 |
-
try:
|
| 149 |
-
values.append(float(value))
|
| 150 |
-
except:
|
| 151 |
-
pass
|
| 152 |
-
return values
|
| 153 |
-
|
| 154 |
-
@tool
|
| 155 |
-
def filter_rows(data: str, exclude_words: list) -> str:
|
| 156 |
-
"""Remove rows containing any of the exclude words."""
|
| 157 |
-
parsed = json.loads(data)
|
| 158 |
-
filtered = []
|
| 159 |
-
for row in parsed:
|
| 160 |
-
row_text = " ".join(str(v).lower() for v in row.values())
|
| 161 |
-
if not any(word.lower() in row_text for word in exclude_words):
|
| 162 |
-
filtered.append(row)
|
| 163 |
-
return json.dumps(filtered)
|
| 164 |
-
|
| 165 |
-
@tool
|
| 166 |
-
def read_excel(file_path: str) -> str:
|
| 167 |
-
"""Read any Excel file and return as JSON."""
|
| 168 |
-
df = pd.read_excel(file_path)
|
| 169 |
-
return json.dumps(df.to_dict(orient='records'))
|
| 170 |
-
|
| 171 |
-
@tool
|
| 172 |
-
def object_detection(video_url: str) -> str:
|
| 173 |
-
"""Analyze objects and visual content in a YouTube video."""
|
| 174 |
-
try:
|
| 175 |
-
model = YOLO("yolo11n.pt") # Load an official Detect model
|
| 176 |
-
results = model.track(video_url)
|
| 177 |
-
|
| 178 |
-
# Track objects across frames
|
| 179 |
-
frame_objects = []
|
| 180 |
-
for i, result in enumerate(results):
|
| 181 |
-
if result.boxes is not None:
|
| 182 |
-
objects_in_frame = []
|
| 183 |
-
for j in range(len(result.boxes)):
|
| 184 |
-
class_name = result.names[int(result.boxes.cls[j].item())]
|
| 185 |
-
confidence = float(result.boxes.conf[j].item())
|
| 186 |
-
if confidence > 0.5: # Only high confidence detections
|
| 187 |
-
objects_in_frame.append(class_name)
|
| 188 |
-
|
| 189 |
-
frame_objects.append({
|
| 190 |
-
"frame": i,
|
| 191 |
-
"objects": objects_in_frame,
|
| 192 |
-
"unique_objects": list(set(objects_in_frame))
|
| 193 |
-
})
|
| 194 |
-
|
| 195 |
-
return json.dumps(frame_objects, indent=2)
|
| 196 |
-
|
| 197 |
-
except Exception as e:
|
| 198 |
-
return f"Error analyzing video: {str(e)}"
|
| 199 |
-
|
| 200 |
-
@tool
|
| 201 |
-
def get_youtube_transcript(video_url: str) -> str:
|
| 202 |
-
"""Get transcript from a YouTube video."""
|
| 203 |
-
try:
|
| 204 |
-
# Extract video ID
|
| 205 |
-
video_id_match = re.search(r'(?:v=|\/)([0-9A-Za-z_-]{11}).*', video_url)
|
| 206 |
-
if not video_id_match:
|
| 207 |
-
return "Error: Could not extract video ID"
|
| 208 |
-
|
| 209 |
-
video_id = video_id_match.group(1)
|
| 210 |
-
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
| 211 |
-
|
| 212 |
-
# Format with timestamps
|
| 213 |
-
formatted_transcript = []
|
| 214 |
-
for entry in transcript:
|
| 215 |
-
formatted_transcript.append({
|
| 216 |
-
"start": entry['start'],
|
| 217 |
-
"duration": entry['duration'],
|
| 218 |
-
"text": entry['text']
|
| 219 |
-
})
|
| 220 |
-
|
| 221 |
-
return json.dumps(formatted_transcript, indent=2)
|
| 222 |
-
|
| 223 |
-
except Exception as e:
|
| 224 |
-
return f"Error getting transcript: {str(e)}"
|
| 225 |
-
|
| 226 |
-
# @tool
|
| 227 |
-
def analyze_video_content(video_url: str, question: str = "", max_vision_frames: int = 1) -> str:
|
| 228 |
-
"""Analyze video content using YOLO for object detection and vision LLM for detailed analysis."""
|
| 229 |
-
try:
|
| 230 |
-
model = YOLO("yolo11n.pt")
|
| 231 |
-
results = model.track(video_url)
|
| 232 |
-
|
| 233 |
-
# Step 1: YOLO analysis for all frames
|
| 234 |
-
frame_objects = []
|
| 235 |
-
frames_with_content = []
|
| 236 |
-
|
| 237 |
-
for i, result in enumerate(results):
|
| 238 |
-
frame_data = {
|
| 239 |
-
"frame": i,
|
| 240 |
-
"objects": [],
|
| 241 |
-
"unique_objects": [],
|
| 242 |
-
"object_counts": {}
|
| 243 |
-
}
|
| 244 |
-
|
| 245 |
-
if result.boxes is not None:
|
| 246 |
-
objects_in_frame = []
|
| 247 |
-
for j in range(len(result.boxes)):
|
| 248 |
-
class_name = result.names[int(result.boxes.cls[j].item())]
|
| 249 |
-
confidence = float(result.boxes.conf[j].item())
|
| 250 |
-
if confidence > 0.5:
|
| 251 |
-
objects_in_frame.append(class_name)
|
| 252 |
-
|
| 253 |
-
# Count objects
|
| 254 |
-
for obj in objects_in_frame:
|
| 255 |
-
frame_data["object_counts"][obj] = frame_data["object_counts"].get(obj, 0) + 1
|
| 256 |
-
|
| 257 |
-
frame_data["objects"] = objects_in_frame
|
| 258 |
-
frame_data["unique_objects"] = list(set(objects_in_frame))
|
| 259 |
-
|
| 260 |
-
# Store frame for potential vision analysis
|
| 261 |
-
if objects_in_frame: # Only store frames with detected objects
|
| 262 |
-
frames_with_content.append({
|
| 263 |
-
"frame_index": i,
|
| 264 |
-
"objects": objects_in_frame,
|
| 265 |
-
"object_counts": frame_data["object_counts"],
|
| 266 |
-
"total_objects": len(objects_in_frame),
|
| 267 |
-
"image": result.orig_img
|
| 268 |
-
})
|
| 269 |
-
|
| 270 |
-
frame_objects.append(frame_data)
|
| 271 |
-
|
| 272 |
-
# Step 2: If there's a specific question, use vision LLM on selected frames
|
| 273 |
-
detailed_analyses = []
|
| 274 |
-
if question.strip():
|
| 275 |
-
# Sort frames by total objects and select top frames
|
| 276 |
-
frames_with_content.sort(key=lambda x: x["total_objects"], reverse=True)
|
| 277 |
-
selected_frames = frames_with_content[:max_vision_frames]
|
| 278 |
-
|
| 279 |
-
for frame_data in selected_frames:
|
| 280 |
-
try:
|
| 281 |
-
# Encode frame directly to base64
|
| 282 |
-
_, buffer = cv2.imencode('.jpg', frame_data["image"])
|
| 283 |
-
image_bytes = buffer.tobytes()
|
| 284 |
-
image_base64 = base64.b64encode(image_bytes).decode("utf-8")
|
| 285 |
-
|
| 286 |
-
message = [
|
| 287 |
-
HumanMessage(
|
| 288 |
-
content=[
|
| 289 |
-
{"type": "text", "text": question},
|
| 290 |
-
{
|
| 291 |
-
"type": "image_url",
|
| 292 |
-
"image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}
|
| 293 |
-
}
|
| 294 |
-
]
|
| 295 |
-
)
|
| 296 |
-
]
|
| 297 |
-
|
| 298 |
-
vision_response = vision_llm.invoke(message)
|
| 299 |
-
|
| 300 |
-
detailed_analyses.append({
|
| 301 |
-
"frame_index": frame_data["frame_index"],
|
| 302 |
-
"yolo_objects": frame_data["objects"],
|
| 303 |
-
"yolo_counts": frame_data["object_counts"],
|
| 304 |
-
"vision_analysis": vision_response.content
|
| 305 |
-
})
|
| 306 |
-
|
| 307 |
-
except Exception as vision_error:
|
| 308 |
-
detailed_analyses.append({
|
| 309 |
-
"frame_index": frame_data["frame_index"],
|
| 310 |
-
"yolo_objects": frame_data["objects"],
|
| 311 |
-
"yolo_counts": frame_data["object_counts"],
|
| 312 |
-
"vision_analysis": f"Vision analysis failed: {str(vision_error)}"
|
| 313 |
-
})
|
| 314 |
-
|
| 315 |
-
# Combine results
|
| 316 |
-
result_data = {
|
| 317 |
-
"video_url": video_url,
|
| 318 |
-
"question": question,
|
| 319 |
-
"total_frames": len(frame_objects),
|
| 320 |
-
"yolo_analysis": frame_objects,
|
| 321 |
-
"frames_with_objects": len(frames_with_content)
|
| 322 |
-
}
|
| 323 |
-
|
| 324 |
-
if detailed_analyses:
|
| 325 |
-
result_data["detailed_vision_analysis"] = detailed_analyses
|
| 326 |
-
result_data["vision_frames_analyzed"] = len(detailed_analyses)
|
| 327 |
-
|
| 328 |
-
return json.dumps(result_data, indent=2)
|
| 329 |
-
|
| 330 |
-
except Exception as e:
|
| 331 |
-
return f"Error analyzing video content: {str(e)}"
|
| 332 |
-
@tool
|
| 333 |
-
def google_search():
|
| 334 |
-
"""Google search tool"""
|
| 335 |
-
api_wrapper = GoogleSearchAPIWrapper(
|
| 336 |
-
google_api_key=GOOGLE_API_KEY,
|
| 337 |
-
google_cse_id=GOOGLE_CSE_ID,
|
| 338 |
-
k=10, # Number of results
|
| 339 |
-
siterestrict=False # Site restrictions
|
| 340 |
-
)
|
| 341 |
-
google_search = GoogleSearchRun(api_wrapper=api_wrapper)
|
| 342 |
-
return google_search
|
| 343 |
-
|
| 344 |
-
@tool
|
| 345 |
-
def wiki_search():
|
| 346 |
-
"""Google search tool"""
|
| 347 |
-
api_wrapper = WikipediaAPIWrapper()
|
| 348 |
-
search = WikipediaQueryRun(api_wrapper=api_wrapper)
|
| 349 |
-
return search
|
| 350 |
-
|
| 351 |
-
@tool
|
| 352 |
-
def arxiv_search():
|
| 353 |
-
"""Google search tool"""
|
| 354 |
-
api_wrapper = ArxivAPIWrapper()
|
| 355 |
-
search = ArxivQueryRun(api_wrapper=api_wrapper)
|
| 356 |
-
return search
|
| 357 |
-
def general_tools():
|
| 358 |
-
tools = [
|
| 359 |
-
analyze_image,
|
| 360 |
-
read_python_file,
|
| 361 |
-
transcribe_audio,
|
| 362 |
-
]
|
| 363 |
-
return tools
|
| 364 |
-
|
| 365 |
-
def analyze_video_tools():
|
| 366 |
-
tools = [object_detection, analyze_video_content]
|
| 367 |
-
return tools
|
| 368 |
-
|
| 369 |
-
def youtube_transcript_tools():
|
| 370 |
-
tools = [get_youtube_transcript]
|
| 371 |
-
return tools
|
| 372 |
-
|
| 373 |
-
def file_agent_tools():
|
| 374 |
-
tools = [read_excel]
|
| 375 |
-
return tools
|
| 376 |
-
|
| 377 |
-
def math_agent_tools():
|
| 378 |
-
tools = [add, sum_list]
|
| 379 |
-
return tools
|
| 380 |
-
|
| 381 |
-
def data_agent_tools():
|
| 382 |
-
tools = [extract_values, filter_rows]
|
| 383 |
-
return tools
|
| 384 |
-
|
| 385 |
-
def search_agen_tools():
|
| 386 |
-
tools = [
|
| 387 |
-
google_search,
|
| 388 |
-
ArxivQueryRun(api_wrapper=ArxivAPIWrapper()),
|
| 389 |
-
WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
|
| 390 |
-
]
|
| 391 |
-
return tools
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
yolo11n.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:0ebbc80d4a7680d14987a577cd21342b65ecfd94632bd9a8da63ae6417644ee1
|
| 3 |
-
size 5613764
|
|
|
|
|
|
|
|
|
|
|
|