Spaces:
Sleeping
Sleeping
video update
Browse files- __pycache__/agent.cpython-312.pyc +0 -0
- agent.py +33 -22
- app copy.py +6 -2
- requirements.txt +2 -0
__pycache__/agent.cpython-312.pyc
CHANGED
|
Binary files a/__pycache__/agent.cpython-312.pyc and b/__pycache__/agent.cpython-312.pyc differ
|
|
|
agent.py
CHANGED
|
@@ -281,7 +281,23 @@ def analyze_video(video_path: str, question: str) -> str:
|
|
| 281 |
"""
|
| 282 |
if cv2 is None:
|
| 283 |
return "Error: cv2 is not installed. Please install opencv-python."
|
|
|
|
|
|
|
|
|
|
|
|
|
| 284 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
# 1. Extract frames evenly spaced throughout the video
|
| 286 |
cap = cv2.VideoCapture(video_path)
|
| 287 |
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
@@ -341,6 +357,12 @@ def analyze_video(video_path: str, question: str) -> str:
|
|
| 341 |
return f"Video Summary based on extracted frames and audio:\n{video_context}"
|
| 342 |
except Exception as e:
|
| 343 |
return f"Error analyzing video: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
|
| 345 |
@tool
|
| 346 |
def read_url(url: str) -> str:
|
|
@@ -469,28 +491,16 @@ def answer_message(state: AgentState) -> AgentState:
|
|
| 469 |
current_date = datetime.datetime.now().strftime("%Y-%m-%d")
|
| 470 |
|
| 471 |
prompt = [SystemMessage(f"""
|
| 472 |
-
You are a GAIA
|
| 473 |
-
|
| 474 |
-
Think carefully before answering the question.
|
| 475 |
|
| 476 |
TODAY'S EXACT DATE is {current_date}. Keep this in mind for all time-sensitive queries.
|
| 477 |
|
| 478 |
-
CRITICAL RULES
|
| 479 |
-
1. If
|
| 480 |
-
2.
|
| 481 |
-
3. If
|
| 482 |
-
4.
|
| 483 |
-
5. Be thorough. If one tool doesn't give enough info, use another (e.g., search the web for context).
|
| 484 |
-
|
| 485 |
-
Example of Tool Trigger:
|
| 486 |
-
User: "What is in this image? [Attached File Local Path: /path/to/image.png]"
|
| 487 |
-
Your Thought: "I need to see the image to answer. I will use the analyze_image tool."
|
| 488 |
-
Your Action: Call `analyze_image(image_path='/path/to/image.png', question='What is in this image?')`
|
| 489 |
-
|
| 490 |
-
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
|
| 491 |
-
If you are asked for a number, don't use comma to write your number, and don't use units such as $ or percent sign unless specified otherwise.
|
| 492 |
-
If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
|
| 493 |
-
If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
|
| 494 |
""")]
|
| 495 |
messages = prompt + messages
|
| 496 |
|
|
@@ -549,9 +559,10 @@ def answer_message(state: AgentState) -> AgentState:
|
|
| 549 |
"You are a strict output formatter for the GAIA benchmark. "
|
| 550 |
"Given a verbose draft answer, extract ONLY the final exact answer required. "
|
| 551 |
"Return nothing else. DO NOT include prefixes like 'The answer is'. "
|
| 552 |
-
"Strip
|
| 553 |
-
"If the answer is a number, just return the number
|
| 554 |
-
"If
|
|
|
|
| 555 |
)
|
| 556 |
)
|
| 557 |
final_response, _ = smart_invoke([formatting_sys, HumanMessage(content=extract_text_from_content(draft_response.content))], use_tools=False, start_tier=current_tier)
|
|
|
|
| 281 |
"""
|
| 282 |
if cv2 is None:
|
| 283 |
return "Error: cv2 is not installed. Please install opencv-python."
|
| 284 |
+
|
| 285 |
+
temp_dir = tempfile.gettempdir()
|
| 286 |
+
downloaded_video = None
|
| 287 |
+
|
| 288 |
try:
|
| 289 |
+
# Check if video_path is a URL
|
| 290 |
+
if video_path.startswith("http"):
|
| 291 |
+
print(f"Downloading video from URL: {video_path}")
|
| 292 |
+
downloaded_video = os.path.join(temp_dir, f"video_{int(time.time())}.mp4")
|
| 293 |
+
try:
|
| 294 |
+
# Use yt-dlp to download the video
|
| 295 |
+
# Note: --ffmpeg-location could be used if we knew where it was, but we assume it's in path or missing
|
| 296 |
+
subprocess.run(["yt-dlp", "-f", "best[ext=mp4]/mp4", "-o", downloaded_video, video_path], check=True, timeout=120)
|
| 297 |
+
video_path = downloaded_video
|
| 298 |
+
except Exception as e:
|
| 299 |
+
return f"Error downloading video from URL: {str(e)}. Tip: Check if yt-dlp is installed and the URL is valid."
|
| 300 |
+
|
| 301 |
# 1. Extract frames evenly spaced throughout the video
|
| 302 |
cap = cv2.VideoCapture(video_path)
|
| 303 |
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
|
|
| 357 |
return f"Video Summary based on extracted frames and audio:\n{video_context}"
|
| 358 |
except Exception as e:
|
| 359 |
return f"Error analyzing video: {str(e)}"
|
| 360 |
+
finally:
|
| 361 |
+
if downloaded_video and os.path.exists(downloaded_video):
|
| 362 |
+
try:
|
| 363 |
+
os.remove(downloaded_video)
|
| 364 |
+
except:
|
| 365 |
+
pass
|
| 366 |
|
| 367 |
@tool
|
| 368 |
def read_url(url: str) -> str:
|
|
|
|
| 491 |
current_date = datetime.datetime.now().strftime("%Y-%m-%d")
|
| 492 |
|
| 493 |
prompt = [SystemMessage(f"""
|
| 494 |
+
You are a master of the GAIA benchmark, a general AI assistant designed to solve complex multi-step tasks.
|
| 495 |
+
Think carefully and logically. Use your tools effectively. Use your internal monologue to plan your steps.
|
|
|
|
| 496 |
|
| 497 |
TODAY'S EXACT DATE is {current_date}. Keep this in mind for all time-sensitive queries.
|
| 498 |
|
| 499 |
+
CRITICAL RULES:
|
| 500 |
+
1. If you see a path like `[Attached File Local Path: ...]` followed by an image, video, or audio file, YOU MUST USE THE CORRESPONDING TOOL (analyze_image, analyze_video, analyze_audio) IMMEDIATELY in your next step.
|
| 501 |
+
2. Plan your steps ahead. 8 steps is your LIMIT for the reasoning loop, so make every step count.
|
| 502 |
+
3. If a tool fails (e.g., 429 or 402), the system will automatically try another model for you, so just keep going!
|
| 503 |
+
4. Be concise and accurate. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 504 |
""")]
|
| 505 |
messages = prompt + messages
|
| 506 |
|
|
|
|
| 559 |
"You are a strict output formatter for the GAIA benchmark. "
|
| 560 |
"Given a verbose draft answer, extract ONLY the final exact answer required. "
|
| 561 |
"Return nothing else. DO NOT include prefixes like 'The answer is'. "
|
| 562 |
+
"Strip trailing punctuation like periods and quotes. "
|
| 563 |
+
"If the answer is a number, just return the number. "
|
| 564 |
+
"If the answer is a list or set of elements, return them as a COMMA-SEPARATED list (e.g., 'a, b, c'). "
|
| 565 |
+
"DO NOT strip commas that separate list items."
|
| 566 |
)
|
| 567 |
)
|
| 568 |
final_response, _ = smart_invoke([formatting_sys, HumanMessage(content=extract_text_from_content(draft_response.content))], use_tools=False, start_tier=current_tier)
|
app copy.py
CHANGED
|
@@ -57,7 +57,9 @@ questions_url = f"{DEFAULT_API_URL}/questions"
|
|
| 57 |
response = requests.get(questions_url, timeout=15)
|
| 58 |
response.raise_for_status()
|
| 59 |
questions_data = response.json()
|
| 60 |
-
|
|
|
|
|
|
|
| 61 |
question_text = item.get("question")
|
| 62 |
if question_text is None:
|
| 63 |
continue
|
|
@@ -73,11 +75,13 @@ for item in questions_data[3:4]:
|
|
| 73 |
else:
|
| 74 |
question_text += f"\n\n[Attached File: {file_name} (Download Failed)]"
|
| 75 |
|
| 76 |
-
print(
|
| 77 |
output = agent(question_text)
|
| 78 |
print("Q:", question_text)
|
| 79 |
print("A:", output)
|
| 80 |
print("-" * 40)
|
|
|
|
|
|
|
| 81 |
|
| 82 |
|
| 83 |
|
|
|
|
| 57 |
response = requests.get(questions_url, timeout=15)
|
| 58 |
response.raise_for_status()
|
| 59 |
questions_data = response.json()
|
| 60 |
+
import time
|
| 61 |
+
print(f"Running agent on {len(questions_data)} questions sequentially to avoid 429 errors...")
|
| 62 |
+
for item in questions_data[6:7]:
|
| 63 |
question_text = item.get("question")
|
| 64 |
if question_text is None:
|
| 65 |
continue
|
|
|
|
| 75 |
else:
|
| 76 |
question_text += f"\n\n[Attached File: {file_name} (Download Failed)]"
|
| 77 |
|
| 78 |
+
print(f"Processing Task ID: {task_id}")
|
| 79 |
output = agent(question_text)
|
| 80 |
print("Q:", question_text)
|
| 81 |
print("A:", output)
|
| 82 |
print("-" * 40)
|
| 83 |
+
# Stagger requests to refill Token bucket and provide space for other concurrent tasks if any
|
| 84 |
+
time.sleep(5)
|
| 85 |
|
| 86 |
|
| 87 |
|
requirements.txt
CHANGED
|
@@ -27,3 +27,5 @@ PyPDF2
|
|
| 27 |
openai-whisper
|
| 28 |
langchain-openai
|
| 29 |
langchain-google-genai
|
|
|
|
|
|
|
|
|
| 27 |
openai-whisper
|
| 28 |
langchain-openai
|
| 29 |
langchain-google-genai
|
| 30 |
+
yt-dlp
|
| 31 |
+
ffmpeg
|