Paperbag commited on
Commit
d3b92d3
·
1 Parent(s): f1a7daa

video update

Browse files
Files changed (4) hide show
  1. __pycache__/agent.cpython-312.pyc +0 -0
  2. agent.py +33 -22
  3. app copy.py +6 -2
  4. requirements.txt +2 -0
__pycache__/agent.cpython-312.pyc CHANGED
Binary files a/__pycache__/agent.cpython-312.pyc and b/__pycache__/agent.cpython-312.pyc differ
 
agent.py CHANGED
@@ -281,7 +281,23 @@ def analyze_video(video_path: str, question: str) -> str:
281
  """
282
  if cv2 is None:
283
  return "Error: cv2 is not installed. Please install opencv-python."
 
 
 
 
284
  try:
 
 
 
 
 
 
 
 
 
 
 
 
285
  # 1. Extract frames evenly spaced throughout the video
286
  cap = cv2.VideoCapture(video_path)
287
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
@@ -341,6 +357,12 @@ def analyze_video(video_path: str, question: str) -> str:
341
  return f"Video Summary based on extracted frames and audio:\n{video_context}"
342
  except Exception as e:
343
  return f"Error analyzing video: {str(e)}"
 
 
 
 
 
 
344
 
345
  @tool
346
  def read_url(url: str) -> str:
@@ -469,28 +491,16 @@ def answer_message(state: AgentState) -> AgentState:
469
  current_date = datetime.datetime.now().strftime("%Y-%m-%d")
470
 
471
  prompt = [SystemMessage(f"""
472
- You are a GAIA question answering expert.
473
- Your task is to provide an answer to a question.
474
- Think carefully before answering the question.
475
 
476
  TODAY'S EXACT DATE is {current_date}. Keep this in mind for all time-sensitive queries.
477
 
478
- CRITICAL RULES FOR FILES & TOOLS:
479
- 1. If a message contains a path like `[Attached File Local Path: ...]` followed by an image (.png, .jpg, .jpeg), video, or audio file, YOU MUST USE THE CORRESPONDING TOOL (analyze_image, analyze_video, analyze_audio).
480
- 2. YOU ARE NOT BLIND. NEVER say "I cannot see images" or "I don't have access to files". Use your tools to see and hear for you!
481
- 3. If you see a file path, invoke the tool IMMEDIATELY in your first ReAct step.
482
- 4. For text/data files, use `read_document` or `run_python_script` (especially for .xlsx or .pdf).
483
- 5. Be thorough. If one tool doesn't give enough info, use another (e.g., search the web for context).
484
-
485
- Example of Tool Trigger:
486
- User: "What is in this image? [Attached File Local Path: /path/to/image.png]"
487
- Your Thought: "I need to see the image to answer. I will use the analyze_image tool."
488
- Your Action: Call `analyze_image(image_path='/path/to/image.png', question='What is in this image?')`
489
-
490
- YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
491
- If you are asked for a number, don't use comma to write your number, and don't use units such as $ or percent sign unless specified otherwise.
492
- If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
493
- If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
494
  """)]
495
  messages = prompt + messages
496
 
@@ -549,9 +559,10 @@ def answer_message(state: AgentState) -> AgentState:
549
  "You are a strict output formatter for the GAIA benchmark. "
550
  "Given a verbose draft answer, extract ONLY the final exact answer required. "
551
  "Return nothing else. DO NOT include prefixes like 'The answer is'. "
552
- "Strip all punctuation points at the end and quotes. "
553
- "If the answer is a number, just return the number without commas or units unless specified. "
554
- "If it is a name or word, just return the exact string. If a list, return only the comma-separated list."
 
555
  )
556
  )
557
  final_response, _ = smart_invoke([formatting_sys, HumanMessage(content=extract_text_from_content(draft_response.content))], use_tools=False, start_tier=current_tier)
 
281
  """
282
  if cv2 is None:
283
  return "Error: cv2 is not installed. Please install opencv-python."
284
+
285
+ temp_dir = tempfile.gettempdir()
286
+ downloaded_video = None
287
+
288
  try:
289
+ # Check if video_path is a URL
290
+ if video_path.startswith("http"):
291
+ print(f"Downloading video from URL: {video_path}")
292
+ downloaded_video = os.path.join(temp_dir, f"video_{int(time.time())}.mp4")
293
+ try:
294
+ # Use yt-dlp to download the video
295
+ # Note: --ffmpeg-location could be used if we knew where it was, but we assume it's in path or missing
296
+ subprocess.run(["yt-dlp", "-f", "best[ext=mp4]/mp4", "-o", downloaded_video, video_path], check=True, timeout=120)
297
+ video_path = downloaded_video
298
+ except Exception as e:
299
+ return f"Error downloading video from URL: {str(e)}. Tip: Check if yt-dlp is installed and the URL is valid."
300
+
301
  # 1. Extract frames evenly spaced throughout the video
302
  cap = cv2.VideoCapture(video_path)
303
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 
357
  return f"Video Summary based on extracted frames and audio:\n{video_context}"
358
  except Exception as e:
359
  return f"Error analyzing video: {str(e)}"
360
+ finally:
361
+ if downloaded_video and os.path.exists(downloaded_video):
362
+ try:
363
+ os.remove(downloaded_video)
364
+ except:
365
+ pass
366
 
367
  @tool
368
  def read_url(url: str) -> str:
 
491
  current_date = datetime.datetime.now().strftime("%Y-%m-%d")
492
 
493
  prompt = [SystemMessage(f"""
494
+ You are a master of the GAIA benchmark, a general AI assistant designed to solve complex multi-step tasks.
495
+ Think carefully and logically. Use your tools effectively. Use your internal monologue to plan your steps.
 
496
 
497
  TODAY'S EXACT DATE is {current_date}. Keep this in mind for all time-sensitive queries.
498
 
499
+ CRITICAL RULES:
500
+ 1. If you see a path like `[Attached File Local Path: ...]` followed by an image, video, or audio file, YOU MUST USE THE CORRESPONDING TOOL (analyze_image, analyze_video, analyze_audio) IMMEDIATELY in your next step.
501
+ 2. Plan your steps ahead. 8 steps is your LIMIT for the reasoning loop, so make every step count.
502
+ 3. If a tool fails (e.g., 429 or 402), the system will automatically try another model for you, so just keep going!
503
+ 4. Be concise and accurate. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list.
 
 
 
 
 
 
 
 
 
 
 
504
  """)]
505
  messages = prompt + messages
506
 
 
559
  "You are a strict output formatter for the GAIA benchmark. "
560
  "Given a verbose draft answer, extract ONLY the final exact answer required. "
561
  "Return nothing else. DO NOT include prefixes like 'The answer is'. "
562
+ "Strip trailing punctuation like periods and quotes. "
563
+ "If the answer is a number, just return the number. "
564
+ "If the answer is a list or set of elements, return them as a COMMA-SEPARATED list (e.g., 'a, b, c'). "
565
+ "DO NOT strip commas that separate list items."
566
  )
567
  )
568
  final_response, _ = smart_invoke([formatting_sys, HumanMessage(content=extract_text_from_content(draft_response.content))], use_tools=False, start_tier=current_tier)
app copy.py CHANGED
@@ -57,7 +57,9 @@ questions_url = f"{DEFAULT_API_URL}/questions"
57
  response = requests.get(questions_url, timeout=15)
58
  response.raise_for_status()
59
  questions_data = response.json()
60
- for item in questions_data[3:4]:
 
 
61
  question_text = item.get("question")
62
  if question_text is None:
63
  continue
@@ -73,11 +75,13 @@ for item in questions_data[3:4]:
73
  else:
74
  question_text += f"\n\n[Attached File: {file_name} (Download Failed)]"
75
 
76
- print(files_text, task_id)
77
  output = agent(question_text)
78
  print("Q:", question_text)
79
  print("A:", output)
80
  print("-" * 40)
 
 
81
 
82
 
83
 
 
57
  response = requests.get(questions_url, timeout=15)
58
  response.raise_for_status()
59
  questions_data = response.json()
60
+ import time
61
+ print(f"Running agent on {len(questions_data)} questions sequentially to avoid 429 errors...")
62
+ for item in questions_data[6:7]:
63
  question_text = item.get("question")
64
  if question_text is None:
65
  continue
 
75
  else:
76
  question_text += f"\n\n[Attached File: {file_name} (Download Failed)]"
77
 
78
+ print(f"Processing Task ID: {task_id}")
79
  output = agent(question_text)
80
  print("Q:", question_text)
81
  print("A:", output)
82
  print("-" * 40)
83
+ # Stagger requests to refill Token bucket and provide space for other concurrent tasks if any
84
+ time.sleep(5)
85
 
86
 
87
 
requirements.txt CHANGED
@@ -27,3 +27,5 @@ PyPDF2
27
  openai-whisper
28
  langchain-openai
29
  langchain-google-genai
 
 
 
27
  openai-whisper
28
  langchain-openai
29
  langchain-google-genai
30
+ yt-dlp
31
+ ffmpeg