Spaces:
Sleeping
Sleeping
Samuel Thomas
commited on
Commit
·
7ab4cd0
1
Parent(s):
d5fffa5
ytube correction for download
Browse files
tools.py
CHANGED
|
@@ -1351,14 +1351,11 @@ class WikipediaSearchToolWithFAISS(BaseTool):
|
|
| 1351 |
return f"An unexpected error occurred: {str(e)}"
|
| 1352 |
|
| 1353 |
|
| 1354 |
-
|
| 1355 |
class EnhancedYoutubeScreenshotQA(BaseTool):
|
| 1356 |
name: str = "bird_species_screenshot_qa"
|
| 1357 |
description: str = (
|
| 1358 |
"Use this tool to calculate the number of bird species on camera at any one time,"
|
| 1359 |
"Input should be a dict with keys: 'youtube_url', 'question', and optional parameters. "
|
| 1360 |
-
#"Optional parameters: 'frame_interval_seconds' (default: 10), 'max_frames' (default: 50), "
|
| 1361 |
-
#"'use_scene_detection' (default: True), 'parallel_processing' (default: True). "
|
| 1362 |
"Example: {'youtube_url': 'https://youtube.com/watch?v=xyz', 'question': 'What animals are visible?'}"
|
| 1363 |
)
|
| 1364 |
|
|
@@ -1408,7 +1405,6 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
|
|
| 1408 |
def _initialize_model(self):
|
| 1409 |
"""Initialize BLIP model for VQA with error handling"""
|
| 1410 |
try:
|
| 1411 |
-
#self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 1412 |
self.device = torch.device("cpu")
|
| 1413 |
print(f"Using device: {self.device}")
|
| 1414 |
|
|
@@ -1417,11 +1413,6 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
|
|
| 1417 |
"Salesforce/blip-vqa-base"
|
| 1418 |
).to(self.device)
|
| 1419 |
|
| 1420 |
-
#self.processor_vqa = BlipProcessor.from_pretrained("Salesforce/blip-vqa-capfilt-large")
|
| 1421 |
-
#self.model_vqa = BlipForQuestionAnswering.from_pretrained(
|
| 1422 |
-
# "Salesforce/blip-vqa-capfilt-large"
|
| 1423 |
-
#).to(self.device)
|
| 1424 |
-
|
| 1425 |
print("BLIP VQA model loaded successfully")
|
| 1426 |
except Exception as e:
|
| 1427 |
print(f"Error initializing VQA model: {str(e)}")
|
|
@@ -1458,7 +1449,7 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
|
|
| 1458 |
print(f"Error saving cache: {str(e)}")
|
| 1459 |
|
| 1460 |
def download_youtube_video(self, url: str, video_hash: str, cache_enabled: bool = True) -> Optional[str]:
|
| 1461 |
-
"""Enhanced YouTube video download with
|
| 1462 |
video_dir = '/tmp/video/'
|
| 1463 |
output_filename = f'{video_hash}.mp4'
|
| 1464 |
output_path = os.path.join(video_dir, output_filename)
|
|
@@ -1469,30 +1460,137 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
|
|
| 1469 |
return output_path
|
| 1470 |
|
| 1471 |
# Clean directory
|
| 1472 |
-
video_dir = '/tmp/video/'
|
| 1473 |
self._clean_directory(video_dir)
|
| 1474 |
|
| 1475 |
try:
|
|
|
|
| 1476 |
ydl_opts = {
|
| 1477 |
-
|
|
|
|
| 1478 |
'outtmpl': output_path,
|
| 1479 |
-
'quiet':
|
|
|
|
| 1480 |
'merge_output_format': 'mp4',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1481 |
'postprocessors': [{
|
| 1482 |
'key': 'FFmpegVideoConvertor',
|
| 1483 |
'preferedformat': 'mp4',
|
| 1484 |
}]
|
| 1485 |
}
|
| 1486 |
|
| 1487 |
-
|
| 1488 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1489 |
|
| 1490 |
-
|
| 1491 |
-
|
| 1492 |
-
|
| 1493 |
-
|
| 1494 |
-
|
| 1495 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1496 |
|
| 1497 |
except Exception as e:
|
| 1498 |
print(f"Error downloading YouTube video: {str(e)}")
|
|
@@ -1657,7 +1755,6 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
|
|
| 1657 |
def _answer_question_on_frame(self, frame_path: str, question: str) -> Tuple[str, float]:
|
| 1658 |
"""Answer question on single frame with confidence scoring"""
|
| 1659 |
try:
|
| 1660 |
-
#ipdb.set_trace()
|
| 1661 |
image = Image.open(frame_path).convert('RGB')
|
| 1662 |
inputs = self.processor_vqa(image, question, return_tensors="pt").to(self.device)
|
| 1663 |
|
|
@@ -1929,7 +2026,6 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
|
|
| 1929 |
"note": "No numeric results available for statistical summary"
|
| 1930 |
}
|
| 1931 |
|
| 1932 |
-
|
| 1933 |
if not answers:
|
| 1934 |
return {
|
| 1935 |
"final_answer": "All frame processing failed.",
|
|
@@ -1944,7 +2040,6 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
|
|
| 1944 |
# Find most common cluster
|
| 1945 |
largest_cluster = max(answer_clusters.items(), key=lambda x: len(x[1]))
|
| 1946 |
most_common_answer = largest_cluster[0]
|
| 1947 |
-
cluster_size = len(largest_cluster[1])
|
| 1948 |
|
| 1949 |
# Calculate weighted confidence
|
| 1950 |
answer_counts = Counter(answers)
|
|
@@ -1970,15 +2065,10 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
|
|
| 1970 |
"statistical_summary": stats
|
| 1971 |
}
|
| 1972 |
|
| 1973 |
-
#def _run(self, query: Dict[str, Any]) -> str:
|
| 1974 |
def _run(self, youtube_url, question, **kwargs) -> str:
|
| 1975 |
"""Enhanced main execution method"""
|
| 1976 |
-
#ipdb.set_trace()
|
| 1977 |
question = "How many unique bird species are on camera?"
|
| 1978 |
|
| 1979 |
-
#input_data = query
|
| 1980 |
-
#youtube_url = input_data.get("youtube_url")
|
| 1981 |
-
#question = input_data.get("question")
|
| 1982 |
input_data = {
|
| 1983 |
'youtube_url': youtube_url,
|
| 1984 |
'question': question
|
|
@@ -1996,7 +2086,7 @@ class EnhancedYoutubeScreenshotQA(BaseTool):
|
|
| 1996 |
cache_enabled = self._get_config('cache_enabled', True, input_data)
|
| 1997 |
video_path = self.download_youtube_video(youtube_url, video_hash, cache_enabled)
|
| 1998 |
if not video_path or not os.path.exists(video_path):
|
| 1999 |
-
return "Error: Failed to download the YouTube video."
|
| 2000 |
|
| 2001 |
# Step 2: Smart frame extraction
|
| 2002 |
print(f"Extracting frames with smart selection...")
|
|
|
|
| 1351 |
return f"An unexpected error occurred: {str(e)}"
|
| 1352 |
|
| 1353 |
|
|
|
|
| 1354 |
class EnhancedYoutubeScreenshotQA(BaseTool):
|
| 1355 |
name: str = "bird_species_screenshot_qa"
|
| 1356 |
description: str = (
|
| 1357 |
"Use this tool to calculate the number of bird species on camera at any one time,"
|
| 1358 |
"Input should be a dict with keys: 'youtube_url', 'question', and optional parameters. "
|
|
|
|
|
|
|
| 1359 |
"Example: {'youtube_url': 'https://youtube.com/watch?v=xyz', 'question': 'What animals are visible?'}"
|
| 1360 |
)
|
| 1361 |
|
|
|
|
| 1405 |
def _initialize_model(self):
|
| 1406 |
"""Initialize BLIP model for VQA with error handling"""
|
| 1407 |
try:
|
|
|
|
| 1408 |
self.device = torch.device("cpu")
|
| 1409 |
print(f"Using device: {self.device}")
|
| 1410 |
|
|
|
|
| 1413 |
"Salesforce/blip-vqa-base"
|
| 1414 |
).to(self.device)
|
| 1415 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1416 |
print("BLIP VQA model loaded successfully")
|
| 1417 |
except Exception as e:
|
| 1418 |
print(f"Error initializing VQA model: {str(e)}")
|
|
|
|
| 1449 |
print(f"Error saving cache: {str(e)}")
|
| 1450 |
|
| 1451 |
def download_youtube_video(self, url: str, video_hash: str, cache_enabled: bool = True) -> Optional[str]:
|
| 1452 |
+
"""Enhanced YouTube video download with anti-bot measures"""
|
| 1453 |
video_dir = '/tmp/video/'
|
| 1454 |
output_filename = f'{video_hash}.mp4'
|
| 1455 |
output_path = os.path.join(video_dir, output_filename)
|
|
|
|
| 1460 |
return output_path
|
| 1461 |
|
| 1462 |
# Clean directory
|
|
|
|
| 1463 |
self._clean_directory(video_dir)
|
| 1464 |
|
| 1465 |
try:
|
| 1466 |
+
# Enhanced yt-dlp options with anti-bot measures
|
| 1467 |
ydl_opts = {
|
| 1468 |
+
# Format selection - prefer lower quality to avoid restrictions
|
| 1469 |
+
'format': 'best[height<=480][ext=mp4]/best[height<=720][ext=mp4]/best[ext=mp4]/best',
|
| 1470 |
'outtmpl': output_path,
|
| 1471 |
+
'quiet': False, # Changed to False for debugging
|
| 1472 |
+
'no_warnings': False,
|
| 1473 |
'merge_output_format': 'mp4',
|
| 1474 |
+
|
| 1475 |
+
# Anti-bot headers and user agent
|
| 1476 |
+
'http_headers': {
|
| 1477 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
| 1478 |
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
| 1479 |
+
'Accept-Language': 'en-us,en;q=0.5',
|
| 1480 |
+
'Accept-Encoding': 'gzip,deflate',
|
| 1481 |
+
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
|
| 1482 |
+
'Connection': 'keep-alive',
|
| 1483 |
+
'Upgrade-Insecure-Requests': '1',
|
| 1484 |
+
},
|
| 1485 |
+
|
| 1486 |
+
# Additional anti-detection measures
|
| 1487 |
+
'extractor_args': {
|
| 1488 |
+
'youtube': {
|
| 1489 |
+
'skip': ['hls', 'dash'], # Skip some formats that might trigger detection
|
| 1490 |
+
'player_skip': ['js'], # Skip JavaScript player
|
| 1491 |
+
}
|
| 1492 |
+
},
|
| 1493 |
+
|
| 1494 |
+
# Rate limiting
|
| 1495 |
+
'sleep_interval': 1,
|
| 1496 |
+
'max_sleep_interval': 5,
|
| 1497 |
+
'sleep_interval_subtitles': 1,
|
| 1498 |
+
|
| 1499 |
+
# Retry settings
|
| 1500 |
+
'retries': 3,
|
| 1501 |
+
'fragment_retries': 3,
|
| 1502 |
+
'skip_unavailable_fragments': True,
|
| 1503 |
+
|
| 1504 |
+
# Cookie handling (you can add browser cookies if needed)
|
| 1505 |
+
# 'cookiefile': '/path/to/cookies.txt', # Uncomment and set path if you have cookies
|
| 1506 |
+
|
| 1507 |
+
# Additional options
|
| 1508 |
+
'extract_flat': False,
|
| 1509 |
+
'writesubtitles': False,
|
| 1510 |
+
'writeautomaticsub': False,
|
| 1511 |
+
'ignoreerrors': True,
|
| 1512 |
+
|
| 1513 |
+
# Postprocessors
|
| 1514 |
'postprocessors': [{
|
| 1515 |
'key': 'FFmpegVideoConvertor',
|
| 1516 |
'preferedformat': 'mp4',
|
| 1517 |
}]
|
| 1518 |
}
|
| 1519 |
|
| 1520 |
+
print(f"Attempting to download: {url}")
|
| 1521 |
+
|
| 1522 |
+
# Try multiple download strategies
|
| 1523 |
+
strategies = [
|
| 1524 |
+
# Strategy 1: Standard download
|
| 1525 |
+
ydl_opts,
|
| 1526 |
+
|
| 1527 |
+
# Strategy 2: More conservative approach
|
| 1528 |
+
{
|
| 1529 |
+
**ydl_opts,
|
| 1530 |
+
'format': 'worst[ext=mp4]/worst', # Try worst quality first
|
| 1531 |
+
'sleep_interval': 2,
|
| 1532 |
+
'max_sleep_interval': 10,
|
| 1533 |
+
},
|
| 1534 |
+
|
| 1535 |
+
# Strategy 3: Different user agent
|
| 1536 |
+
{
|
| 1537 |
+
**ydl_opts,
|
| 1538 |
+
'http_headers': {
|
| 1539 |
+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15'
|
| 1540 |
+
},
|
| 1541 |
+
'format': 'best[height<=360][ext=mp4]/best[ext=mp4]/best',
|
| 1542 |
+
}
|
| 1543 |
+
]
|
| 1544 |
|
| 1545 |
+
last_error = None
|
| 1546 |
+
for i, strategy in enumerate(strategies, 1):
|
| 1547 |
+
try:
|
| 1548 |
+
print(f"Trying download strategy {i}/3...")
|
| 1549 |
+
|
| 1550 |
+
with yt_dlp.YoutubeDL(strategy) as ydl:
|
| 1551 |
+
# Add some delay before download
|
| 1552 |
+
import time
|
| 1553 |
+
time.sleep(2)
|
| 1554 |
+
|
| 1555 |
+
ydl.download([url])
|
| 1556 |
+
|
| 1557 |
+
if os.path.exists(output_path):
|
| 1558 |
+
print(f"Video downloaded successfully with strategy {i}: {output_path}")
|
| 1559 |
+
return output_path
|
| 1560 |
+
else:
|
| 1561 |
+
print(f"Strategy {i} completed but file not found")
|
| 1562 |
+
|
| 1563 |
+
except Exception as e:
|
| 1564 |
+
last_error = e
|
| 1565 |
+
print(f"Strategy {i} failed: {str(e)}")
|
| 1566 |
+
if i < len(strategies):
|
| 1567 |
+
print(f"Trying next strategy...")
|
| 1568 |
+
# Add delay between strategies
|
| 1569 |
+
import time
|
| 1570 |
+
time.sleep(5)
|
| 1571 |
+
continue
|
| 1572 |
+
|
| 1573 |
+
# If all strategies failed, try one more approach with cookies from browser
|
| 1574 |
+
print("All standard strategies failed. Trying with browser cookies...")
|
| 1575 |
+
try:
|
| 1576 |
+
cookie_strategy = {
|
| 1577 |
+
**ydl_opts,
|
| 1578 |
+
'cookiesfrombrowser': ('chrome',), # Try to get cookies from Chrome
|
| 1579 |
+
'format': 'worst[ext=mp4]/worst',
|
| 1580 |
+
}
|
| 1581 |
+
|
| 1582 |
+
with yt_dlp.YoutubeDL(cookie_strategy) as ydl:
|
| 1583 |
+
ydl.download([url])
|
| 1584 |
+
|
| 1585 |
+
if os.path.exists(output_path):
|
| 1586 |
+
print(f"Video downloaded successfully with browser cookies: {output_path}")
|
| 1587 |
+
return output_path
|
| 1588 |
+
|
| 1589 |
+
except Exception as e:
|
| 1590 |
+
print(f"Browser cookie strategy also failed: {str(e)}")
|
| 1591 |
+
|
| 1592 |
+
print(f"All download strategies failed. Last error: {last_error}")
|
| 1593 |
+
return None
|
| 1594 |
|
| 1595 |
except Exception as e:
|
| 1596 |
print(f"Error downloading YouTube video: {str(e)}")
|
|
|
|
| 1755 |
def _answer_question_on_frame(self, frame_path: str, question: str) -> Tuple[str, float]:
|
| 1756 |
"""Answer question on single frame with confidence scoring"""
|
| 1757 |
try:
|
|
|
|
| 1758 |
image = Image.open(frame_path).convert('RGB')
|
| 1759 |
inputs = self.processor_vqa(image, question, return_tensors="pt").to(self.device)
|
| 1760 |
|
|
|
|
| 2026 |
"note": "No numeric results available for statistical summary"
|
| 2027 |
}
|
| 2028 |
|
|
|
|
| 2029 |
if not answers:
|
| 2030 |
return {
|
| 2031 |
"final_answer": "All frame processing failed.",
|
|
|
|
| 2040 |
# Find most common cluster
|
| 2041 |
largest_cluster = max(answer_clusters.items(), key=lambda x: len(x[1]))
|
| 2042 |
most_common_answer = largest_cluster[0]
|
|
|
|
| 2043 |
|
| 2044 |
# Calculate weighted confidence
|
| 2045 |
answer_counts = Counter(answers)
|
|
|
|
| 2065 |
"statistical_summary": stats
|
| 2066 |
}
|
| 2067 |
|
|
|
|
| 2068 |
def _run(self, youtube_url, question, **kwargs) -> str:
|
| 2069 |
"""Enhanced main execution method"""
|
|
|
|
| 2070 |
question = "How many unique bird species are on camera?"
|
| 2071 |
|
|
|
|
|
|
|
|
|
|
| 2072 |
input_data = {
|
| 2073 |
'youtube_url': youtube_url,
|
| 2074 |
'question': question
|
|
|
|
| 2086 |
cache_enabled = self._get_config('cache_enabled', True, input_data)
|
| 2087 |
video_path = self.download_youtube_video(youtube_url, video_hash, cache_enabled)
|
| 2088 |
if not video_path or not os.path.exists(video_path):
|
| 2089 |
+
return "Error: Failed to download the YouTube video. This may be due to YouTube's anti-bot protection. Try using a different video or implement cookie authentication."
|
| 2090 |
|
| 2091 |
# Step 2: Smart frame extraction
|
| 2092 |
print(f"Extracting frames with smart selection...")
|