Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -78,7 +78,7 @@ config = Config()
|
|
| 78 |
# =============================================================================
|
| 79 |
def retry_with_backoff(max_retries=None, base_delay=None):
|
| 80 |
"""Decorator for automatic retry with exponential backoff"""
|
| 81 |
-
max_retries = max_retries or
|
| 82 |
base_delay = base_delay or config.BASE_RETRY_DELAY
|
| 83 |
|
| 84 |
def decorator(func):
|
|
@@ -1397,11 +1397,11 @@ def get_youtube_transcript(video_url: str) -> str:
|
|
| 1397 |
transcriber = aai.Transcriber()
|
| 1398 |
print(f" Submitting to AssemblyAI...")
|
| 1399 |
|
| 1400 |
-
|
| 1401 |
speech_model=aai.SpeechModel.best,
|
| 1402 |
)
|
| 1403 |
|
| 1404 |
-
transcript = transcriber.transcribe(video_url, config=
|
| 1405 |
|
| 1406 |
# Wait for completion
|
| 1407 |
print(f" Initial status: {transcript.status}")
|
|
@@ -1424,12 +1424,21 @@ def get_youtube_transcript(video_url: str) -> str:
|
|
| 1424 |
print(f" Status after {elapsed}s: {transcript.status}")
|
| 1425 |
except Exception as refresh_err:
|
| 1426 |
print(f" Warning: Could not refresh status: {refresh_err}")
|
| 1427 |
-
# Continue anyway, maybe it finished
|
| 1428 |
break
|
| 1429 |
|
| 1430 |
# Check final status
|
| 1431 |
if transcript.status == aai.TranscriptStatus.error:
|
| 1432 |
error_msg = getattr(transcript, 'error', 'Unknown error')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1433 |
raise RuntimeError(f"AssemblyAI transcription failed: {error_msg}")
|
| 1434 |
|
| 1435 |
if transcript.status != aai.TranscriptStatus.completed:
|
|
@@ -1454,29 +1463,23 @@ def get_youtube_transcript(video_url: str) -> str:
|
|
| 1454 |
telemetry.record_call("get_youtube_transcript", time.time() - start_time, True)
|
| 1455 |
return f"YouTube Transcript:\n{truncate_if_needed(result_text)}"
|
| 1456 |
|
| 1457 |
-
except
|
| 1458 |
telemetry.record_call("get_youtube_transcript", time.time() - start_time, False)
|
| 1459 |
error_msg = str(e)
|
| 1460 |
|
| 1461 |
suggestions = []
|
| 1462 |
-
if "
|
|
|
|
|
|
|
| 1463 |
suggestions.append("Video may be private or deleted")
|
| 1464 |
-
|
| 1465 |
suggestions.append("AssemblyAI quota exceeded")
|
| 1466 |
-
|
| 1467 |
suggestions.append("Video may be too long (try shorter video)")
|
| 1468 |
|
| 1469 |
-
suggestion_text = " | ".join(suggestions) if suggestions else "Check video URL
|
| 1470 |
|
| 1471 |
raise ToolError("get_youtube_transcript", e, suggestion_text)
|
| 1472 |
-
|
| 1473 |
-
except TimeoutError as e:
|
| 1474 |
-
telemetry.record_call("get_youtube_transcript", time.time() - start_time, False)
|
| 1475 |
-
raise ToolError("get_youtube_transcript", e, "Video too long or AssemblyAI overloaded. Try shorter video.")
|
| 1476 |
-
|
| 1477 |
-
except Exception as e:
|
| 1478 |
-
telemetry.record_call("get_youtube_transcript", time.time() - start_time, False)
|
| 1479 |
-
raise ToolError("get_youtube_transcript", e, "Check video URL is valid and public")
|
| 1480 |
|
| 1481 |
|
| 1482 |
class BrowseInput(BaseModel):
|
|
@@ -1711,7 +1714,76 @@ def scrape_and_retrieve(url: str, query: str) -> str:
|
|
| 1711 |
telemetry.record_call("scrape_and_retrieve", time.time() - start_time, False)
|
| 1712 |
raise ToolError("scrape_and_retrieve", e)
|
| 1713 |
|
|
|
|
|
|
|
|
|
|
| 1714 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1715 |
class FinalAnswerInput(BaseModel):
|
| 1716 |
answer: str = Field(description="Final answer - exact, no fluff")
|
| 1717 |
|
|
@@ -1741,8 +1813,9 @@ defined_tools = [
|
|
| 1741 |
|
| 1742 |
# Core tools
|
| 1743 |
search_tool,
|
| 1744 |
-
wikipedia_search,
|
| 1745 |
calculator,
|
|
|
|
| 1746 |
code_interpreter,
|
| 1747 |
|
| 1748 |
# File operations
|
|
@@ -1998,6 +2071,19 @@ Turn 5: final_answer_tool("3")
|
|
| 1998 |
REMEMBER: wikipedia_search() wants just the SUBJECT NAME!
|
| 1999 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 2000 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2001 |
π¨ ANTI-LOOP RULES:
|
| 2002 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 2003 |
|
|
@@ -2159,7 +2245,7 @@ REMEMBER: One tool per turn. No reasoning without tools. Exact answer format.
|
|
| 2159 |
|
| 2160 |
# Check if we should force reflection
|
| 2161 |
consecutive_errors = state.get('consecutive_errors', 0)
|
| 2162 |
-
should_reflect = (current_turn > 5 and current_turn % REFLECT_EVERY_N_TURNS == 0) or consecutive_errors >= 3
|
| 2163 |
|
| 2164 |
# Force tool usage
|
| 2165 |
if len(messages_to_send) >= 2:
|
|
|
|
| 78 |
# =============================================================================
|
| 79 |
def retry_with_backoff(max_retries=None, base_delay=None):
|
| 80 |
"""Decorator for automatic retry with exponential backoff"""
|
| 81 |
+
max_retries = max_retries or Config.MAX_RETRIES
|
| 82 |
base_delay = base_delay or config.BASE_RETRY_DELAY
|
| 83 |
|
| 84 |
def decorator(func):
|
|
|
|
| 1397 |
transcriber = aai.Transcriber()
|
| 1398 |
print(f" Submitting to AssemblyAI...")
|
| 1399 |
|
| 1400 |
+
config_obj = aai.TranscriptionConfig(
|
| 1401 |
speech_model=aai.SpeechModel.best,
|
| 1402 |
)
|
| 1403 |
|
| 1404 |
+
transcript = transcriber.transcribe(video_url, config=config_obj)
|
| 1405 |
|
| 1406 |
# Wait for completion
|
| 1407 |
print(f" Initial status: {transcript.status}")
|
|
|
|
| 1424 |
print(f" Status after {elapsed}s: {transcript.status}")
|
| 1425 |
except Exception as refresh_err:
|
| 1426 |
print(f" Warning: Could not refresh status: {refresh_err}")
|
|
|
|
| 1427 |
break
|
| 1428 |
|
| 1429 |
# Check final status
|
| 1430 |
if transcript.status == aai.TranscriptStatus.error:
|
| 1431 |
error_msg = getattr(transcript, 'error', 'Unknown error')
|
| 1432 |
+
|
| 1433 |
+
# ===== NEW: Check for network block =====
|
| 1434 |
+
if "text/html" in error_msg or "HTML document" in error_msg:
|
| 1435 |
+
raise RuntimeError(
|
| 1436 |
+
"YouTube access blocked. "
|
| 1437 |
+
"If a local video file was provided, use analyze_image or audio_transcription_tool instead. "
|
| 1438 |
+
"Or try downloading the video first."
|
| 1439 |
+
)
|
| 1440 |
+
# ===== END NEW =====
|
| 1441 |
+
|
| 1442 |
raise RuntimeError(f"AssemblyAI transcription failed: {error_msg}")
|
| 1443 |
|
| 1444 |
if transcript.status != aai.TranscriptStatus.completed:
|
|
|
|
| 1463 |
telemetry.record_call("get_youtube_transcript", time.time() - start_time, True)
|
| 1464 |
return f"YouTube Transcript:\n{truncate_if_needed(result_text)}"
|
| 1465 |
|
| 1466 |
+
except Exception as e:
|
| 1467 |
telemetry.record_call("get_youtube_transcript", time.time() - start_time, False)
|
| 1468 |
error_msg = str(e)
|
| 1469 |
|
| 1470 |
suggestions = []
|
| 1471 |
+
if "text/html" in error_msg.lower() or "html document" in error_msg.lower():
|
| 1472 |
+
suggestions.append("YouTube blocked on HuggingFace. Use the local .mp4 file instead with audio_transcription_tool or analyze_image")
|
| 1473 |
+
elif "not found" in error_msg.lower():
|
| 1474 |
suggestions.append("Video may be private or deleted")
|
| 1475 |
+
elif "quota" in error_msg.lower() or "limit" in error_msg.lower():
|
| 1476 |
suggestions.append("AssemblyAI quota exceeded")
|
| 1477 |
+
elif "timeout" in error_msg.lower():
|
| 1478 |
suggestions.append("Video may be too long (try shorter video)")
|
| 1479 |
|
| 1480 |
+
suggestion_text = " | ".join(suggestions) if suggestions else "Check video URL is valid and public"
|
| 1481 |
|
| 1482 |
raise ToolError("get_youtube_transcript", e, suggestion_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1483 |
|
| 1484 |
|
| 1485 |
class BrowseInput(BaseModel):
|
|
|
|
| 1714 |
telemetry.record_call("scrape_and_retrieve", time.time() - start_time, False)
|
| 1715 |
raise ToolError("scrape_and_retrieve", e)
|
| 1716 |
|
| 1717 |
+
class VideoAnalysisInput(BaseModel):
|
| 1718 |
+
file_path: str = Field(description="Path to video file (.mp4, .mov, etc.)")
|
| 1719 |
+
query: str = Field(description="What to find in the video")
|
| 1720 |
|
| 1721 |
+
@tool(args_schema=VideoAnalysisInput)
|
| 1722 |
+
def analyze_video(file_path: str, query: str) -> str:
|
| 1723 |
+
"""
|
| 1724 |
+
Analyze video using Gemini Vision (supports video).
|
| 1725 |
+
|
| 1726 |
+
Use for:
|
| 1727 |
+
- Counting objects/people/animals in video
|
| 1728 |
+
- Describing what happens
|
| 1729 |
+
- Finding specific moments
|
| 1730 |
+
- Visual Q&A about video content
|
| 1731 |
+
"""
|
| 1732 |
+
start_time = time.time()
|
| 1733 |
+
|
| 1734 |
+
try:
|
| 1735 |
+
print(f"π₯ Analyzing video: {file_path}")
|
| 1736 |
+
print(f" Query: {query[:100]}...")
|
| 1737 |
+
|
| 1738 |
+
video_path = find_file(file_path)
|
| 1739 |
+
if not video_path and os.path.exists(file_path):
|
| 1740 |
+
video_path = Path(file_path)
|
| 1741 |
+
|
| 1742 |
+
if not video_path or not video_path.exists():
|
| 1743 |
+
raise FileNotFoundError(f"Video not found: {file_path}")
|
| 1744 |
+
|
| 1745 |
+
GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY")
|
| 1746 |
+
if not GOOGLE_API_KEY:
|
| 1747 |
+
raise ValueError("GEMINI_API_KEY not set")
|
| 1748 |
+
|
| 1749 |
+
# Upload video to Gemini
|
| 1750 |
+
print(f" Uploading video to Gemini...")
|
| 1751 |
+
import google.generativeai as genai
|
| 1752 |
+
genai.configure(api_key=GOOGLE_API_KEY)
|
| 1753 |
+
|
| 1754 |
+
video_file = genai.upload_file(path=str(video_path))
|
| 1755 |
+
|
| 1756 |
+
print(f" Waiting for processing...")
|
| 1757 |
+
while video_file.state.name == "PROCESSING":
|
| 1758 |
+
time.sleep(2)
|
| 1759 |
+
video_file = genai.get_file(video_file.name)
|
| 1760 |
+
|
| 1761 |
+
if video_file.state.name == "FAILED":
|
| 1762 |
+
raise RuntimeError("Video processing failed")
|
| 1763 |
+
|
| 1764 |
+
# Analyze with Gemini
|
| 1765 |
+
print(f" Analyzing with Gemini...")
|
| 1766 |
+
model = genai.GenerativeModel("gemini-2.0-flash-exp")
|
| 1767 |
+
|
| 1768 |
+
response = model.generate_content([
|
| 1769 |
+
video_file,
|
| 1770 |
+
query
|
| 1771 |
+
])
|
| 1772 |
+
|
| 1773 |
+
result = response.text
|
| 1774 |
+
|
| 1775 |
+
# Clean up
|
| 1776 |
+
genai.delete_file(video_file.name)
|
| 1777 |
+
|
| 1778 |
+
print(f"β Analysis complete: {len(result)} chars")
|
| 1779 |
+
|
| 1780 |
+
telemetry.record_call("analyze_video", time.time() - start_time, True)
|
| 1781 |
+
return f"Video Analysis:\n{truncate_if_needed(result)}"
|
| 1782 |
+
|
| 1783 |
+
except Exception as e:
|
| 1784 |
+
telemetry.record_call("analyze_video", time.time() - start_time, False)
|
| 1785 |
+
raise ToolError("analyze_video", e, "Check video file path and Gemini API")
|
| 1786 |
+
|
| 1787 |
class FinalAnswerInput(BaseModel):
|
| 1788 |
answer: str = Field(description="Final answer - exact, no fluff")
|
| 1789 |
|
|
|
|
| 1813 |
|
| 1814 |
# Core tools
|
| 1815 |
search_tool,
|
| 1816 |
+
wikipedia_search,
|
| 1817 |
calculator,
|
| 1818 |
+
analyze_video,
|
| 1819 |
code_interpreter,
|
| 1820 |
|
| 1821 |
# File operations
|
|
|
|
| 2071 |
REMEMBER: wikipedia_search() wants just the SUBJECT NAME!
|
| 2072 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 2073 |
|
| 2074 |
+
**YOUTUBE VIDEO QUESTIONS** (Network restrictions):
|
| 2075 |
+
β οΈ YouTube URLs may be blocked on HuggingFace Spaces!
|
| 2076 |
+
If question mentions YouTube AND a local .mp4 file exists:
|
| 2077 |
+
β Use analyze_image tool on the local video file instead
|
| 2078 |
+
β Or use audio_transcription_tool for audio content
|
| 2079 |
+
|
| 2080 |
+
Example:
|
| 2081 |
+
Q: "In video https://youtube.com/..., what happens?"
|
| 2082 |
+
[FILE: task_123.mp4]
|
| 2083 |
+
β
CORRECT: analyze_image("files/task_123.mp4", "what happens in video")
|
| 2084 |
+
β WRONG: get_youtube_transcript("https://youtube.com/...")
|
| 2085 |
+
|
| 2086 |
+
|
| 2087 |
π¨ ANTI-LOOP RULES:
|
| 2088 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 2089 |
|
|
|
|
| 2245 |
|
| 2246 |
# Check if we should force reflection
|
| 2247 |
consecutive_errors = state.get('consecutive_errors', 0)
|
| 2248 |
+
should_reflect = (current_turn > 5 and current_turn % Config.REFLECT_EVERY_N_TURNS == 0) or consecutive_errors >= 3
|
| 2249 |
|
| 2250 |
# Force tool usage
|
| 2251 |
if len(messages_to_send) >= 2:
|