Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1374,31 +1374,109 @@ def analyze_image(file_path: str, query: str) -> str:
|
|
| 1374 |
raise ToolError("analyze_image", e)
|
| 1375 |
|
| 1376 |
|
| 1377 |
-
class YoutubeInput(BaseModel
|
| 1378 |
video_url: str = Field(description="YouTube URL")
|
| 1379 |
|
| 1380 |
@tool(args_schema=YoutubeInput)
|
| 1381 |
-
@retry_with_backoff(max_retries=2)
|
| 1382 |
def get_youtube_transcript(video_url: str) -> str:
|
| 1383 |
-
"""Get YouTube transcript using AssemblyAI"""
|
| 1384 |
start_time = time.time()
|
| 1385 |
|
| 1386 |
try:
|
| 1387 |
aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")
|
| 1388 |
-
|
|
|
|
|
|
|
|
|
|
| 1389 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1390 |
transcriber = aai.Transcriber()
|
| 1391 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1392 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1393 |
if transcript.status == aai.TranscriptStatus.error:
|
| 1394 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1395 |
|
| 1396 |
telemetry.record_call("get_youtube_transcript", time.time() - start_time, True)
|
| 1397 |
-
return f"Transcript:\n{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1398 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1399 |
except Exception as e:
|
| 1400 |
telemetry.record_call("get_youtube_transcript", time.time() - start_time, False)
|
| 1401 |
-
raise ToolError("get_youtube_transcript", e)
|
| 1402 |
|
| 1403 |
|
| 1404 |
class BrowseInput(BaseModel):
|
|
|
|
| 1374 |
raise ToolError("analyze_image", e)
|
| 1375 |
|
| 1376 |
|
| 1377 |
+
class YoutubeInput(BaseModel:
|
| 1378 |
video_url: str = Field(description="YouTube URL")
|
| 1379 |
|
| 1380 |
@tool(args_schema=YoutubeInput)
|
|
|
|
| 1381 |
def get_youtube_transcript(video_url: str) -> str:
|
| 1382 |
+
"""Get YouTube transcript using AssemblyAI with proper status handling"""
|
| 1383 |
start_time = time.time()
|
| 1384 |
|
| 1385 |
try:
|
| 1386 |
aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")
|
| 1387 |
+
if not aai.settings.api_key:
|
| 1388 |
+
raise ValueError("ASSEMBLYAI_API_KEY not set in Space secrets")
|
| 1389 |
+
|
| 1390 |
+
print(f"📺 Transcribing YouTube: {video_url}")
|
| 1391 |
|
| 1392 |
+
# Validate URL
|
| 1393 |
+
if not ("youtube.com" in video_url or "youtu.be" in video_url):
|
| 1394 |
+
raise ValueError(f"Invalid YouTube URL: {video_url}")
|
| 1395 |
+
|
| 1396 |
+
# Submit transcription request
|
| 1397 |
transcriber = aai.Transcriber()
|
| 1398 |
+
print(f" Submitting to AssemblyAI...")
|
| 1399 |
+
|
| 1400 |
+
config = aai.TranscriptionConfig(
|
| 1401 |
+
speech_model=aai.SpeechModel.best,
|
| 1402 |
+
)
|
| 1403 |
+
|
| 1404 |
+
transcript = transcriber.transcribe(video_url, config=config)
|
| 1405 |
+
|
| 1406 |
+
# Wait for completion
|
| 1407 |
+
print(f" Initial status: {transcript.status}")
|
| 1408 |
|
| 1409 |
+
# Poll for completion (max 5 minutes)
|
| 1410 |
+
max_wait = 300
|
| 1411 |
+
poll_interval = 5
|
| 1412 |
+
elapsed = 0
|
| 1413 |
+
|
| 1414 |
+
while transcript.status == aai.TranscriptStatus.queued or transcript.status == aai.TranscriptStatus.processing:
|
| 1415 |
+
if elapsed >= max_wait:
|
| 1416 |
+
raise TimeoutError(f"Transcription timed out after {max_wait}s. Video may be too long.")
|
| 1417 |
+
|
| 1418 |
+
time.sleep(poll_interval)
|
| 1419 |
+
elapsed += poll_interval
|
| 1420 |
+
|
| 1421 |
+
# Refresh transcript object
|
| 1422 |
+
try:
|
| 1423 |
+
transcript = transcriber.get_transcript(transcript.id)
|
| 1424 |
+
print(f" Status after {elapsed}s: {transcript.status}")
|
| 1425 |
+
except Exception as refresh_err:
|
| 1426 |
+
print(f" Warning: Could not refresh status: {refresh_err}")
|
| 1427 |
+
# Continue anyway, maybe it finished
|
| 1428 |
+
break
|
| 1429 |
+
|
| 1430 |
+
# Check final status
|
| 1431 |
if transcript.status == aai.TranscriptStatus.error:
|
| 1432 |
+
error_msg = getattr(transcript, 'error', 'Unknown error')
|
| 1433 |
+
raise RuntimeError(f"AssemblyAI transcription failed: {error_msg}")
|
| 1434 |
+
|
| 1435 |
+
if transcript.status != aai.TranscriptStatus.completed:
|
| 1436 |
+
raise RuntimeError(f"Unexpected status: {transcript.status}")
|
| 1437 |
+
|
| 1438 |
+
# Extract text
|
| 1439 |
+
if not hasattr(transcript, 'text'):
|
| 1440 |
+
raise AttributeError("Transcript object has no 'text' attribute")
|
| 1441 |
+
|
| 1442 |
+
result_text = transcript.text
|
| 1443 |
+
|
| 1444 |
+
if not result_text or not isinstance(result_text, str):
|
| 1445 |
+
raise ValueError(f"Transcript text is invalid: {type(result_text)}")
|
| 1446 |
+
|
| 1447 |
+
result_text = result_text.strip()
|
| 1448 |
+
|
| 1449 |
+
if len(result_text) < 10:
|
| 1450 |
+
raise ValueError(f"Transcript too short ({len(result_text)} chars). Video may have no audio.")
|
| 1451 |
+
|
| 1452 |
+
print(f"✓ Transcribed {len(result_text)} chars")
|
| 1453 |
|
| 1454 |
telemetry.record_call("get_youtube_transcript", time.time() - start_time, True)
|
| 1455 |
+
return f"YouTube Transcript:\n{truncate_if_needed(result_text)}"
|
| 1456 |
+
|
| 1457 |
+
except aai.types.TranscriptError as e:
|
| 1458 |
+
telemetry.record_call("get_youtube_transcript", time.time() - start_time, False)
|
| 1459 |
+
error_msg = str(e)
|
| 1460 |
+
|
| 1461 |
+
suggestions = []
|
| 1462 |
+
if "not found" in error_msg.lower():
|
| 1463 |
+
suggestions.append("Video may be private or deleted")
|
| 1464 |
+
if "quota" in error_msg.lower() or "limit" in error_msg.lower():
|
| 1465 |
+
suggestions.append("AssemblyAI quota exceeded")
|
| 1466 |
+
if "timeout" in error_msg.lower():
|
| 1467 |
+
suggestions.append("Video may be too long (try shorter video)")
|
| 1468 |
+
|
| 1469 |
+
suggestion_text = " | ".join(suggestions) if suggestions else "Check video URL and API quota"
|
| 1470 |
|
| 1471 |
+
raise ToolError("get_youtube_transcript", e, suggestion_text)
|
| 1472 |
+
|
| 1473 |
+
except TimeoutError as e:
|
| 1474 |
+
telemetry.record_call("get_youtube_transcript", time.time() - start_time, False)
|
| 1475 |
+
raise ToolError("get_youtube_transcript", e, "Video too long or AssemblyAI overloaded. Try shorter video.")
|
| 1476 |
+
|
| 1477 |
except Exception as e:
|
| 1478 |
telemetry.record_call("get_youtube_transcript", time.time() - start_time, False)
|
| 1479 |
+
raise ToolError("get_youtube_transcript", e, "Check video URL is valid and public")
|
| 1480 |
|
| 1481 |
|
| 1482 |
class BrowseInput(BaseModel):
|