gabejavitt commited on
Commit
176e768
·
verified ·
1 Parent(s): 48c0860

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -8
app.py CHANGED
@@ -1374,31 +1374,109 @@ def analyze_image(file_path: str, query: str) -> str:
1374
  raise ToolError("analyze_image", e)
1375
 
1376
 
1377
- class YoutubeInput(BaseModel):
1378
  video_url: str = Field(description="YouTube URL")
1379
 
1380
  @tool(args_schema=YoutubeInput)
1381
- @retry_with_backoff(max_retries=2)
1382
  def get_youtube_transcript(video_url: str) -> str:
1383
- """Get YouTube transcript using AssemblyAI"""
1384
  start_time = time.time()
1385
 
1386
  try:
1387
  aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")
1388
- print(f"📺 Transcribing: {video_url}")
 
 
 
1389
 
 
 
 
 
 
1390
  transcriber = aai.Transcriber()
1391
- transcript = transcriber.transcribe(video_url)
 
 
 
 
 
 
 
 
 
1392
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1393
  if transcript.status == aai.TranscriptStatus.error:
1394
- raise RuntimeError(transcript.error)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1395
 
1396
  telemetry.record_call("get_youtube_transcript", time.time() - start_time, True)
1397
- return f"Transcript:\n{transcript.text}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1398
 
 
 
 
 
 
 
1399
  except Exception as e:
1400
  telemetry.record_call("get_youtube_transcript", time.time() - start_time, False)
1401
- raise ToolError("get_youtube_transcript", e)
1402
 
1403
 
1404
  class BrowseInput(BaseModel):
 
1374
  raise ToolError("analyze_image", e)
1375
 
1376
 
1377
+ class YoutubeInput(BaseModel:
1378
  video_url: str = Field(description="YouTube URL")
1379
 
1380
  @tool(args_schema=YoutubeInput)
 
1381
  def get_youtube_transcript(video_url: str) -> str:
1382
+ """Get YouTube transcript using AssemblyAI with proper status handling"""
1383
  start_time = time.time()
1384
 
1385
  try:
1386
  aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")
1387
+ if not aai.settings.api_key:
1388
+ raise ValueError("ASSEMBLYAI_API_KEY not set in Space secrets")
1389
+
1390
+ print(f"📺 Transcribing YouTube: {video_url}")
1391
 
1392
+ # Validate URL
1393
+ if not ("youtube.com" in video_url or "youtu.be" in video_url):
1394
+ raise ValueError(f"Invalid YouTube URL: {video_url}")
1395
+
1396
+ # Submit transcription request
1397
  transcriber = aai.Transcriber()
1398
+ print(f" Submitting to AssemblyAI...")
1399
+
1400
+ config = aai.TranscriptionConfig(
1401
+ speech_model=aai.SpeechModel.best,
1402
+ )
1403
+
1404
+ transcript = transcriber.transcribe(video_url, config=config)
1405
+
1406
+ # Wait for completion
1407
+ print(f" Initial status: {transcript.status}")
1408
 
1409
+ # Poll for completion (max 5 minutes)
1410
+ max_wait = 300
1411
+ poll_interval = 5
1412
+ elapsed = 0
1413
+
1414
+ while transcript.status == aai.TranscriptStatus.queued or transcript.status == aai.TranscriptStatus.processing:
1415
+ if elapsed >= max_wait:
1416
+ raise TimeoutError(f"Transcription timed out after {max_wait}s. Video may be too long.")
1417
+
1418
+ time.sleep(poll_interval)
1419
+ elapsed += poll_interval
1420
+
1421
+ # Refresh transcript object
1422
+ try:
1423
+ transcript = transcriber.get_transcript(transcript.id)
1424
+ print(f" Status after {elapsed}s: {transcript.status}")
1425
+ except Exception as refresh_err:
1426
+ print(f" Warning: Could not refresh status: {refresh_err}")
1427
+ # Continue anyway, maybe it finished
1428
+ break
1429
+
1430
+ # Check final status
1431
  if transcript.status == aai.TranscriptStatus.error:
1432
+ error_msg = getattr(transcript, 'error', 'Unknown error')
1433
+ raise RuntimeError(f"AssemblyAI transcription failed: {error_msg}")
1434
+
1435
+ if transcript.status != aai.TranscriptStatus.completed:
1436
+ raise RuntimeError(f"Unexpected status: {transcript.status}")
1437
+
1438
+ # Extract text
1439
+ if not hasattr(transcript, 'text'):
1440
+ raise AttributeError("Transcript object has no 'text' attribute")
1441
+
1442
+ result_text = transcript.text
1443
+
1444
+ if not result_text or not isinstance(result_text, str):
1445
+ raise ValueError(f"Transcript text is invalid: {type(result_text)}")
1446
+
1447
+ result_text = result_text.strip()
1448
+
1449
+ if len(result_text) < 10:
1450
+ raise ValueError(f"Transcript too short ({len(result_text)} chars). Video may have no audio.")
1451
+
1452
+ print(f"✓ Transcribed {len(result_text)} chars")
1453
 
1454
  telemetry.record_call("get_youtube_transcript", time.time() - start_time, True)
1455
+ return f"YouTube Transcript:\n{truncate_if_needed(result_text)}"
1456
+
1457
+ except aai.types.TranscriptError as e:
1458
+ telemetry.record_call("get_youtube_transcript", time.time() - start_time, False)
1459
+ error_msg = str(e)
1460
+
1461
+ suggestions = []
1462
+ if "not found" in error_msg.lower():
1463
+ suggestions.append("Video may be private or deleted")
1464
+ if "quota" in error_msg.lower() or "limit" in error_msg.lower():
1465
+ suggestions.append("AssemblyAI quota exceeded")
1466
+ if "timeout" in error_msg.lower():
1467
+ suggestions.append("Video may be too long (try shorter video)")
1468
+
1469
+ suggestion_text = " | ".join(suggestions) if suggestions else "Check video URL and API quota"
1470
 
1471
+ raise ToolError("get_youtube_transcript", e, suggestion_text)
1472
+
1473
+ except TimeoutError as e:
1474
+ telemetry.record_call("get_youtube_transcript", time.time() - start_time, False)
1475
+ raise ToolError("get_youtube_transcript", e, "Video too long or AssemblyAI overloaded. Try shorter video.")
1476
+
1477
  except Exception as e:
1478
  telemetry.record_call("get_youtube_transcript", time.time() - start_time, False)
1479
+ raise ToolError("get_youtube_transcript", e, "Check video URL is valid and public")
1480
 
1481
 
1482
  class BrowseInput(BaseModel):