Spaces:
Sleeping
Sleeping
Fix: Correct IndentationError and concurrency pattern in modal_whisper_app.py
Browse files- modal_whisper_app.py +51 -45
modal_whisper_app.py
CHANGED
|
@@ -758,38 +758,53 @@ def extract_video_urls_from_search(search_results: List[Dict[str, str]], max_url
|
|
| 758 |
gpu="any",
|
| 759 |
timeout=3600
|
| 760 |
)
|
| 761 |
-
|
| 762 |
-
|
| 763 |
-
|
| 764 |
-
|
| 765 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 766 |
|
| 767 |
-
|
| 768 |
-
|
| 769 |
|
| 770 |
-
|
| 771 |
-
|
| 772 |
-
|
| 773 |
-
|
| 774 |
-
|
| 775 |
-
|
| 776 |
-
|
| 777 |
-
else:
|
| 778 |
-
return {"url": video_url, "analysis": analysis_result}
|
| 779 |
|
| 780 |
except httpx.HTTPStatusError as e:
|
| 781 |
-
print(f"[
|
| 782 |
-
return {"url": video_url, "error_type": "download_error", "
|
| 783 |
except httpx.RequestError as e:
|
| 784 |
-
print(f"[
|
| 785 |
-
return {"url": video_url, "error_type": "download_error", "
|
| 786 |
except Exception as e:
|
| 787 |
-
print(f"[
|
| 788 |
import traceback
|
| 789 |
-
|
| 790 |
-
# traceback.print_exc() # This might be too verbose for regular Modal logs
|
| 791 |
-
return {"url": video_url, "error_type": "processing_error", "error_details": str(e), "traceback": traceback.format_exc()[:1000]}
|
| 792 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 793 |
async def analyze_videos_by_topic(video_urls: List[str], topic: str) -> Dict[str, Any]:
|
| 794 |
"""Analyzes a list of videos (by URL) concurrently and aggregates results for a topic."""
|
| 795 |
print(f"[TopicAnalysis] Starting concurrent analysis for topic: '{topic}' with {len(video_urls)} video(s).")
|
|
@@ -804,29 +819,20 @@ async def analyze_videos_by_topic(video_urls: List[str], topic: str) -> Dict[str
|
|
| 804 |
results_aggregator["errors"].append({"topic_error": "No video URLs provided or found for the topic."})
|
| 805 |
return results_aggregator
|
| 806 |
|
| 807 |
-
|
| 808 |
-
|
| 809 |
-
|
| 810 |
-
|
| 811 |
-
|
| 812 |
-
|
| 813 |
-
|
| 814 |
-
|
| 815 |
-
# This handles exceptions not caught within _download_and_analyze_one_video itself (should be rare)
|
| 816 |
-
# Or if return_exceptions=True was used and _download_and_analyze_one_video raised an unhandled one.
|
| 817 |
-
print(f"[TopicAnalysis] An unexpected exception occurred during asyncio.gather: {res_or_exc}")
|
| 818 |
-
results_aggregator["errors"].append({"url": "unknown_url_due_to_gather_exception", "processing_error": str(res_or_exc)})
|
| 819 |
-
elif isinstance(res_or_exc, dict):
|
| 820 |
-
if "error_type" in res_or_exc:
|
| 821 |
-
results_aggregator["errors"].append(res_or_exc) # Append the error dict directly
|
| 822 |
-
elif "analysis" in res_or_exc:
|
| 823 |
-
results_aggregator["analyzed_videos"].append(res_or_exc)
|
| 824 |
else:
|
| 825 |
-
|
| 826 |
-
results_aggregator["errors"].append({"url": res_or_exc.get("url", "unknown"), "processing_error": "Unknown result structure"})
|
| 827 |
else:
|
| 828 |
-
|
| 829 |
-
|
|
|
|
| 830 |
|
| 831 |
print(f"[TopicAnalysis] Finished concurrent analysis for topic '{topic}'.")
|
| 832 |
return results_aggregator
|
|
|
|
| 758 |
gpu="any",
|
| 759 |
timeout=3600
|
| 760 |
)
|
| 761 |
+
@app.function(
|
| 762 |
+
image=video_analysis_image_v2,
|
| 763 |
+
secrets=[HF_TOKEN_SECRET],
|
| 764 |
+
timeout=1800,
|
| 765 |
+
)
|
| 766 |
+
async def _analyze_video_worker(video_url: str) -> dict:
|
| 767 |
+
"""
|
| 768 |
+
Worker function to download a video from a URL and run comprehensive analysis.
|
| 769 |
+
This is designed to be called concurrently.
|
| 770 |
+
"""
|
| 771 |
+
print(f"[Worker] Starting analysis for {video_url}")
|
| 772 |
+
try:
|
| 773 |
+
async with httpx.AsyncClient() as client:
|
| 774 |
+
print(f"[Worker] Downloading video from {video_url}")
|
| 775 |
+
response = await client.get(video_url, follow_redirects=True, timeout=60.0)
|
| 776 |
+
response.raise_for_status()
|
| 777 |
+
video_bytes = await response.aread()
|
| 778 |
+
print(f"[Worker] Downloaded {len(video_bytes)} bytes from {video_url}")
|
| 779 |
|
| 780 |
+
if not video_bytes:
|
| 781 |
+
raise ValueError("Downloaded video content is empty.")
|
| 782 |
|
| 783 |
+
analysis_result = await analyze_video_comprehensive.coro(video_bytes)
|
| 784 |
+
|
| 785 |
+
if isinstance(analysis_result, dict) and any("error" in str(v).lower() for v in analysis_result.values()):
|
| 786 |
+
print(f"[Worker] Comprehensive analysis for {video_url} reported errors: {analysis_result}")
|
| 787 |
+
return {"url": video_url, "status": "error", "error_type": "analysis_error", "details": analysis_result}
|
| 788 |
+
else:
|
| 789 |
+
return {"url": video_url, "status": "success", "analysis": analysis_result}
|
|
|
|
|
|
|
| 790 |
|
| 791 |
except httpx.HTTPStatusError as e:
|
| 792 |
+
print(f"[Worker] HTTP error downloading {video_url}: {e}")
|
| 793 |
+
return {"url": video_url, "status": "error", "error_type": "download_error", "details": f"HTTP {e.response.status_code}"}
|
| 794 |
except httpx.RequestError as e:
|
| 795 |
+
print(f"[Worker] Request error downloading {video_url}: {e}")
|
| 796 |
+
return {"url": video_url, "status": "error", "error_type": "download_error", "details": f"Failed to download: {str(e)}"}
|
| 797 |
except Exception as e:
|
| 798 |
+
print(f"[Worker] Error processing video {video_url}: {e}")
|
| 799 |
import traceback
|
| 800 |
+
return {"url": video_url, "status": "error", "error_type": "processing_error", "details": str(e), "traceback": traceback.format_exc()[:1000]}
|
|
|
|
|
|
|
| 801 |
|
| 802 |
+
@app.function(
|
| 803 |
+
image=video_analysis_image_v2,
|
| 804 |
+
secrets=[HF_TOKEN_SECRET],
|
| 805 |
+
timeout=3600,
|
| 806 |
+
gpu="any",
|
| 807 |
+
)
|
| 808 |
async def analyze_videos_by_topic(video_urls: List[str], topic: str) -> Dict[str, Any]:
|
| 809 |
"""Analyzes a list of videos (by URL) concurrently and aggregates results for a topic."""
|
| 810 |
print(f"[TopicAnalysis] Starting concurrent analysis for topic: '{topic}' with {len(video_urls)} video(s).")
|
|
|
|
| 819 |
results_aggregator["errors"].append({"topic_error": "No video URLs provided or found for the topic."})
|
| 820 |
return results_aggregator
|
| 821 |
|
| 822 |
+
# Use .map to run the worker function concurrently on all video URLs
|
| 823 |
+
# The list() call forces the generator to execute and retrieve all results.
|
| 824 |
+
individual_results = list(_analyze_video_worker.map(video_urls))
|
| 825 |
+
|
| 826 |
+
for result in individual_results:
|
| 827 |
+
if isinstance(result, dict):
|
| 828 |
+
if result.get("status") == "error":
|
| 829 |
+
results_aggregator["errors"].append(result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 830 |
else:
|
| 831 |
+
results_aggregator["analyzed_videos"].append(result)
|
|
|
|
| 832 |
else:
|
| 833 |
+
# This case handles unexpected return types from the worker, like exceptions
|
| 834 |
+
print(f"[TopicAnalysis] Received an unexpected result type from worker: {type(result)}")
|
| 835 |
+
results_aggregator["errors"].append({"url": "unknown", "error_type": "unexpected_result", "details": str(result)})
|
| 836 |
|
| 837 |
print(f"[TopicAnalysis] Finished concurrent analysis for topic '{topic}'.")
|
| 838 |
return results_aggregator
|