Spaces:
Running
Running
Ahmed Mostafa
feat: implement robust YouTube transcript downloader with multi-provider fallback and proxy support
571ece6 | """ | |
| Main entry point for YouTube Study Notes AI. | |
| Provides CLI interface and server startup. | |
| """ | |
| import os | |
| import sys | |
| import subprocess | |
| import argparse | |
| from pathlib import Path | |
| from src.utils.logger import setup_logger | |
| from src.utils.config import settings | |
| logger = setup_logger(__name__) | |
| def configure_proxy(): | |
| proxy_url = os.environ.get("PROXY_URL", "").strip() | |
| if proxy_url: | |
| os.environ["HTTP_PROXY"] = proxy_url | |
| os.environ["HTTPS_PROXY"] = proxy_url | |
| os.environ["ALL_PROXY"] = proxy_url | |
| os.environ["http_proxy"] = proxy_url | |
| os.environ["https_proxy"] = proxy_url | |
| os.environ["all_proxy"] = proxy_url | |
| logger.info("β Global proxy configured from PROXY_URL") | |
| def check_youtube_connectivity(): | |
| import urllib.request | |
| import urllib.error | |
| try: | |
| urllib.request.urlopen("https://www.youtube.com", timeout=5) | |
| logger.info("β YouTube is reachable") | |
| except (urllib.error.URLError, Exception): | |
| logger.warning( | |
| "WARNING: YouTube is not reachable from this host. " | |
| "Set PROXY_URL or RAPIDAPI_KEY env vars to enable YouTube features." | |
| ) | |
| def check_environment(): | |
| """Log key dependency versions to confirm runtime environment.""" | |
| configure_proxy() | |
| check_youtube_connectivity() | |
| # Check ffmpeg (still used by audio processing utilities) | |
| try: | |
| ffmpeg_out = subprocess.check_output( | |
| ["ffmpeg", "-version"], stderr=subprocess.STDOUT | |
| ).decode().splitlines()[0] | |
| logger.info(f"β ffmpeg available: {ffmpeg_out}") | |
| except (subprocess.CalledProcessError, FileNotFoundError): | |
| logger.warning("β ffmpeg NOT found β audio extraction will fail") | |
| # Verify Supadata API key is configured | |
| supadata_key = os.environ.get("SUPADATA_API_KEY", "").strip() | |
| if supadata_key: | |
| logger.info("β SUPADATA_API_KEY is set") | |
| else: | |
| logger.warning("β SUPADATA_API_KEY is NOT set β transcript extraction will fail") | |
| def run_server(): | |
| """Start the FastAPI server with CORS enabled for Flutter Web.""" | |
| import uvicorn | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from src.api.main import app | |
| check_environment() | |
| logger.info("Configuring CORS for Flutter Web...") | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| logger.info("Starting YouTube Study Notes AI server...") | |
| logger.info(f"Server will be available at http://{settings.api_host}:{settings.api_port}") | |
| logger.info(f"API Documentation: http://{settings.api_host}:{settings.api_port}/docs") | |
| uvicorn.run(app, host=settings.api_host, port=settings.api_port, log_level="info") | |
| def run_cli(youtube_url: str, output_file: str = None): | |
| from src.transcription.downloader import YouTubeDownloader | |
| check_environment() | |
| downloader = YouTubeDownloader() | |
| transcript = downloader.get_transcript(youtube_url) | |
| print(transcript) | |
| def main(): | |
| """Main entry point with argument parsing.""" | |
| parser = argparse.ArgumentParser( | |
| description="YouTube Study Notes AI - Generate structured notes from educational videos" | |
| ) | |
| parser.add_argument( | |
| "mode", | |
| choices=["server", "cli"], | |
| help="Run mode: server (API + web UI) or cli (direct processing)", | |
| ) | |
| parser.add_argument("--url", type=str, help="YouTube video URL (required for cli mode)") | |
| parser.add_argument("--output", type=str, help="Output file path (optional for cli mode)") | |
| args = parser.parse_args() | |
| if args.mode == "server": | |
| run_server() | |
| elif args.mode == "cli": | |
| if not args.url: | |
| print("Error: --url is required for cli mode") | |
| sys.exit(1) | |
| run_cli(args.url, args.output) | |
| if __name__ == "__main__": | |
| main() |