bivav commited on
Commit
a7c62bf
·
0 Parent(s):

Add: youtube to mp3 feature

Browse files
.dockerignore ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Git
2
+ .git
3
+ .gitignore
4
+ .gitattributes
5
+
6
+ # Python
7
+ __pycache__/
8
+ *.py[cod]
9
+ *$py.class
10
+ *.so
11
+ .Python
12
+ env/
13
+ build/
14
+ develop-eggs/
15
+ dist/
16
+ downloads/
17
+ eggs/
18
+ .eggs/
19
+ lib/
20
+ lib64/
21
+ parts/
22
+ sdist/
23
+ var/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+
28
+ # Virtual Environment
29
+ venv/
30
+ ENV/
31
+
32
+ # IDE
33
+ .idea/
34
+ .vscode/
35
+ *.swp
36
+ *.swo
37
+
38
+ # Project specific
39
+ app/static/audio/*
40
+ !app/static/audio/.gitkeep
41
+ *.mp3
42
+ *.log
43
+ .env
44
+ .env.*
.env.example ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Server Configuration
2
+ SERVER_HOST=http://localhost
3
+ SERVER_PORT=8000
4
+
5
+ # API Configuration
6
+ API_V1_STR=/api
7
+
8
+ # Project Configuration
9
+ PROJECT_NAME="YouTube to MP3 Converter"
10
+
11
+ # File Configuration
12
+ MAX_AUDIO_FILES=1000
13
+ CLEANUP_THRESHOLD=800
.gitignore ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+
23
+ # Virtual Environment
24
+ venv/
25
+ env/
26
+ ENV/
27
+ .env
28
+
29
+ # IDE
30
+ .idea/
31
+ .vscode/
32
+ *.swp
33
+ *.swo
34
+
35
+ # FastAPI specific
36
+ .pytest_cache/
37
+ coverage.xml
38
+ htmlcov/
39
+
40
+ # Logs
41
+ *.log
42
+
43
+ # Local development
44
+ .env.local
45
+ .env.development.local
46
+ .env.test.local
47
+ .env.production.local
48
+
49
+ # System Files
50
+ .DS_Store
51
+ Thumbs.db
52
+
53
+ *.mp3
Dockerfile ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use Python 3.11 slim image as base
2
+ FROM python:3.11-slim
3
+
4
+ # Set environment variables
5
+ ENV PYTHONDONTWRITEBYTECODE=1 \
6
+ PYTHONUNBUFFERED=1 \
7
+ DEBIAN_FRONTEND=noninteractive
8
+
9
+ # Set working directory
10
+ WORKDIR /code
11
+
12
+ # Install system dependencies
13
+ RUN apt-get update && apt-get install -y \
14
+ ffmpeg \
15
+ curl \
16
+ && rm -rf /var/lib/apt/lists/*
17
+
18
+ # Install Python dependencies
19
+ COPY requirements.txt .
20
+ RUN pip install --no-cache-dir -r requirements.txt
21
+
22
+ # Copy application code
23
+ COPY . .
24
+
25
+ # Create directory for static files in Hugging Face's persistent storage
26
+ RUN mkdir -p /data/audio && \
27
+ chmod -R 755 /data/audio
28
+
29
+ # Expose port (Hugging Face uses port 7860 by default)
30
+ EXPOSE 7860
31
+
32
+ # Run the application
33
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YouTube to MP3 Converter API
2
+
3
+ A FastAPI service that converts YouTube videos to MP3 format, optimized for Hugging Face Spaces deployment.
4
+
5
+ ## Features
6
+
7
+ - Convert YouTube videos to MP3
8
+ - Persistent storage for audio files
9
+ - RESTful API with automatic documentation
10
+ - Docker-ready deployment
11
+
12
+ ## API Endpoints
13
+
14
+ - `POST /api/convert`: Convert a YouTube video to MP3
15
+ - Request body: `{"url": "youtube_url"}`
16
+ - Response: `{"download_url": "mp3_file_url"}`
17
+
18
+ ## Development
19
+
20
+ 1. Create `.env` file from template:
21
+ ```bash
22
+ cp .env.example .env
23
+ ```
24
+
25
+ 2. Run locally with Docker:
26
+ ```bash
27
+ docker-compose up --build
28
+ ```
29
+
30
+ 3. Access API documentation:
31
+ - Local: http://localhost:8000/docs
32
+ - Production: https://huggingface.co/spaces/bivav/video-mp3/docs
33
+
34
+ ## Hugging Face Deployment
35
+
36
+ This API is deployed on Hugging Face Spaces at: https://huggingface.co/spaces/bivav/video-mp3
37
+
38
+ The service uses Hugging Face's persistent storage at `/data/audio` for storing MP3 files.
39
+
40
+ ## Environment Variables
41
+
42
+ - `SERVER_HOST`: API server host
43
+ - `SERVER_PORT`: API server port
44
+ - `HF_SPACE_NAME`: Hugging Face space name
45
+ - `HF_STATIC_DIR`: Directory for static files in Hugging Face
46
+
47
+ ## License
48
+
49
+ MIT License
app/__init__.py ADDED
File without changes
app/api/__init__.py ADDED
File without changes
app/api/endpoints.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ from fastapi.responses import FileResponse
3
+ from pydantic import BaseModel, HttpUrl
4
+ from ..services.youtube_service import youtube_service
5
+ from ..core.config import settings
6
+
7
+ router = APIRouter()
8
+
9
+
10
+ class ConversionRequest(BaseModel):
11
+ url: HttpUrl
12
+
13
+
14
+ class ConversionResponse(BaseModel):
15
+ download_url: str
16
+
17
+
18
+ @router.post("/convert", response_model=ConversionResponse)
19
+ async def convert_video(request: ConversionRequest):
20
+ try:
21
+ filename = await youtube_service.convert_to_mp3(str(request.url))
22
+ download_url = f"{settings.BASE_URL}/static/audio/{filename}"
23
+ return ConversionResponse(download_url=download_url)
24
+ except Exception as e:
25
+ raise HTTPException(status_code=400, detail=str(e))
26
+
27
+
28
+ @router.get("/static/audio/{filename}")
29
+ async def get_audio(filename: str):
30
+ file_path = settings.AUDIO_DIR / filename
31
+ if not file_path.exists():
32
+ raise HTTPException(status_code=404, detail="Audio file not found")
33
+ return FileResponse(
34
+ path=file_path,
35
+ media_type="audio/mpeg",
36
+ filename=filename,
37
+ headers={"Content-Disposition": f"attachment; filename={filename}"},
38
+ )
app/core/__init__.py ADDED
File without changes
app/core/config.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from pydantic_settings import BaseSettings
3
+
4
+
5
+ class Settings(BaseSettings):
6
+ # API Settings
7
+ API_V1_STR: str = "/api"
8
+ PROJECT_NAME: str = "YouTube to MP3 Converter"
9
+
10
+ # Server configuration
11
+ SERVER_HOST: str
12
+ SERVER_PORT: int
13
+
14
+ # Hugging Face configuration
15
+ HF_SPACE_NAME: str = "bivav/video-mp3"
16
+ HF_STATIC_DIR: str = "/data/audio"
17
+
18
+ @property
19
+ def BASE_URL(self) -> str:
20
+ # For HTTPS, we don't include the port
21
+ return self.SERVER_HOST
22
+
23
+ # Static file configurations
24
+ STATIC_DIR: Path = (
25
+ Path(HF_STATIC_DIR)
26
+ if HF_STATIC_DIR.startswith("/data")
27
+ else Path(__file__).parent.parent / "static"
28
+ )
29
+ AUDIO_DIR: Path = STATIC_DIR / "audio"
30
+
31
+ # File configurations
32
+ MAX_AUDIO_FILES: int = 1000
33
+ CLEANUP_THRESHOLD: int = 800
34
+
35
+ class Config:
36
+ env_file = ".env"
37
+ env_file_encoding = "utf-8"
38
+ case_sensitive = True
39
+
40
+
41
+ settings = Settings()
42
+
43
+ # Ensure directories exist
44
+ settings.STATIC_DIR.mkdir(exist_ok=True, parents=True)
45
+ settings.AUDIO_DIR.mkdir(exist_ok=True, parents=True)
app/main.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.staticfiles import StaticFiles
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+
5
+ from .core.config import settings
6
+ from .api.endpoints import router
7
+
8
+ app = FastAPI(
9
+ title=settings.PROJECT_NAME,
10
+ openapi_url=f"{settings.API_V1_STR}/openapi.json"
11
+ )
12
+
13
+ # Configure CORS
14
+ app.add_middleware(
15
+ CORSMiddleware,
16
+ allow_origins=["*"], # In production, replace with specific origins
17
+ allow_credentials=True,
18
+ allow_methods=["*"],
19
+ allow_headers=["*"],
20
+ )
21
+
22
+ # Mount static files directory
23
+ app.mount("/static", StaticFiles(directory=str(settings.STATIC_DIR)), name="static")
24
+
25
+ # Include API router
26
+ app.include_router(router, prefix=settings.API_V1_STR)
app/services/__init__.py ADDED
File without changes
app/services/youtube_service.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ from urllib.parse import urlparse, parse_qs
3
+ from pathlib import Path
4
+ import yt_dlp
5
+ from ..core.config import settings
6
+
7
+
8
+ class YouTubeService:
9
+ def __init__(self):
10
+ self.output_dir = settings.AUDIO_DIR
11
+ # Configure yt-dlp with Docker-compatible defaults
12
+ self.ydl_opts = {
13
+ "format": "bestaudio/best",
14
+ "postprocessors": [
15
+ {
16
+ "key": "FFmpegExtractAudio",
17
+ "preferredcodec": "mp3",
18
+ "preferredquality": "192",
19
+ }
20
+ ],
21
+ "quiet": True,
22
+ # Add user agent and other headers to avoid restrictions
23
+ "http_headers": {
24
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
25
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
26
+ "Accept-Language": "en-us,en;q=0.5",
27
+ "Referer": "https://www.youtube.com/",
28
+ },
29
+ # Add retries and throttling with better Docker-optimized settings
30
+ "retries": 3,
31
+ "fragment_retries": 3,
32
+ "skip_unavailable_fragments": True,
33
+ "ignoreerrors": False, # Changed to False to better handle errors in Docker
34
+ "no_warnings": False, # Show warnings for better Docker logs
35
+ "sleep_interval": 2, # Increased sleep to avoid rate limiting
36
+ "max_sleep_interval": 6,
37
+ # Add geo bypass
38
+ "geo_bypass": True,
39
+ "geo_bypass_country": "US",
40
+ # Add network settings
41
+ "socket_timeout": 30, # Timeout for network operations
42
+ "extractor_retries": 3, # Retries for extractor errors
43
+ # Add progress hooks for better logging
44
+ "progress_hooks": [self._progress_hook],
45
+ # Force IPv4 to avoid IPv6 issues in some Docker networks
46
+ "source_address": "0.0.0.0",
47
+ }
48
+
49
+ def _progress_hook(self, d):
50
+ """
51
+ Hook to track download progress and log important events
52
+ """
53
+ if d["status"] == "downloading":
54
+ try:
55
+ progress = (
56
+ float(d.get("downloaded_bytes", 0))
57
+ / float(d.get("total_bytes", 1))
58
+ * 100
59
+ )
60
+ print(f"Downloading: {progress:.1f}%")
61
+ except:
62
+ pass
63
+ elif d["status"] == "error":
64
+ print(f"Error during download: {d.get('error')}")
65
+ elif d["status"] == "finished":
66
+ print(f"Download finished, converting to MP3...")
67
+
68
+ def _extract_video_id(self, url: str) -> str:
69
+ """
70
+ Extract video ID from YouTube URL
71
+ """
72
+ query = urlparse(url)
73
+ if query.hostname == "youtu.be":
74
+ return query.path[1:]
75
+ if query.hostname in {"www.youtube.com", "youtube.com", "m.youtube.com"}:
76
+ if query.path == "/watch":
77
+ return parse_qs(query.query)["v"][0]
78
+ if query.path[:7] == "/embed/":
79
+ return query.path.split("/")[2]
80
+ if query.path[:3] == "/v/":
81
+ return query.path.split("/")[2]
82
+ raise ValueError("Invalid YouTube URL")
83
+
84
+ async def convert_to_mp3(self, url: str) -> str:
85
+ """
86
+ Convert YouTube video to MP3 and return the filename
87
+ """
88
+ video_id = self._extract_video_id(url)
89
+ filename = f"{video_id}.mp3"
90
+ output_path = self.output_dir / filename
91
+
92
+ # Skip if file already exists
93
+ if output_path.exists():
94
+ return filename
95
+
96
+ # Update output template for this specific download
97
+ opts = {**self.ydl_opts, "outtmpl": str(output_path.with_suffix(""))}
98
+
99
+ try:
100
+ # Run youtube-dl in a thread pool to avoid blocking
101
+ await asyncio.get_event_loop().run_in_executor(
102
+ None, self._download_and_convert, url, opts
103
+ )
104
+
105
+ if not output_path.exists():
106
+ raise Exception("Failed to download and convert the video")
107
+
108
+ return filename
109
+ except Exception as e:
110
+ print(
111
+ f"Error in convert_to_mp3: {str(e)}"
112
+ ) # Better error logging for Docker
113
+ raise Exception(f"Error converting video: {str(e)}")
114
+
115
+ def _download_and_convert(self, url: str, opts: dict):
116
+ """
117
+ Download and convert the video using yt-dlp
118
+ """
119
+ try:
120
+ with yt_dlp.YoutubeDL(opts) as ydl:
121
+ error = ydl.download([url])
122
+ if error: # yt-dlp returns non-zero on error
123
+ raise Exception(f"yt-dlp returned error code: {error}")
124
+ except Exception as e:
125
+ print(f"First download attempt failed: {str(e)}")
126
+ # Try one more time with different format
127
+ try:
128
+ opts["format"] = (
129
+ "worstaudio/worst" # Try with lowest quality as fallback
130
+ )
131
+ with yt_dlp.YoutubeDL(opts) as ydl_fallback:
132
+ error = ydl_fallback.download([url])
133
+ if error:
134
+ raise Exception(
135
+ f"Fallback download failed with error code: {error}"
136
+ )
137
+ except Exception as fallback_error:
138
+ print(f"Fallback download attempt failed: {str(fallback_error)}")
139
+ raise # Re-raise the last error
140
+
141
+
142
+ youtube_service = YouTubeService()
docker-compose.yml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ services:
2
+ web:
3
+ build: .
4
+ ports:
5
+ - "8000:8000"
6
+ volumes:
7
+ - ./app/static/audio:/app/app/static/audio
8
+ env_file:
9
+ - .env
10
+ restart: unless-stopped
11
+ healthcheck:
12
+ test: ["CMD", "curl", "-f", "http://localhost:8000/docs"]
13
+ interval: 30s
14
+ timeout: 10s
15
+ retries: 3
16
+ start_period: 10s
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi>=0.104.1
2
+ uvicorn[standard]>=0.24.0
3
+ python-multipart>=0.0.6
4
+ pydantic>=2.5.2
5
+ pydantic-settings>=2.1.0
6
+ yt-dlp>=2023.11.16
7
+ python-dotenv>=1.0.0