diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000000000000000000000000000000000000..dec9763e736b4103d209194c5a88476f1a59a345
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,52 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+*.egg-info/
+.pytest_cache/
+.coverage
+htmlcov/
+.tox/
+.nox/
+.hypothesis/
+
+# Virtual Environment
+venv/
+env/
+ENV/
+.venv/
+.ENV/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+*~
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Project specific
+temp/
+logs/
+*.log
+.env
+!.env.example
+
+# Git
+.git/
+.gitignore
+
+# Documentation
+*.md
+!README.md
+docs/
+
+# Tests
+tests/
+test_*.py
+*_test.py
\ No newline at end of file
diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000000000000000000000000000000000000..41ef4aadba9d15aa24b72e6a35379b17a27d0722
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,18 @@
+# Application Settings
+APP_NAME="Video to Audio Extractor"
+APP_VERSION="1.0.0"
+DEBUG=false
+
+# File Processing
+TEMP_DIR="/tmp/audio_extractor"
+MAX_DIRECT_FILE_SIZE_MB=10.0
+CLEANUP_INTERVAL_SECONDS=3600
+FILE_RETENTION_HOURS=2
+
+# FFmpeg Settings
+FFMPEG_PATH="/usr/bin/ffmpeg"
+FFMPEG_TIMEOUT_SECONDS=1800
+
+# Supported Formats (comma-separated)
+SUPPORTED_VIDEO_FORMATS=".mp4,.avi,.mov,.mkv,.webm,.flv,.wmv,.m4v"
+SUPPORTED_AUDIO_FORMATS="mp3,aac,wav,flac,m4a,ogg"
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index 625544f89440829abfabbd42b97095ab9f9b2659..199d2a54c2fb84b61ec43ed1024453b5e46f569a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,4 @@
-FROM python:3.9
+FROM python:3.9-slim
# Install system dependencies including ffmpeg
RUN apt-get update && \
@@ -6,28 +6,44 @@ RUN apt-get update && \
ffmpeg \
libsm6 \
libxext6 \
+ curl \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
-# Create user
+# Create user for Hugging Face
RUN useradd -m -u 1000 user
+
+# Set working directory
+WORKDIR /app
+
+# Copy requirements first for better caching
+COPY --chown=user:user requirements.txt .
+
+# Switch to user and install Python dependencies
USER user
ENV PATH="/home/user/.local/bin:$PATH"
+RUN pip install --no-cache-dir --upgrade pip && \
+ pip install --no-cache-dir -r requirements.txt
-WORKDIR /app
+# Copy application code maintaining structure
+COPY --chown=user:user . .
-# Copy and install Python dependencies
-COPY --chown=user ./requirements.txt requirements.txt
-RUN pip install --no-cache-dir --upgrade -r requirements.txt
+# Create necessary directories
+RUN mkdir -p /tmp/audio_extractor && \
+ mkdir -p interfaces/web/static && \
+ mkdir -p interfaces/web/templates
-# Copy application code
-COPY --chown=user . /app
+# Set environment variables
+ENV PYTHONPATH=/app
+ENV TEMP_DIR=/tmp/audio_extractor
+ENV FFMPEG_PATH=/usr/bin/ffmpeg
-# Create temp directory
-RUN mkdir -p /tmp/audio_extractor
+# Health check
+HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
+ CMD curl -f http://localhost:7860/api/v1/health || exit 1
# Expose port
EXPOSE 7860
# Run the application
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
\ No newline at end of file
+CMD ["python", "main.py"]
\ No newline at end of file
diff --git a/app.py b/app.py
index d1c2e393806872a5f5f445f21400c6c10e00404a..28d2eb80f484399ad63cdc92488ef792e03a49ef 100644
--- a/app.py
+++ b/app.py
@@ -1,510 +1,96 @@
-from fastapi import FastAPI, UploadFile, File, HTTPException, BackgroundTasks, Form
-from fastapi.responses import FileResponse, JSONResponse, HTMLResponse
-from fastapi.middleware.cors import CORSMiddleware
-import ffmpeg
-import tempfile
-import os
-import uuid
-from datetime import datetime, timedelta
-import asyncio
-import aiofiles
-from typing import Optional, Dict, Any
+"""FastAPI application initialization with DDD architecture."""
+from fastapi import FastAPI, Request
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+from contextlib import asynccontextmanager
import logging
-import shutil
-from pathlib import Path
-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-
-app = FastAPI(
- title="Video to Audio Extractor",
- description="Extract audio from video files using FFmpeg",
- version="1.0.0"
-)
-
-# Enable CORS
-app.add_middleware(
- CORSMiddleware,
- allow_origins=["*"],
- allow_credentials=True,
- allow_methods=["*"],
- allow_headers=["*"],
-)
-
-# Storage for background jobs
-processing_jobs: Dict[str, Any] = {}
+# Infrastructure imports
+from infrastructure.config.settings import settings
+from infrastructure.services.container import ServiceContainer
-# Temporary directory for file storage
-TEMP_DIR = Path("/tmp/audio_extractor")
-TEMP_DIR.mkdir(exist_ok=True)
+# Application imports
+from application.use_cases.container import UseCaseContainer
-# Supported formats
-SUPPORTED_VIDEO_FORMATS = ['.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv', '.wmv', '.m4v']
-SUPPORTED_AUDIO_FORMATS = ['mp3', 'aac', 'wav', 'flac', 'm4a', 'ogg']
+# Interface imports
+from interfaces.api.routes import register_routes
+from interfaces.api.middleware.error_handler import register_exception_handlers
+from interfaces.api.middleware.cors_middleware import configure_cors
-# Quality settings for different formats
-QUALITY_SETTINGS = {
- 'mp3': {
- 'high': {'audio_bitrate': '320k', 'acodec': 'libmp3lame'},
- 'medium': {'audio_bitrate': '192k', 'acodec': 'libmp3lame'},
- 'low': {'audio_bitrate': '128k', 'acodec': 'libmp3lame'}
- },
- 'aac': {
- 'high': {'audio_bitrate': '256k', 'acodec': 'aac'},
- 'medium': {'audio_bitrate': '192k', 'acodec': 'aac'},
- 'low': {'audio_bitrate': '128k', 'acodec': 'aac'}
- },
- 'wav': {
- 'high': {'acodec': 'pcm_s24le'},
- 'medium': {'acodec': 'pcm_s16le'},
- 'low': {'acodec': 'pcm_s16le'}
- },
- 'flac': {
- 'high': {'acodec': 'flac', 'compression_level': 12},
- 'medium': {'acodec': 'flac', 'compression_level': 8},
- 'low': {'acodec': 'flac', 'compression_level': 0}
- },
- 'm4a': {
- 'high': {'audio_bitrate': '256k', 'acodec': 'aac'},
- 'medium': {'audio_bitrate': '192k', 'acodec': 'aac'},
- 'low': {'audio_bitrate': '128k', 'acodec': 'aac'}
- },
- 'ogg': {
- 'high': {'audio_bitrate': '256k', 'acodec': 'libvorbis'},
- 'medium': {'audio_bitrate': '192k', 'acodec': 'libvorbis'},
- 'low': {'audio_bitrate': '128k', 'acodec': 'libvorbis'}
- }
-}
-
-def get_media_type(format: str) -> str:
- """Get the correct media type for audio format."""
- media_types = {
- 'mp3': 'audio/mpeg',
- 'aac': 'audio/aac',
- 'wav': 'audio/wav',
- 'flac': 'audio/flac',
- 'm4a': 'audio/mp4',
- 'ogg': 'audio/ogg'
- }
- return media_types.get(format, 'audio/mpeg')
+# Configure logging
+logging.basicConfig(
+ level=logging.INFO if not settings.debug else logging.DEBUG,
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
-@app.get("/", response_class=HTMLResponse)
-async def home():
- """Simple HTML interface for testing."""
- return """
-
-
-
- Video to Audio Extractor
-
-
-
-
-
🎵 Video to Audio Extractor
-
Upload a video file to extract its audio track.
-
-
-
-
-
-
-
-
-
- """
+# Global containers
+service_container: ServiceContainer = None
+use_case_container: UseCaseContainer = None
-@app.post("/extract-audio")
-async def extract_audio(
- background_tasks: BackgroundTasks,
- video: UploadFile = File(...),
- output_format: str = Form("mp3"),
- quality: str = Form("medium")
-):
- """Extract audio from uploaded video file."""
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+ """Application lifespan manager."""
+ global service_container, use_case_container
- # Validate input format
- if output_format not in SUPPORTED_AUDIO_FORMATS:
- raise HTTPException(400, f"Unsupported output format. Supported: {', '.join(SUPPORTED_AUDIO_FORMATS)}")
+ # Startup
+ logger.info(f"Starting {settings.app_name} v{settings.app_version}")
- # Validate video file extension
- file_ext = Path(video.filename).suffix.lower()
- if file_ext not in SUPPORTED_VIDEO_FORMATS:
- raise HTTPException(400, f"Unsupported video format. Supported: {', '.join(SUPPORTED_VIDEO_FORMATS)}")
+ # Initialize containers
+ service_container = ServiceContainer.get_instance()
+ use_case_container = UseCaseContainer(service_container, settings)
- # Check file size (in MB)
- video.file.seek(0, 2) # Seek to end
- file_size = video.file.tell()
- video.file.seek(0) # Reset to beginning
- file_size_mb = file_size / (1024 * 1024)
+ # Start background services
+ await service_container.startup()
- logger.info(f"Processing video: {video.filename} ({file_size_mb:.1f} MB) -> {output_format} ({quality})")
+ logger.info("Application started successfully")
- # Decide processing method based on file size
- if file_size_mb < 10: # Small files: process immediately
- try:
- return await process_and_return_direct(video, output_format, quality)
- except Exception as e:
- logger.error(f"Error processing video: {str(e)}")
- raise HTTPException(500, f"Processing failed: {str(e)}")
- else: # Large files: process in background
- job_id = str(uuid.uuid4())
- background_tasks.add_task(
- process_in_background,
- job_id, video, output_format, quality, file_size_mb
- )
- return JSONResponse({
- "job_id": job_id,
- "status": "processing",
- "message": f"Processing large file ({file_size_mb:.1f} MB). Check status at /status/{job_id}",
- "check_url": f"/status/{job_id}"
- })
-
-async def process_and_return_direct(video: UploadFile, output_format: str, quality: str) -> FileResponse:
- """Process video and return audio file directly."""
-
- # Create temporary files
- with tempfile.NamedTemporaryFile(delete=False, suffix=Path(video.filename).suffix) as tmp_video:
- # Save uploaded video
- content = await video.read()
- tmp_video.write(content)
- tmp_video_path = tmp_video.name
-
- try:
- # Extract audio
- output_path = await extract_audio_ffmpeg(tmp_video_path, output_format, quality)
-
- # Create response that will clean up files after sending
- def cleanup():
- try:
- os.unlink(tmp_video_path)
- os.unlink(output_path)
- except:
- pass
-
- background_tasks = BackgroundTasks()
- background_tasks.add_task(cleanup)
-
- return FileResponse(
- output_path,
- media_type=get_media_type(output_format),
- filename=f"{Path(video.filename).stem}.{output_format}",
- background=background_tasks
- )
- except Exception as e:
- # Cleanup on error
- if os.path.exists(tmp_video_path):
- os.unlink(tmp_video_path)
- raise
-
-async def extract_audio_ffmpeg(input_path: str, output_format: str, quality: str) -> str:
- """Extract audio using FFmpeg."""
+ yield
- # Generate output path
- output_path = str(TEMP_DIR / f"{uuid.uuid4()}.{output_format}")
+ # Shutdown
+ logger.info("Shutting down application")
- # Get quality settings
- settings = QUALITY_SETTINGS.get(output_format, {}).get(quality, {})
+ # Stop background services
+ await service_container.shutdown()
- try:
- # Build FFmpeg command
- stream = ffmpeg.input(input_path)
- stream = stream.audio # Extract only audio stream
-
- # Apply format-specific settings
- stream = ffmpeg.output(stream, output_path, **settings)
-
- # Run FFmpeg
- await asyncio.get_event_loop().run_in_executor(
- None,
- lambda: ffmpeg.run(stream, overwrite_output=True, capture_stdout=True, capture_stderr=True)
- )
-
- logger.info(f"Audio extracted successfully: {output_path}")
- return output_path
-
- except ffmpeg.Error as e:
- logger.error(f"FFmpeg error: {e.stderr.decode()}")
- raise Exception(f"FFmpeg processing failed: {e.stderr.decode()}")
+ logger.info("Application shut down successfully")
-async def process_in_background(job_id: str, video: UploadFile, output_format: str, quality: str, file_size_mb: float):
- """Process large video files in background."""
-
- # Update job status
- processing_jobs[job_id] = {
- 'status': 'processing',
- 'started_at': datetime.now(),
- 'filename': video.filename,
- 'file_size_mb': file_size_mb,
- 'format': output_format,
- 'quality': quality
- }
-
- tmp_video_path = None
- output_path = None
-
- try:
- # Save video to temporary file
- tmp_video_path = str(TEMP_DIR / f"{job_id}_input{Path(video.filename).suffix}")
- async with aiofiles.open(tmp_video_path, 'wb') as f:
- while chunk := await video.read(1024 * 1024): # Read in 1MB chunks
- await f.write(chunk)
-
- # Extract audio
- output_path = await extract_audio_ffmpeg(tmp_video_path, output_format, quality)
-
- # Update job status
- processing_jobs[job_id].update({
- 'status': 'completed',
- 'output_path': output_path,
- 'completed_at': datetime.now(),
- 'download_url': f'/download/{job_id}'
- })
-
- logger.info(f"Background job {job_id} completed successfully")
-
- except Exception as e:
- logger.error(f"Background job {job_id} failed: {str(e)}")
- processing_jobs[job_id].update({
- 'status': 'failed',
- 'error': str(e),
- 'failed_at': datetime.now()
- })
- finally:
- # Clean up input file
- if tmp_video_path and os.path.exists(tmp_video_path):
- try:
- os.unlink(tmp_video_path)
- except:
- pass
+# Create FastAPI app
+app = FastAPI(
+ title=settings.app_name,
+ version=settings.app_version,
+ description="Extract audio from video files using FFmpeg",
+ lifespan=lifespan
+)
-@app.get("/status/{job_id}")
-async def check_status(job_id: str):
- """Check the status of a background processing job."""
-
- job = processing_jobs.get(job_id)
- if not job:
- raise HTTPException(404, "Job not found")
-
- # Calculate processing time
- if job['status'] == 'processing':
- duration = (datetime.now() - job['started_at']).total_seconds()
- job['processing_time_seconds'] = duration
- elif job['status'] == 'completed':
- duration = (job['completed_at'] - job['started_at']).total_seconds()
- job['processing_time_seconds'] = duration
-
- # Don't expose internal paths
- safe_job = {k: v for k, v in job.items() if k != 'output_path'}
- return safe_job
+# Configure middleware
+configure_cors(app)
+register_exception_handlers(app)
-@app.get("/download/{job_id}")
-async def download_result(job_id: str):
- """Download the processed audio file."""
-
- job = processing_jobs.get(job_id)
- if not job:
- raise HTTPException(404, "Job not found")
-
- if job['status'] != 'completed':
- raise HTTPException(400, f"Job status: {job['status']}")
-
- if not os.path.exists(job['output_path']):
- raise HTTPException(404, "Output file not found")
-
- filename = f"{Path(job['filename']).stem}.{job['format']}"
-
- return FileResponse(
- job['output_path'],
- media_type=get_media_type(job['format']),
- filename=filename
- )
+# Register API routes
+register_routes(app)
+
+# Mount static files
+app.mount("/static", StaticFiles(directory="interfaces/web/static"), name="static")
+
+# Setup templates
+templates = Jinja2Templates(directory="interfaces/web/templates")
-@app.get("/api/info")
-async def api_info():
- """Get API information and supported formats."""
- return {
- "supported_video_formats": SUPPORTED_VIDEO_FORMATS,
- "supported_audio_formats": SUPPORTED_AUDIO_FORMATS,
- "quality_levels": ["high", "medium", "low"],
- "max_direct_response_size_mb": 10,
- "endpoints": {
- "/": "Web interface",
- "/extract-audio": "POST - Extract audio from video",
- "/status/{job_id}": "GET - Check job status",
- "/download/{job_id}": "GET - Download processed audio",
- "/api/info": "GET - API information"
- }
- }
+# Root route for web interface
+@app.get("/", include_in_schema=False)
+async def home(request: Request):
+ """Serve the web interface."""
+ return templates.TemplateResponse("index.html", {"request": request})
-# Cleanup task
-async def cleanup_old_files():
- """Periodically clean up old temporary files."""
- while True:
- try:
- await asyncio.sleep(3600) # Run every hour
-
- now = datetime.now()
- cleaned_count = 0
-
- # Clean up completed/failed jobs older than 2 hours
- for job_id, job in list(processing_jobs.items()):
- job_age = now - job.get('started_at', now)
- if job_age > timedelta(hours=2):
- # Delete output file if exists
- if 'output_path' in job and os.path.exists(job['output_path']):
- try:
- os.unlink(job['output_path'])
- cleaned_count += 1
- except:
- pass
- del processing_jobs[job_id]
-
- # Clean up orphaned files in temp directory
- for file_path in TEMP_DIR.glob('*'):
- if file_path.is_file():
- file_age = now - datetime.fromtimestamp(file_path.stat().st_mtime)
- if file_age > timedelta(hours=2):
- try:
- file_path.unlink()
- cleaned_count += 1
- except:
- pass
-
- if cleaned_count > 0:
- logger.info(f"Cleanup: removed {cleaned_count} old files")
-
- except Exception as e:
- logger.error(f"Cleanup error: {str(e)}")
+# Dependency injection functions for routes
+def get_service_container() -> ServiceContainer:
+ """Get service container for dependency injection."""
+ return service_container
-@app.on_event("startup")
-async def startup_event():
- """Start background tasks on app startup."""
- asyncio.create_task(cleanup_old_files())
- logger.info("Audio extractor service started")
+def get_use_case_container() -> UseCaseContainer:
+ """Get use case container for dependency injection."""
+ return use_case_container
-@app.on_event("shutdown")
-async def shutdown_event():
- """Clean up on shutdown."""
- # Clean all temporary files
- try:
- shutil.rmtree(TEMP_DIR)
- except:
- pass
- logger.info("Audio extractor service stopped")
\ No newline at end of file
+# Make containers available for dependency injection
+app.state.get_services = get_service_container
+app.state.get_use_cases = get_use_case_container
\ No newline at end of file
diff --git a/application/__init__.py b/application/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..b54809747af26252d1741f1f87e6fb0d23b071b2
--- /dev/null
+++ b/application/__init__.py
@@ -0,0 +1 @@
+"""Application layer for the audio extractor application."""
\ No newline at end of file
diff --git a/application/dto/__init__.py b/application/dto/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ace66b177ab2537976d33ff1033a20dc101d5a9b
--- /dev/null
+++ b/application/dto/__init__.py
@@ -0,0 +1 @@
+"""Data Transfer Objects for application layer."""
\ No newline at end of file
diff --git a/application/dto/extraction_request.py b/application/dto/extraction_request.py
new file mode 100644
index 0000000000000000000000000000000000000000..0584d206d971c40720248b2fec44f35b8fc7f9de
--- /dev/null
+++ b/application/dto/extraction_request.py
@@ -0,0 +1,22 @@
+"""Request DTOs for extraction use cases."""
+from dataclasses import dataclass
+from typing import Optional
+
+@dataclass
+class ExtractionRequestDTO:
+ """DTO for extraction request."""
+ video_filename: str
+ video_file_path: str
+ video_file_size: int
+ output_format: str
+ quality: str
+ content_type: Optional[str] = None
+
+@dataclass
+class JobCreationDTO:
+ """DTO for job creation."""
+ job_id: str
+ status: str
+ message: str
+ check_url: str
+ file_size_mb: float
\ No newline at end of file
diff --git a/application/dto/extraction_response.py b/application/dto/extraction_response.py
new file mode 100644
index 0000000000000000000000000000000000000000..5bd0505ac06df1516f21a93030f6cbff3476a68d
--- /dev/null
+++ b/application/dto/extraction_response.py
@@ -0,0 +1,45 @@
+"""Response DTOs for extraction use cases."""
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Optional, Dict, Any
+
+@dataclass
+class DirectExtractionResultDTO:
+ """DTO for direct extraction result."""
+ file_path: str
+ media_type: str
+ filename: str
+ processing_time: float
+ file_size: int
+
+@dataclass
+class JobStatusDTO:
+ """DTO for job status."""
+ job_id: str
+ status: str
+ created_at: datetime
+ updated_at: datetime
+ filename: Optional[str] = None
+ file_size_mb: Optional[float] = None
+ output_format: Optional[str] = None
+ quality: Optional[str] = None
+ processing_time: Optional[float] = None
+ error: Optional[str] = None
+ download_url: Optional[str] = None
+
+@dataclass
+class DownloadResultDTO:
+ """DTO for download result."""
+ file_path: str
+ media_type: str
+ filename: str
+ processing_time: float
+
+@dataclass
+class JobCreationDTO:
+ """DTO for job creation."""
+ job_id: str
+ status: str
+ message: str
+ check_url: str
+ file_size_mb: Optional[float] = None
\ No newline at end of file
diff --git a/application/use_cases/__init__.py b/application/use_cases/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..aeb408442ed6e1eb70ca9776e3e2e8c1fc2a0f5b
--- /dev/null
+++ b/application/use_cases/__init__.py
@@ -0,0 +1 @@
+"""Application use cases."""
\ No newline at end of file
diff --git a/application/use_cases/check_job_status.py b/application/use_cases/check_job_status.py
new file mode 100644
index 0000000000000000000000000000000000000000..34fdb641b4e45eb1db840ae534507373a43ba3db
--- /dev/null
+++ b/application/use_cases/check_job_status.py
@@ -0,0 +1,49 @@
+"""Use case for checking job status."""
+from typing import Optional, Any
+import logging
+
+from domain.exceptions.domain_exceptions import JobNotFoundError
+from ..dto.extraction_response import JobStatusDTO
+
+logger = logging.getLogger(__name__)
+
+class JobRepository:
+ """Protocol for job repository."""
+ async def get(self, job_id: str) -> Optional[Any]:
+ ...
+
+class CheckJobStatusUseCase:
+ """Use case for checking job status."""
+
+ def __init__(self, job_repository: JobRepository):
+ self.job_repository = job_repository
+
+ async def execute(self, job_id: str) -> JobStatusDTO:
+ """Check the status of a job."""
+ # Get job from repository
+ job_record = await self.job_repository.get(job_id)
+
+ if not job_record:
+ raise JobNotFoundError(job_id)
+
+ # Calculate processing time
+ processing_time = None
+ if job_record.processing_time:
+ processing_time = job_record.processing_time
+ elif job_record.status == "processing":
+ processing_time = (job_record.updated_at - job_record.created_at).total_seconds()
+
+ # Create DTO
+ return JobStatusDTO(
+ job_id=job_record.id,
+ status=job_record.status,
+ created_at=job_record.created_at,
+ updated_at=job_record.updated_at,
+ filename=job_record.filename,
+ file_size_mb=job_record.file_size_mb,
+ output_format=job_record.output_format,
+ quality=job_record.quality,
+ processing_time=processing_time,
+ error=job_record.error,
+ download_url=f"/api/v1/jobs/{job_id}/download" if job_record.status == "completed" else None
+ )
\ No newline at end of file
diff --git a/application/use_cases/container.py b/application/use_cases/container.py
new file mode 100644
index 0000000000000000000000000000000000000000..2700906f8b8fdef26f9ff7601d4de71a860480e2
--- /dev/null
+++ b/application/use_cases/container.py
@@ -0,0 +1,65 @@
+"""Use case dependency container."""
+from typing import Dict, Any
+
+from .extract_audio_direct import ExtractAudioDirectUseCase
+from .extract_audio_async import ExtractAudioAsyncUseCase
+from .process_job import ProcessJobUseCase
+from .check_job_status import CheckJobStatusUseCase
+from .download_audio_result import DownloadAudioResultUseCase
+
+from domain.services.validation_service import ValidationService
+from infrastructure.services.container import ServiceContainer
+
+class UseCaseContainer:
+ """Container for all application use cases."""
+
+ def __init__(self, services: ServiceContainer, settings: Any):
+ self.services = services
+ self.settings = settings
+
+ # Create validation service
+ self.validation_service = ValidationService(
+ max_file_size_mb=settings.max_direct_file_size_mb * 100, # Allow larger files for async
+ supported_video_formats=settings.supported_video_formats,
+ supported_audio_formats=settings.supported_audio_formats
+ )
+
+ # Initialize use cases
+ self._init_use_cases()
+
+ def _init_use_cases(self):
+ """Initialize all use cases with dependencies."""
+
+ # Process job use case (needed by async extractor)
+ self.process_job = ProcessJobUseCase(
+ job_repository=self.services.job_repository,
+ ffmpeg_service=self.services.ffmpeg_service,
+ file_repository=self.services.file_repository
+ )
+
+ # Direct extraction use case
+ self.extract_audio_direct = ExtractAudioDirectUseCase(
+ ffmpeg_service=self.services.ffmpeg_service,
+ file_repository=self.services.file_repository,
+ validation_service=self.validation_service,
+ quality_presets=self.settings.quality_presets
+ )
+
+ # Async extraction use case
+ self.extract_audio_async = ExtractAudioAsyncUseCase(
+ job_repository=self.services.job_repository,
+ validation_service=self.validation_service,
+ process_job_use_case=self.process_job
+ )
+
+ # Status checking use case
+ self.check_job_status = CheckJobStatusUseCase(
+ job_repository=self.services.job_repository
+ )
+
+ # Download use case
+ self.download_audio_result = DownloadAudioResultUseCase(
+ job_repository=self.services.job_repository,
+ file_repository=self.services.file_repository,
+ audio_mime_types=self.settings.audio_mime_types
+ )
\ No newline at end of file
diff --git a/application/use_cases/download_audio_result.py b/application/use_cases/download_audio_result.py
new file mode 100644
index 0000000000000000000000000000000000000000..37397794e48098b93d57a3b3495dcd060fe30991
--- /dev/null
+++ b/application/use_cases/download_audio_result.py
@@ -0,0 +1,66 @@
+"""Use case for downloading audio results."""
+from typing import Any, Optional
+import logging
+
+from domain.exceptions.domain_exceptions import JobNotFoundError, JobNotCompletedError
+from ..dto.extraction_response import DownloadResultDTO
+
+logger = logging.getLogger(__name__)
+
+class JobRepository:
+ """Protocol for job repository."""
+ async def get(self, job_id: str) -> Optional[Any]:
+ ...
+
+class FileRepository:
+ """Protocol for file repository."""
+ async def file_exists(self, file_path: str) -> bool:
+ ...
+
+class DownloadAudioResultUseCase:
+ """Use case for downloading completed audio."""
+
+ def __init__(self,
+ job_repository: JobRepository,
+ file_repository: FileRepository,
+ audio_mime_types: dict):
+ self.job_repository = job_repository
+ self.file_repository = file_repository
+ self.audio_mime_types = audio_mime_types
+
+ async def execute(self, job_id: str) -> DownloadResultDTO:
+ """Get download information for completed job."""
+ # Get job from repository
+ job_record = await self.job_repository.get(job_id)
+
+ if not job_record:
+ raise JobNotFoundError(job_id)
+
+ # Check if job is completed
+ if job_record.status != "completed":
+ raise JobNotCompletedError(job_id, job_record.status)
+
+ # Check if output file exists
+ if not job_record.output_path:
+ raise RuntimeError(f"Job {job_id} has no output path")
+
+ if not await self.file_repository.file_exists(job_record.output_path):
+ raise RuntimeError(f"Output file not found for job {job_id}")
+
+ # Get MIME type
+ mime_type = self.audio_mime_types.get(
+ job_record.output_format,
+ 'application/octet-stream'
+ )
+
+ # Create filename
+ import os
+ original_name = os.path.splitext(job_record.filename)[0]
+ filename = f"{original_name}.{job_record.output_format}"
+
+ return DownloadResultDTO(
+ file_path=job_record.output_path,
+ media_type=mime_type,
+ filename=filename,
+ processing_time=job_record.processing_time or 0
+ )
\ No newline at end of file
diff --git a/application/use_cases/extract_audio_async.py b/application/use_cases/extract_audio_async.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd517c3916a64a07d3edfa18f73246135181f516
--- /dev/null
+++ b/application/use_cases/extract_audio_async.py
@@ -0,0 +1,103 @@
+"""Use case for asynchronous audio extraction."""
+import asyncio
+from typing import Protocol, Any
+import logging
+
+from domain.entities.video import Video
+from domain.entities.job import Job
+from domain.value_objects.file_size import FileSize
+from domain.services.validation_service import ValidationService
+
+from ..dto.extraction_request import ExtractionRequestDTO
+from ..dto.extraction_response import JobCreationDTO
+
+logger = logging.getLogger(__name__)
+
+class JobRepository(Protocol):
+ """Protocol for job repository."""
+ async def create(self, job_id: str, filename: str, file_size_mb: float,
+ output_format: str, quality: str) -> Any:
+ ...
+ async def update_status(self, job_id: str, status: str,
+ error: str = None, output_path: str = None,
+ processing_time: float = None) -> Any:
+ ...
+
+class BackgroundTaskRunner(Protocol):
+ """Protocol for background task execution."""
+ def add_task(self, func, *args, **kwargs):
+ ...
+
+class ExtractAudioAsyncUseCase:
+ """Use case for asynchronous audio extraction (large files)."""
+
+ def __init__(self,
+ job_repository: JobRepository,
+ validation_service: ValidationService,
+ process_job_use_case: 'ProcessJobUseCase'):
+ self.job_repository = job_repository
+ self.validation_service = validation_service
+ self.process_job_use_case = process_job_use_case
+
+ async def execute(self, request: ExtractionRequestDTO,
+ background_tasks: BackgroundTaskRunner) -> JobCreationDTO:
+ """Create and queue an async extraction job."""
+
+ # Create domain objects for validation
+ video = Video(
+ filename=request.video_filename,
+ file_path=request.video_file_path,
+ size=FileSize(request.video_file_size),
+ content_type=request.content_type
+ )
+
+ # Validate request
+ self.validation_service.validate_extraction_request(
+ video, request.output_format, request.quality
+ )
+
+ # Create job
+ job = Job.create_new(
+ video_filename=request.video_filename,
+ file_size_bytes=request.video_file_size,
+ output_format=request.output_format,
+ quality=request.quality
+ )
+
+ # Save job to repository
+ await self.job_repository.create(
+ job_id=job.id,
+ filename=job.video_filename,
+ file_size_mb=job.file_size.megabytes,
+ output_format=job.output_format.value,
+ quality=job.quality.value
+ )
+
+ # Queue background processing
+ background_tasks.add_task(
+ self._process_job_background,
+ job.id,
+ request
+ )
+
+ logger.info(f"Created async job {job.id} for {video.filename}")
+
+ return JobCreationDTO(
+ job_id=job.id,
+ status=job.status.value,
+ message=f"Processing large file ({job.file_size.megabytes:.1f} MB)",
+ check_url=f"/api/v1/jobs/{job.id}",
+ file_size_mb=job.file_size.megabytes
+ )
+
+ async def _process_job_background(self, job_id: str, request: ExtractionRequestDTO):
+ """Process job in background."""
+ try:
+ await self.process_job_use_case.execute(job_id, request)
+ except Exception as e:
+ logger.error(f"Background job {job_id} failed: {str(e)}")
+ await self.job_repository.update_status(
+ job_id=job_id,
+ status="failed",
+ error=str(e)
+ )
\ No newline at end of file
diff --git a/application/use_cases/extract_audio_direct.py b/application/use_cases/extract_audio_direct.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b3811f5b0d019ed08d273760409df18ac6dbb15
--- /dev/null
+++ b/application/use_cases/extract_audio_direct.py
@@ -0,0 +1,119 @@
+"""Use case for direct audio extraction."""
+import time
+from typing import Protocol, Any
+import logging
+
+from domain.entities.video import Video
+from domain.entities.audio import Audio
+from domain.value_objects.audio_format import AudioFormat
+from domain.value_objects.audio_quality import AudioQuality
+from domain.value_objects.file_size import FileSize
+from domain.services.validation_service import ValidationService
+from domain.exceptions.domain_exceptions import ProcessingError
+
+from ..dto.extraction_request import ExtractionRequestDTO
+from ..dto.extraction_response import DirectExtractionResultDTO
+
+logger = logging.getLogger(__name__)
+
+class FFmpegService(Protocol):
+ """Protocol for FFmpeg service."""
+ async def extract_audio(self, input_path: str, output_path: str,
+ format: str, quality: str) -> Any:
+ ...
+
+class FileRepository(Protocol):
+ """Protocol for file repository."""
+ async def create_output_path(self, job_id: str, format: str) -> str:
+ ...
+ async def get_file_size(self, file_path: str) -> int:
+ ...
+ async def delete_file(self, file_path: str) -> bool:
+ ...
+
+class ExtractAudioDirectUseCase:
+ """Use case for direct audio extraction (small files)."""
+
+ def __init__(self,
+ ffmpeg_service: FFmpegService,
+ file_repository: FileRepository,
+ validation_service: ValidationService,
+ quality_presets: dict):
+ self.ffmpeg_service = ffmpeg_service
+ self.file_repository = file_repository
+ self.validation_service = validation_service
+ self.quality_presets = quality_presets
+
+ async def execute(self, request: ExtractionRequestDTO) -> DirectExtractionResultDTO:
+ """Execute direct audio extraction."""
+ start_time = time.time()
+ output_path = None
+
+ try:
+ # Create domain objects
+ video = Video(
+ filename=request.video_filename,
+ file_path=request.video_file_path,
+ size=FileSize(request.video_file_size),
+ content_type=request.content_type
+ )
+
+ audio_format = AudioFormat(request.output_format)
+ audio_quality = AudioQuality(request.quality)
+
+ # Validate
+ self.validation_service.validate_extraction_request(
+ video, request.output_format, request.quality
+ )
+
+ # Create output path
+ import uuid
+ job_id = str(uuid.uuid4())
+ output_path = await self.file_repository.create_output_path(
+ job_id, audio_format.value
+ )
+
+ # Extract audio
+ logger.info(f"Starting direct extraction: {video.filename} -> {audio_format.value}")
+
+ result = await self.ffmpeg_service.extract_audio(
+ video.file_path,
+ output_path,
+ audio_format.value,
+ audio_quality.value
+ )
+
+ if not result.success:
+ raise ProcessingError(f"FFmpeg extraction failed: {result.error}")
+
+ # Get output file size
+ output_size = await self.file_repository.get_file_size(output_path)
+
+ # Create audio entity
+ audio = Audio.create_from_extraction(
+ source_video=video,
+ file_path=output_path,
+ format=audio_format,
+ quality=audio_quality,
+ size=output_size
+ )
+
+ processing_time = time.time() - start_time
+
+ logger.info(f"Direct extraction completed in {processing_time:.2f}s")
+
+ return DirectExtractionResultDTO(
+ file_path=audio.file_path,
+ media_type=audio.get_mime_type(),
+ filename=audio.get_full_filename(),
+ processing_time=processing_time,
+ file_size=output_size
+ )
+
+ except Exception as e:
+ # Clean up output file on error
+ if output_path:
+ await self.file_repository.delete_file(output_path)
+
+ logger.error(f"Direct extraction failed: {str(e)}")
+ raise
\ No newline at end of file
diff --git a/application/use_cases/process_job.py b/application/use_cases/process_job.py
new file mode 100644
index 0000000000000000000000000000000000000000..3c5f5c78df117111280e9be20bbbaa54aca7b4e0
--- /dev/null
+++ b/application/use_cases/process_job.py
@@ -0,0 +1,113 @@
+"""Use case for processing extraction jobs."""
+import time
+import logging
+from typing import Protocol, Any
+
+from domain.value_objects.audio_format import AudioFormat
+from domain.value_objects.audio_quality import AudioQuality
+from domain.value_objects.file_size import FileSize
+from domain.entities.video import Video
+from domain.entities.audio import Audio
+
+from ..dto.extraction_request import ExtractionRequestDTO
+
+logger = logging.getLogger(__name__)
+
+class JobRepository(Protocol):
+ """Protocol for job repository."""
+ async def update_status(self, job_id: str, status: str,
+ error: str = None, output_path: str = None,
+ processing_time: float = None) -> Any:
+ ...
+
+class FFmpegService(Protocol):
+ """Protocol for FFmpeg service."""
+ async def extract_audio(self, input_path: str, output_path: str,
+ format: str, quality: str) -> Any:
+ ...
+
+class FileRepository(Protocol):
+ """Protocol for file repository."""
+ async def create_output_path(self, job_id: str, format: str) -> str:
+ ...
+ async def get_file_size(self, file_path: str) -> int:
+ ...
+ async def delete_file(self, file_path: str) -> bool:
+ ...
+
+class ProcessJobUseCase:
+ """Use case for processing a queued extraction job."""
+
+ def __init__(self,
+ job_repository: JobRepository,
+ ffmpeg_service: FFmpegService,
+ file_repository: FileRepository):
+ self.job_repository = job_repository
+ self.ffmpeg_service = ffmpeg_service
+ self.file_repository = file_repository
+
+ async def execute(self, job_id: str, request: ExtractionRequestDTO):
+ """Process the extraction job."""
+ start_time = time.time()
+ output_path = None
+
+ try:
+ # Update job status to processing
+ await self.job_repository.update_status(job_id, "processing")
+
+ # Create domain objects
+ video = Video(
+ filename=request.video_filename,
+ file_path=request.video_file_path,
+ size=FileSize(request.video_file_size),
+ content_type=request.content_type
+ )
+
+ audio_format = AudioFormat(request.output_format)
+ audio_quality = AudioQuality(request.quality)
+
+ # Create output path
+ output_path = await self.file_repository.create_output_path(
+ job_id, audio_format.value
+ )
+
+ # Extract audio
+ logger.info(f"Processing job {job_id}: {video.filename} -> {audio_format.value}")
+
+ result = await self.ffmpeg_service.extract_audio(
+ video.file_path,
+ output_path,
+ audio_format.value,
+ audio_quality.value
+ )
+
+ if not result.success:
+ raise Exception(f"FFmpeg extraction failed: {result.error}")
+
+ # Calculate processing time
+ processing_time = time.time() - start_time
+
+ # Update job as completed
+ await self.job_repository.update_status(
+ job_id=job_id,
+ status="completed",
+ output_path=output_path,
+ processing_time=processing_time
+ )
+
+ logger.info(f"Job {job_id} completed in {processing_time:.2f}s")
+
+ except Exception as e:
+ # Clean up output file on error
+ if output_path:
+ await self.file_repository.delete_file(output_path)
+
+ # Update job as failed
+ await self.job_repository.update_status(
+ job_id=job_id,
+ status="failed",
+ error=str(e)
+ )
+
+ logger.error(f"Job {job_id} failed: {str(e)}")
+ raise
\ No newline at end of file
diff --git a/domain/__init__.py b/domain/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff061922e33a6295c208d3b63114adbe71a400c0
--- /dev/null
+++ b/domain/__init__.py
@@ -0,0 +1 @@
+"""Domain layer for the audio extractor application."""
\ No newline at end of file
diff --git a/domain/entities/__init__.py b/domain/entities/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe2b9a04637a6235d62c6f5c084dfa99c60c859e
--- /dev/null
+++ b/domain/entities/__init__.py
@@ -0,0 +1 @@
+"""Domain entities."""
\ No newline at end of file
diff --git a/domain/entities/audio.py b/domain/entities/audio.py
new file mode 100644
index 0000000000000000000000000000000000000000..98e37aa55ccd0608c9f56ac7d6eec8cf07a3e553
--- /dev/null
+++ b/domain/entities/audio.py
@@ -0,0 +1,44 @@
+"""Audio entity."""
+from dataclasses import dataclass
+from typing import Optional
+
+from ..value_objects.audio_format import AudioFormat
+from ..value_objects.audio_quality import AudioQuality
+from ..value_objects.file_size import FileSize
+
+@dataclass
+class Audio:
+ """Entity representing an audio file."""
+ filename: str
+ file_path: str
+ format: AudioFormat
+ quality: AudioQuality
+ size: Optional[FileSize] = None
+ duration: Optional[float] = None
+ bitrate: Optional[str] = None
+
+ def get_mime_type(self) -> str:
+ """Get MIME type for this audio file."""
+ return self.format.mime_type
+
+ def get_full_filename(self) -> str:
+ """Get filename with correct extension."""
+ base_name = self.filename
+ if not base_name.endswith(self.format.extension):
+ base_name = f"{base_name}{self.format.extension}"
+ return base_name
+
+ @classmethod
+ def create_from_extraction(cls, source_video: 'Video', file_path: str,
+ format: AudioFormat, quality: AudioQuality,
+ size: Optional[int] = None) -> 'Audio':
+ """Create Audio from extraction process."""
+ filename = f"{source_video.get_filename_without_extension()}{format.extension}"
+
+ return cls(
+ filename=filename,
+ file_path=file_path,
+ format=format,
+ quality=quality,
+ size=FileSize(size) if size else None
+ )
\ No newline at end of file
diff --git a/domain/entities/job.py b/domain/entities/job.py
new file mode 100644
index 0000000000000000000000000000000000000000..4c2171c77ee6e9a9816558b4a87873362fd3d7e7
--- /dev/null
+++ b/domain/entities/job.py
@@ -0,0 +1,110 @@
+"""Job entity."""
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Optional, Dict, Any
+import uuid
+
+from ..value_objects.job_status import JobStatus
+from ..value_objects.audio_format import AudioFormat
+from ..value_objects.audio_quality import AudioQuality
+from ..value_objects.file_size import FileSize
+from ..exceptions.domain_exceptions import ValidationError
+
+@dataclass
+class Job:
+ """Entity representing an audio extraction job."""
+ id: str
+ video_filename: str
+ file_size: FileSize
+ output_format: AudioFormat
+ quality: AudioQuality
+ status: JobStatus
+ created_at: datetime
+ updated_at: datetime
+ completed_at: Optional[datetime] = None
+ output_path: Optional[str] = None
+ error_message: Optional[str] = None
+ processing_duration: Optional[float] = None
+ metadata: Dict[str, Any] = field(default_factory=dict)
+
+ def __post_init__(self):
+ # Ensure status is JobStatus enum
+ if isinstance(self.status, str):
+ self.status = JobStatus(self.status)
+
+ @classmethod
+ def create_new(cls, video_filename: str, file_size_bytes: int,
+ output_format: str, quality: str) -> 'Job':
+ """Create a new job."""
+ now = datetime.utcnow()
+ return cls(
+ id=str(uuid.uuid4()),
+ video_filename=video_filename,
+ file_size=FileSize(file_size_bytes),
+ output_format=AudioFormat(output_format),
+ quality=AudioQuality(quality),
+ status=JobStatus.PENDING,
+ created_at=now,
+ updated_at=now
+ )
+
+ def start_processing(self) -> None:
+ """Mark job as processing."""
+ if not self.status.can_transition_to(JobStatus.PROCESSING):
+ raise ValidationError(f"Cannot start job in {self.status} status")
+
+ self.status = JobStatus.PROCESSING
+ self.updated_at = datetime.utcnow()
+
+ def complete(self, output_path: str, processing_duration: float) -> None:
+ """Mark job as completed."""
+ if not self.status.can_transition_to(JobStatus.COMPLETED):
+ raise ValidationError(f"Cannot complete job in {self.status} status")
+
+ self.status = JobStatus.COMPLETED
+ self.output_path = output_path
+ self.processing_duration = processing_duration
+ self.completed_at = datetime.utcnow()
+ self.updated_at = self.completed_at
+
+ def fail(self, error_message: str) -> None:
+ """Mark job as failed."""
+ if not self.status.can_transition_to(JobStatus.FAILED):
+ raise ValidationError(f"Cannot fail job in {self.status} status")
+
+ self.status = JobStatus.FAILED
+ self.error_message = error_message
+ self.updated_at = datetime.utcnow()
+
+ def cancel(self) -> None:
+ """Cancel the job."""
+ if not self.status.can_transition_to(JobStatus.CANCELLED):
+ raise ValidationError(f"Cannot cancel job in {self.status} status")
+
+ self.status = JobStatus.CANCELLED
+ self.updated_at = datetime.utcnow()
+
+ @property
+ def is_complete(self) -> bool:
+ """Check if job is complete."""
+ return self.status == JobStatus.COMPLETED
+
+ @property
+ def is_active(self) -> bool:
+ """Check if job is active."""
+ return self.status.is_active()
+
+ @property
+ def can_download(self) -> bool:
+ """Check if results can be downloaded."""
+ return self.is_complete and self.output_path is not None
+
+ def get_processing_time_seconds(self) -> Optional[float]:
+ """Get processing time in seconds."""
+ if self.processing_duration:
+ return self.processing_duration
+ elif self.status == JobStatus.PROCESSING:
+ return (datetime.utcnow() - self.created_at).total_seconds()
+ elif self.completed_at:
+ return (self.completed_at - self.created_at).total_seconds()
+ return None
\ No newline at end of file
diff --git a/domain/entities/video.py b/domain/entities/video.py
new file mode 100644
index 0000000000000000000000000000000000000000..e0adbbcdf609912494c9cfa870798d7e01f636f0
--- /dev/null
+++ b/domain/entities/video.py
@@ -0,0 +1,54 @@
+"""Video entity."""
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional
+
+from ..value_objects.file_size import FileSize
+from ..exceptions.domain_exceptions import InvalidVideoFormatError
+
+@dataclass
+class Video:
+ """Entity representing a video file."""
+ filename: str
+ file_path: str
+ size: FileSize
+ content_type: Optional[str] = None
+ duration: Optional[float] = None
+
+ # Supported video formats
+ SUPPORTED_EXTENSIONS = ['.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv', '.wmv', '.m4v']
+
+ def __post_init__(self):
+ self.validate()
+
+ def validate(self):
+ """Validate video entity."""
+ if not self.filename:
+ raise InvalidVideoFormatError("", self.SUPPORTED_EXTENSIONS)
+
+ extension = self.get_extension()
+ if extension not in self.SUPPORTED_EXTENSIONS:
+ raise InvalidVideoFormatError(extension, self.SUPPORTED_EXTENSIONS)
+
+ def get_extension(self) -> str:
+ """Get file extension."""
+ return Path(self.filename).suffix.lower()
+
+ def get_filename_without_extension(self) -> str:
+ """Get filename without extension."""
+ return Path(self.filename).stem
+
+ def is_large_file(self, threshold_mb: float) -> bool:
+ """Check if file is considered large."""
+ return self.size.megabytes > threshold_mb
+
+ @classmethod
+ def from_upload(cls, filename: str, file_path: str, file_size: int,
+ content_type: Optional[str] = None) -> 'Video':
+ """Create Video from upload data."""
+ return cls(
+ filename=filename,
+ file_path=file_path,
+ size=FileSize(file_size),
+ content_type=content_type
+ )
\ No newline at end of file
diff --git a/domain/exceptions/__init__.py b/domain/exceptions/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..348ef86111bfb38efbf400b08621931fc700dddb
--- /dev/null
+++ b/domain/exceptions/__init__.py
@@ -0,0 +1 @@
+"""Domain-specific exceptions."""
\ No newline at end of file
diff --git a/domain/exceptions/domain_exceptions.py b/domain/exceptions/domain_exceptions.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe40abe00732f926396666419addc88ea3f6656b
--- /dev/null
+++ b/domain/exceptions/domain_exceptions.py
@@ -0,0 +1,52 @@
+"""Custom domain exceptions."""
+
+class DomainException(Exception):
+ """Base exception for all domain errors."""
+ pass
+
+class ValidationError(DomainException):
+ """Raised when domain validation fails."""
+ pass
+
+class InvalidVideoFormatError(ValidationError):
+ """Raised when video format is not supported."""
+ def __init__(self, format: str, supported_formats: list):
+ self.format = format
+ self.supported_formats = supported_formats
+ super().__init__(f"Unsupported video format: {format}. Supported: {', '.join(supported_formats)}")
+
+class InvalidAudioFormatError(ValidationError):
+ """Raised when audio format is not supported."""
+ def __init__(self, format: str, supported_formats: list):
+ self.format = format
+ self.supported_formats = supported_formats
+ super().__init__(f"Unsupported audio format: {format}. Supported: {', '.join(supported_formats)}")
+
+class InvalidQualityLevelError(ValidationError):
+ """Raised when quality level is not supported."""
+ def __init__(self, quality: str):
+ super().__init__(f"Invalid quality level: {quality}. Must be 'high', 'medium', or 'low'")
+
+class FileSizeExceededError(ValidationError):
+ """Raised when file size exceeds limits."""
+ def __init__(self, size_mb: float, max_size_mb: float):
+ self.size_mb = size_mb
+ self.max_size_mb = max_size_mb
+ super().__init__(f"File size {size_mb:.1f}MB exceeds maximum {max_size_mb:.1f}MB")
+
+class ProcessingError(DomainException):
+ """Raised when audio extraction fails."""
+ pass
+
+class JobNotFoundError(DomainException):
+ """Raised when a job cannot be found."""
+ def __init__(self, job_id: str):
+ self.job_id = job_id
+ super().__init__(f"Job not found: {job_id}")
+
+class JobNotCompletedError(DomainException):
+ """Raised when trying to access results of incomplete job."""
+ def __init__(self, job_id: str, status: str):
+ self.job_id = job_id
+ self.status = status
+ super().__init__(f"Job {job_id} is not completed (status: {status})")
diff --git a/domain/services/__init__.py b/domain/services/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..39a2bd4d49835f030ac0e42becd1a2e46a4784fa
--- /dev/null
+++ b/domain/services/__init__.py
@@ -0,0 +1 @@
+"""Domain services."""
\ No newline at end of file
diff --git a/domain/services/audio_extraction_service.py b/domain/services/audio_extraction_service.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d4d3ad863eaa5bec49e81a4152fdbf7def48f58
--- /dev/null
+++ b/domain/services/audio_extraction_service.py
@@ -0,0 +1,52 @@
+"""Domain service for audio extraction."""
+from abc import ABC, abstractmethod
+from typing import Protocol, Optional
+from dataclasses import dataclass
+
+from ..entities.video import Video
+from ..entities.audio import Audio
+from ..entities.job import Job
+from ..value_objects.audio_format import AudioFormat
+from ..value_objects.audio_quality import AudioQuality
+
+@dataclass
+class ExtractionResult:
+ """Result of audio extraction."""
+ audio: Audio
+ processing_time: float
+ metadata: Optional[dict] = None
+
+class AudioExtractionService(ABC):
+ """Abstract domain service for audio extraction."""
+
+ @abstractmethod
+ async def extract_audio(self, video: Video, format: AudioFormat,
+ quality: AudioQuality) -> ExtractionResult:
+ """Extract audio from video with specified format and quality."""
+ pass
+
+ @abstractmethod
+ async def supports_format(self, format: AudioFormat) -> bool:
+ """Check if the service supports the given audio format."""
+ pass
+
+ @abstractmethod
+ async def estimate_processing_time(self, video: Video, format: AudioFormat) -> float:
+ """Estimate processing time in seconds."""
+ pass
+
+class JobService(Protocol):
+ """Protocol for job-related operations."""
+
+ async def create_job(self, video: Video, format: AudioFormat,
+ quality: AudioQuality) -> Job:
+ """Create a new extraction job."""
+ ...
+
+ async def get_job(self, job_id: str) -> Optional[Job]:
+ """Get job by ID."""
+ ...
+
+ async def update_job_status(self, job: Job) -> None:
+ """Update job status."""
+ ...
\ No newline at end of file
diff --git a/domain/services/validation_service.py b/domain/services/validation_service.py
new file mode 100644
index 0000000000000000000000000000000000000000..3432494a9c6f7d66bc12cc7f5e83b37b51007130
--- /dev/null
+++ b/domain/services/validation_service.py
@@ -0,0 +1,52 @@
+"""Domain validation service."""
+from typing import List
+
+from ..entities.video import Video
+from ..value_objects.audio_format import AudioFormat
+from ..value_objects.audio_quality import AudioQuality
+from ..value_objects.file_size import FileSize
+from ..exceptions.domain_exceptions import (
+ ValidationError,
+ FileSizeExceededError,
+ InvalidVideoFormatError,
+ InvalidAudioFormatError
+)
+
+class ValidationService:
+ """Service for validating domain rules."""
+
+ def __init__(self, max_file_size_mb: float,
+ supported_video_formats: List[str],
+ supported_audio_formats: List[str]):
+ self.max_file_size_mb = max_file_size_mb
+ self.supported_video_formats = supported_video_formats
+ self.supported_audio_formats = supported_audio_formats
+
+ def validate_video(self, video: Video) -> None:
+ """Validate video entity against business rules."""
+ # Check file size
+ if not video.size.is_within_limit(self.max_file_size_mb):
+ raise FileSizeExceededError(video.size.megabytes, self.max_file_size_mb)
+
+ # Check format
+ extension = video.get_extension()
+ if extension not in self.supported_video_formats:
+ raise InvalidVideoFormatError(extension, self.supported_video_formats)
+
+ def validate_extraction_request(self, video: Video, format: str, quality: str) -> None:
+ """Validate complete extraction request."""
+ # Validate video
+ self.validate_video(video)
+
+ # Validate audio format
+ try:
+ AudioFormat(format)
+ except InvalidAudioFormatError:
+ raise InvalidAudioFormatError(format, self.supported_audio_formats)
+
+ # Validate quality
+ AudioQuality(quality) # Will raise if invalid
+
+ def can_process_directly(self, video: Video, threshold_mb: float) -> bool:
+ """Check if video can be processed directly (not async)."""
+ return not video.is_large_file(threshold_mb)
\ No newline at end of file
diff --git a/domain/value_objects/__init__.py b/domain/value_objects/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6249cd61e45460f06982f41bc50d62e59d48e35d
--- /dev/null
+++ b/domain/value_objects/__init__.py
@@ -0,0 +1 @@
+"""Domain value objects."""
\ No newline at end of file
diff --git a/domain/value_objects/audio_format.py b/domain/value_objects/audio_format.py
new file mode 100644
index 0000000000000000000000000000000000000000..5276ca827fce43badf0a8db4a75779063a40eb00
--- /dev/null
+++ b/domain/value_objects/audio_format.py
@@ -0,0 +1,50 @@
+"""Audio format value object."""
+from typing import List
+from dataclasses import dataclass
+
+from ..exceptions.domain_exceptions import InvalidAudioFormatError
+
+@dataclass(frozen=True)
+class AudioFormat:
+ """Value object representing an audio format."""
+ value: str
+
+ # Supported formats with their characteristics
+ FORMATS = {
+ 'mp3': {'extension': '.mp3', 'mime_type': 'audio/mpeg', 'lossy': True},
+ 'aac': {'extension': '.aac', 'mime_type': 'audio/aac', 'lossy': True},
+ 'wav': {'extension': '.wav', 'mime_type': 'audio/wav', 'lossy': False},
+ 'flac': {'extension': '.flac', 'mime_type': 'audio/flac', 'lossy': False},
+ 'm4a': {'extension': '.m4a', 'mime_type': 'audio/mp4', 'lossy': True},
+ 'ogg': {'extension': '.ogg', 'mime_type': 'audio/ogg', 'lossy': True}
+ }
+
+ def __post_init__(self):
+ if self.value.lower() not in self.FORMATS:
+ raise InvalidAudioFormatError(self.value, list(self.FORMATS.keys()))
+
+ # Normalize to lowercase
+ object.__setattr__(self, 'value', self.value.lower())
+
+ @property
+ def extension(self) -> str:
+ """Get file extension for this format."""
+ return self.FORMATS[self.value]['extension']
+
+ @property
+ def mime_type(self) -> str:
+ """Get MIME type for this format."""
+ return self.FORMATS[self.value]['mime_type']
+
+ @property
+ def is_lossy(self) -> bool:
+ """Check if format uses lossy compression."""
+ return self.FORMATS[self.value]['lossy']
+
+ @classmethod
+ def supported_formats(cls) -> List[str]:
+ """Get list of supported formats."""
+ return list(cls.FORMATS.keys())
+
+ def __str__(self) -> str:
+ return self.value
\ No newline at end of file
diff --git a/domain/value_objects/audio_quality.py b/domain/value_objects/audio_quality.py
new file mode 100644
index 0000000000000000000000000000000000000000..6fe32154ade6d14fa45fb7d481eb492c73f6e93c
--- /dev/null
+++ b/domain/value_objects/audio_quality.py
@@ -0,0 +1,45 @@
+"""Audio quality value object."""
+from dataclasses import dataclass
+from typing import Dict, Any
+
+from ..exceptions.domain_exceptions import InvalidQualityLevelError
+
+@dataclass(frozen=True)
+class AudioQuality:
+ """Value object representing audio quality level."""
+ value: str
+
+ LEVELS = {
+ 'high': {'priority': 3, 'description': 'Best quality, larger file size'},
+ 'medium': {'priority': 2, 'description': 'Balanced quality and file size'},
+ 'low': {'priority': 1, 'description': 'Smaller file size, acceptable quality'}
+ }
+
+ def __post_init__(self):
+ if self.value.lower() not in self.LEVELS:
+ raise InvalidQualityLevelError(self.value)
+
+ # Normalize to lowercase
+ object.__setattr__(self, 'value', self.value.lower())
+
+ @property
+ def priority(self) -> int:
+ """Get numeric priority (higher is better quality)."""
+ return self.LEVELS[self.value]['priority']
+
+ @property
+ def description(self) -> str:
+ """Get human-readable description."""
+ return self.LEVELS[self.value]['description']
+
+ def get_settings_for_format(self, audio_format: 'AudioFormat',
+ quality_presets: Dict[str, Dict[str, Dict[str, Any]]]) -> Dict[str, Any]:
+ """Get quality settings for specific audio format."""
+ return quality_presets.get(audio_format.value, {}).get(self.value, {})
+
+ def __str__(self) -> str:
+ return self.value
+
+ def __lt__(self, other: 'AudioQuality') -> bool:
+ """Compare quality levels by priority."""
+ return self.priority < other.priority
\ No newline at end of file
diff --git a/domain/value_objects/file_size.py b/domain/value_objects/file_size.py
new file mode 100644
index 0000000000000000000000000000000000000000..8a244a44c79aaf8c70d44cf5114ff06de59de533
--- /dev/null
+++ b/domain/value_objects/file_size.py
@@ -0,0 +1,39 @@
+"""File size value object."""
+from dataclasses import dataclass
+
+from ..exceptions.domain_exceptions import ValidationError
+
+@dataclass(frozen=True)
+class FileSize:
+ """Value object representing file size."""
+ bytes: int
+
+ def __post_init__(self):
+ if self.bytes < 0:
+ raise ValidationError("File size cannot be negative")
+
+ @property
+ def megabytes(self) -> float:
+ """Get size in megabytes."""
+ return self.bytes / (1024 * 1024)
+
+ @property
+ def gigabytes(self) -> float:
+ """Get size in gigabytes."""
+ return self.bytes / (1024 * 1024 * 1024)
+
+ def is_within_limit(self, max_mb: float) -> bool:
+ """Check if file size is within specified limit in MB."""
+ return self.megabytes <= max_mb
+
+ def __str__(self) -> str:
+ """Human-readable string representation."""
+ if self.gigabytes >= 1:
+ return f"{self.gigabytes:.2f} GB"
+ elif self.megabytes >= 1:
+ return f"{self.megabytes:.1f} MB"
+ else:
+ return f"{self.bytes} bytes"
+
+ def __lt__(self, other: 'FileSize') -> bool:
+ return self.bytes < other.bytes
\ No newline at end of file
diff --git a/domain/value_objects/job_status.py b/domain/value_objects/job_status.py
new file mode 100644
index 0000000000000000000000000000000000000000..e9278d7991d05a15d7243041417127e586d3f45f
--- /dev/null
+++ b/domain/value_objects/job_status.py
@@ -0,0 +1,30 @@
+"""Job status value object."""
+from enum import Enum
+
+class JobStatus(str, Enum):
+ """Enumeration of possible job statuses."""
+ PENDING = "pending"
+ PROCESSING = "processing"
+ COMPLETED = "completed"
+ FAILED = "failed"
+ CANCELLED = "cancelled"
+
+ def is_terminal(self) -> bool:
+ """Check if status is terminal (no further changes expected)."""
+ return self in [self.COMPLETED, self.FAILED, self.CANCELLED]
+
+ def is_active(self) -> bool:
+ """Check if job is actively being processed."""
+ return self == self.PROCESSING
+
+ def can_transition_to(self, new_status: 'JobStatus') -> bool:
+ """Check if transition to new status is valid."""
+ if self.is_terminal():
+ return False
+
+ valid_transitions = {
+ self.PENDING: [self.PROCESSING, self.CANCELLED],
+ self.PROCESSING: [self.COMPLETED, self.FAILED, self.CANCELLED]
+ }
+
+ return new_status in valid_transitions.get(self, [])
\ No newline at end of file
diff --git a/infrastructure/__init__.py b/infrastructure/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3ada33eaf9c62ab909ad486efedd5df3e59eae9b
--- /dev/null
+++ b/infrastructure/__init__.py
@@ -0,0 +1 @@
+"""Infrastructure layer for the audio extractor application."""
\ No newline at end of file
diff --git a/infrastructure/config/__Init__.py b/infrastructure/config/__Init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..42edf4df36478a183761f9dd1c44262e18f44eaa
--- /dev/null
+++ b/infrastructure/config/__Init__.py
@@ -0,0 +1 @@
+"""Configuration management."""
\ No newline at end of file
diff --git a/infrastructure/config/settings.py b/infrastructure/config/settings.py
new file mode 100644
index 0000000000000000000000000000000000000000..60e7a6dc175e1fd14f203ccc3913edc86c2ca81f
--- /dev/null
+++ b/infrastructure/config/settings.py
@@ -0,0 +1,91 @@
+"""Application configuration settings."""
+from pydantic_settings import BaseSettings
+from pydantic import Field
+from pathlib import Path
+from typing import List, Dict, Any
+import os
+
+class Settings(BaseSettings):
+ """Application settings with environment variable support."""
+
+ # Application settings
+ app_name: str = "Video to Audio Extractor"
+ app_version: str = "1.0.0"
+ debug: bool = Field(default=False, env="DEBUG")
+
+ # File processing settings
+ temp_dir: Path = Field(default=Path("/tmp/audio_extractor"), env="TEMP_DIR")
+ max_direct_file_size_mb: float = Field(default=10.0, env="MAX_DIRECT_FILE_SIZE_MB")
+ cleanup_interval_seconds: int = Field(default=3600, env="CLEANUP_INTERVAL_SECONDS")
+ file_retention_hours: int = Field(default=2, env="FILE_RETENTION_HOURS")
+
+ # FFmpeg settings
+ ffmpeg_path: str = Field(default="/usr/bin/ffmpeg", env="FFMPEG_PATH")
+ ffmpeg_timeout_seconds: int = Field(default=1800, env="FFMPEG_TIMEOUT_SECONDS") # 30 minutes
+
+ # Supported formats
+ supported_video_formats: List[str] = Field(
+ default=['.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv', '.wmv', '.m4v'],
+ env="SUPPORTED_VIDEO_FORMATS"
+ )
+ supported_audio_formats: List[str] = Field(
+ default=['mp3', 'aac', 'wav', 'flac', 'm4a', 'ogg'],
+ env="SUPPORTED_AUDIO_FORMATS"
+ )
+
+ # Quality presets
+ quality_presets: Dict[str, Dict[str, Dict[str, Any]]] = {
+ 'mp3': {
+ 'high': {'bitrate': '320k', 'codec': 'libmp3lame'},
+ 'medium': {'bitrate': '192k', 'codec': 'libmp3lame'},
+ 'low': {'bitrate': '128k', 'codec': 'libmp3lame'}
+ },
+ 'aac': {
+ 'high': {'bitrate': '256k', 'codec': 'aac'},
+ 'medium': {'bitrate': '192k', 'codec': 'aac'},
+ 'low': {'bitrate': '128k', 'codec': 'aac'}
+ },
+ 'wav': {
+ 'high': {'codec': 'pcm_s24le'},
+ 'medium': {'codec': 'pcm_s16le'},
+ 'low': {'codec': 'pcm_s16le'}
+ },
+ 'flac': {
+ 'high': {'codec': 'flac', 'compression_level': 12},
+ 'medium': {'codec': 'flac', 'compression_level': 8},
+ 'low': {'codec': 'flac', 'compression_level': 0}
+ },
+ 'm4a': {
+ 'high': {'bitrate': '256k', 'codec': 'aac'},
+ 'medium': {'bitrate': '192k', 'codec': 'aac'},
+ 'low': {'bitrate': '128k', 'codec': 'aac'}
+ },
+ 'ogg': {
+ 'high': {'bitrate': '256k', 'codec': 'libvorbis'},
+ 'medium': {'bitrate': '192k', 'codec': 'libvorbis'},
+ 'low': {'bitrate': '128k', 'codec': 'libvorbis'}
+ }
+ }
+
+ # MIME types
+ audio_mime_types: Dict[str, str] = {
+ 'mp3': 'audio/mpeg',
+ 'aac': 'audio/aac',
+ 'wav': 'audio/wav',
+ 'flac': 'audio/flac',
+ 'm4a': 'audio/mp4',
+ 'ogg': 'audio/ogg'
+ }
+
+ class Config:
+ env_file = ".env"
+ env_file_encoding = "utf-8"
+ case_sensitive = False
+
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+ # Ensure temp directory exists
+ self.temp_dir.mkdir(parents=True, exist_ok=True)
+
+# Singleton instance
+settings = Settings()
\ No newline at end of file
diff --git a/infrastructure/repositories/__Init__.py b/infrastructure/repositories/__Init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..73221a1942fb671a60de36ba5a4d2c66881bbeeb
--- /dev/null
+++ b/infrastructure/repositories/__Init__.py
@@ -0,0 +1 @@
+"""Repository implementations."""
\ No newline at end of file
diff --git a/infrastructure/repositories/file_repository.py b/infrastructure/repositories/file_repository.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ac9a1af3cac1e114e3c189810c2ed1994ec5ebc
--- /dev/null
+++ b/infrastructure/repositories/file_repository.py
@@ -0,0 +1,117 @@
+"""File system repository implementation."""
+from pathlib import Path
+from typing import Optional, List, Tuple
+import aiofiles
+import os
+import uuid
+from datetime import datetime, timedelta
+import logging
+import inspect
+
+logger = logging.getLogger(__name__)
+
+class FileSystemRepository:
+ """Repository for managing temporary files."""
+
+ def __init__(self, base_path: Path):
+ self.base_path = Path(base_path)
+ self.base_path.mkdir(parents=True, exist_ok=True)
+
+ async def save_uploaded_file(self, content: bytes, original_filename: str) -> str:
+ """Save uploaded file and return the path."""
+ # Generate unique filename
+ file_id = str(uuid.uuid4())
+ extension = Path(original_filename).suffix
+ filename = f"{file_id}_input{extension}"
+ file_path = self.base_path / filename
+
+ async with aiofiles.open(file_path, 'wb') as f:
+ await f.write(content)
+
+ logger.info(f"Saved uploaded file: {file_path}")
+ return str(file_path)
+
+ async def save_stream(self, stream, original_filename: str, chunk_size: int = 1024 * 1024) -> str:
+ """Save file from stream, handling both async and sync streams."""
+ file_id = str(uuid.uuid4())
+ extension = Path(original_filename).suffix
+ filename = f"{file_id}_input{extension}"
+ file_path = self.base_path / filename
+
+ async with aiofiles.open(file_path, 'wb') as f:
+ read_method = getattr(stream, 'read', None)
+
+ if read_method is None:
+ raise ValueError("Provided stream does not have a .read() method")
+
+ is_async = inspect.iscoroutinefunction(read_method)
+
+ while True:
+ if is_async:
+ chunk = await read_method(chunk_size)
+ else:
+ # Run sync read in thread pool to not block the event loop
+ chunk = await asyncio.to_thread(read_method, chunk_size)
+
+ if not chunk:
+ break
+
+ await f.write(chunk)
+
+ logger.info(f"Saved streamed file: {file_path}")
+ return str(file_path)
+
+ async def create_output_path(self, job_id: str, format: str) -> str:
+ """Create a path for output file."""
+ filename = f"{job_id}_output.{format}"
+ return str(self.base_path / filename)
+
+ async def read_file(self, file_path: str) -> bytes:
+ """Read file content."""
+ async with aiofiles.open(file_path, 'rb') as f:
+ return await f.read()
+
+ async def file_exists(self, file_path: str) -> bool:
+ """Check if file exists."""
+ return Path(file_path).exists()
+
+ async def get_file_size(self, file_path: str) -> int:
+ """Get file size in bytes."""
+ return Path(file_path).stat().st_size
+
+ async def delete_file(self, file_path: str) -> bool:
+ """Delete a file."""
+ try:
+ Path(file_path).unlink()
+ logger.info(f"Deleted file: {file_path}")
+ return True
+ except Exception as e:
+ logger.error(f"Failed to delete file {file_path}: {e}")
+ return False
+
+ async def list_old_files(self, older_than_hours: int) -> List[Tuple[str, datetime]]:
+ """List files older than specified hours."""
+ cutoff_time = datetime.utcnow() - timedelta(hours=older_than_hours)
+ old_files = []
+
+ for file_path in self.base_path.glob('*'):
+ if file_path.is_file():
+ modified_time = datetime.fromtimestamp(file_path.stat().st_mtime)
+ if modified_time < cutoff_time:
+ old_files.append((str(file_path), modified_time))
+
+ return old_files
+
+ async def cleanup_old_files(self, older_than_hours: int) -> int:
+ """Clean up files older than specified hours."""
+ old_files = await self.list_old_files(older_than_hours)
+ deleted_count = 0
+
+ for file_path, _ in old_files:
+ if await self.delete_file(file_path):
+ deleted_count += 1
+
+ if deleted_count > 0:
+ logger.info(f"Cleaned up {deleted_count} old files")
+
+ return deleted_count
\ No newline at end of file
diff --git a/infrastructure/repositories/job_repository.py b/infrastructure/repositories/job_repository.py
new file mode 100644
index 0000000000000000000000000000000000000000..14962dc7cea79b756a96d9bdcb7800af1923785f
--- /dev/null
+++ b/infrastructure/repositories/job_repository.py
@@ -0,0 +1,101 @@
+"""In-memory job repository implementation."""
+from typing import Dict, Optional, List
+from datetime import datetime, timedelta
+import asyncio
+from dataclasses import dataclass, field
+import logging
+
+logger = logging.getLogger(__name__)
+
+@dataclass
+class JobRecord:
+ """Internal job storage record."""
+ id: str
+ status: str
+ created_at: datetime
+ updated_at: datetime
+ filename: Optional[str] = None
+ file_size_mb: Optional[float] = None
+ output_format: Optional[str] = None
+ quality: Optional[str] = None
+ output_path: Optional[str] = None
+ error: Optional[str] = None
+ processing_time: Optional[float] = None
+ metadata: Dict = field(default_factory=dict)
+
+class InMemoryJobRepository:
+ """In-memory implementation of job repository."""
+
+ def __init__(self):
+ self._jobs: Dict[str, JobRecord] = {}
+ self._lock = asyncio.Lock()
+
+ async def create(self, job_id: str, filename: str, file_size_mb: float,
+ output_format: str, quality: str) -> JobRecord:
+ """Create a new job record."""
+ async with self._lock:
+ job = JobRecord(
+ id=job_id,
+ status="processing",
+ created_at=datetime.utcnow(),
+ updated_at=datetime.utcnow(),
+ filename=filename,
+ file_size_mb=file_size_mb,
+ output_format=output_format,
+ quality=quality
+ )
+ self._jobs[job_id] = job
+ logger.info(f"Created job {job_id} for {filename}")
+ return job
+
+ async def get(self, job_id: str) -> Optional[JobRecord]:
+ """Get a job by ID."""
+ async with self._lock:
+ return self._jobs.get(job_id)
+
+ async def update_status(self, job_id: str, status: str,
+ error: Optional[str] = None,
+ output_path: Optional[str] = None,
+ processing_time: Optional[float] = None) -> Optional[JobRecord]:
+ """Update job status."""
+ async with self._lock:
+ job = self._jobs.get(job_id)
+ if job:
+ job.status = status
+ job.updated_at = datetime.utcnow()
+ job.error = error
+ job.output_path = output_path
+ job.processing_time = processing_time
+ logger.info(f"Updated job {job_id} status to {status}")
+ return job
+
+ async def list_old_jobs(self, older_than_hours: int) -> List[JobRecord]:
+ """List jobs older than specified hours."""
+ cutoff_time = datetime.utcnow() - timedelta(hours=older_than_hours)
+ async with self._lock:
+ return [
+ job for job in self._jobs.values()
+ if job.created_at < cutoff_time
+ ]
+
+ async def delete(self, job_id: str) -> bool:
+ """Delete a job record."""
+ async with self._lock:
+ if job_id in self._jobs:
+ del self._jobs[job_id]
+ logger.info(f"Deleted job {job_id}")
+ return True
+ return False
+
+ async def get_stats(self) -> Dict:
+ """Get repository statistics."""
+ async with self._lock:
+ total = len(self._jobs)
+ by_status = {}
+ for job in self._jobs.values():
+ by_status[job.status] = by_status.get(job.status, 0) + 1
+
+ return {
+ "total_jobs": total,
+ "by_status": by_status
+ }
\ No newline at end of file
diff --git a/infrastructure/services/__init__.py b/infrastructure/services/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..400a6b80a6c76d67c620131273c9ccf7f7fc9947
--- /dev/null
+++ b/infrastructure/services/__init__.py
@@ -0,0 +1 @@
+"""Service implementations."""
\ No newline at end of file
diff --git a/infrastructure/services/container.py b/infrastructure/services/container.py
new file mode 100644
index 0000000000000000000000000000000000000000..de7f354b9f48e022cf212551869f7630a81c1141
--- /dev/null
+++ b/infrastructure/services/container.py
@@ -0,0 +1,53 @@
+"""Dependency injection container for services."""
+from typing import Optional
+from pathlib import Path
+
+from ..config.settings import settings
+from ..repositories.job_repository import InMemoryJobRepository
+from ..repositories.file_repository import FileSystemRepository
+from .ffmpeg_service import FFmpegService
+from .file_cleanup_service import FileCleanupService
+
+class ServiceContainer:
+ """Container for all infrastructure services."""
+
+ _instance: Optional['ServiceContainer'] = None
+
+ def __init__(self):
+ # Repositories
+ self.job_repository = InMemoryJobRepository()
+ self.file_repository = FileSystemRepository(settings.temp_dir)
+
+ # Services
+ self.ffmpeg_service = FFmpegService(
+ ffmpeg_path=settings.ffmpeg_path,
+ quality_presets=settings.quality_presets,
+ timeout_seconds=settings.ffmpeg_timeout_seconds
+ )
+
+ self.cleanup_service = FileCleanupService(
+ job_repo=self.job_repository,
+ file_repo=self.file_repository,
+ cleanup_interval_seconds=settings.cleanup_interval_seconds,
+ retention_hours=settings.file_retention_hours
+ )
+
+ @classmethod
+ def get_instance(cls) -> 'ServiceContainer':
+ """Get singleton instance of service container."""
+ if cls._instance is None:
+ cls._instance = cls()
+ return cls._instance
+
+ async def startup(self):
+ """Initialize services on startup."""
+ await self.cleanup_service.start()
+
+ async def shutdown(self):
+ """Cleanup services on shutdown."""
+ await self.cleanup_service.stop()
+
+# Convenience function for getting services
+def get_services() -> ServiceContainer:
+ """Get the service container instance."""
+ return ServiceContainer.get_instance()
\ No newline at end of file
diff --git a/infrastructure/services/ffmpeg_service.py b/infrastructure/services/ffmpeg_service.py
new file mode 100644
index 0000000000000000000000000000000000000000..7599e2f49d7ae194ef52a97f007d9a429a7cd57b
--- /dev/null
+++ b/infrastructure/services/ffmpeg_service.py
@@ -0,0 +1,160 @@
+"""FFmpeg service implementation."""
+import asyncio
+import subprocess
+from pathlib import Path
+from typing import Dict, Optional, Any
+import logging
+import json
+from dataclasses import dataclass
+
+logger = logging.getLogger(__name__)
+
+@dataclass
+class FFmpegResult:
+ """Result from FFmpeg operation."""
+ success: bool
+ output_path: Optional[str] = None
+ duration: Optional[float] = None
+ error: Optional[str] = None
+ metadata: Dict[str, Any] = None
+
+class FFmpegService:
+ """Service for audio extraction using FFmpeg."""
+
+ def __init__(self, ffmpeg_path: str, quality_presets: Dict[str, Dict[str, Dict]],
+ timeout_seconds: int = 1800):
+ self.ffmpeg_path = ffmpeg_path
+ self.quality_presets = quality_presets
+ self.timeout_seconds = timeout_seconds
+
+ async def extract_audio(self, input_path: str, output_path: str,
+ format: str, quality: str) -> FFmpegResult:
+ """Extract audio from video file."""
+ try:
+ # Get quality settings
+ preset = self.quality_presets.get(format, {}).get(quality, {})
+
+ # Build FFmpeg command
+ cmd = self._build_command(input_path, output_path, format, preset)
+
+ logger.info(f"Running FFmpeg command: {' '.join(cmd)}")
+
+ # Run FFmpeg
+ process = await asyncio.create_subprocess_exec(
+ *cmd,
+ stdout=asyncio.subprocess.PIPE,
+ stderr=asyncio.subprocess.PIPE
+ )
+
+ try:
+ stdout, stderr = await asyncio.wait_for(
+ process.communicate(),
+ timeout=self.timeout_seconds
+ )
+ except asyncio.TimeoutError:
+ process.kill()
+ await process.wait()
+ return FFmpegResult(
+ success=False,
+ error=f"FFmpeg timeout after {self.timeout_seconds} seconds"
+ )
+
+ if process.returncode == 0:
+ # Extract duration from stderr
+ duration = self._extract_duration(stderr.decode())
+
+ return FFmpegResult(
+ success=True,
+ output_path=output_path,
+ duration=duration,
+ metadata={
+ "format": format,
+ "quality": quality,
+ "preset": preset
+ }
+ )
+ else:
+ return FFmpegResult(
+ success=False,
+ error=f"FFmpeg failed: {stderr.decode()}"
+ )
+
+ except Exception as e:
+ logger.error(f"FFmpeg error: {str(e)}")
+ return FFmpegResult(
+ success=False,
+ error=str(e)
+ )
+
+ async def get_media_info(self, file_path: str) -> Dict[str, Any]:
+ """Get media file information using ffprobe."""
+ try:
+ cmd = [
+ 'ffprobe',
+ '-v', 'quiet',
+ '-print_format', 'json',
+ '-show_format',
+ '-show_streams',
+ file_path
+ ]
+
+ process = await asyncio.create_subprocess_exec(
+ *cmd,
+ stdout=asyncio.subprocess.PIPE,
+ stderr=asyncio.subprocess.PIPE
+ )
+
+ stdout, _ = await process.communicate()
+
+ if process.returncode == 0:
+ return json.loads(stdout.decode())
+ else:
+ return {}
+
+ except Exception as e:
+ logger.error(f"ffprobe error: {str(e)}")
+ return {}
+
+ def _build_command(self, input_path: str, output_path: str,
+ format: str, preset: Dict) -> list:
+ """Build FFmpeg command based on format and preset."""
+ cmd = [
+ self.ffmpeg_path,
+ '-i', input_path,
+ '-vn', # No video
+ '-y' # Overwrite output
+ ]
+
+ # Add codec
+ if 'codec' in preset:
+ cmd.extend(['-acodec', preset['codec']])
+
+ # Add bitrate
+ if 'bitrate' in preset:
+ cmd.extend(['-b:a', preset['bitrate']])
+
+ # Add compression level (for FLAC)
+ if 'compression_level' in preset:
+ cmd.extend(['-compression_level', str(preset['compression_level'])])
+
+ # Add output file
+ cmd.append(output_path)
+
+ return cmd
+
+ def _extract_duration(self, stderr_output: str) -> Optional[float]:
+ """Extract duration from FFmpeg stderr output."""
+ import re
+
+ # Look for Duration: HH:MM:SS.ms
+ duration_match = re.search(r'Duration: (\d{2}):(\d{2}):(\d{2})\.(\d{2})', stderr_output)
+ if duration_match:
+ hours = int(duration_match.group(1))
+ minutes = int(duration_match.group(2))
+ seconds = int(duration_match.group(3))
+ centiseconds = int(duration_match.group(4))
+
+ total_seconds = hours * 3600 + minutes * 60 + seconds + centiseconds / 100
+ return total_seconds
+
+ return None
\ No newline at end of file
diff --git a/infrastructure/services/file_cleanup_service.py b/infrastructure/services/file_cleanup_service.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae050efac738c1791938f67be9fb04bcaddce189
--- /dev/null
+++ b/infrastructure/services/file_cleanup_service.py
@@ -0,0 +1,95 @@
+"""Background file cleanup service."""
+import asyncio
+from datetime import datetime
+import logging
+from typing import Protocol
+
+logger = logging.getLogger(__name__)
+
+class JobRepository(Protocol):
+ """Protocol for job repository."""
+ async def list_old_jobs(self, older_than_hours: int) -> list:
+ ...
+ async def delete(self, job_id: str) -> bool:
+ ...
+
+class FileRepository(Protocol):
+ """Protocol for file repository."""
+ async def cleanup_old_files(self, older_than_hours: int) -> int:
+ ...
+ async def delete_file(self, file_path: str) -> bool:
+ ...
+
+class FileCleanupService:
+ """Service for cleaning up old files and jobs."""
+
+ def __init__(self, job_repo: JobRepository, file_repo: FileRepository,
+ cleanup_interval_seconds: int, retention_hours: int):
+ self.job_repo = job_repo
+ self.file_repo = file_repo
+ self.cleanup_interval_seconds = cleanup_interval_seconds
+ self.retention_hours = retention_hours
+ self._running = False
+ self._task = None
+
+ async def start(self):
+ """Start the cleanup service."""
+ if self._running:
+ return
+
+ self._running = True
+ self._task = asyncio.create_task(self._cleanup_loop())
+ logger.info(f"Started cleanup service (interval: {self.cleanup_interval_seconds}s, "
+ f"retention: {self.retention_hours}h)")
+
+ async def stop(self):
+ """Stop the cleanup service."""
+ self._running = False
+ if self._task:
+ self._task.cancel()
+ try:
+ await self._task
+ except asyncio.CancelledError:
+ pass
+ logger.info("Stopped cleanup service")
+
+ async def _cleanup_loop(self):
+ """Main cleanup loop."""
+ while self._running:
+ try:
+ await asyncio.sleep(self.cleanup_interval_seconds)
+ await self._perform_cleanup()
+ except asyncio.CancelledError:
+ break
+ except Exception as e:
+ logger.error(f"Cleanup error: {str(e)}")
+
+ async def _perform_cleanup(self):
+ """Perform cleanup operation."""
+ start_time = datetime.utcnow()
+
+ # Clean up old jobs
+ old_jobs = await self.job_repo.list_old_jobs(self.retention_hours)
+ job_count = 0
+
+ for job in old_jobs:
+ # Delete output file if exists
+ if hasattr(job, 'output_path') and job.output_path:
+ await self.file_repo.delete_file(job.output_path)
+
+ # Delete job record
+ if await self.job_repo.delete(job.id):
+ job_count += 1
+
+ # Clean up orphaned files
+ file_count = await self.file_repo.cleanup_old_files(self.retention_hours)
+
+ # Log results
+ duration = (datetime.utcnow() - start_time).total_seconds()
+ if job_count > 0 or file_count > 0:
+ logger.info(f"Cleanup completed in {duration:.2f}s: "
+ f"deleted {job_count} jobs and {file_count} files")
+
+ async def force_cleanup(self):
+ """Force an immediate cleanup."""
+ await self._perform_cleanup()
\ No newline at end of file
diff --git a/interfaces/api/__init__.py b/interfaces/api/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d5e0a6d96d6a37b4a42395f7547f309355d583c6
--- /dev/null
+++ b/interfaces/api/__init__.py
@@ -0,0 +1 @@
+"""API interface components."""
\ No newline at end of file
diff --git a/interfaces/api/dependencies.py b/interfaces/api/dependencies.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf9ec663a026cf55dea777f2c3ccbdcd77514a5a
--- /dev/null
+++ b/interfaces/api/dependencies.py
@@ -0,0 +1,70 @@
+# interfaces/api/dependencies.py
+"""FastAPI dependency injection configuration."""
+from typing import Annotated
+from fastapi import Depends, UploadFile, Form, HTTPException, Request
+from pydantic import BaseModel, Field, validator
+import re
+
+from application.use_cases.container import UseCaseContainer
+from infrastructure.services.container import ServiceContainer
+
+class ExtractionRequest(BaseModel):
+ """Request model for audio extraction."""
+ output_format: str = Field(default="mp3", description="Output audio format")
+ quality: str = Field(default="medium", description="Audio quality level")
+
+ @validator('output_format')
+ def validate_format(cls, v):
+ allowed = ['mp3', 'aac', 'wav', 'flac', 'm4a', 'ogg']
+ if v.lower() not in allowed:
+ raise ValueError(f"Format must be one of: {', '.join(allowed)}")
+ return v.lower()
+
+ @validator('quality')
+ def validate_quality(cls, v):
+ allowed = ['high', 'medium', 'low']
+ if v.lower() not in allowed:
+ raise ValueError(f"Quality must be one of: {', '.join(allowed)}")
+ return v.lower()
+
+def extraction_params(
+ output_format: str = Form("mp3"),
+ quality: str = Form("medium")
+) -> ExtractionRequest:
+ """Parse and validate extraction parameters."""
+ return ExtractionRequest(output_format=output_format, quality=quality)
+
+async def validate_video_file(video: UploadFile) -> UploadFile:
+ """Validate uploaded video file."""
+ if not video.filename:
+ raise HTTPException(400, "No filename provided")
+
+ # Check file extension
+ allowed_extensions = ['.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv', '.wmv', '.m4v']
+ file_ext = '.' + video.filename.lower().split('.')[-1] if '.' in video.filename else ''
+
+ if file_ext not in allowed_extensions:
+ raise HTTPException(
+ 400,
+ f"Unsupported video format. Allowed: {', '.join(allowed_extensions)}"
+ )
+
+ # Check content type (basic validation)
+ if video.content_type and not video.content_type.startswith(('video/', 'application/octet-stream')):
+ raise HTTPException(400, "File must be a video")
+
+ return video
+
+def get_services(request: Request) -> ServiceContainer:
+ """Get service container from app state."""
+ return request.app.state.get_services()
+
+def get_use_cases(request: Request) -> UseCaseContainer:
+ """Get use case container from app state."""
+ return request.app.state.get_use_cases()
+
+# Type aliases for dependency injection
+ValidatedVideo = Annotated[UploadFile, Depends(validate_video_file)]
+ExtractionParams = Annotated[ExtractionRequest, Depends(extraction_params)]
+Services = Annotated[ServiceContainer, Depends(get_services)]
+UseCases = Annotated[UseCaseContainer, Depends(get_use_cases)]
diff --git a/interfaces/api/middleware/__init__.py b/interfaces/api/middleware/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..884d27cedebfaabb56d364456b1633c5aab7a1ce
--- /dev/null
+++ b/interfaces/api/middleware/__init__.py
@@ -0,0 +1 @@
+"""API middleware components."""
\ No newline at end of file
diff --git a/interfaces/api/middleware/cors_middleware.py b/interfaces/api/middleware/cors_middleware.py
new file mode 100644
index 0000000000000000000000000000000000000000..014bda441cbc1871d7b77fb017b9843326205767
--- /dev/null
+++ b/interfaces/api/middleware/cors_middleware.py
@@ -0,0 +1,13 @@
+"""CORS configuration middleware."""
+from fastapi.middleware.cors import CORSMiddleware
+
+def configure_cors(app):
+ """Configure CORS for the application."""
+ app.add_middleware(
+ CORSMiddleware,
+ allow_origins=["*"], # Configure appropriately for production
+ allow_credentials=True,
+ allow_methods=["*"],
+ allow_headers=["*"],
+ expose_headers=["X-Processing-Time", "X-File-Size", "X-Job-Id"]
+ )
diff --git a/interfaces/api/middleware/error_handler.py b/interfaces/api/middleware/error_handler.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae49f214e41020eb3c48d4238ec033626d03a13c
--- /dev/null
+++ b/interfaces/api/middleware/error_handler.py
@@ -0,0 +1,54 @@
+"""Global error handling middleware."""
+from fastapi import Request, HTTPException
+from fastapi.responses import JSONResponse
+from fastapi.exceptions import RequestValidationError
+from starlette.exceptions import HTTPException as StarletteHTTPException
+import logging
+import traceback
+
+logger = logging.getLogger(__name__)
+
+async def http_exception_handler(request: Request, exc: HTTPException):
+ """Handle HTTP exceptions."""
+ return JSONResponse(
+ status_code=exc.status_code,
+ content={
+ "error": exc.detail,
+ "code": f"HTTP_{exc.status_code}"
+ }
+ )
+
+async def validation_exception_handler(request: Request, exc: RequestValidationError):
+ """Handle validation errors."""
+ errors = []
+ for error in exc.errors():
+ field = ".".join(str(x) for x in error["loc"])
+ errors.append(f"{field}: {error['msg']}")
+
+ return JSONResponse(
+ status_code=422,
+ content={
+ "error": "Validation failed",
+ "details": "; ".join(errors),
+ "code": "VALIDATION_ERROR"
+ }
+ )
+
+async def general_exception_handler(request: Request, exc: Exception):
+ """Handle unexpected exceptions."""
+ logger.error(f"Unexpected error: {str(exc)}\n{traceback.format_exc()}")
+
+ return JSONResponse(
+ status_code=500,
+ content={
+ "error": "Internal server error",
+ "details": "An unexpected error occurred",
+ "code": "INTERNAL_ERROR"
+ }
+ )
+
+def register_exception_handlers(app):
+ """Register all exception handlers with the app."""
+ app.add_exception_handler(HTTPException, http_exception_handler)
+ app.add_exception_handler(RequestValidationError, validation_exception_handler)
+ app.add_exception_handler(Exception, general_exception_handler)
\ No newline at end of file
diff --git a/interfaces/api/responses.py b/interfaces/api/responses.py
new file mode 100644
index 0000000000000000000000000000000000000000..936a8b1325b962c6f80cf52cd5940c89cab00453
--- /dev/null
+++ b/interfaces/api/responses.py
@@ -0,0 +1,49 @@
+"""API response models."""
+from pydantic import BaseModel, Field
+from typing import Optional, Dict, List
+from datetime import datetime
+
+class ExtractionResponse(BaseModel):
+ """Response for direct extraction."""
+ file_path: str
+ media_type: str
+ filename: str
+ processing_time: float
+ file_size: int
+
+class JobCreatedResponse(BaseModel):
+ """Response when a background job is created."""
+ job_id: str
+ status: str
+ message: str
+ check_url: str
+ file_size_mb: float
+
+class JobStatusResponse(BaseModel):
+ """Response for job status check."""
+ job_id: str
+ status: str
+ created_at: datetime
+ updated_at: datetime
+ filename: Optional[str] = None
+ file_size_mb: Optional[float] = None
+ output_format: Optional[str] = None
+ quality: Optional[str] = None
+ processing_time: Optional[float] = None
+ error: Optional[str] = None
+ download_url: Optional[str] = None
+
+class ErrorResponse(BaseModel):
+ """Standard error response."""
+ error: str
+ details: Optional[str] = None
+ code: Optional[str] = None
+
+class ApiInfoResponse(BaseModel):
+ """API information response."""
+ version: str
+ supported_video_formats: List[str]
+ supported_audio_formats: List[str]
+ quality_levels: List[str]
+ max_direct_response_size_mb: float
+ endpoints: Dict[str, str]
\ No newline at end of file
diff --git a/interfaces/api/routes/__init__.py b/interfaces/api/routes/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a49ee6736fe33b1151cdf396cb2886e5e6d8ada
--- /dev/null
+++ b/interfaces/api/routes/__init__.py
@@ -0,0 +1,11 @@
+"""API route modules."""
+from fastapi import APIRouter
+from .extraction_routes import router as extraction_router
+from .job_routes import router as job_router
+from .info_routes import router as info_router
+
+def register_routes(app):
+ """Register all API routes with the FastAPI app."""
+ app.include_router(extraction_router, prefix="/api/v1", tags=["extraction"])
+ app.include_router(job_router, prefix="/api/v1", tags=["jobs"])
+ app.include_router(info_router, prefix="/api/v1", tags=["info"])
\ No newline at end of file
diff --git a/interfaces/api/routes/extraction_routes.py b/interfaces/api/routes/extraction_routes.py
new file mode 100644
index 0000000000000000000000000000000000000000..71fcb06830ab4c8dbd4b0252df6cbc552f877f0d
--- /dev/null
+++ b/interfaces/api/routes/extraction_routes.py
@@ -0,0 +1,90 @@
+"""Audio extraction API routes."""
+from fastapi import APIRouter, BackgroundTasks, Response, Request, UploadFile
+from fastapi.responses import FileResponse, JSONResponse
+from typing import Dict, Any
+from dataclasses import asdict
+
+from ..dependencies import ValidatedVideo, ExtractionParams, UseCases, Services
+from ..responses import ExtractionResponse, JobCreatedResponse
+from application.dto.extraction_request import ExtractionRequestDTO
+
+router = APIRouter()
+
+@router.post("/extract", response_model=None)
+async def extract_audio(
+ background_tasks: BackgroundTasks,
+ video: ValidatedVideo,
+ params: ExtractionParams,
+ use_cases: UseCases,
+ services: Services
+):
+ """
+ Extract audio from uploaded video file.
+
+ For small files (<10MB), returns the audio file directly.
+ For large files, returns a job ID for async processing.
+ """
+ # Get file size
+ file_size = _get_file_size(video)
+ file_size_mb = file_size / (1024 * 1024)
+
+ # Create DTO
+ extraction_dto = ExtractionRequestDTO(
+ video_filename=video.filename,
+ video_file_path="", # Will be set by use case
+ video_file_size=file_size,
+ output_format=params.output_format,
+ quality=params.quality,
+ content_type=video.content_type
+ )
+
+ # Save uploaded file temporarily
+ file_path = await services.file_repository.save_stream(
+ video,
+ video.filename
+ )
+ extraction_dto.video_file_path = file_path
+
+ # Decide processing method
+ if file_size_mb < 10:
+ # Direct processing
+ try:
+ result = await use_cases.extract_audio_direct.execute(extraction_dto)
+
+ # Clean up input file after processing
+ background_tasks.add_task(
+ services.file_repository.delete_file,
+ file_path
+ )
+
+ return FileResponse(
+ path=result.file_path,
+ media_type=result.media_type,
+ filename=result.filename,
+ headers={
+ "X-Processing-Time": str(result.processing_time),
+ "X-File-Size": str(result.file_size)
+ }
+ )
+ except Exception as e:
+ # Clean up on error
+ await services.file_repository.delete_file(file_path)
+ raise
+ else:
+ # Async processing
+ result = await use_cases.extract_audio_async.execute(
+ extraction_dto,
+ background_tasks
+ )
+
+ return JSONResponse(
+ content=asdict(result),
+ status_code=202 # Accepted
+ )
+
+def _get_file_size(video: UploadFile) -> int:
+ """Get the size of the uploaded file."""
+ video.file.seek(0, 2) # Seek to end
+ size = video.file.tell()
+ video.file.seek(0) # Reset pointer to start
+ return size
\ No newline at end of file
diff --git a/interfaces/api/routes/info_routes.py b/interfaces/api/routes/info_routes.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c9619171dd359347ef9f0bcb5e4aaba35423ccd
--- /dev/null
+++ b/interfaces/api/routes/info_routes.py
@@ -0,0 +1,29 @@
+"""API information routes."""
+from fastapi import APIRouter
+from typing import Dict, List
+
+from ..responses import ApiInfoResponse
+
+router = APIRouter()
+
+@router.get("/info", response_model=ApiInfoResponse)
+async def get_api_info():
+ """Get API information and supported formats."""
+ return ApiInfoResponse(
+ version="1.0.0",
+ supported_video_formats=['.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv', '.wmv', '.m4v'],
+ supported_audio_formats=['mp3', 'aac', 'wav', 'flac', 'm4a', 'ogg'],
+ quality_levels=['high', 'medium', 'low'],
+ max_direct_response_size_mb=10.0,
+ endpoints={
+ "/api/v1/extract": "POST - Extract audio from video",
+ "/api/v1/jobs/{job_id}": "GET - Check job status",
+ "/api/v1/jobs/{job_id}/download": "GET - Download processed audio",
+ "/api/v1/info": "GET - API information"
+ }
+ )
+
+@router.get("/health")
+async def health_check():
+ """Simple health check endpoint."""
+ return {"status": "healthy", "service": "audio-extractor"}
\ No newline at end of file
diff --git a/interfaces/api/routes/job_routes.py b/interfaces/api/routes/job_routes.py
new file mode 100644
index 0000000000000000000000000000000000000000..bdbbf1a3b6c0cf30e7157db2a02b2dca4860adc0
--- /dev/null
+++ b/interfaces/api/routes/job_routes.py
@@ -0,0 +1,62 @@
+"""Job management API routes."""
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import FileResponse
+from typing import Any
+
+from ..dependencies import UseCases
+from ..responses import JobStatusResponse, ErrorResponse
+from domain.exceptions.domain_exceptions import JobNotFoundError, JobNotCompletedError
+
+router = APIRouter()
+
+@router.get("/jobs/{job_id}", response_model=JobStatusResponse)
+async def get_job_status(
+ job_id: str,
+ use_cases: UseCases
+):
+ """Get the status of a background processing job."""
+ try:
+ job_dto = await use_cases.check_job_status.execute(job_id)
+
+ return JobStatusResponse(
+ job_id=job_dto.job_id,
+ status=job_dto.status,
+ created_at=job_dto.created_at,
+ updated_at=job_dto.updated_at,
+ filename=job_dto.filename,
+ file_size_mb=job_dto.file_size_mb,
+ output_format=job_dto.output_format,
+ quality=job_dto.quality,
+ processing_time=job_dto.processing_time,
+ error=job_dto.error,
+ download_url=job_dto.download_url
+ )
+ except JobNotFoundError as e:
+ raise HTTPException(404, str(e))
+ except Exception as e:
+ raise HTTPException(500, f"Error checking job status: {str(e)}")
+
+@router.get("/jobs/{job_id}/download")
+async def download_job_result(
+ job_id: str,
+ use_cases: UseCases
+):
+ """Download the result of a completed job."""
+ try:
+ result = await use_cases.download_audio_result.execute(job_id)
+
+ return FileResponse(
+ path=result.file_path,
+ media_type=result.media_type,
+ filename=result.filename,
+ headers={
+ "X-Job-Id": job_id,
+ "X-Processing-Time": str(result.processing_time)
+ }
+ )
+ except JobNotFoundError as e:
+ raise HTTPException(404, str(e))
+ except JobNotCompletedError as e:
+ raise HTTPException(400, str(e))
+ except Exception as e:
+ raise HTTPException(500, f"Error downloading result: {str(e)}")
\ No newline at end of file
diff --git a/interfaces/web/__init__.py b/interfaces/web/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d797e9760c229d4a9a9cfc61c665c38759b750b
--- /dev/null
+++ b/interfaces/web/__init__.py
@@ -0,0 +1 @@
+"""Web interface components."""
\ No newline at end of file
diff --git a/interfaces/web/static/script.js b/interfaces/web/static/script.js
new file mode 100644
index 0000000000000000000000000000000000000000..bcade9bbdd54e998b5551970684500635c294a72
--- /dev/null
+++ b/interfaces/web/static/script.js
@@ -0,0 +1,186 @@
+class AudioExtractor {
+ constructor() {
+ this.form = document.getElementById('extractionForm');
+ this.fileInput = document.getElementById('videoFile');
+ this.fileLabel = document.querySelector('.file-input-label');
+ this.fileInfo = document.getElementById('fileInfo');
+ this.submitBtn = document.getElementById('submitBtn');
+ this.resultSection = document.getElementById('resultSection');
+
+ this.initializeEventListeners();
+ }
+
+ initializeEventListeners() {
+ this.form.addEventListener('submit', (e) => this.handleSubmit(e));
+ this.fileInput.addEventListener('change', (e) => this.handleFileSelect(e));
+
+ // Drag and drop
+ this.fileLabel.addEventListener('dragover', (e) => this.handleDragOver(e));
+ this.fileLabel.addEventListener('dragleave', (e) => this.handleDragLeave(e));
+ this.fileLabel.addEventListener('drop', (e) => this.handleDrop(e));
+ }
+
+ handleFileSelect(e) {
+ const file = e.target.files[0];
+ if (file) {
+ this.displayFileInfo(file);
+ }
+ }
+
+ handleDragOver(e) {
+ e.preventDefault();
+ this.fileLabel.classList.add('drag-over');
+ }
+
+ handleDragLeave(e) {
+ e.preventDefault();
+ this.fileLabel.classList.remove('drag-over');
+ }
+
+ handleDrop(e) {
+ e.preventDefault();
+ this.fileLabel.classList.remove('drag-over');
+
+ const files = e.dataTransfer.files;
+ if (files.length > 0) {
+ this.fileInput.files = files;
+ this.displayFileInfo(files[0]);
+ }
+ }
+
+ displayFileInfo(file) {
+ const sizeMB = (file.size / (1024 * 1024)).toFixed(2);
+ this.fileInfo.textContent = `${file.name} (${sizeMB} MB)`;
+ this.fileLabel.classList.add('has-file');
+ this.fileLabel.querySelector('span').textContent = 'Change file';
+ }
+
+ async handleSubmit(e) {
+ e.preventDefault();
+
+ const file = this.fileInput.files[0];
+ if (!file) return;
+
+ const formData = new FormData();
+ formData.append('video', file);
+ formData.append('output_format', document.getElementById('outputFormat').value);
+ formData.append('quality', document.getElementById('quality').value);
+
+ this.setLoading(true);
+ this.showStatus('loading', 'Processing your video...');
+
+ try {
+ const response = await fetch('/api/v1/extract', {
+ method: 'POST',
+ body: formData
+ });
+
+ if (response.headers.get('content-type')?.includes('audio')) {
+ // Direct file response
+ await this.handleDirectResponse(response);
+ } else {
+ // Job response
+ const data = await response.json();
+ if (response.status === 202) {
+ await this.handleJobResponse(data);
+ } else {
+ throw new Error(data.error || 'Unknown error');
+ }
+ }
+ } catch (error) {
+ this.showStatus('error', `Error: ${error.message}`);
+ } finally {
+ this.setLoading(false);
+ }
+ }
+
+ async handleDirectResponse(response) {
+ const blob = await response.blob();
+ const url = URL.createObjectURL(blob);
+ const filename = this.extractFilename(response);
+
+ this.showResult({
+ type: 'direct',
+ url: url,
+ filename: filename
+ });
+ }
+
+ async handleJobResponse(data) {
+ this.showStatus('loading', `Processing large file (${data.file_size_mb.toFixed(1)} MB)...`);
+
+ const checkInterval = setInterval(async () => {
+ try {
+ const response = await fetch(`/api/v1/jobs/${data.job_id}`);
+ const status = await response.json();
+
+ if (status.status === 'completed') {
+ clearInterval(checkInterval);
+ this.showResult({
+ type: 'job',
+ downloadUrl: status.download_url,
+ filename: status.filename
+ });
+ } else if (status.status === 'failed') {
+ clearInterval(checkInterval);
+ this.showStatus('error', `Processing failed: ${status.error}`);
+ }
+ } catch (error) {
+ clearInterval(checkInterval);
+ this.showStatus('error', `Error checking status: ${error.message}`);
+ }
+ }, 2000);
+ }
+
+ showStatus(type, message) {
+ this.resultSection.style.display = 'block';
+ this.resultSection.innerHTML = `
+
+
+ ${type === 'loading' ? ' ' : ''}
+ ${message}
+
+
+ `;
+ }
+
+ showResult(result) {
+ const format = document.getElementById('outputFormat').value;
+
+ this.resultSection.style.display = 'block';
+ this.resultSection.innerHTML = `
+
+ `;
+ }
+
+ setLoading(loading) {
+ this.submitBtn.disabled = loading;
+ this.submitBtn.textContent = loading ? 'Processing...' : 'Extract Audio';
+ }
+
+ extractFilename(response) {
+ const disposition = response.headers.get('content-disposition');
+ if (disposition) {
+ const match = disposition.match(/filename="(.+)"/);
+ if (match) return match[1];
+ }
+ return 'extracted_audio';
+ }
+}
+
+// Initialize the app
+document.addEventListener('DOMContentLoaded', () => {
+ new AudioExtractor();
+});
\ No newline at end of file
diff --git a/interfaces/web/static/style.css b/interfaces/web/static/style.css
new file mode 100644
index 0000000000000000000000000000000000000000..42ecc42b24d88848b54109db839b8ea11c9cfa8d
--- /dev/null
+++ b/interfaces/web/static/style.css
@@ -0,0 +1,250 @@
+:root {
+ --primary-color: #3b82f6;
+ --primary-hover: #2563eb;
+ --success-color: #10b981;
+ --error-color: #ef4444;
+ --warning-color: #f59e0b;
+ --bg-color: #f3f4f6;
+ --card-bg: #ffffff;
+ --text-primary: #111827;
+ --text-secondary: #6b7280;
+ --border-color: #e5e7eb;
+}
+
+* {
+ margin: 0;
+ padding: 0;
+ box-sizing: border-box;
+}
+
+body {
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+ background-color: var(--bg-color);
+ color: var(--text-primary);
+ line-height: 1.6;
+}
+
+.container {
+ max-width: 800px;
+ margin: 0 auto;
+ padding: 2rem;
+}
+
+header {
+ text-align: center;
+ margin-bottom: 3rem;
+}
+
+header h1 {
+ font-size: 2.5rem;
+ margin-bottom: 0.5rem;
+}
+
+header p {
+ color: var(--text-secondary);
+ font-size: 1.125rem;
+}
+
+.upload-section {
+ background: var(--card-bg);
+ border-radius: 1rem;
+ padding: 2rem;
+ box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
+}
+
+.file-input-wrapper {
+ margin-bottom: 2rem;
+}
+
+#videoFile {
+ display: none;
+}
+
+.file-input-label {
+ display: flex;
+ flex-direction: column;
+ align-items: center;
+ padding: 3rem;
+ border: 2px dashed var(--border-color);
+ border-radius: 0.5rem;
+ cursor: pointer;
+ transition: all 0.3s;
+}
+
+.file-input-label:hover {
+ border-color: var(--primary-color);
+ background-color: #f9fafb;
+}
+
+.file-input-label.has-file {
+ border-color: var(--success-color);
+ background-color: #f0fdf4;
+}
+
+.upload-icon {
+ width: 48px;
+ height: 48px;
+ fill: var(--text-secondary);
+ margin-bottom: 1rem;
+}
+
+.file-info {
+ margin-top: 1rem;
+ text-align: center;
+ color: var(--text-secondary);
+}
+
+.options-grid {
+ display: grid;
+ grid-template-columns: 1fr 1fr;
+ gap: 1.5rem;
+ margin-bottom: 2rem;
+}
+
+.form-group {
+ display: flex;
+ flex-direction: column;
+}
+
+.form-group label {
+ font-weight: 500;
+ margin-bottom: 0.5rem;
+ color: var(--text-primary);
+}
+
+select {
+ padding: 0.75rem;
+ border: 1px solid var(--border-color);
+ border-radius: 0.375rem;
+ background-color: white;
+ font-size: 1rem;
+ transition: border-color 0.3s;
+}
+
+select:focus {
+ outline: none;
+ border-color: var(--primary-color);
+ box-shadow: 0 0 0 3px rgba(59, 130, 246, 0.1);
+}
+
+.submit-btn {
+ width: 100%;
+ padding: 1rem;
+ background-color: var(--primary-color);
+ color: white;
+ border: none;
+ border-radius: 0.5rem;
+ font-size: 1.125rem;
+ font-weight: 600;
+ cursor: pointer;
+ transition: background-color 0.3s;
+}
+
+.submit-btn:hover:not(:disabled) {
+ background-color: var(--primary-hover);
+}
+
+.submit-btn:disabled {
+ background-color: var(--text-secondary);
+ cursor: not-allowed;
+}
+
+.result-section {
+ margin-top: 2rem;
+ display: none;
+}
+
+.result-card {
+ background: var(--card-bg);
+ border-radius: 1rem;
+ padding: 2rem;
+ box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
+}
+
+.status-message {
+ display: flex;
+ align-items: center;
+ margin-bottom: 1.5rem;
+ padding: 1rem;
+ border-radius: 0.5rem;
+}
+
+.status-message.success {
+ background-color: #f0fdf4;
+ color: var(--success-color);
+}
+
+.status-message.error {
+ background-color: #fef2f2;
+ color: var(--error-color);
+}
+
+.status-message.loading {
+ background-color: #fefce8;
+ color: var(--warning-color);
+}
+
+.spinner {
+ display: inline-block;
+ width: 20px;
+ height: 20px;
+ border: 3px solid rgba(0, 0, 0, 0.1);
+ border-radius: 50%;
+ border-top-color: currentColor;
+ animation: spin 1s ease-in-out infinite;
+ margin-right: 0.75rem;
+}
+
+@keyframes spin {
+ to { transform: rotate(360deg); }
+}
+
+audio {
+ width: 100%;
+ margin: 1rem 0;
+}
+
+.download-btn {
+ display: inline-flex;
+ align-items: center;
+ padding: 0.75rem 1.5rem;
+ background-color: var(--success-color);
+ color: white;
+ text-decoration: none;
+ border-radius: 0.375rem;
+ font-weight: 500;
+ transition: background-color 0.3s;
+}
+
+.download-btn:hover {
+ background-color: #059669;
+}
+
+footer {
+ text-align: center;
+ margin-top: 4rem;
+ color: var(--text-secondary);
+}
+
+footer a {
+ color: var(--primary-color);
+ text-decoration: none;
+}
+
+footer a:hover {
+ text-decoration: underline;
+}
+
+/* Responsive */
+@media (max-width: 640px) {
+ .container {
+ padding: 1rem;
+ }
+
+ header h1 {
+ font-size: 2rem;
+ }
+
+ .options-grid {
+ grid-template-columns: 1fr;
+ }
\ No newline at end of file
diff --git a/interfaces/web/templates/index.html b/interfaces/web/templates/index.html
new file mode 100644
index 0000000000000000000000000000000000000000..7513d0bbb2c58bada72260121055ee80f12a0c60
--- /dev/null
+++ b/interfaces/web/templates/index.html
@@ -0,0 +1,69 @@
+
+
+
+
+
+ Video to Audio Extractor
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/main.py b/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..c4fb29e26f49193bb3ea208e56de28c338ad4a97
--- /dev/null
+++ b/main.py
@@ -0,0 +1,12 @@
+"""Entry point for the application."""
+import uvicorn
+from app import app
+
+if __name__ == "__main__":
+ uvicorn.run(
+ "app:app",
+ host="0.0.0.0",
+ port=7860,
+ reload=True,
+ log_level="info"
+ )
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 00dd2627eb1d3c1147c6b9443e1cdb944d0fe0a2..401b222d5d658f0d0b967e17b188abafc283c236 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,17 @@
fastapi==0.104.1
uvicorn[standard]==0.24.0
+
+# API Dependencies
python-multipart==0.0.6
+pydantic==2.5.0
+pydantic-settings==2.1.0
+
+# File Processing
+aiofiles==23.2.1
ffmpeg-python==0.2.0
-aiofiles==23.2.1
\ No newline at end of file
+
+# Web Interface
+jinja2==3.1.2
+
+# Utilities
+python-dotenv==1.0.0
\ No newline at end of file