Spaces:

unveilAiDotOrg
/

audio-processor

Sleeping

App Files Files Community

Tadeas Kosek commited on Jun 14, 2025

Commit

8b33160

1 Parent(s): 2ddc19d

add audio splitting

Browse files

Files changed (6) hide show

application/use_cases/container.py +3 -1
application/use_cases/download_audio_result.py +139 -7
domain/services/validation_service.py +93 -2
infrastructure/repositories/file_repository.py +61 -0
infrastructure/services/ffmpeg_service.py +99 -0
interfaces/api/routes/job_routes.py +79 -8

application/use_cases/container.py CHANGED Viewed

@@ -57,9 +57,11 @@ class UseCaseContainer:
             job_repository=self.services.job_repository
         )
-        # Download use case
         self.download_audio_result = DownloadAudioResultUseCase(
             job_repository=self.services.job_repository,
             file_repository=self.services.file_repository,
             audio_mime_types=self.settings.audio_mime_types
         )

             job_repository=self.services.job_repository
         )
+        # Download use case (updated with new dependencies)
         self.download_audio_result = DownloadAudioResultUseCase(
             job_repository=self.services.job_repository,
             file_repository=self.services.file_repository,
+            validation_service=self.validation_service,
+            ffmpeg_service=self.services.ffmpeg_service,
             audio_mime_types=self.settings.audio_mime_types
         )

application/use_cases/download_audio_result.py CHANGED Viewed

@@ -1,8 +1,11 @@
 """Use case for downloading audio results."""
 from typing import Any, Optional
 import logging
-from domain.exceptions.domain_exceptions import JobNotFoundError, JobNotCompletedError
 from ..dto.extraction_response import DownloadResultDTO
 logger = logging.getLogger(__name__)
@@ -16,6 +19,25 @@ class FileRepository:
     """Protocol for file repository."""
     async def file_exists(self, file_path: str) -> bool:
         ...
 class DownloadAudioResultUseCase:
     """Use case for downloading completed audio."""
@@ -23,12 +45,18 @@ class DownloadAudioResultUseCase:
     def __init__(self,
                  job_repository: JobRepository,
                  file_repository: FileRepository,
                  audio_mime_types: dict):
         self.job_repository = job_repository
         self.file_repository = file_repository
         self.audio_mime_types = audio_mime_types
-    async def execute(self, job_id: str) -> DownloadResultDTO:
         """Get download information for completed job."""
         # Get job from repository
         job_record = await self.job_repository.get(job_id)
@@ -47,6 +75,30 @@ class DownloadAudioResultUseCase:
         if not await self.file_repository.file_exists(job_record.output_path):
             raise RuntimeError(f"Output file not found for job {job_id}")
         # Get MIME type
         mime_type = self.audio_mime_types.get(
             job_record.output_format,
@@ -54,13 +106,93 @@ class DownloadAudioResultUseCase:
         )
         # Create filename
-        import os
-        original_name = os.path.splitext(job_record.filename)[0]
-        filename = f"{original_name}.{job_record.output_format}"
         return DownloadResultDTO(
-            file_path=job_record.output_path,
             media_type=mime_type,
             filename=filename,
             processing_time=job_record.processing_time or 0
-        )

 """Use case for downloading audio results."""
 from typing import Any, Optional
 import logging
+import os
+from pathlib import Path
+from domain.exceptions.domain_exceptions import JobNotFoundError, JobNotCompletedError, ValidationError
+from domain.services.validation_service import ValidationService
 from ..dto.extraction_response import DownloadResultDTO
 logger = logging.getLogger(__name__)
     """Protocol for file repository."""
     async def file_exists(self, file_path: str) -> bool:
         ...
+    async def create_temp_path(self, prefix: str, extension: str) -> str:
+        """Create a temporary file path."""
+        ...
+    async def create_deterministic_temp_path(self, job_id: str,
+                                           start_seconds: Optional[float],
+                                           end_seconds: Optional[float],
+                                           extension: str) -> str:
+        """Create a deterministic temporary file path based on parameters."""
+        ...
+class FFmpegService:
+    """Protocol for FFmpeg service."""
+    async def get_media_info(self, file_path: str) -> dict:
+        """Get media file information."""
+        ...
+    async def trim_audio(self, input_path: str, output_path: str,
+                        start_seconds: Optional[float], end_seconds: Optional[float]) -> Any:
+        """Trim audio file to specified time range."""
+        ...
 class DownloadAudioResultUseCase:
     """Use case for downloading completed audio."""
     def __init__(self,
                  job_repository: JobRepository,
                  file_repository: FileRepository,
+                 validation_service: ValidationService,
+                 ffmpeg_service: FFmpegService,
                  audio_mime_types: dict):
         self.job_repository = job_repository
         self.file_repository = file_repository
+        self.validation_service = validation_service
+        self.ffmpeg_service = ffmpeg_service
         self.audio_mime_types = audio_mime_types
+    async def execute(self, job_id: str,
+                     start_seconds: Optional[float] = None,
+                     end_seconds: Optional[float] = None) -> DownloadResultDTO:
         """Get download information for completed job."""
         # Get job from repository
         job_record = await self.job_repository.get(job_id)
         if not await self.file_repository.file_exists(job_record.output_path):
             raise RuntimeError(f"Output file not found for job {job_id}")
+        # Determine if trimming is needed
+        needs_trimming = start_seconds is not None or end_seconds is not None
+        if needs_trimming:
+            # Get audio duration for validation
+            media_info = await self.ffmpeg_service.get_media_info(job_record.output_path)
+            audio_duration = self._extract_duration_from_media_info(media_info)
+            if audio_duration is None:
+                raise RuntimeError(f"Could not determine audio duration for job {job_id}")
+            # Validate time range
+            self.validation_service.validate_time_range(
+                start_seconds, end_seconds, audio_duration
+            )
+            # Create trimmed audio
+            file_path = await self._create_trimmed_audio(
+                job_record, start_seconds, end_seconds
+            )
+        else:
+            # Use original file
+            file_path = job_record.output_path
         # Get MIME type
         mime_type = self.audio_mime_types.get(
             job_record.output_format,
         )
         # Create filename
+        filename = self._create_filename(job_record, start_seconds, end_seconds)
         return DownloadResultDTO(
+            file_path=file_path,
             media_type=mime_type,
             filename=filename,
             processing_time=job_record.processing_time or 0
+        )
+    async def _create_trimmed_audio(self, job_record: Any,
+                                   start_seconds: Optional[float],
+                                   end_seconds: Optional[float]) -> str:
+        """Create trimmed audio file and return its path."""
+        # Create deterministic output path
+        temp_path = await self.file_repository.create_deterministic_temp_path(
+            job_id=job_record.id,
+            start_seconds=start_seconds,
+            end_seconds=end_seconds,
+            extension=job_record.output_format
+        )
+        # Check if trimmed file already exists
+        if await self.file_repository.file_exists(temp_path):
+            logger.info(f"Reusing existing trimmed audio for job {job_record.id}: "
+                       f"start={start_seconds}, end={end_seconds}")
+            return temp_path
+        logger.info(f"Creating new trimmed audio for job {job_record.id}: "
+                   f"start={start_seconds}, end={end_seconds}")
+        # Trim audio using FFmpeg
+        result = await self.ffmpeg_service.trim_audio(
+            input_path=job_record.output_path,
+            output_path=temp_path,
+            start_seconds=start_seconds,
+            end_seconds=end_seconds
+        )
+        if not result.success:
+            raise RuntimeError(f"Audio trimming failed: {result.error}")
+        return temp_path
+    def _extract_duration_from_media_info(self, media_info: dict) -> Optional[float]:
+        """Extract duration from ffprobe media info."""
+        try:
+            # Try to get duration from format section
+            if 'format' in media_info and 'duration' in media_info['format']:
+                return float(media_info['format']['duration'])
+            # Try to get from first audio stream
+            if 'streams' in media_info:
+                for stream in media_info['streams']:
+                    if stream.get('codec_type') == 'audio' and 'duration' in stream:
+                        return float(stream['duration'])
+            return None
+        except (ValueError, KeyError, TypeError) as e:
+            logger.error(f"Error extracting duration from media info: {e}")
+            return None
+    def _create_filename(self, job_record: Any,
+                        start_seconds: Optional[float],
+                        end_seconds: Optional[float]) -> str:
+        """Create appropriate filename for the download."""
+        # Get base filename without extension
+        original_name = os.path.splitext(job_record.filename)[0]
+        # Add time range suffix if trimming
+        if start_seconds is not None or end_seconds is not None:
+            time_suffix = self._format_time_range_suffix(start_seconds, end_seconds)
+            filename = f"{original_name}_{time_suffix}.{job_record.output_format}"
+        else:
+            filename = f"{original_name}.{job_record.output_format}"
+        return filename
+    def _format_time_range_suffix(self, start_seconds: Optional[float],
+                                 end_seconds: Optional[float]) -> str:
+        """Format time range as a filename suffix."""
+        def seconds_to_time_str(seconds: float) -> str:
+            hours = int(seconds // 3600)
+            minutes = int((seconds % 3600) // 60)
+            secs = int(seconds % 60)
+            return f"{hours:02d}-{minutes:02d}-{secs:02d}"
+        start_str = seconds_to_time_str(start_seconds) if start_seconds is not None else "start"
+        end_str = seconds_to_time_str(end_seconds) if end_seconds is not None else "end"
+        return f"{start_str}_to_{end_str}"

domain/services/validation_service.py CHANGED Viewed

@@ -1,5 +1,6 @@
 """Domain validation service."""
-from typing import List
 from ..entities.video import Video
 from ..value_objects.audio_format import AudioFormat
@@ -49,4 +50,94 @@ class ValidationService:
     def can_process_directly(self, video: Video, threshold_mb: float) -> bool:
         """Check if video can be processed directly (not async)."""
-        return not video.is_large_file(threshold_mb)

 """Domain validation service."""
+import re
+from typing import List, Optional
 from ..entities.video import Video
 from ..value_objects.audio_format import AudioFormat
     def can_process_directly(self, video: Video, threshold_mb: float) -> bool:
         """Check if video can be processed directly (not async)."""
+        return not video.is_large_file(threshold_mb)
+    def validate_time_format(self, time_str: str) -> float:
+        """Validate and convert HH:MM:SS format to seconds.
+        Args:
+            time_str: Time string in HH:MM:SS format
+        Returns:
+            float: Time in seconds
+        Raises:
+            ValidationError: If format is invalid
+        """
+        if not time_str:
+            raise ValidationError("Time string cannot be empty")
+        # Pattern for HH:MM:SS format
+        pattern = r'^(\d{1,2}):(\d{2}):(\d{2})$'
+        match = re.match(pattern, time_str.strip())
+        if not match:
+            raise ValidationError(
+                f"Invalid time format '{time_str}'. Expected format: HH:MM:SS (e.g., 01:23:45)"
+            )
+        hours, minutes, seconds = map(int, match.groups())
+        # Validate ranges
+        if minutes >= 60:
+            raise ValidationError(f"Invalid minutes '{minutes}'. Must be 0-59")
+        if seconds >= 60:
+            raise ValidationError(f"Invalid seconds '{seconds}'. Must be 0-59")
+        # Convert to total seconds
+        total_seconds = hours * 3600 + minutes * 60 + seconds
+        if total_seconds < 0:
+            raise ValidationError("Time cannot be negative")
+        return float(total_seconds)
+    def validate_time_range(self, start_seconds: Optional[float],
+                           end_seconds: Optional[float],
+                           audio_duration: float) -> None:
+        """Validate that time range is valid for the audio duration.
+        Args:
+            start_seconds: Start time in seconds (None means start from beginning)
+            end_seconds: End time in seconds (None means end at audio end)
+            audio_duration: Total audio duration in seconds
+        Raises:
+            ValidationError: If time range is invalid
+        """
+        if audio_duration <= 0:
+            raise ValidationError("Audio duration must be positive")
+        # Validate start time
+        if start_seconds is not None:
+            if start_seconds < 0:
+                raise ValidationError("Start time cannot be negative")
+            if start_seconds >= audio_duration:
+                raise ValidationError(
+                    f"Start time {start_seconds:.1f}s exceeds audio duration {audio_duration:.1f}s"
+                )
+        # Validate end time
+        if end_seconds is not None:
+            if end_seconds < 0:
+                raise ValidationError("End time cannot be negative")
+            if end_seconds > audio_duration:
+                raise ValidationError(
+                    f"End time {end_seconds:.1f}s exceeds audio duration {audio_duration:.1f}s"
+                )
+        # Validate range relationship
+        if start_seconds is not None and end_seconds is not None:
+            if start_seconds >= end_seconds:
+                raise ValidationError(
+                    f"Start time {start_seconds:.1f}s must be less than end time {end_seconds:.1f}s"
+                )
+            # Check for minimum segment duration (at least 1 second)
+            if end_seconds - start_seconds < 1.0:
+                raise ValidationError(
+                    "Audio segment must be at least 1 second long"
+                )

infrastructure/repositories/file_repository.py CHANGED Viewed

@@ -7,6 +7,7 @@ import uuid
 from datetime import datetime, timedelta
 import logging
 import inspect
 logger = logging.getLogger(__name__)
@@ -66,6 +67,66 @@ class FileSystemRepository:
         filename = f"{job_id}_output.{format}"
         return str(self.base_path / filename)
     async def read_file(self, file_path: str) -> bytes:
         """Read file content."""
         async with aiofiles.open(file_path, 'rb') as f:

 from datetime import datetime, timedelta
 import logging
 import inspect
+import asyncio
 logger = logging.getLogger(__name__)
         filename = f"{job_id}_output.{format}"
         return str(self.base_path / filename)
+    async def create_temp_path(self, prefix: str, extension: str) -> str:
+        """Create a unique temporary file path."""
+        # Generate unique identifier
+        temp_id = str(uuid.uuid4())[:8]
+        # Clean the prefix to be filename-safe
+        safe_prefix = "".join(c for c in prefix if c.isalnum() or c in ('-', '_'))
+        # Ensure extension starts with a dot
+        if not extension.startswith('.'):
+            extension = f".{extension}"
+        # Create filename
+        filename = f"{safe_prefix}_{temp_id}{extension}"
+        file_path = self.base_path / filename
+        logger.debug(f"Created temp path: {file_path}")
+        return str(file_path)
+    async def create_deterministic_temp_path(self, job_id: str,
+                                           start_seconds: Optional[float],
+                                           end_seconds: Optional[float],
+                                           extension: str) -> str:
+        """Create a deterministic temporary file path based on parameters.
+        This ensures that the same trimming parameters always result in the same filename,
+        allowing for efficient reuse of previously trimmed files.
+        Args:
+            job_id: The job ID
+            start_seconds: Start time in seconds (None for beginning)
+            end_seconds: End time in seconds (None for end)
+            extension: File extension (e.g., 'mp3')
+        Returns:
+            str: Deterministic file path
+        """
+        import hashlib
+        # Create a unique string from the parameters
+        # Convert None values to consistent strings
+        start_str = str(start_seconds) if start_seconds is not None else "start"
+        end_str = str(end_seconds) if end_seconds is not None else "end"
+        # Create hash input
+        hash_input = f"{job_id}_{start_str}_{end_str}"
+        # Generate MD5 hash (sufficient for file naming)
+        hash_suffix = hashlib.md5(hash_input.encode()).hexdigest()[:8]
+        # Ensure extension doesn't start with a dot for this method
+        clean_extension = extension.lstrip('.')
+        # Create deterministic filename
+        filename = f"trim_{job_id}_{hash_suffix}.{clean_extension}"
+        file_path = self.base_path / filename
+        logger.debug(f"Created deterministic temp path: {file_path} for params: {hash_input}")
+        return str(file_path)
     async def read_file(self, file_path: str) -> bytes:
         """Read file content."""
         async with aiofiles.open(file_path, 'rb') as f:

infrastructure/services/ffmpeg_service.py CHANGED Viewed

@@ -86,6 +86,73 @@ class FFmpegService:
                 error=str(e)
             )
     async def get_media_info(self, file_path: str) -> Dict[str, Any]:
         """Get media file information using ffprobe."""
         try:
@@ -142,6 +209,38 @@ class FFmpegService:
         return cmd
     def _extract_duration(self, stderr_output: str) -> Optional[float]:
         """Extract duration from FFmpeg stderr output."""
         import re

                 error=str(e)
             )
+    async def trim_audio(self, input_path: str, output_path: str,
+                        start_seconds: Optional[float] = None,
+                        end_seconds: Optional[float] = None) -> FFmpegResult:
+        """Trim audio file to specified time range.
+        Args:
+            input_path: Path to input audio file
+            output_path: Path for output trimmed file
+            start_seconds: Start time in seconds (None = start from beginning)
+            end_seconds: End time in seconds (None = continue to end)
+        Returns:
+            FFmpegResult with success status and details
+        """
+        try:
+            # Build trim command
+            cmd = self._build_trim_command(input_path, output_path, start_seconds, end_seconds)
+            logger.info(f"Running FFmpeg trim command: {' '.join(cmd)}")
+            # Run FFmpeg
+            process = await asyncio.create_subprocess_exec(
+                *cmd,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE
+            )
+            try:
+                stdout, stderr = await asyncio.wait_for(
+                    process.communicate(),
+                    timeout=self.timeout_seconds
+                )
+            except asyncio.TimeoutError:
+                process.kill()
+                await process.wait()
+                return FFmpegResult(
+                    success=False,
+                    error=f"FFmpeg trim timeout after {self.timeout_seconds} seconds"
+                )
+            if process.returncode == 0:
+                # Extract duration from stderr
+                duration = self._extract_duration(stderr.decode())
+                return FFmpegResult(
+                    success=True,
+                    output_path=output_path,
+                    duration=duration,
+                    metadata={
+                        "operation": "trim",
+                        "start_seconds": start_seconds,
+                        "end_seconds": end_seconds
+                    }
+                )
+            else:
+                return FFmpegResult(
+                    success=False,
+                    error=f"FFmpeg trim failed: {stderr.decode()}"
+                )
+        except Exception as e:
+            logger.error(f"FFmpeg trim error: {str(e)}")
+            return FFmpegResult(
+                success=False,
+                error=str(e)
+            )
     async def get_media_info(self, file_path: str) -> Dict[str, Any]:
         """Get media file information using ffprobe."""
         try:
         return cmd
+    def _build_trim_command(self, input_path: str, output_path: str,
+                           start_seconds: Optional[float],
+                           end_seconds: Optional[float]) -> list:
+        """Build FFmpeg command for audio trimming."""
+        cmd = [
+            self.ffmpeg_path,
+            '-i', input_path
+        ]
+        # Add start time if specified
+        if start_seconds is not None:
+            cmd.extend(['-ss', str(start_seconds)])
+        # Add end time or duration if specified
+        if end_seconds is not None:
+            if start_seconds is not None:
+                # Calculate duration
+                duration = end_seconds - start_seconds
+                cmd.extend(['-t', str(duration)])
+            else:
+                # Use -to for end time from beginning
+                cmd.extend(['-to', str(end_seconds)])
+        # Copy streams without re-encoding for faster processing
+        cmd.extend([
+            '-c', 'copy',  # Copy codec (no re-encoding)
+            '-y',          # Overwrite output
+            output_path
+        ])
+        return cmd
     def _extract_duration(self, stderr_output: str) -> Optional[float]:
         """Extract duration from FFmpeg stderr output."""
         import re

interfaces/api/routes/job_routes.py CHANGED Viewed

@@ -1,11 +1,11 @@
 """Job management API routes."""
-from fastapi import APIRouter, Depends, HTTPException, Request, Path
 from fastapi.responses import FileResponse
-from typing import Any
 from ..dependencies import UseCases
 from ..responses import JobStatusResponse, ErrorResponse
-from domain.exceptions.domain_exceptions import JobNotFoundError, JobNotCompletedError
 router = APIRouter()
@@ -45,22 +45,54 @@ async def get_job_status(
 @router.get("/jobs/{job_id}/download",
            summary="Download Extracted Audio",
-           description="Download the audio file from a completed extraction job",
            responses={
                200: {
                    "description": "Audio file",
                    "content": {"audio/mpeg": {}, "audio/aac": {}, "audio/wav": {}}
                },
-               400: {"description": "Job not completed"},
                404: {"description": "Job not found"}
            })
 async def download_job_result(
     use_cases: UseCases,
-    job_id: str = Path(..., description="The job ID of the completed extraction")
 ):
     """Download the result of a completed job."""
     try:
-        result = await use_cases.download_audio_result.execute(job_id)
         return FileResponse(
             path=result.file_path,
@@ -75,5 +107,44 @@ async def download_job_result(
         raise HTTPException(404, str(e))
     except JobNotCompletedError as e:
         raise HTTPException(400, str(e))
     except Exception as e:
-        raise HTTPException(500, f"Error downloading result: {str(e)}")

 """Job management API routes."""
+from fastapi import APIRouter, Depends, HTTPException, Request, Path, Query
 from fastapi.responses import FileResponse
+from typing import Any, Optional
 from ..dependencies import UseCases
 from ..responses import JobStatusResponse, ErrorResponse
+from domain.exceptions.domain_exceptions import JobNotFoundError, JobNotCompletedError, ValidationError
 router = APIRouter()
 @router.get("/jobs/{job_id}/download",
            summary="Download Extracted Audio",
+           description="""
+           Download the audio file from a completed extraction job.
+           Optionally specify start and end times to download only a portion of the audio:
+           - start: Start time in HH:MM:SS format (e.g., 00:01:30 for 1 minute 30 seconds)
+           - end: End time in HH:MM:SS format (e.g., 00:03:45 for 3 minutes 45 seconds)
+           If neither parameter is provided, the complete audio file is returned.
+           If only start is provided, audio from start time to the end is returned.
+           If only end is provided, audio from beginning to end time is returned.
+           """,
            responses={
                200: {
                    "description": "Audio file",
                    "content": {"audio/mpeg": {}, "audio/aac": {}, "audio/wav": {}}
                },
+               400: {"description": "Job not completed or invalid time parameters"},
                404: {"description": "Job not found"}
            })
 async def download_job_result(
     use_cases: UseCases,
+    job_id: str = Path(..., description="The job ID of the completed extraction"),
+    start: Optional[str] = Query(
+        None,
+        description="Start time in HH:MM:SS format (e.g., 00:01:30)",
+        example="00:01:30"
+    ),
+    end: Optional[str] = Query(
+        None,
+        description="End time in HH:MM:SS format (e.g., 00:03:45)",
+        example="00:03:45"
+    )
 ):
     """Download the result of a completed job."""
     try:
+        # Parse time parameters if provided (validation will be done in use case)
+        start_seconds = None
+        end_seconds = None
+        if start is not None:
+            start_seconds = _parse_time_to_seconds(start)
+        if end is not None:
+            end_seconds = _parse_time_to_seconds(end)
+        result = await use_cases.download_audio_result.execute(
+            job_id, start_seconds, end_seconds
+        )
         return FileResponse(
             path=result.file_path,
         raise HTTPException(404, str(e))
     except JobNotCompletedError as e:
         raise HTTPException(400, str(e))
+    except ValidationError as e:
+        raise HTTPException(400, str(e))
+    except ValueError as e:
+        # Handle time parsing errors
+        raise HTTPException(400, f"Invalid time format: {str(e)}")
     except Exception as e:
+        raise HTTPException(500, f"Error downloading result: {str(e)}")
+def _parse_time_to_seconds(time_str: str) -> float:
+    """Parse HH:MM:SS format to seconds."""
+    import re
+    if not time_str:
+        raise ValueError("Time string cannot be empty")
+    # Pattern for HH:MM:SS format
+    pattern = r'^(\d{1,2}):(\d{2}):(\d{2})$'
+    match = re.match(pattern, time_str.strip())
+    if not match:
+        raise ValueError(
+            f"Invalid time format '{time_str}'. Expected format: HH:MM:SS (e.g., 01:23:45)"
+        )
+    hours, minutes, seconds = map(int, match.groups())
+    # Validate ranges
+    if minutes >= 60:
+        raise ValueError(f"Invalid minutes '{minutes}'. Must be 0-59")
+    if seconds >= 60:
+        raise ValueError(f"Invalid seconds '{seconds}'. Must be 0-59")
+    # Convert to total seconds
+    total_seconds = hours * 3600 + minutes * 60 + seconds
+    if total_seconds < 0:
+        raise ValueError("Time cannot be negative")
+    return float(total_seconds)