Spaces:

unveilAiDotOrg
/

audio-processor

Sleeping

App Files Files Community

Tadeas Kosek commited on Jun 20, 2025

Commit

b8ef32f

1 Parent(s): b7187f4

fix download job

Browse files

Files changed (3) hide show

application/dto/extraction_response.py +1 -0
application/use_cases/download_audio_result.py +47 -39
interfaces/api/routes/job_routes.py +20 -4

application/dto/extraction_response.py CHANGED Viewed

@@ -34,6 +34,7 @@ class DownloadResultDTO:
     media_type: str
     filename: str
     processing_time: float
 @dataclass
 class JobCreationDTO:

     media_type: str
     filename: str
     processing_time: float
+    storage_key: str = None
 @dataclass
 class JobCreationDTO:

application/use_cases/download_audio_result.py CHANGED Viewed

@@ -6,6 +6,7 @@ from pathlib import Path
 from domain.exceptions.domain_exceptions import JobNotFoundError, JobNotCompletedError, ValidationError
 from domain.services.validation_service import ValidationService
 from ..dto.extraction_response import DownloadResultDTO
 logger = logging.getLogger(__name__)
@@ -53,6 +54,7 @@ class DownloadAudioResultUseCase:
         self.validation_service = validation_service
         self.ffmpeg_service = ffmpeg_service
         self.audio_mime_types = audio_mime_types
     async def execute(self, job_id: str,
                      start_seconds: Optional[float] = None,
@@ -77,50 +79,58 @@ class DownloadAudioResultUseCase:
         # Determine if trimming is needed
         needs_trimming = start_seconds is not None or end_seconds is not None
         if needs_trimming:
-            # Get audio duration for validation
-            media_info = await self.ffmpeg_service.get_media_info(job_record.output_path)
-            audio_duration = self._extract_duration_from_media_info(media_info)
-            if audio_duration is None:
-                raise RuntimeError(f"Could not determine audio duration for job {job_id}")
-            # Validate time range
-            self.validation_service.validate_time_range(
-                start_seconds, end_seconds, audio_duration
-            )
-            # Create trimmed audio
-            file_path = await self._create_trimmed_audio(
-                job_record, start_seconds, end_seconds
-            )
         else:
-            # Use original file
-            file_path = job_record.output_path
-        # Get MIME type
-        mime_type = self.audio_mime_types.get(
-            job_record.output_format,
-            'application/octet-stream'
-        )
-        # Create filename
         filename = self._create_filename(job_record, start_seconds, end_seconds)
         return DownloadResultDTO(
             file_path=file_path,
             media_type=mime_type,
             filename=filename,
-            processing_time=job_record.processing_time or 0
         )
     async def _create_trimmed_audio(self, job_record: Any,
                                    start_seconds: Optional[float],
                                    end_seconds: Optional[float]) -> str:
-        """Create trimmed audio file and return its path."""
         # Create deterministic output path
-        temp_path = await self.file_repository.create_deterministic_temp_path(
             job_id=job_record.id,
             start_seconds=start_seconds,
             end_seconds=end_seconds,
@@ -128,26 +138,24 @@ class DownloadAudioResultUseCase:
         )
         # Check if trimmed file already exists
-        if await self.file_repository.file_exists(temp_path):
             logger.info(f"Reusing existing trimmed audio for job {job_record.id}: "
                        f"start={start_seconds}, end={end_seconds}")
-            return temp_path
         logger.info(f"Creating new trimmed audio for job {job_record.id}: "
-                   f"start={start_seconds}, end={end_seconds}")
-        # Trim audio using FFmpeg
-        result = await self.ffmpeg_service.trim_audio(
-            input_path=job_record.output_path,
-            output_path=temp_path,
             start_seconds=start_seconds,
             end_seconds=end_seconds
         )
-        if not result.success:
-            raise RuntimeError(f"Audio trimming failed: {result.error}")
-        return temp_path
     def _extract_duration_from_media_info(self, media_info: dict) -> Optional[float]:
         """Extract duration from ffprobe media info."""

 from domain.exceptions.domain_exceptions import JobNotFoundError, JobNotCompletedError, ValidationError
 from domain.services.validation_service import ValidationService
+from infrastructure.services.local_file_processor import LocalFileProcessor
 from ..dto.extraction_response import DownloadResultDTO
 logger = logging.getLogger(__name__)
         self.validation_service = validation_service
         self.ffmpeg_service = ffmpeg_service
         self.audio_mime_types = audio_mime_types
     async def execute(self, job_id: str,
                      start_seconds: Optional[float] = None,
         # Determine if trimming is needed
         needs_trimming = start_seconds is not None or end_seconds is not None
         if needs_trimming:
+            # Get local path for media info analysis
+            local_original_path = await self.file_repository.get_local_path(job_record.output_path)
+            try:
+                # Get audio duration for validation
+                media_info = await self.ffmpeg_service.get_media_info(local_original_path)
+                audio_duration = self._extract_duration_from_media_info(media_info)
+                if audio_duration is None:
+                    raise RuntimeError(f"Could not determine audio duration for job {job_id}")
+                # Validate time range
+                self.validation_service.validate_time_range(
+                    start_seconds, end_seconds, audio_duration
+                )
+                # Create trimmed audio
+                storage_key = await self._create_trimmed_audio(
+                    job_record, start_seconds, end_seconds
+                )
+                # Get local path for download
+                file_path = await self.file_repository.get_local_path(storage_key)
+            finally:
+                # Clean up the original local file
+                await self.file_repository.cleanup_local_path(local_original_path, job_record.output_path)
         else:
+            # Use original file - get local path for download
+            file_path = await self.file_repository.get_local_path(job_record.output_path)
+            storage_key = job_record.output_path
+        # Get MIME type and filename
+        mime_type = self.audio_mime_types.get(job_record.output_format, 'application/octet-stream')
         filename = self._create_filename(job_record, start_seconds, end_seconds)
         return DownloadResultDTO(
             file_path=file_path,
             media_type=mime_type,
             filename=filename,
+            processing_time=job_record.processing_time or 0,
+            storage_key=storage_key  # Add this for cleanup later
         )
     async def _create_trimmed_audio(self, job_record: Any,
                                    start_seconds: Optional[float],
                                    end_seconds: Optional[float]) -> str:
+        """Create trimmed audio file and return its storage key."""
         # Create deterministic output path
+        output_storage_key = await self.file_repository.create_deterministic_temp_path(
             job_id=job_record.id,
             start_seconds=start_seconds,
             end_seconds=end_seconds,
         )
         # Check if trimmed file already exists
+        if await self.file_repository.file_exists(output_storage_key):
             logger.info(f"Reusing existing trimmed audio for job {job_record.id}: "
                        f"start={start_seconds}, end={end_seconds}")
+            return output_storage_key
         logger.info(f"Creating new trimmed audio for job {job_record.id}: "
+                    f"start={start_seconds}, end={end_seconds}")
+        # Use file processor for trimming with local files
+        await self.file_processor.process_with_ffmpeg(
+            input_storage_key=job_record.output_path,
+            output_storage_key=output_storage_key,
+            ffmpeg_func=self.ffmpeg_service.trim_audio,
             start_seconds=start_seconds,
             end_seconds=end_seconds
         )
+        return output_storage_key
     def _extract_duration_from_media_info(self, media_info: dict) -> Optional[float]:
         """Extract duration from ffprobe media info."""

interfaces/api/routes/job_routes.py CHANGED Viewed

@@ -1,10 +1,10 @@
 """Job management API routes."""
-from fastapi import APIRouter, Depends, HTTPException, Request, Path, Query
 from fastapi.responses import FileResponse
 from typing import Any, Optional
-from ..dependencies import UseCases
-from ..responses import JobStatusResponse, ErrorResponse
 from domain.exceptions.domain_exceptions import JobNotFoundError, JobNotCompletedError, ValidationError
 router = APIRouter()
@@ -65,7 +65,9 @@ async def get_job_status(
                404: {"description": "Job not found"}
            })
 async def download_job_result(
     use_cases: UseCases,
     job_id: str = Path(..., description="The job ID of the completed extraction"),
     start: Optional[str] = Query(
         None,
@@ -94,7 +96,8 @@ async def download_job_result(
             job_id, start_seconds, end_seconds
         )
-        return FileResponse(
             path=result.file_path,
             media_type=result.media_type,
             filename=result.filename,
@@ -103,6 +106,19 @@ async def download_job_result(
                 "X-Processing-Time": str(result.processing_time)
             }
         )
     except JobNotFoundError as e:
         raise HTTPException(404, str(e))
     except JobNotCompletedError as e:

 """Job management API routes."""
+from fastapi import APIRouter, HTTPException, Path, Query, BackgroundTasks
 from fastapi.responses import FileResponse
 from typing import Any, Optional
+from ..dependencies import Services, UseCases
+from ..responses import JobStatusResponse
 from domain.exceptions.domain_exceptions import JobNotFoundError, JobNotCompletedError, ValidationError
 router = APIRouter()
                404: {"description": "Job not found"}
            })
 async def download_job_result(
+    background_tasks: BackgroundTasks,
     use_cases: UseCases,
+    services: Services,
     job_id: str = Path(..., description="The job ID of the completed extraction"),
     start: Optional[str] = Query(
         None,
             job_id, start_seconds, end_seconds
         )
+        # Create file response
+        response = FileResponse(
             path=result.file_path,
             media_type=result.media_type,
             filename=result.filename,
                 "X-Processing-Time": str(result.processing_time)
             }
         )
+        # Schedule cleanup for remote storage (R2)
+        # Check if this is remote storage by seeing if the repository has R2-specific methods
+        if hasattr(services.file_repository, '_get_client') and result.storage_key:
+            # This is R2 storage, schedule cleanup of the local temp file
+            background_tasks.add_task(
+                services.file_repository.cleanup_local_path,
+                result.file_path,
+                result.storage_key
+            )
+        return response
     except JobNotFoundError as e:
         raise HTTPException(404, str(e))
     except JobNotCompletedError as e: