Tadeas Kosek commited on
Commit
8b33160
·
1 Parent(s): 2ddc19d

add audio splitting

Browse files
application/use_cases/container.py CHANGED
@@ -57,9 +57,11 @@ class UseCaseContainer:
57
  job_repository=self.services.job_repository
58
  )
59
 
60
- # Download use case
61
  self.download_audio_result = DownloadAudioResultUseCase(
62
  job_repository=self.services.job_repository,
63
  file_repository=self.services.file_repository,
 
 
64
  audio_mime_types=self.settings.audio_mime_types
65
  )
 
57
  job_repository=self.services.job_repository
58
  )
59
 
60
+ # Download use case (updated with new dependencies)
61
  self.download_audio_result = DownloadAudioResultUseCase(
62
  job_repository=self.services.job_repository,
63
  file_repository=self.services.file_repository,
64
+ validation_service=self.validation_service,
65
+ ffmpeg_service=self.services.ffmpeg_service,
66
  audio_mime_types=self.settings.audio_mime_types
67
  )
application/use_cases/download_audio_result.py CHANGED
@@ -1,8 +1,11 @@
1
  """Use case for downloading audio results."""
2
  from typing import Any, Optional
3
  import logging
 
 
4
 
5
- from domain.exceptions.domain_exceptions import JobNotFoundError, JobNotCompletedError
 
6
  from ..dto.extraction_response import DownloadResultDTO
7
 
8
  logger = logging.getLogger(__name__)
@@ -16,6 +19,25 @@ class FileRepository:
16
  """Protocol for file repository."""
17
  async def file_exists(self, file_path: str) -> bool:
18
  ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  class DownloadAudioResultUseCase:
21
  """Use case for downloading completed audio."""
@@ -23,12 +45,18 @@ class DownloadAudioResultUseCase:
23
  def __init__(self,
24
  job_repository: JobRepository,
25
  file_repository: FileRepository,
 
 
26
  audio_mime_types: dict):
27
  self.job_repository = job_repository
28
  self.file_repository = file_repository
 
 
29
  self.audio_mime_types = audio_mime_types
30
 
31
- async def execute(self, job_id: str) -> DownloadResultDTO:
 
 
32
  """Get download information for completed job."""
33
  # Get job from repository
34
  job_record = await self.job_repository.get(job_id)
@@ -47,6 +75,30 @@ class DownloadAudioResultUseCase:
47
  if not await self.file_repository.file_exists(job_record.output_path):
48
  raise RuntimeError(f"Output file not found for job {job_id}")
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  # Get MIME type
51
  mime_type = self.audio_mime_types.get(
52
  job_record.output_format,
@@ -54,13 +106,93 @@ class DownloadAudioResultUseCase:
54
  )
55
 
56
  # Create filename
57
- import os
58
- original_name = os.path.splitext(job_record.filename)[0]
59
- filename = f"{original_name}.{job_record.output_format}"
60
 
61
  return DownloadResultDTO(
62
- file_path=job_record.output_path,
63
  media_type=mime_type,
64
  filename=filename,
65
  processing_time=job_record.processing_time or 0
66
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """Use case for downloading audio results."""
2
  from typing import Any, Optional
3
  import logging
4
+ import os
5
+ from pathlib import Path
6
 
7
+ from domain.exceptions.domain_exceptions import JobNotFoundError, JobNotCompletedError, ValidationError
8
+ from domain.services.validation_service import ValidationService
9
  from ..dto.extraction_response import DownloadResultDTO
10
 
11
  logger = logging.getLogger(__name__)
 
19
  """Protocol for file repository."""
20
  async def file_exists(self, file_path: str) -> bool:
21
  ...
22
+ async def create_temp_path(self, prefix: str, extension: str) -> str:
23
+ """Create a temporary file path."""
24
+ ...
25
+ async def create_deterministic_temp_path(self, job_id: str,
26
+ start_seconds: Optional[float],
27
+ end_seconds: Optional[float],
28
+ extension: str) -> str:
29
+ """Create a deterministic temporary file path based on parameters."""
30
+ ...
31
+
32
+ class FFmpegService:
33
+ """Protocol for FFmpeg service."""
34
+ async def get_media_info(self, file_path: str) -> dict:
35
+ """Get media file information."""
36
+ ...
37
+ async def trim_audio(self, input_path: str, output_path: str,
38
+ start_seconds: Optional[float], end_seconds: Optional[float]) -> Any:
39
+ """Trim audio file to specified time range."""
40
+ ...
41
 
42
  class DownloadAudioResultUseCase:
43
  """Use case for downloading completed audio."""
 
45
  def __init__(self,
46
  job_repository: JobRepository,
47
  file_repository: FileRepository,
48
+ validation_service: ValidationService,
49
+ ffmpeg_service: FFmpegService,
50
  audio_mime_types: dict):
51
  self.job_repository = job_repository
52
  self.file_repository = file_repository
53
+ self.validation_service = validation_service
54
+ self.ffmpeg_service = ffmpeg_service
55
  self.audio_mime_types = audio_mime_types
56
 
57
+ async def execute(self, job_id: str,
58
+ start_seconds: Optional[float] = None,
59
+ end_seconds: Optional[float] = None) -> DownloadResultDTO:
60
  """Get download information for completed job."""
61
  # Get job from repository
62
  job_record = await self.job_repository.get(job_id)
 
75
  if not await self.file_repository.file_exists(job_record.output_path):
76
  raise RuntimeError(f"Output file not found for job {job_id}")
77
 
78
+ # Determine if trimming is needed
79
+ needs_trimming = start_seconds is not None or end_seconds is not None
80
+
81
+ if needs_trimming:
82
+ # Get audio duration for validation
83
+ media_info = await self.ffmpeg_service.get_media_info(job_record.output_path)
84
+ audio_duration = self._extract_duration_from_media_info(media_info)
85
+
86
+ if audio_duration is None:
87
+ raise RuntimeError(f"Could not determine audio duration for job {job_id}")
88
+
89
+ # Validate time range
90
+ self.validation_service.validate_time_range(
91
+ start_seconds, end_seconds, audio_duration
92
+ )
93
+
94
+ # Create trimmed audio
95
+ file_path = await self._create_trimmed_audio(
96
+ job_record, start_seconds, end_seconds
97
+ )
98
+ else:
99
+ # Use original file
100
+ file_path = job_record.output_path
101
+
102
  # Get MIME type
103
  mime_type = self.audio_mime_types.get(
104
  job_record.output_format,
 
106
  )
107
 
108
  # Create filename
109
+ filename = self._create_filename(job_record, start_seconds, end_seconds)
 
 
110
 
111
  return DownloadResultDTO(
112
+ file_path=file_path,
113
  media_type=mime_type,
114
  filename=filename,
115
  processing_time=job_record.processing_time or 0
116
+ )
117
+
118
+ async def _create_trimmed_audio(self, job_record: Any,
119
+ start_seconds: Optional[float],
120
+ end_seconds: Optional[float]) -> str:
121
+ """Create trimmed audio file and return its path."""
122
+ # Create deterministic output path
123
+ temp_path = await self.file_repository.create_deterministic_temp_path(
124
+ job_id=job_record.id,
125
+ start_seconds=start_seconds,
126
+ end_seconds=end_seconds,
127
+ extension=job_record.output_format
128
+ )
129
+
130
+ # Check if trimmed file already exists
131
+ if await self.file_repository.file_exists(temp_path):
132
+ logger.info(f"Reusing existing trimmed audio for job {job_record.id}: "
133
+ f"start={start_seconds}, end={end_seconds}")
134
+ return temp_path
135
+
136
+ logger.info(f"Creating new trimmed audio for job {job_record.id}: "
137
+ f"start={start_seconds}, end={end_seconds}")
138
+
139
+ # Trim audio using FFmpeg
140
+ result = await self.ffmpeg_service.trim_audio(
141
+ input_path=job_record.output_path,
142
+ output_path=temp_path,
143
+ start_seconds=start_seconds,
144
+ end_seconds=end_seconds
145
+ )
146
+
147
+ if not result.success:
148
+ raise RuntimeError(f"Audio trimming failed: {result.error}")
149
+
150
+ return temp_path
151
+
152
+ def _extract_duration_from_media_info(self, media_info: dict) -> Optional[float]:
153
+ """Extract duration from ffprobe media info."""
154
+ try:
155
+ # Try to get duration from format section
156
+ if 'format' in media_info and 'duration' in media_info['format']:
157
+ return float(media_info['format']['duration'])
158
+
159
+ # Try to get from first audio stream
160
+ if 'streams' in media_info:
161
+ for stream in media_info['streams']:
162
+ if stream.get('codec_type') == 'audio' and 'duration' in stream:
163
+ return float(stream['duration'])
164
+
165
+ return None
166
+ except (ValueError, KeyError, TypeError) as e:
167
+ logger.error(f"Error extracting duration from media info: {e}")
168
+ return None
169
+
170
+ def _create_filename(self, job_record: Any,
171
+ start_seconds: Optional[float],
172
+ end_seconds: Optional[float]) -> str:
173
+ """Create appropriate filename for the download."""
174
+ # Get base filename without extension
175
+ original_name = os.path.splitext(job_record.filename)[0]
176
+
177
+ # Add time range suffix if trimming
178
+ if start_seconds is not None or end_seconds is not None:
179
+ time_suffix = self._format_time_range_suffix(start_seconds, end_seconds)
180
+ filename = f"{original_name}_{time_suffix}.{job_record.output_format}"
181
+ else:
182
+ filename = f"{original_name}.{job_record.output_format}"
183
+
184
+ return filename
185
+
186
+ def _format_time_range_suffix(self, start_seconds: Optional[float],
187
+ end_seconds: Optional[float]) -> str:
188
+ """Format time range as a filename suffix."""
189
+ def seconds_to_time_str(seconds: float) -> str:
190
+ hours = int(seconds // 3600)
191
+ minutes = int((seconds % 3600) // 60)
192
+ secs = int(seconds % 60)
193
+ return f"{hours:02d}-{minutes:02d}-{secs:02d}"
194
+
195
+ start_str = seconds_to_time_str(start_seconds) if start_seconds is not None else "start"
196
+ end_str = seconds_to_time_str(end_seconds) if end_seconds is not None else "end"
197
+
198
+ return f"{start_str}_to_{end_str}"
domain/services/validation_service.py CHANGED
@@ -1,5 +1,6 @@
1
  """Domain validation service."""
2
- from typing import List
 
3
 
4
  from ..entities.video import Video
5
  from ..value_objects.audio_format import AudioFormat
@@ -49,4 +50,94 @@ class ValidationService:
49
 
50
  def can_process_directly(self, video: Video, threshold_mb: float) -> bool:
51
  """Check if video can be processed directly (not async)."""
52
- return not video.is_large_file(threshold_mb)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """Domain validation service."""
2
+ import re
3
+ from typing import List, Optional
4
 
5
  from ..entities.video import Video
6
  from ..value_objects.audio_format import AudioFormat
 
50
 
51
  def can_process_directly(self, video: Video, threshold_mb: float) -> bool:
52
  """Check if video can be processed directly (not async)."""
53
+ return not video.is_large_file(threshold_mb)
54
+
55
+ def validate_time_format(self, time_str: str) -> float:
56
+ """Validate and convert HH:MM:SS format to seconds.
57
+
58
+ Args:
59
+ time_str: Time string in HH:MM:SS format
60
+
61
+ Returns:
62
+ float: Time in seconds
63
+
64
+ Raises:
65
+ ValidationError: If format is invalid
66
+ """
67
+ if not time_str:
68
+ raise ValidationError("Time string cannot be empty")
69
+
70
+ # Pattern for HH:MM:SS format
71
+ pattern = r'^(\d{1,2}):(\d{2}):(\d{2})$'
72
+ match = re.match(pattern, time_str.strip())
73
+
74
+ if not match:
75
+ raise ValidationError(
76
+ f"Invalid time format '{time_str}'. Expected format: HH:MM:SS (e.g., 01:23:45)"
77
+ )
78
+
79
+ hours, minutes, seconds = map(int, match.groups())
80
+
81
+ # Validate ranges
82
+ if minutes >= 60:
83
+ raise ValidationError(f"Invalid minutes '{minutes}'. Must be 0-59")
84
+
85
+ if seconds >= 60:
86
+ raise ValidationError(f"Invalid seconds '{seconds}'. Must be 0-59")
87
+
88
+ # Convert to total seconds
89
+ total_seconds = hours * 3600 + minutes * 60 + seconds
90
+
91
+ if total_seconds < 0:
92
+ raise ValidationError("Time cannot be negative")
93
+
94
+ return float(total_seconds)
95
+
96
+ def validate_time_range(self, start_seconds: Optional[float],
97
+ end_seconds: Optional[float],
98
+ audio_duration: float) -> None:
99
+ """Validate that time range is valid for the audio duration.
100
+
101
+ Args:
102
+ start_seconds: Start time in seconds (None means start from beginning)
103
+ end_seconds: End time in seconds (None means end at audio end)
104
+ audio_duration: Total audio duration in seconds
105
+
106
+ Raises:
107
+ ValidationError: If time range is invalid
108
+ """
109
+ if audio_duration <= 0:
110
+ raise ValidationError("Audio duration must be positive")
111
+
112
+ # Validate start time
113
+ if start_seconds is not None:
114
+ if start_seconds < 0:
115
+ raise ValidationError("Start time cannot be negative")
116
+
117
+ if start_seconds >= audio_duration:
118
+ raise ValidationError(
119
+ f"Start time {start_seconds:.1f}s exceeds audio duration {audio_duration:.1f}s"
120
+ )
121
+
122
+ # Validate end time
123
+ if end_seconds is not None:
124
+ if end_seconds < 0:
125
+ raise ValidationError("End time cannot be negative")
126
+
127
+ if end_seconds > audio_duration:
128
+ raise ValidationError(
129
+ f"End time {end_seconds:.1f}s exceeds audio duration {audio_duration:.1f}s"
130
+ )
131
+
132
+ # Validate range relationship
133
+ if start_seconds is not None and end_seconds is not None:
134
+ if start_seconds >= end_seconds:
135
+ raise ValidationError(
136
+ f"Start time {start_seconds:.1f}s must be less than end time {end_seconds:.1f}s"
137
+ )
138
+
139
+ # Check for minimum segment duration (at least 1 second)
140
+ if end_seconds - start_seconds < 1.0:
141
+ raise ValidationError(
142
+ "Audio segment must be at least 1 second long"
143
+ )
infrastructure/repositories/file_repository.py CHANGED
@@ -7,6 +7,7 @@ import uuid
7
  from datetime import datetime, timedelta
8
  import logging
9
  import inspect
 
10
 
11
  logger = logging.getLogger(__name__)
12
 
@@ -66,6 +67,66 @@ class FileSystemRepository:
66
  filename = f"{job_id}_output.{format}"
67
  return str(self.base_path / filename)
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  async def read_file(self, file_path: str) -> bytes:
70
  """Read file content."""
71
  async with aiofiles.open(file_path, 'rb') as f:
 
7
  from datetime import datetime, timedelta
8
  import logging
9
  import inspect
10
+ import asyncio
11
 
12
  logger = logging.getLogger(__name__)
13
 
 
67
  filename = f"{job_id}_output.{format}"
68
  return str(self.base_path / filename)
69
 
70
+ async def create_temp_path(self, prefix: str, extension: str) -> str:
71
+ """Create a unique temporary file path."""
72
+ # Generate unique identifier
73
+ temp_id = str(uuid.uuid4())[:8]
74
+
75
+ # Clean the prefix to be filename-safe
76
+ safe_prefix = "".join(c for c in prefix if c.isalnum() or c in ('-', '_'))
77
+
78
+ # Ensure extension starts with a dot
79
+ if not extension.startswith('.'):
80
+ extension = f".{extension}"
81
+
82
+ # Create filename
83
+ filename = f"{safe_prefix}_{temp_id}{extension}"
84
+ file_path = self.base_path / filename
85
+
86
+ logger.debug(f"Created temp path: {file_path}")
87
+ return str(file_path)
88
+
89
+ async def create_deterministic_temp_path(self, job_id: str,
90
+ start_seconds: Optional[float],
91
+ end_seconds: Optional[float],
92
+ extension: str) -> str:
93
+ """Create a deterministic temporary file path based on parameters.
94
+
95
+ This ensures that the same trimming parameters always result in the same filename,
96
+ allowing for efficient reuse of previously trimmed files.
97
+
98
+ Args:
99
+ job_id: The job ID
100
+ start_seconds: Start time in seconds (None for beginning)
101
+ end_seconds: End time in seconds (None for end)
102
+ extension: File extension (e.g., 'mp3')
103
+
104
+ Returns:
105
+ str: Deterministic file path
106
+ """
107
+ import hashlib
108
+
109
+ # Create a unique string from the parameters
110
+ # Convert None values to consistent strings
111
+ start_str = str(start_seconds) if start_seconds is not None else "start"
112
+ end_str = str(end_seconds) if end_seconds is not None else "end"
113
+
114
+ # Create hash input
115
+ hash_input = f"{job_id}_{start_str}_{end_str}"
116
+
117
+ # Generate MD5 hash (sufficient for file naming)
118
+ hash_suffix = hashlib.md5(hash_input.encode()).hexdigest()[:8]
119
+
120
+ # Ensure extension doesn't start with a dot for this method
121
+ clean_extension = extension.lstrip('.')
122
+
123
+ # Create deterministic filename
124
+ filename = f"trim_{job_id}_{hash_suffix}.{clean_extension}"
125
+ file_path = self.base_path / filename
126
+
127
+ logger.debug(f"Created deterministic temp path: {file_path} for params: {hash_input}")
128
+ return str(file_path)
129
+
130
  async def read_file(self, file_path: str) -> bytes:
131
  """Read file content."""
132
  async with aiofiles.open(file_path, 'rb') as f:
infrastructure/services/ffmpeg_service.py CHANGED
@@ -86,6 +86,73 @@ class FFmpegService:
86
  error=str(e)
87
  )
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  async def get_media_info(self, file_path: str) -> Dict[str, Any]:
90
  """Get media file information using ffprobe."""
91
  try:
@@ -142,6 +209,38 @@ class FFmpegService:
142
 
143
  return cmd
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  def _extract_duration(self, stderr_output: str) -> Optional[float]:
146
  """Extract duration from FFmpeg stderr output."""
147
  import re
 
86
  error=str(e)
87
  )
88
 
89
+ async def trim_audio(self, input_path: str, output_path: str,
90
+ start_seconds: Optional[float] = None,
91
+ end_seconds: Optional[float] = None) -> FFmpegResult:
92
+ """Trim audio file to specified time range.
93
+
94
+ Args:
95
+ input_path: Path to input audio file
96
+ output_path: Path for output trimmed file
97
+ start_seconds: Start time in seconds (None = start from beginning)
98
+ end_seconds: End time in seconds (None = continue to end)
99
+
100
+ Returns:
101
+ FFmpegResult with success status and details
102
+ """
103
+ try:
104
+ # Build trim command
105
+ cmd = self._build_trim_command(input_path, output_path, start_seconds, end_seconds)
106
+
107
+ logger.info(f"Running FFmpeg trim command: {' '.join(cmd)}")
108
+
109
+ # Run FFmpeg
110
+ process = await asyncio.create_subprocess_exec(
111
+ *cmd,
112
+ stdout=asyncio.subprocess.PIPE,
113
+ stderr=asyncio.subprocess.PIPE
114
+ )
115
+
116
+ try:
117
+ stdout, stderr = await asyncio.wait_for(
118
+ process.communicate(),
119
+ timeout=self.timeout_seconds
120
+ )
121
+ except asyncio.TimeoutError:
122
+ process.kill()
123
+ await process.wait()
124
+ return FFmpegResult(
125
+ success=False,
126
+ error=f"FFmpeg trim timeout after {self.timeout_seconds} seconds"
127
+ )
128
+
129
+ if process.returncode == 0:
130
+ # Extract duration from stderr
131
+ duration = self._extract_duration(stderr.decode())
132
+
133
+ return FFmpegResult(
134
+ success=True,
135
+ output_path=output_path,
136
+ duration=duration,
137
+ metadata={
138
+ "operation": "trim",
139
+ "start_seconds": start_seconds,
140
+ "end_seconds": end_seconds
141
+ }
142
+ )
143
+ else:
144
+ return FFmpegResult(
145
+ success=False,
146
+ error=f"FFmpeg trim failed: {stderr.decode()}"
147
+ )
148
+
149
+ except Exception as e:
150
+ logger.error(f"FFmpeg trim error: {str(e)}")
151
+ return FFmpegResult(
152
+ success=False,
153
+ error=str(e)
154
+ )
155
+
156
  async def get_media_info(self, file_path: str) -> Dict[str, Any]:
157
  """Get media file information using ffprobe."""
158
  try:
 
209
 
210
  return cmd
211
 
212
+ def _build_trim_command(self, input_path: str, output_path: str,
213
+ start_seconds: Optional[float],
214
+ end_seconds: Optional[float]) -> list:
215
+ """Build FFmpeg command for audio trimming."""
216
+ cmd = [
217
+ self.ffmpeg_path,
218
+ '-i', input_path
219
+ ]
220
+
221
+ # Add start time if specified
222
+ if start_seconds is not None:
223
+ cmd.extend(['-ss', str(start_seconds)])
224
+
225
+ # Add end time or duration if specified
226
+ if end_seconds is not None:
227
+ if start_seconds is not None:
228
+ # Calculate duration
229
+ duration = end_seconds - start_seconds
230
+ cmd.extend(['-t', str(duration)])
231
+ else:
232
+ # Use -to for end time from beginning
233
+ cmd.extend(['-to', str(end_seconds)])
234
+
235
+ # Copy streams without re-encoding for faster processing
236
+ cmd.extend([
237
+ '-c', 'copy', # Copy codec (no re-encoding)
238
+ '-y', # Overwrite output
239
+ output_path
240
+ ])
241
+
242
+ return cmd
243
+
244
  def _extract_duration(self, stderr_output: str) -> Optional[float]:
245
  """Extract duration from FFmpeg stderr output."""
246
  import re
interfaces/api/routes/job_routes.py CHANGED
@@ -1,11 +1,11 @@
1
  """Job management API routes."""
2
- from fastapi import APIRouter, Depends, HTTPException, Request, Path
3
  from fastapi.responses import FileResponse
4
- from typing import Any
5
 
6
  from ..dependencies import UseCases
7
  from ..responses import JobStatusResponse, ErrorResponse
8
- from domain.exceptions.domain_exceptions import JobNotFoundError, JobNotCompletedError
9
 
10
  router = APIRouter()
11
 
@@ -45,22 +45,54 @@ async def get_job_status(
45
 
46
  @router.get("/jobs/{job_id}/download",
47
  summary="Download Extracted Audio",
48
- description="Download the audio file from a completed extraction job",
 
 
 
 
 
 
 
 
 
 
49
  responses={
50
  200: {
51
  "description": "Audio file",
52
  "content": {"audio/mpeg": {}, "audio/aac": {}, "audio/wav": {}}
53
  },
54
- 400: {"description": "Job not completed"},
55
  404: {"description": "Job not found"}
56
  })
57
  async def download_job_result(
58
  use_cases: UseCases,
59
- job_id: str = Path(..., description="The job ID of the completed extraction")
 
 
 
 
 
 
 
 
 
 
60
  ):
61
  """Download the result of a completed job."""
62
  try:
63
- result = await use_cases.download_audio_result.execute(job_id)
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  return FileResponse(
66
  path=result.file_path,
@@ -75,5 +107,44 @@ async def download_job_result(
75
  raise HTTPException(404, str(e))
76
  except JobNotCompletedError as e:
77
  raise HTTPException(400, str(e))
 
 
 
 
 
78
  except Exception as e:
79
- raise HTTPException(500, f"Error downloading result: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """Job management API routes."""
2
+ from fastapi import APIRouter, Depends, HTTPException, Request, Path, Query
3
  from fastapi.responses import FileResponse
4
+ from typing import Any, Optional
5
 
6
  from ..dependencies import UseCases
7
  from ..responses import JobStatusResponse, ErrorResponse
8
+ from domain.exceptions.domain_exceptions import JobNotFoundError, JobNotCompletedError, ValidationError
9
 
10
  router = APIRouter()
11
 
 
45
 
46
  @router.get("/jobs/{job_id}/download",
47
  summary="Download Extracted Audio",
48
+ description="""
49
+ Download the audio file from a completed extraction job.
50
+
51
+ Optionally specify start and end times to download only a portion of the audio:
52
+ - start: Start time in HH:MM:SS format (e.g., 00:01:30 for 1 minute 30 seconds)
53
+ - end: End time in HH:MM:SS format (e.g., 00:03:45 for 3 minutes 45 seconds)
54
+
55
+ If neither parameter is provided, the complete audio file is returned.
56
+ If only start is provided, audio from start time to the end is returned.
57
+ If only end is provided, audio from beginning to end time is returned.
58
+ """,
59
  responses={
60
  200: {
61
  "description": "Audio file",
62
  "content": {"audio/mpeg": {}, "audio/aac": {}, "audio/wav": {}}
63
  },
64
+ 400: {"description": "Job not completed or invalid time parameters"},
65
  404: {"description": "Job not found"}
66
  })
67
  async def download_job_result(
68
  use_cases: UseCases,
69
+ job_id: str = Path(..., description="The job ID of the completed extraction"),
70
+ start: Optional[str] = Query(
71
+ None,
72
+ description="Start time in HH:MM:SS format (e.g., 00:01:30)",
73
+ example="00:01:30"
74
+ ),
75
+ end: Optional[str] = Query(
76
+ None,
77
+ description="End time in HH:MM:SS format (e.g., 00:03:45)",
78
+ example="00:03:45"
79
+ )
80
  ):
81
  """Download the result of a completed job."""
82
  try:
83
+ # Parse time parameters if provided (validation will be done in use case)
84
+ start_seconds = None
85
+ end_seconds = None
86
+
87
+ if start is not None:
88
+ start_seconds = _parse_time_to_seconds(start)
89
+
90
+ if end is not None:
91
+ end_seconds = _parse_time_to_seconds(end)
92
+
93
+ result = await use_cases.download_audio_result.execute(
94
+ job_id, start_seconds, end_seconds
95
+ )
96
 
97
  return FileResponse(
98
  path=result.file_path,
 
107
  raise HTTPException(404, str(e))
108
  except JobNotCompletedError as e:
109
  raise HTTPException(400, str(e))
110
+ except ValidationError as e:
111
+ raise HTTPException(400, str(e))
112
+ except ValueError as e:
113
+ # Handle time parsing errors
114
+ raise HTTPException(400, f"Invalid time format: {str(e)}")
115
  except Exception as e:
116
+ raise HTTPException(500, f"Error downloading result: {str(e)}")
117
+
118
+
119
+ def _parse_time_to_seconds(time_str: str) -> float:
120
+ """Parse HH:MM:SS format to seconds."""
121
+ import re
122
+
123
+ if not time_str:
124
+ raise ValueError("Time string cannot be empty")
125
+
126
+ # Pattern for HH:MM:SS format
127
+ pattern = r'^(\d{1,2}):(\d{2}):(\d{2})$'
128
+ match = re.match(pattern, time_str.strip())
129
+
130
+ if not match:
131
+ raise ValueError(
132
+ f"Invalid time format '{time_str}'. Expected format: HH:MM:SS (e.g., 01:23:45)"
133
+ )
134
+
135
+ hours, minutes, seconds = map(int, match.groups())
136
+
137
+ # Validate ranges
138
+ if minutes >= 60:
139
+ raise ValueError(f"Invalid minutes '{minutes}'. Must be 0-59")
140
+
141
+ if seconds >= 60:
142
+ raise ValueError(f"Invalid seconds '{seconds}'. Must be 0-59")
143
+
144
+ # Convert to total seconds
145
+ total_seconds = hours * 3600 + minutes * 60 + seconds
146
+
147
+ if total_seconds < 0:
148
+ raise ValueError("Time cannot be negative")
149
+
150
+ return float(total_seconds)