abhisheksan commited on
Commit
d09ee28
·
1 Parent(s): 21f2df3

Add Whisper transcription support and config

Browse files
.env.example CHANGED
@@ -14,14 +14,15 @@ RATE_LIMIT_WINDOW=60
14
  # Logging configuration
15
  LOG_LEVEL=INFO
16
 
17
- # yt-dlp configuration
18
- YT_DLP_TIMEOUT_LIST=30
19
- YT_DLP_TIMEOUT_DOWNLOAD=60
 
 
 
20
 
21
  # Embedding model configuration
22
  EMBEDDING_MODEL=mixedbread-ai/mxbai-embed-large-v1
23
- # Models cache directory (set by Docker, optional for local dev)
24
- # SENTENCE_TRANSFORMERS_HOME=models
25
 
26
  # Server configuration
27
  HOST=0.0.0.0
@@ -29,4 +30,6 @@ PORT=8000
29
  RELOAD=true
30
 
31
  # Container-specific (set automatically in Dockerfile)
32
- # DISABLE_FILE_LOGGING=true
 
 
 
14
  # Logging configuration
15
  LOG_LEVEL=INFO
16
 
17
+ # yt-dlp configuration (audio download timeout in seconds)
18
+ YT_DLP_TIMEOUT_DOWNLOAD=120
19
+
20
+ # Whisper configuration
21
+ # Models: tiny, base, small, medium, large-v2, large-v3
22
+ WHISPER_MODEL=base
23
 
24
  # Embedding model configuration
25
  EMBEDDING_MODEL=mixedbread-ai/mxbai-embed-large-v1
 
 
26
 
27
  # Server configuration
28
  HOST=0.0.0.0
 
30
  RELOAD=true
31
 
32
  # Container-specific (set automatically in Dockerfile)
33
+ # DISABLE_FILE_LOGGING=true
34
+ # SENTENCE_TRANSFORMERS_HOME=/app/models
35
+ # WHISPER_MODELS_DIR=/app/models
Dockerfile CHANGED
@@ -45,6 +45,7 @@ ENV PYTHONPATH=/app
45
  ENV HF_HOME=/app/models
46
  ENV TRANSFORMERS_CACHE=/app/models
47
  ENV SENTENCE_TRANSFORMERS_HOME=/app/models
 
48
 
49
  # Disable file logging in container
50
  ENV DISABLE_FILE_LOGGING=true
 
45
  ENV HF_HOME=/app/models
46
  ENV TRANSFORMERS_CACHE=/app/models
47
  ENV SENTENCE_TRANSFORMERS_HOME=/app/models
48
+ ENV WHISPER_MODELS_DIR=/app/models
49
 
50
  # Disable file logging in container
51
  ENV DISABLE_FILE_LOGGING=true
app/apis/subtitles/service.py CHANGED
@@ -1,10 +1,12 @@
1
- """Subtitle extraction service using yt-dlp with caching."""
2
 
3
  import asyncio
 
4
  import sys
5
  import tempfile
 
6
  from pathlib import Path
7
- from typing import List, Optional, Tuple
8
 
9
  from cachetools import TTLCache
10
 
@@ -15,109 +17,89 @@ from app.core.exceptions import (
15
  SubtitleExtractionError,
16
  InvalidVideoURLError
17
  )
18
- from app.apis.subtitles.utils import extract_video_id, convert_vtt_to_text
 
19
 
 
20
 
21
  SUBTITLE_CACHE: TTLCache = TTLCache(maxsize=100, ttl=3600)
22
-
23
- ALTERNATIVE_LANGUAGES = {
24
- "en": ["en-US", "en-GB", "en-orig"],
25
- "es": ["es-ES", "es-MX", "es-419"],
26
- "fr": ["fr-FR", "fr-CA"],
27
- "de": ["de-DE"],
28
- "it": ["it-IT"],
29
- "pt": ["pt-BR", "pt-PT"],
30
- "ja": ["ja-JP"],
31
- "ko": ["ko-KR"],
32
- "zh": ["zh-CN", "zh-TW", "zh-Hans", "zh-Hant"]
33
- }
34
 
35
 
36
  class SubtitleService:
37
- """Service for extracting subtitles from YouTube videos."""
38
 
39
  def __init__(self) -> None:
40
- self.timeout_list = settings.yt_dlp_timeout_list
 
 
41
  self.timeout_download = settings.yt_dlp_timeout_download
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  async def extract_subtitles(self, url: str, lang: str = "en") -> Tuple[str, List[str]]:
44
  """
45
- Extract subtitles from a YouTube video.
46
 
47
  Args:
48
  url: YouTube video URL
49
- lang: Language code for subtitles
50
 
51
  Returns:
52
  Tuple of (video_id, subtitle_lines)
53
-
54
- Raises:
55
- SubtitlesNotFoundError: If no subtitles are found
56
- DownloadTimeoutError: If the operation times out
57
- SubtitleExtractionError: If extraction fails
58
  """
59
  video_id = extract_video_id(url)
60
  cache_key = f"{video_id}:{lang}"
61
 
62
  if cache_key in SUBTITLE_CACHE:
 
63
  return SUBTITLE_CACHE[cache_key]
64
 
65
  with tempfile.TemporaryDirectory() as temp_dir:
66
- subtitle_content = await self._download_subtitles(url, lang, temp_dir, video_id)
67
 
68
- if not subtitle_content:
69
- subtitle_content = await self._try_alternative_languages(url, lang, temp_dir, video_id)
70
 
71
- if not subtitle_content:
72
- raise SubtitlesNotFoundError(f"No subtitles available in language '{lang}' or alternatives")
73
 
74
- clean_lines = convert_vtt_to_text(subtitle_content)
 
75
 
76
- if not clean_lines:
77
- raise SubtitlesNotFoundError("Subtitles found but appear to be empty after cleaning")
78
-
79
- result = (video_id, clean_lines)
80
  SUBTITLE_CACHE[cache_key] = result
81
  return result
82
 
83
- async def _try_alternative_languages(
84
- self, url: str, lang: str, temp_dir: str, video_id: str
85
- ) -> Optional[str]:
86
- """Try downloading subtitles in alternative language codes concurrently."""
87
- alt_langs = ALTERNATIVE_LANGUAGES.get(lang, [f"{lang}-{lang.upper()}"])
88
-
89
- tasks = [
90
- self._download_subtitles(url, alt_lang, temp_dir, video_id)
91
- for alt_lang in alt_langs
92
- ]
93
-
94
- results = await asyncio.gather(*tasks, return_exceptions=True)
95
-
96
- for result in results:
97
- if isinstance(result, str) and result:
98
- return result
99
-
100
- return None
101
-
102
- async def _download_subtitles(
103
- self, url: str, lang: str, temp_dir: str, video_id: str
104
- ) -> Optional[str]:
105
- """Download subtitles for a specific language."""
106
- output_template = str(Path(temp_dir) / f"{video_id}.%(ext)s")
107
 
108
  cmd = [
109
  sys.executable, "-m", "yt_dlp",
110
- "--write-subs",
111
- "--write-auto-subs",
112
- "--sub-lang", lang,
113
- "--skip-download",
114
  "--no-warnings",
115
- "--output", output_template,
116
- "--user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
117
  url
118
  ]
119
 
120
  try:
 
121
  process = await asyncio.create_subprocess_exec(
122
  *cmd,
123
  stdout=asyncio.subprocess.PIPE,
@@ -131,24 +113,56 @@ class SubtitleService:
131
 
132
  if process.returncode != 0:
133
  error_msg = stderr.decode('utf-8', errors='ignore')
 
134
  if "Video unavailable" in error_msg or "Private video" in error_msg:
135
  raise InvalidVideoURLError("Video is unavailable, private, or does not exist")
136
- return None
137
-
138
- temp_path = Path(temp_dir)
139
- subtitle_files = list(temp_path.glob(f"{video_id}*.vtt"))
140
 
141
- if not subtitle_files:
142
- return None
 
 
 
143
 
144
- return subtitle_files[0].read_text(encoding='utf-8', errors='ignore')
145
 
146
  except asyncio.TimeoutError:
147
- raise DownloadTimeoutError(f"Timeout while downloading subtitles for language '{lang}'")
148
- except (InvalidVideoURLError, DownloadTimeoutError):
149
- raise
150
- except Exception as e:
151
- raise SubtitleExtractionError(f"Error downloading subtitles: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
 
154
  subtitle_service = SubtitleService()
 
1
+ """Subtitle extraction service using yt-dlp audio download and Whisper transcription."""
2
 
3
  import asyncio
4
+ import os
5
  import sys
6
  import tempfile
7
+ import threading
8
  from pathlib import Path
9
+ from typing import List, Tuple
10
 
11
  from cachetools import TTLCache
12
 
 
17
  SubtitleExtractionError,
18
  InvalidVideoURLError
19
  )
20
+ from app.apis.subtitles.utils import extract_video_id
21
+ from app.core.logging import get_logger
22
 
23
+ logger = get_logger(__name__)
24
 
25
  SUBTITLE_CACHE: TTLCache = TTLCache(maxsize=100, ttl=3600)
26
+ MODELS_DIR = os.environ.get("WHISPER_MODELS_DIR", os.environ.get("SENTENCE_TRANSFORMERS_HOME", "models"))
 
 
 
 
 
 
 
 
 
 
 
27
 
28
 
29
  class SubtitleService:
30
+ """Service for extracting subtitles from videos via Whisper transcription."""
31
 
32
  def __init__(self) -> None:
33
+ self._whisper_model = None
34
+ self._model_name = settings.whisper_model
35
+ self._lock = threading.Lock()
36
  self.timeout_download = settings.yt_dlp_timeout_download
37
 
38
+ def _load_whisper_model(self):
39
+ """Lazy load the Whisper model on first use."""
40
+ if self._whisper_model is None:
41
+ with self._lock:
42
+ if self._whisper_model is None:
43
+ logger.info(f"Loading Whisper model: {self._model_name}")
44
+ logger.info(f"Models directory: {MODELS_DIR}")
45
+ from faster_whisper import WhisperModel
46
+ self._whisper_model = WhisperModel(
47
+ self._model_name,
48
+ device="cpu",
49
+ compute_type="int8",
50
+ download_root=MODELS_DIR
51
+ )
52
+ logger.info("Whisper model loaded successfully")
53
+
54
  async def extract_subtitles(self, url: str, lang: str = "en") -> Tuple[str, List[str]]:
55
  """
56
+ Extract subtitles from a video by downloading audio and transcribing with Whisper.
57
 
58
  Args:
59
  url: YouTube video URL
60
+ lang: Language code for transcription
61
 
62
  Returns:
63
  Tuple of (video_id, subtitle_lines)
 
 
 
 
 
64
  """
65
  video_id = extract_video_id(url)
66
  cache_key = f"{video_id}:{lang}"
67
 
68
  if cache_key in SUBTITLE_CACHE:
69
+ logger.info(f"Cache hit for {cache_key}")
70
  return SUBTITLE_CACHE[cache_key]
71
 
72
  with tempfile.TemporaryDirectory() as temp_dir:
73
+ audio_path = await self._download_audio(url, temp_dir, video_id)
74
 
75
+ if not audio_path or not audio_path.exists():
76
+ raise SubtitleExtractionError("Failed to download audio from video")
77
 
78
+ subtitle_lines = await self._transcribe_audio(audio_path, lang)
 
79
 
80
+ if not subtitle_lines:
81
+ raise SubtitlesNotFoundError("Transcription produced no text")
82
 
83
+ result = (video_id, subtitle_lines)
 
 
 
84
  SUBTITLE_CACHE[cache_key] = result
85
  return result
86
 
87
+ async def _download_audio(self, url: str, temp_dir: str, video_id: str) -> Path:
88
+ """Download audio from video URL using yt-dlp."""
89
+ output_path = Path(temp_dir) / f"{video_id}.mp3"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
  cmd = [
92
  sys.executable, "-m", "yt_dlp",
93
+ "--extract-audio",
94
+ "--audio-format", "mp3",
95
+ "--audio-quality", "5",
 
96
  "--no-warnings",
97
+ "--output", str(Path(temp_dir) / f"{video_id}.%(ext)s"),
 
98
  url
99
  ]
100
 
101
  try:
102
+ logger.info(f"Downloading audio for video: {video_id}")
103
  process = await asyncio.create_subprocess_exec(
104
  *cmd,
105
  stdout=asyncio.subprocess.PIPE,
 
113
 
114
  if process.returncode != 0:
115
  error_msg = stderr.decode('utf-8', errors='ignore')
116
+ logger.error(f"yt-dlp error: {error_msg}")
117
  if "Video unavailable" in error_msg or "Private video" in error_msg:
118
  raise InvalidVideoURLError("Video is unavailable, private, or does not exist")
119
+ raise SubtitleExtractionError(f"Failed to download audio: {error_msg[:200]}")
 
 
 
120
 
121
+ # Find the downloaded audio file
122
+ audio_files = list(Path(temp_dir).glob(f"{video_id}.*"))
123
+ if audio_files:
124
+ logger.info(f"Audio downloaded: {audio_files[0]}")
125
+ return audio_files[0]
126
 
127
+ raise SubtitleExtractionError("Audio file not found after download")
128
 
129
  except asyncio.TimeoutError:
130
+ raise DownloadTimeoutError("Timeout while downloading audio")
131
+
132
+ async def _transcribe_audio(self, audio_path: Path, lang: str) -> List[str]:
133
+ """Transcribe audio file using Whisper."""
134
+ self._load_whisper_model()
135
+
136
+ logger.info(f"Transcribing audio: {audio_path}")
137
+
138
+ # Run transcription in thread pool to not block event loop
139
+ loop = asyncio.get_event_loop()
140
+ segments = await loop.run_in_executor(
141
+ None,
142
+ self._run_transcription,
143
+ audio_path,
144
+ lang
145
+ )
146
+
147
+ return segments
148
+
149
+ def _run_transcription(self, audio_path: Path, lang: str) -> List[str]:
150
+ """Run the actual transcription (blocking)."""
151
+ segments, info = self._whisper_model.transcribe(
152
+ str(audio_path),
153
+ language=lang if lang != "auto" else None,
154
+ beam_size=5,
155
+ vad_filter=True
156
+ )
157
+
158
+ lines = []
159
+ for segment in segments:
160
+ text = segment.text.strip()
161
+ if text:
162
+ lines.append(text)
163
+
164
+ logger.info(f"Transcription complete: {len(lines)} segments")
165
+ return lines
166
 
167
 
168
  subtitle_service = SubtitleService()
app/apis/subtitles/utils.py CHANGED
@@ -4,96 +4,30 @@ import re
4
  from typing import List
5
 
6
 
7
- def clean_subtitle_text(lines: List[str]) -> List[str]:
8
- """
9
- Clean subtitle text by removing timestamps, duplicates, and empty lines.
10
-
11
- Args:
12
- lines: Raw subtitle lines
13
-
14
- Returns:
15
- Cleaned subtitle lines
16
- """
17
- clean_lines = []
18
- seen_recently = set() # Track recent lines to avoid duplicates
19
-
20
- for line in lines:
21
- line = line.strip()
22
-
23
- # Skip VTT header lines
24
- if line.startswith(("WEBVTT", "NOTE")):
25
- continue
26
-
27
- # Skip timestamp lines (format: 00:00:00.000 --> 00:00:00.000)
28
- if "-->" in line and re.match(r'\d+:\d+:\d+\.\d+', line):
29
- continue
30
-
31
- # Skip empty lines and sequence numbers
32
- if not line or line.isdigit():
33
- continue
34
-
35
- # Remove HTML tags that might be in subtitles
36
- line = re.sub(r'<[^>]+>', '', line)
37
-
38
- # Remove common subtitle formatting
39
- line = re.sub(r'^\d+$', '', line) # Remove standalone numbers
40
- line = re.sub(r'^-\s*', '', line) # Remove leading dashes
41
-
42
- # Skip if line is too short or just punctuation
43
- if len(line.strip()) < 3:
44
- continue
45
-
46
- # Avoid recent duplicates (check last 5 lines)
47
- if line not in seen_recently:
48
- clean_lines.append(line)
49
- seen_recently.add(line)
50
-
51
- # Keep only last 5 lines in memory for duplicate checking
52
- if len(seen_recently) > 5:
53
- seen_recently.clear()
54
-
55
- return clean_lines
56
-
57
-
58
  def extract_video_id(url: str) -> str:
59
  """
60
  Extract video ID from YouTube URL.
61
-
62
  Args:
63
  url: YouTube URL
64
-
65
  Returns:
66
- Video ID
67
  """
68
- # Handle different YouTube URL formats
69
  patterns = [
70
  r'(?:youtube\.com/watch\?v=|youtu\.be/)([a-zA-Z0-9_-]+)',
71
  r'youtube\.com/embed/([a-zA-Z0-9_-]+)',
72
  r'youtube\.com/v/([a-zA-Z0-9_-]+)'
73
  ]
74
-
75
  for pattern in patterns:
76
  match = re.search(pattern, url)
77
  if match:
78
  return match.group(1)
79
-
80
  # Fallback: try to extract anything that looks like a video ID
81
  match = re.search(r'[a-zA-Z0-9_-]{11}', url)
82
  if match:
83
  return match.group(0)
84
-
85
- return "unknown"
86
 
87
-
88
- def convert_vtt_to_text(vtt_content: str) -> List[str]:
89
- """
90
- Convert VTT subtitle content to clean text lines.
91
-
92
- Args:
93
- vtt_content: Raw VTT file content
94
-
95
- Returns:
96
- Cleaned text lines
97
- """
98
- lines = vtt_content.split('\n')
99
- return clean_subtitle_text(lines)
 
4
  from typing import List
5
 
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  def extract_video_id(url: str) -> str:
8
  """
9
  Extract video ID from YouTube URL.
10
+
11
  Args:
12
  url: YouTube URL
13
+
14
  Returns:
15
+ Video ID string
16
  """
 
17
  patterns = [
18
  r'(?:youtube\.com/watch\?v=|youtu\.be/)([a-zA-Z0-9_-]+)',
19
  r'youtube\.com/embed/([a-zA-Z0-9_-]+)',
20
  r'youtube\.com/v/([a-zA-Z0-9_-]+)'
21
  ]
22
+
23
  for pattern in patterns:
24
  match = re.search(pattern, url)
25
  if match:
26
  return match.group(1)
27
+
28
  # Fallback: try to extract anything that looks like a video ID
29
  match = re.search(r'[a-zA-Z0-9_-]{11}', url)
30
  if match:
31
  return match.group(0)
 
 
32
 
33
+ return "unknown"
 
 
 
 
 
 
 
 
 
 
 
 
app/core/config.py CHANGED
@@ -22,8 +22,10 @@ class Settings(BaseSettings):
22
  log_level: str = "INFO"
23
 
24
  # yt-dlp configuration
25
- yt_dlp_timeout_list: int = 30
26
- yt_dlp_timeout_download: int = 60
 
 
27
 
28
  # Embedding configuration
29
  embedding_model: str = "mixedbread-ai/mxbai-embed-large-v1"
 
22
  log_level: str = "INFO"
23
 
24
  # yt-dlp configuration
25
+ yt_dlp_timeout_download: int = 120
26
+
27
+ # Whisper configuration
28
+ whisper_model: str = "base"
29
 
30
  # Embedding configuration
31
  embedding_model: str = "mixedbread-ai/mxbai-embed-large-v1"
poetry.lock CHANGED
@@ -33,6 +33,66 @@ doc = ["Sphinx", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-
33
  test = ["anyio[trio]", "coverage[toml] (>=4.5)", "hypothesis (>=4.0)", "mock (>=4) ; python_version < \"3.8\"", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17) ; python_version < \"3.12\" and platform_python_implementation == \"CPython\" and platform_system != \"Windows\""]
34
  trio = ["trio (<0.22)"]
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  [[package]]
37
  name = "black"
38
  version = "23.12.1"
@@ -253,6 +313,24 @@ files = [
253
  {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
254
  ]
255
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  [[package]]
257
  name = "coverage"
258
  version = "7.10.6"
@@ -354,6 +432,56 @@ files = [
354
  [package.extras]
355
  toml = ["tomli ; python_full_version <= \"3.11.0a6\""]
356
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
  [[package]]
358
  name = "deprecated"
359
  version = "1.3.1"
@@ -393,6 +521,29 @@ typing-extensions = ">=4.8.0"
393
  [package.extras]
394
  all = ["email-validator (>=2.0.0)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.5)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"]
395
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
396
  [[package]]
397
  name = "filelock"
398
  version = "3.20.3"
@@ -422,6 +573,17 @@ mccabe = ">=0.7.0,<0.8.0"
422
  pycodestyle = ">=2.11.0,<2.12.0"
423
  pyflakes = ">=3.1.0,<3.2.0"
424
 
 
 
 
 
 
 
 
 
 
 
 
425
  [[package]]
426
  name = "fsspec"
427
  version = "2026.1.0"
@@ -652,6 +814,21 @@ testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "authlib (>=1.
652
  torch = ["safetensors[torch]", "torch"]
653
  typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)"]
654
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
655
  [[package]]
656
  name = "idna"
657
  version = "3.10"
@@ -1299,6 +1476,46 @@ files = [
1299
  {file = "nvidia_nvtx_cu12-12.6.77-py3-none-win_amd64.whl", hash = "sha256:2fb11a4af04a5e6c84073e6404d26588a34afd35379f0855a99797897efa75c0"},
1300
  ]
1301
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1302
  [[package]]
1303
  name = "packaging"
1304
  version = "25.0"
@@ -1465,6 +1682,26 @@ files = [
1465
  dev = ["pre-commit", "tox"]
1466
  testing = ["coverage", "pytest", "pytest-benchmark"]
1467
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1468
  [[package]]
1469
  name = "pycodestyle"
1470
  version = "2.11.1"
@@ -1647,6 +1884,22 @@ files = [
1647
  {file = "pyflakes-3.1.0.tar.gz", hash = "sha256:a0aae034c444db0071aa077972ba4768d40c830d9539fd45bf4cd3f8f6992efc"},
1648
  ]
1649
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1650
  [[package]]
1651
  name = "pytest"
1652
  version = "7.4.4"
@@ -2152,7 +2405,6 @@ description = "Easily download, build, install, upgrade, and uninstall Python pa
2152
  optional = false
2153
  python-versions = ">=3.9"
2154
  groups = ["main"]
2155
- markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" or python_version >= \"3.12\""
2156
  files = [
2157
  {file = "setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922"},
2158
  {file = "setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c"},
@@ -2954,4 +3206,4 @@ test = ["pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)"]
2954
  [metadata]
2955
  lock-version = "2.1"
2956
  python-versions = "^3.11"
2957
- content-hash = "c179a0fd91b7ec088e821057b465cd5ca717460f85d0d55e3f481456810881c2"
 
33
  test = ["anyio[trio]", "coverage[toml] (>=4.5)", "hypothesis (>=4.0)", "mock (>=4) ; python_version < \"3.8\"", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17) ; python_version < \"3.12\" and platform_python_implementation == \"CPython\" and platform_system != \"Windows\""]
34
  trio = ["trio (<0.22)"]
35
 
36
+ [[package]]
37
+ name = "av"
38
+ version = "16.1.0"
39
+ description = "Pythonic bindings for FFmpeg's libraries."
40
+ optional = false
41
+ python-versions = ">=3.10"
42
+ groups = ["main"]
43
+ files = [
44
+ {file = "av-16.1.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:2395748b0c34fe3a150a1721e4f3d4487b939520991b13e7b36f8926b3b12295"},
45
+ {file = "av-16.1.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:72d7ac832710a158eeb7a93242370aa024a7646516291c562ee7f14a7ea881fd"},
46
+ {file = "av-16.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:6cbac833092e66b6b0ac4d81ab077970b8ca874951e9c3974d41d922aaa653ed"},
47
+ {file = "av-16.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:eb990672d97c18f99c02f31c8d5750236f770ffe354b5a52c5f4d16c5e65f619"},
48
+ {file = "av-16.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:05ad70933ac3b8ef896a820ea64b33b6cca91a5fac5259cb9ba7fa010435be15"},
49
+ {file = "av-16.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d831a1062a3c47520bf99de6ec682bd1d64a40dfa958e5457bb613c5270e7ce3"},
50
+ {file = "av-16.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:358ab910fef3c5a806c55176f2b27e5663b33c4d0a692dafeb049c6ed71f8aff"},
51
+ {file = "av-16.1.0-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:e88ad64ee9d2b9c4c5d891f16c22ae78e725188b8926eb88187538d9dd0b232f"},
52
+ {file = "av-16.1.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:cb296073fa6935724de72593800ba86ae49ed48af03960a4aee34f8a611f442b"},
53
+ {file = "av-16.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:720edd4d25aa73723c1532bb0597806d7b9af5ee34fc02358782c358cfe2f879"},
54
+ {file = "av-16.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:c7f2bc703d0df260a1fdf4de4253c7f5500ca9fc57772ea241b0cb241bcf972e"},
55
+ {file = "av-16.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d69c393809babada7d54964d56099e4b30a3e1f8b5736ca5e27bd7be0e0f3c83"},
56
+ {file = "av-16.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:441892be28582356d53f282873c5a951592daaf71642c7f20165e3ddcb0b4c63"},
57
+ {file = "av-16.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:273a3e32de64819e4a1cd96341824299fe06f70c46f2288b5dc4173944f0fd62"},
58
+ {file = "av-16.1.0-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:640f57b93f927fba8689f6966c956737ee95388a91bd0b8c8b5e0481f73513d6"},
59
+ {file = "av-16.1.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:ae3fb658eec00852ebd7412fdc141f17f3ddce8afee2d2e1cf366263ad2a3b35"},
60
+ {file = "av-16.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:27ee558d9c02a142eebcbe55578a6d817fedfde42ff5676275504e16d07a7f86"},
61
+ {file = "av-16.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:7ae547f6d5fa31763f73900d43901e8c5fa6367bb9a9840978d57b5a7ae14ed2"},
62
+ {file = "av-16.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8cf065f9d438e1921dc31fc7aa045790b58aee71736897866420d80b5450f62a"},
63
+ {file = "av-16.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a345877a9d3cc0f08e2bc4ec163ee83176864b92587afb9d08dff50f37a9a829"},
64
+ {file = "av-16.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:f49243b1d27c91cd8c66fdba90a674e344eb8eb917264f36117bf2b6879118fd"},
65
+ {file = "av-16.1.0-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:ce2a1b3d8bf619f6c47a9f28cfa7518ff75ddd516c234a4ee351037b05e6a587"},
66
+ {file = "av-16.1.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:408dbe6a2573ca58a855eb8cd854112b33ea598651902c36709f5f84c991ed8e"},
67
+ {file = "av-16.1.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:57f657f86652a160a8a01887aaab82282f9e629abf94c780bbdbb01595d6f0f7"},
68
+ {file = "av-16.1.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:adbad2b355c2ee4552cac59762809d791bda90586d134a33c6f13727fb86cb3a"},
69
+ {file = "av-16.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f42e1a68ec2aebd21f7eb6895be69efa6aa27eec1670536876399725bbda4b99"},
70
+ {file = "av-16.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:58fe47aeaef0f100c40ec8a5de9abbd37f118d3ca03829a1009cf288e9aef67c"},
71
+ {file = "av-16.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:565093ebc93b2f4b76782589564869dadfa83af5b852edebedd8fee746457d06"},
72
+ {file = "av-16.1.0-cp313-cp313t-macosx_11_0_x86_64.whl", hash = "sha256:574081a24edb98343fd9f473e21ae155bf61443d4ec9d7708987fa597d6b04b2"},
73
+ {file = "av-16.1.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:9ab00ea29c25ebf2ea1d1e928d7babb3532d562481c5d96c0829212b70756ad0"},
74
+ {file = "av-16.1.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:a84a91188c1071f238a9523fd42dbe567fb2e2607b22b779851b2ce0eac1b560"},
75
+ {file = "av-16.1.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:c2cd0de4dd022a7225ff224fde8e7971496d700be41c50adaaa26c07bb50bf97"},
76
+ {file = "av-16.1.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:0816143530624a5a93bc5494f8c6eeaf77549b9366709c2ac8566c1e9bff6df5"},
77
+ {file = "av-16.1.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e3a28053af29644696d0c007e897d19b1197585834660a54773e12a40b16974c"},
78
+ {file = "av-16.1.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2e3e67144a202b95ed299d165232533989390a9ea3119d37eccec697dc6dbb0c"},
79
+ {file = "av-16.1.0-cp314-cp314-macosx_11_0_x86_64.whl", hash = "sha256:39a634d8e5a87e78ea80772774bfd20c0721f0d633837ff185f36c9d14ffede4"},
80
+ {file = "av-16.1.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:0ba32fb9e9300948a7fa9f8a3fc686e6f7f77599a665c71eb2118fdfd2c743f9"},
81
+ {file = "av-16.1.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:ca04d17815182d34ce3edc53cbda78a4f36e956c0fd73e3bab249872a831c4d7"},
82
+ {file = "av-16.1.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:ee0e8de2e124a9ef53c955fe2add6ee7c56cc8fd83318265549e44057db77142"},
83
+ {file = "av-16.1.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:22bf77a2f658827043a1e184b479c3bf25c4c43ab32353677df2d119f080e28f"},
84
+ {file = "av-16.1.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2dd419d262e6a71cab206d80bbf28e0a10d0f227b671cdf5e854c028faa2d043"},
85
+ {file = "av-16.1.0-cp314-cp314-win_amd64.whl", hash = "sha256:53585986fd431cd436f290fba662cfb44d9494fbc2949a183de00acc5b33fa88"},
86
+ {file = "av-16.1.0-cp314-cp314t-macosx_11_0_x86_64.whl", hash = "sha256:76f5ed8495cf41e1209a5775d3699dc63fdc1740b94a095e2485f13586593205"},
87
+ {file = "av-16.1.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:8d55397190f12a1a3ae7538be58c356cceb2bf50df1b33523817587748ce89e5"},
88
+ {file = "av-16.1.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:9d51d9037437218261b4bbf9df78a95e216f83d7774fbfe8d289230b5b2e28e2"},
89
+ {file = "av-16.1.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:0ce07a89c15644407f49d942111ca046e323bbab0a9078ff43ee57c9b4a50dad"},
90
+ {file = "av-16.1.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:cac0c074892ea97113b53556ff41c99562db7b9f09f098adac1f08318c2acad5"},
91
+ {file = "av-16.1.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:7dec3dcbc35a187ce450f65a2e0dda820d5a9e6553eea8344a1459af11c98649"},
92
+ {file = "av-16.1.0-cp314-cp314t-win_amd64.whl", hash = "sha256:6f90dc082ff2068ddbe77618400b44d698d25d9c4edac57459e250c16b33d700"},
93
+ {file = "av-16.1.0.tar.gz", hash = "sha256:a094b4fd87a3721dacf02794d3d2c82b8d712c85b9534437e82a8a978c175ffd"},
94
+ ]
95
+
96
  [[package]]
97
  name = "black"
98
  version = "23.12.1"
 
313
  {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
314
  ]
315
 
316
+ [[package]]
317
+ name = "coloredlogs"
318
+ version = "15.0.1"
319
+ description = "Colored terminal output for Python's logging module"
320
+ optional = false
321
+ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
322
+ groups = ["main"]
323
+ files = [
324
+ {file = "coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934"},
325
+ {file = "coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0"},
326
+ ]
327
+
328
+ [package.dependencies]
329
+ humanfriendly = ">=9.1"
330
+
331
+ [package.extras]
332
+ cron = ["capturer (>=2.4)"]
333
+
334
  [[package]]
335
  name = "coverage"
336
  version = "7.10.6"
 
432
  [package.extras]
433
  toml = ["tomli ; python_full_version <= \"3.11.0a6\""]
434
 
435
+ [[package]]
436
+ name = "ctranslate2"
437
+ version = "4.6.3"
438
+ description = "Fast inference engine for Transformer models"
439
+ optional = false
440
+ python-versions = ">=3.9"
441
+ groups = ["main"]
442
+ files = [
443
+ {file = "ctranslate2-4.6.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d75d79e55a3a26964320445c03a56af60d7215d95561b744d93d04bad24c268a"},
444
+ {file = "ctranslate2-4.6.3-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:13ccb5011e67b831354c9a01bf4d824b4dc5535c54abcf492e0ae4e41894518e"},
445
+ {file = "ctranslate2-4.6.3-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:259ab216d4de93723f3db1805f2bac48b1a5732ce3de0e5a163b570821fcb063"},
446
+ {file = "ctranslate2-4.6.3-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7a5e59a5a67c3f48133ffe6fe2a557922283c16eb4233e6dbb82e0b9a20782f2"},
447
+ {file = "ctranslate2-4.6.3-cp310-cp310-win_amd64.whl", hash = "sha256:6be735c7904ea98c22d7d02b338299c0a7f4cd4b1d0e9dd528e319e52bd78d66"},
448
+ {file = "ctranslate2-4.6.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1ac0d2bec0961f0f9ee00cd5c55b4d5904ee309d9269778d9f9edd23c46c87ff"},
449
+ {file = "ctranslate2-4.6.3-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:db5f82661fa960a6a1bc0e738acf135a22da94a32cda198d8fb782d37ef4caa8"},
450
+ {file = "ctranslate2-4.6.3-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f1ec2cd9546f02ff9f1b2d21b115eadcce45c8ae5ac5811e7d382f9d9736aa4"},
451
+ {file = "ctranslate2-4.6.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:67f4b5802349a8cfa2e6105b161bf015e97aadab0f58a7034c97e78283cb29b8"},
452
+ {file = "ctranslate2-4.6.3-cp311-cp311-win_amd64.whl", hash = "sha256:fa2f3dcda893a3f4dedeb32b5059e4085738934d93ea8dccdce4bbef2be5d3dc"},
453
+ {file = "ctranslate2-4.6.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:32022dcf0ee2eace0b00345899b0e2be2f5a8b57d8467b1f5ecee40bb3e18746"},
454
+ {file = "ctranslate2-4.6.3-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:df88e7ac821b2def12ae6c71ba4180c13abc13713c1d1ae819e92f2db8556564"},
455
+ {file = "ctranslate2-4.6.3-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:487f57da179057e1a8498d3b61f2fcd826ddfe989ce43ff3b500ec805ca55d56"},
456
+ {file = "ctranslate2-4.6.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a857a42b091f9e0b8b1f63cf1fb356822bb4905d555039f542ff95cf90fd592b"},
457
+ {file = "ctranslate2-4.6.3-cp312-cp312-win_amd64.whl", hash = "sha256:05ec48b44bb2f1e623e30acc57d34d22000d969e8998cae7762137231fae0d25"},
458
+ {file = "ctranslate2-4.6.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:95ff7fdd70bd64d40834cb6ba82bcec15228a9f34dff587babd03a1c3064c302"},
459
+ {file = "ctranslate2-4.6.3-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:a562ef2fd48287423dd6158a0c7921b6c238a052f690bce510b998bba82fd3e2"},
460
+ {file = "ctranslate2-4.6.3-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6cc539ed7c3531354971c78938da50f29ac08b8dc9140bc7ac377e8344bc63e2"},
461
+ {file = "ctranslate2-4.6.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f08efa826707d095ade28410dca27f8d377520f3068843e00b349d5ca15cf174"},
462
+ {file = "ctranslate2-4.6.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a6b6e80d79242761d0583bc0ad7e7ba4d09745d2b23e814bc35f6c842b0ca45"},
463
+ {file = "ctranslate2-4.6.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:75f3e9d3ca7b3d91c87f67972f20998fc318a22d49c25b6d7144b947b5e3240e"},
464
+ {file = "ctranslate2-4.6.3-cp314-cp314-macosx_11_0_x86_64.whl", hash = "sha256:a0657885219e05a6575bb9d8ac4c055da25110d6c897dfed7a322f8c01267fb1"},
465
+ {file = "ctranslate2-4.6.3-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:53e975acf49bab2cd00290a2ece56925d087f8300d5bd7463b96c60002146034"},
466
+ {file = "ctranslate2-4.6.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e411c7212f42899f12522b4d9a4b5a59542aa27d5b8e87e7e7bd2f52194fa984"},
467
+ {file = "ctranslate2-4.6.3-cp314-cp314-win_amd64.whl", hash = "sha256:40749b5ad208eb5224ea7ec9516ff290e77373974be0f41697eccf3cef2a44eb"},
468
+ {file = "ctranslate2-4.6.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:dd117643e9bae19d53e3fea4415862841c4e69fcff86dbc4dd397f6864390d84"},
469
+ {file = "ctranslate2-4.6.3-cp314-cp314t-macosx_11_0_x86_64.whl", hash = "sha256:e058b51372faee95780c0d0af513e7c5df268fffcd435a856476d998e65ebf67"},
470
+ {file = "ctranslate2-4.6.3-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4eca886e30e658bece2bd0fc331a37f4a5ad1e29a590d43d5082c7896eba59d7"},
471
+ {file = "ctranslate2-4.6.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5345d0d259383ddc106343744be5ada9646f0e2632a6676482fd9de6114c9ee2"},
472
+ {file = "ctranslate2-4.6.3-cp314-cp314t-win_amd64.whl", hash = "sha256:53ab04edc3f7280465cd54e6a359f26960eb63961eeae27cb9726f449b4b217e"},
473
+ {file = "ctranslate2-4.6.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3f229dfcd14be23c4f76542873562ab9d8006e6e045fa585be83f82d224c30be"},
474
+ {file = "ctranslate2-4.6.3-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:11f88fe0d2d081f1fc4f7442477a7089a3fac9ad28c98fa2df1d9739a114524e"},
475
+ {file = "ctranslate2-4.6.3-cp39-cp39-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8d0b028e161a8374467a7b77a4675a0aa88cd2dd24e0700c8277418cc31be4d9"},
476
+ {file = "ctranslate2-4.6.3-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a7d0c858a4fecf288211b488ed281c00b93b77155e39e6d496646cc1ddbecda1"},
477
+ {file = "ctranslate2-4.6.3-cp39-cp39-win_amd64.whl", hash = "sha256:875fd4dcd7185589f07197b667ce547ec532ef42d68feed76615543e87f289ef"},
478
+ ]
479
+
480
+ [package.dependencies]
481
+ numpy = "*"
482
+ pyyaml = ">=5.3,<7"
483
+ setuptools = "*"
484
+
485
  [[package]]
486
  name = "deprecated"
487
  version = "1.3.1"
 
521
  [package.extras]
522
  all = ["email-validator (>=2.0.0)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.5)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"]
523
 
524
+ [[package]]
525
+ name = "faster-whisper"
526
+ version = "1.2.1"
527
+ description = "Faster Whisper transcription with CTranslate2"
528
+ optional = false
529
+ python-versions = ">=3.9"
530
+ groups = ["main"]
531
+ files = [
532
+ {file = "faster_whisper-1.2.1-py3-none-any.whl", hash = "sha256:79a66ad50688c0b794dd501dc340a736992a6342f7f95e5811be60b5224a26a7"},
533
+ ]
534
+
535
+ [package.dependencies]
536
+ av = ">=11"
537
+ ctranslate2 = ">=4.0,<5"
538
+ huggingface-hub = ">=0.21"
539
+ onnxruntime = ">=1.14,<2"
540
+ tokenizers = ">=0.13,<1"
541
+ tqdm = "*"
542
+
543
+ [package.extras]
544
+ conversion = ["transformers[torch] (>=4.23)"]
545
+ dev = ["black (==23.*)", "flake8 (==6.*)", "isort (==5.*)", "pytest (==7.*)"]
546
+
547
  [[package]]
548
  name = "filelock"
549
  version = "3.20.3"
 
573
  pycodestyle = ">=2.11.0,<2.12.0"
574
  pyflakes = ">=3.1.0,<3.2.0"
575
 
576
+ [[package]]
577
+ name = "flatbuffers"
578
+ version = "25.12.19"
579
+ description = "The FlatBuffers serialization format for Python"
580
+ optional = false
581
+ python-versions = "*"
582
+ groups = ["main"]
583
+ files = [
584
+ {file = "flatbuffers-25.12.19-py2.py3-none-any.whl", hash = "sha256:7634f50c427838bb021c2d66a3d1168e9d199b0607e6329399f04846d42e20b4"},
585
+ ]
586
+
587
  [[package]]
588
  name = "fsspec"
589
  version = "2026.1.0"
 
814
  torch = ["safetensors[torch]", "torch"]
815
  typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)"]
816
 
817
+ [[package]]
818
+ name = "humanfriendly"
819
+ version = "10.0"
820
+ description = "Human friendly output for text interfaces using Python"
821
+ optional = false
822
+ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
823
+ groups = ["main"]
824
+ files = [
825
+ {file = "humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477"},
826
+ {file = "humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc"},
827
+ ]
828
+
829
+ [package.dependencies]
830
+ pyreadline3 = {version = "*", markers = "sys_platform == \"win32\" and python_version >= \"3.8\""}
831
+
832
  [[package]]
833
  name = "idna"
834
  version = "3.10"
 
1476
  {file = "nvidia_nvtx_cu12-12.6.77-py3-none-win_amd64.whl", hash = "sha256:2fb11a4af04a5e6c84073e6404d26588a34afd35379f0855a99797897efa75c0"},
1477
  ]
1478
 
1479
+ [[package]]
1480
+ name = "onnxruntime"
1481
+ version = "1.23.2"
1482
+ description = "ONNX Runtime is a runtime accelerator for Machine Learning models"
1483
+ optional = false
1484
+ python-versions = ">=3.10"
1485
+ groups = ["main"]
1486
+ files = [
1487
+ {file = "onnxruntime-1.23.2-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:a7730122afe186a784660f6ec5807138bf9d792fa1df76556b27307ea9ebcbe3"},
1488
+ {file = "onnxruntime-1.23.2-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:b28740f4ecef1738ea8f807461dd541b8287d5650b5be33bca7b474e3cbd1f36"},
1489
+ {file = "onnxruntime-1.23.2-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8f7d1fe034090a1e371b7f3ca9d3ccae2fabae8c1d8844fb7371d1ea38e8e8d2"},
1490
+ {file = "onnxruntime-1.23.2-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4ca88747e708e5c67337b0f65eed4b7d0dd70d22ac332038c9fc4635760018f7"},
1491
+ {file = "onnxruntime-1.23.2-cp310-cp310-win_amd64.whl", hash = "sha256:0be6a37a45e6719db5120e9986fcd30ea205ac8103fd1fb74b6c33348327a0cc"},
1492
+ {file = "onnxruntime-1.23.2-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:6f91d2c9b0965e86827a5ba01531d5b669770b01775b23199565d6c1f136616c"},
1493
+ {file = "onnxruntime-1.23.2-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:87d8b6eaf0fbeb6835a60a4265fde7a3b60157cf1b2764773ac47237b4d48612"},
1494
+ {file = "onnxruntime-1.23.2-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bbfd2fca76c855317568c1b36a885ddea2272c13cb0e395002c402f2360429a6"},
1495
+ {file = "onnxruntime-1.23.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da44b99206e77734c5819aa2142c69e64f3b46edc3bd314f6a45a932defc0b3e"},
1496
+ {file = "onnxruntime-1.23.2-cp311-cp311-win_amd64.whl", hash = "sha256:902c756d8b633ce0dedd889b7c08459433fbcf35e9c38d1c03ddc020f0648c6e"},
1497
+ {file = "onnxruntime-1.23.2-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:b8f029a6b98d3cf5be564d52802bb50a8489ab73409fa9db0bf583eabb7c2321"},
1498
+ {file = "onnxruntime-1.23.2-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:218295a8acae83905f6f1aed8cacb8e3eb3bd7513a13fe4ba3b2664a19fc4a6b"},
1499
+ {file = "onnxruntime-1.23.2-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:76ff670550dc23e58ea9bc53b5149b99a44e63b34b524f7b8547469aaa0dcb8c"},
1500
+ {file = "onnxruntime-1.23.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f9b4ae77f8e3c9bee50c27bc1beede83f786fe1d52e99ac85aa8d65a01e9b77"},
1501
+ {file = "onnxruntime-1.23.2-cp312-cp312-win_amd64.whl", hash = "sha256:25de5214923ce941a3523739d34a520aac30f21e631de53bba9174dc9c004435"},
1502
+ {file = "onnxruntime-1.23.2-cp313-cp313-macosx_13_0_arm64.whl", hash = "sha256:2ff531ad8496281b4297f32b83b01cdd719617e2351ffe0dba5684fb283afa1f"},
1503
+ {file = "onnxruntime-1.23.2-cp313-cp313-macosx_13_0_x86_64.whl", hash = "sha256:162f4ca894ec3de1a6fd53589e511e06ecdc3ff646849b62a9da7489dee9ce95"},
1504
+ {file = "onnxruntime-1.23.2-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:45d127d6e1e9b99d1ebeae9bcd8f98617a812f53f46699eafeb976275744826b"},
1505
+ {file = "onnxruntime-1.23.2-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8bace4e0d46480fbeeb7bbe1ffe1f080e6663a42d1086ff95c1551f2d39e7872"},
1506
+ {file = "onnxruntime-1.23.2-cp313-cp313-win_amd64.whl", hash = "sha256:1f9cc0a55349c584f083c1c076e611a7c35d5b867d5d6e6d6c823bf821978088"},
1507
+ {file = "onnxruntime-1.23.2-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9d2385e774f46ac38f02b3a91a91e30263d41b2f1f4f26ae34805b2a9ddef466"},
1508
+ {file = "onnxruntime-1.23.2-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e2b9233c4947907fd1818d0e581c049c41ccc39b2856cc942ff6d26317cee145"},
1509
+ ]
1510
+
1511
+ [package.dependencies]
1512
+ coloredlogs = "*"
1513
+ flatbuffers = "*"
1514
+ numpy = ">=1.21.6"
1515
+ packaging = "*"
1516
+ protobuf = "*"
1517
+ sympy = "*"
1518
+
1519
  [[package]]
1520
  name = "packaging"
1521
  version = "25.0"
 
1682
  dev = ["pre-commit", "tox"]
1683
  testing = ["coverage", "pytest", "pytest-benchmark"]
1684
 
1685
+ [[package]]
1686
+ name = "protobuf"
1687
+ version = "6.33.4"
1688
+ description = ""
1689
+ optional = false
1690
+ python-versions = ">=3.9"
1691
+ groups = ["main"]
1692
+ files = [
1693
+ {file = "protobuf-6.33.4-cp310-abi3-win32.whl", hash = "sha256:918966612c8232fc6c24c78e1cd89784307f5814ad7506c308ee3cf86662850d"},
1694
+ {file = "protobuf-6.33.4-cp310-abi3-win_amd64.whl", hash = "sha256:8f11ffae31ec67fc2554c2ef891dcb561dae9a2a3ed941f9e134c2db06657dbc"},
1695
+ {file = "protobuf-6.33.4-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:2fe67f6c014c84f655ee06f6f66213f9254b3a8b6bda6cda0ccd4232c73c06f0"},
1696
+ {file = "protobuf-6.33.4-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:757c978f82e74d75cba88eddec479df9b99a42b31193313b75e492c06a51764e"},
1697
+ {file = "protobuf-6.33.4-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:c7c64f259c618f0bef7bee042075e390debbf9682334be2b67408ec7c1c09ee6"},
1698
+ {file = "protobuf-6.33.4-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:3df850c2f8db9934de4cf8f9152f8dc2558f49f298f37f90c517e8e5c84c30e9"},
1699
+ {file = "protobuf-6.33.4-cp39-cp39-win32.whl", hash = "sha256:955478a89559fa4568f5a81dce77260eabc5c686f9e8366219ebd30debf06aa6"},
1700
+ {file = "protobuf-6.33.4-cp39-cp39-win_amd64.whl", hash = "sha256:0f12ddbf96912690c3582f9dffb55530ef32015ad8e678cd494312bd78314c4f"},
1701
+ {file = "protobuf-6.33.4-py3-none-any.whl", hash = "sha256:1fe3730068fcf2e595816a6c34fe66eeedd37d51d0400b72fabc848811fdc1bc"},
1702
+ {file = "protobuf-6.33.4.tar.gz", hash = "sha256:dc2e61bca3b10470c1912d166fe0af67bfc20eb55971dcef8dfa48ce14f0ed91"},
1703
+ ]
1704
+
1705
  [[package]]
1706
  name = "pycodestyle"
1707
  version = "2.11.1"
 
1884
  {file = "pyflakes-3.1.0.tar.gz", hash = "sha256:a0aae034c444db0071aa077972ba4768d40c830d9539fd45bf4cd3f8f6992efc"},
1885
  ]
1886
 
1887
+ [[package]]
1888
+ name = "pyreadline3"
1889
+ version = "3.5.4"
1890
+ description = "A python implementation of GNU readline."
1891
+ optional = false
1892
+ python-versions = ">=3.8"
1893
+ groups = ["main"]
1894
+ markers = "sys_platform == \"win32\""
1895
+ files = [
1896
+ {file = "pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6"},
1897
+ {file = "pyreadline3-3.5.4.tar.gz", hash = "sha256:8d57d53039a1c75adba8e50dd3d992b28143480816187ea5efbd5c78e6c885b7"},
1898
+ ]
1899
+
1900
+ [package.extras]
1901
+ dev = ["build", "flake8", "mypy", "pytest", "twine"]
1902
+
1903
  [[package]]
1904
  name = "pytest"
1905
  version = "7.4.4"
 
2405
  optional = false
2406
  python-versions = ">=3.9"
2407
  groups = ["main"]
 
2408
  files = [
2409
  {file = "setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922"},
2410
  {file = "setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c"},
 
3206
  [metadata]
3207
  lock-version = "2.1"
3208
  python-versions = "^3.11"
3209
+ content-hash = "ebb16163670f970e4d506a8230fbb256edc8d5a749255bbbc9dc9cd17fc870eb"
pyproject.toml CHANGED
@@ -19,6 +19,7 @@ slowapi = "^0.1.9"
19
  cachetools = "^5.3.0"
20
  sentence-transformers = "^2.2.2"
21
  torch = "^2.0.0"
 
22
 
23
  [tool.poetry.group.dev.dependencies]
24
  pytest = "^7.4.3"
 
19
  cachetools = "^5.3.0"
20
  sentence-transformers = "^2.2.2"
21
  torch = "^2.0.0"
22
+ faster-whisper = "^1.0.0"
23
 
24
  [tool.poetry.group.dev.dependencies]
25
  pytest = "^7.4.3"
tests/conftest.py CHANGED
@@ -46,30 +46,6 @@ def temp_dir():
46
  yield tmp_dir
47
 
48
 
49
- @pytest.fixture
50
- def sample_vtt_content():
51
- """Sample VTT subtitle content for testing."""
52
- return """WEBVTT
53
- Kind: captions
54
- Language: en
55
-
56
- 00:00:00.000 --> 00:00:03.000
57
- Never gonna give you up
58
-
59
- 00:00:03.000 --> 00:00:06.000
60
- Never gonna let you down
61
-
62
- 00:00:06.000 --> 00:00:09.000
63
- Never gonna run around and desert you
64
-
65
- 00:00:09.000 --> 00:00:12.000
66
- Never gonna make you cry
67
-
68
- 00:00:12.000 --> 00:00:15.000
69
- Never gonna say goodbye
70
- """
71
-
72
-
73
  @pytest.fixture
74
  def sample_youtube_url():
75
  """Sample YouTube URL for testing."""
 
46
  yield tmp_dir
47
 
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  @pytest.fixture
50
  def sample_youtube_url():
51
  """Sample YouTube URL for testing."""
tests/test_subtitles.py CHANGED
@@ -1,19 +1,11 @@
1
  """Tests for subtitle extraction functionality."""
2
 
3
  import pytest
4
- from unittest.mock import AsyncMock, patch
5
- import asyncio
6
 
7
  from app.apis.subtitles.service import SubtitleService, SUBTITLE_CACHE
8
- from app.apis.subtitles.utils import (
9
- clean_subtitle_text,
10
- extract_video_id,
11
- convert_vtt_to_text
12
- )
13
- from app.core.exceptions import (
14
- SubtitlesNotFoundError,
15
- DownloadTimeoutError
16
- )
17
 
18
 
19
  class TestSubtitleUtils:
@@ -34,38 +26,6 @@ class TestSubtitleUtils:
34
  url = "https://www.youtube.com/embed/dQw4w9WgXcQ"
35
  assert extract_video_id(url) == "dQw4w9WgXcQ"
36
 
37
- def test_clean_subtitle_text(self):
38
- """Test cleaning subtitle text."""
39
- raw_lines = [
40
- "WEBVTT",
41
- "",
42
- "1",
43
- "00:00:00.000 --> 00:00:03.000",
44
- "Never gonna give you up",
45
- "",
46
- "2",
47
- "00:00:03.000 --> 00:00:06.000",
48
- "Never gonna let you down",
49
- "Never gonna give you up",
50
- ""
51
- ]
52
-
53
- cleaned = clean_subtitle_text(raw_lines)
54
- assert "Never gonna give you up" in cleaned
55
- assert "Never gonna let you down" in cleaned
56
- assert "WEBVTT" not in cleaned
57
- assert "00:00:00.000 --> 00:00:03.000" not in cleaned
58
- assert len([line for line in cleaned if line == "Never gonna give you up"]) == 1
59
-
60
- def test_convert_vtt_to_text(self, sample_vtt_content):
61
- """Test converting VTT content to clean text."""
62
- result = convert_vtt_to_text(sample_vtt_content)
63
-
64
- assert "Never gonna give you up" in result
65
- assert "Never gonna let you down" in result
66
- assert "WEBVTT" not in result
67
- assert "00:00:00.000 --> 00:00:03.000" not in result
68
-
69
 
70
  class TestSubtitleService:
71
  """Test subtitle extraction service."""
@@ -81,33 +41,28 @@ class TestSubtitleService:
81
  return SubtitleService()
82
 
83
  @pytest.mark.asyncio
84
- async def test_extract_subtitles_success(self, service, sample_youtube_url, sample_vtt_content):
85
  """Test successful subtitle extraction."""
86
- with patch.object(service, '_download_subtitles') as mock_download:
87
- mock_download.return_value = sample_vtt_content
 
 
 
88
 
89
  video_id, subtitles = await service.extract_subtitles(sample_youtube_url, "en")
90
 
91
  assert video_id == "dQw4w9WgXcQ"
92
- assert len(subtitles) > 0
93
- assert "Never gonna give you up" in subtitles
94
-
95
- @pytest.mark.asyncio
96
- async def test_extract_subtitles_not_found(self, service, sample_youtube_url):
97
- """Test subtitle extraction when no subtitles are found."""
98
- with patch.object(service, '_download_subtitles') as mock_download, \
99
- patch.object(service, '_try_alternative_languages') as mock_alt:
100
- mock_download.return_value = None
101
- mock_alt.return_value = None
102
-
103
- with pytest.raises(SubtitlesNotFoundError):
104
- await service.extract_subtitles(sample_youtube_url, "en")
105
 
106
  @pytest.mark.asyncio
107
- async def test_extract_subtitles_uses_cache(self, service, sample_youtube_url, sample_vtt_content):
108
  """Test that cached results are returned."""
109
- with patch.object(service, '_download_subtitles') as mock_download:
110
- mock_download.return_value = sample_vtt_content
 
 
 
111
 
112
  result1 = await service.extract_subtitles(sample_youtube_url, "en")
113
  result2 = await service.extract_subtitles(sample_youtube_url, "en")
@@ -115,14 +70,26 @@ class TestSubtitleService:
115
  assert result1 == result2
116
  assert mock_download.call_count == 1
117
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
  class TestSubtitleAPI:
120
  """Test subtitle API endpoints."""
121
 
122
- def test_extract_subtitles_endpoint_success(self, client, api_key, sample_vtt_content):
123
  """Test successful subtitle extraction via API."""
124
  with patch('app.apis.subtitles.service.subtitle_service.extract_subtitles') as mock_extract:
125
- mock_extract.return_value = ("dQw4w9WgXcQ", ["Never gonna give you up", "Never gonna let you down"])
126
 
127
  response = client.post(
128
  "/api/v1/subtitles/extract",
@@ -134,7 +101,6 @@ class TestSubtitleAPI:
134
  data = response.json()
135
  assert data["status"] == "success"
136
  assert data["video_id"] == "dQw4w9WgXcQ"
137
- assert "Never gonna give you up" in data["subtitles"]
138
 
139
  def test_extract_subtitles_endpoint_invalid_api_key(self, client, invalid_api_key):
140
  """Test API endpoint with invalid API key."""
@@ -143,7 +109,6 @@ class TestSubtitleAPI:
143
  json={"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", "lang": "en"},
144
  headers={"x-api-key": invalid_api_key}
145
  )
146
-
147
  assert response.status_code == 401
148
 
149
  def test_extract_subtitles_endpoint_missing_api_key(self, client):
@@ -152,7 +117,6 @@ class TestSubtitleAPI:
152
  "/api/v1/subtitles/extract",
153
  json={"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", "lang": "en"}
154
  )
155
-
156
  assert response.status_code == 401
157
 
158
  def test_extract_subtitles_endpoint_invalid_url(self, client, api_key):
@@ -162,13 +126,11 @@ class TestSubtitleAPI:
162
  json={"url": "https://example.com/not-youtube", "lang": "en"},
163
  headers={"x-api-key": api_key}
164
  )
165
-
166
  assert response.status_code == 422
167
 
168
  def test_subtitles_health_endpoint(self, client):
169
  """Test subtitles health check endpoint."""
170
  response = client.get("/api/v1/subtitles/health")
171
-
172
  assert response.status_code == 200
173
  data = response.json()
174
  assert data["status"] == "healthy"
 
1
  """Tests for subtitle extraction functionality."""
2
 
3
  import pytest
4
+ from unittest.mock import patch, MagicMock
 
5
 
6
  from app.apis.subtitles.service import SubtitleService, SUBTITLE_CACHE
7
+ from app.apis.subtitles.utils import extract_video_id
8
+ from app.core.exceptions import SubtitlesNotFoundError
 
 
 
 
 
 
 
9
 
10
 
11
  class TestSubtitleUtils:
 
26
  url = "https://www.youtube.com/embed/dQw4w9WgXcQ"
27
  assert extract_video_id(url) == "dQw4w9WgXcQ"
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  class TestSubtitleService:
31
  """Test subtitle extraction service."""
 
41
  return SubtitleService()
42
 
43
  @pytest.mark.asyncio
44
+ async def test_extract_subtitles_success(self, service, sample_youtube_url):
45
  """Test successful subtitle extraction."""
46
+ with patch.object(service, '_download_audio') as mock_download, \
47
+ patch.object(service, '_transcribe_audio') as mock_transcribe:
48
+ mock_download.return_value = MagicMock()
49
+ mock_download.return_value.exists.return_value = True
50
+ mock_transcribe.return_value = ["Test subtitle line 1", "Test subtitle line 2"]
51
 
52
  video_id, subtitles = await service.extract_subtitles(sample_youtube_url, "en")
53
 
54
  assert video_id == "dQw4w9WgXcQ"
55
+ assert len(subtitles) == 2
56
+ assert "Test subtitle line 1" in subtitles
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  @pytest.mark.asyncio
59
+ async def test_extract_subtitles_uses_cache(self, service, sample_youtube_url):
60
  """Test that cached results are returned."""
61
+ with patch.object(service, '_download_audio') as mock_download, \
62
+ patch.object(service, '_transcribe_audio') as mock_transcribe:
63
+ mock_download.return_value = MagicMock()
64
+ mock_download.return_value.exists.return_value = True
65
+ mock_transcribe.return_value = ["Cached subtitle"]
66
 
67
  result1 = await service.extract_subtitles(sample_youtube_url, "en")
68
  result2 = await service.extract_subtitles(sample_youtube_url, "en")
 
70
  assert result1 == result2
71
  assert mock_download.call_count == 1
72
 
73
+ @pytest.mark.asyncio
74
+ async def test_extract_subtitles_empty_transcription(self, service, sample_youtube_url):
75
+ """Test error when transcription produces no text."""
76
+ with patch.object(service, '_download_audio') as mock_download, \
77
+ patch.object(service, '_transcribe_audio') as mock_transcribe:
78
+ mock_download.return_value = MagicMock()
79
+ mock_download.return_value.exists.return_value = True
80
+ mock_transcribe.return_value = []
81
+
82
+ with pytest.raises(SubtitlesNotFoundError):
83
+ await service.extract_subtitles(sample_youtube_url, "en")
84
+
85
 
86
  class TestSubtitleAPI:
87
  """Test subtitle API endpoints."""
88
 
89
+ def test_extract_subtitles_endpoint_success(self, client, api_key):
90
  """Test successful subtitle extraction via API."""
91
  with patch('app.apis.subtitles.service.subtitle_service.extract_subtitles') as mock_extract:
92
+ mock_extract.return_value = ("dQw4w9WgXcQ", ["Never gonna give you up"])
93
 
94
  response = client.post(
95
  "/api/v1/subtitles/extract",
 
101
  data = response.json()
102
  assert data["status"] == "success"
103
  assert data["video_id"] == "dQw4w9WgXcQ"
 
104
 
105
  def test_extract_subtitles_endpoint_invalid_api_key(self, client, invalid_api_key):
106
  """Test API endpoint with invalid API key."""
 
109
  json={"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", "lang": "en"},
110
  headers={"x-api-key": invalid_api_key}
111
  )
 
112
  assert response.status_code == 401
113
 
114
  def test_extract_subtitles_endpoint_missing_api_key(self, client):
 
117
  "/api/v1/subtitles/extract",
118
  json={"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", "lang": "en"}
119
  )
 
120
  assert response.status_code == 401
121
 
122
  def test_extract_subtitles_endpoint_invalid_url(self, client, api_key):
 
126
  json={"url": "https://example.com/not-youtube", "lang": "en"},
127
  headers={"x-api-key": api_key}
128
  )
 
129
  assert response.status_code == 422
130
 
131
  def test_subtitles_health_endpoint(self, client):
132
  """Test subtitles health check endpoint."""
133
  response = client.get("/api/v1/subtitles/health")
 
134
  assert response.status_code == 200
135
  data = response.json()
136
  assert data["status"] == "healthy"