ndurner commited on
Commit
6447e9a
·
1 Parent(s): f897a90

YouTube analysis works

Browse files
Dockerfile CHANGED
@@ -10,7 +10,7 @@ ARG DENO_VERSION=2.0.0
10
  WORKDIR /app
11
 
12
  RUN apt-get update && \
13
- apt-get install -y --no-install-recommends curl unzip ca-certificates && \
14
  curl -fsSL "https://github.com/denoland/deno/releases/download/v${DENO_VERSION}/deno-x86_64-unknown-linux-gnu.zip" -o /tmp/deno.zip && \
15
  unzip -q /tmp/deno.zip -d /tmp && \
16
  mv /tmp/deno /usr/local/bin/deno && \
 
10
  WORKDIR /app
11
 
12
  RUN apt-get update && \
13
+ apt-get install -y --no-install-recommends curl unzip ca-certificates ffmpeg && \
14
  curl -fsSL "https://github.com/denoland/deno/releases/download/v${DENO_VERSION}/deno-x86_64-unknown-linux-gnu.zip" -o /tmp/deno.zip && \
15
  unzip -q /tmp/deno.zip -d /tmp && \
16
  mv /tmp/deno /usr/local/bin/deno && \
demo/health.py CHANGED
@@ -17,6 +17,7 @@ from typing import Iterable
17
 
18
  MIN_DENO_VERSION = (2, 0, 0)
19
  MIN_YTDLP_VERSION = (2025, 11, 12)
 
20
  GEMINI_ENV_VAR = "GEMINI_API_KEY"
21
 
22
 
@@ -102,6 +103,27 @@ def _check_yt_dlp_python() -> ToolStatus:
102
  return ToolStatus(label, False, "yt_dlp_ejs missing (JS sites will fail)")
103
 
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  def _check_gemini_env() -> ToolStatus:
106
  label = "Gemini API key"
107
  if os.environ.get(GEMINI_ENV_VAR):
@@ -174,6 +196,7 @@ def run_health_report() -> HealthReport:
174
  tool_statuses = [
175
  _check_deno(),
176
  _check_yt_dlp_python(),
 
177
  _check_gemini_env(),
178
  _check_mcp_health(),
179
  ]
 
17
 
18
  MIN_DENO_VERSION = (2, 0, 0)
19
  MIN_YTDLP_VERSION = (2025, 11, 12)
20
+ MIN_FFMPEG_VERSION = (4, 0)
21
  GEMINI_ENV_VAR = "GEMINI_API_KEY"
22
 
23
 
 
103
  return ToolStatus(label, False, "yt_dlp_ejs missing (JS sites will fail)")
104
 
105
 
106
+ def _check_ffmpeg() -> ToolStatus:
107
+ label = "ffmpeg"
108
+ binary = shutil.which("ffmpeg")
109
+ if not binary:
110
+ return ToolStatus(label, False, "`ffmpeg` binary not found on PATH")
111
+ try:
112
+ completed = subprocess.run(
113
+ [binary, "-version"],
114
+ capture_output=True,
115
+ text=True,
116
+ check=False,
117
+ timeout=5,
118
+ )
119
+ except Exception as exc:
120
+ return ToolStatus(label, False, f"failed to exec: {exc}")
121
+ if completed.returncode != 0:
122
+ return ToolStatus(label, False, completed.stderr.strip() or "ffmpeg returned error")
123
+ first_line = (completed.stdout or "").splitlines()[0] if completed.stdout else "ffmpeg present"
124
+ return ToolStatus(label, True, first_line)
125
+
126
+
127
  def _check_gemini_env() -> ToolStatus:
128
  label = "Gemini API key"
129
  if os.environ.get(GEMINI_ENV_VAR):
 
196
  tool_statuses = [
197
  _check_deno(),
198
  _check_yt_dlp_python(),
199
+ _check_ffmpeg(),
200
  _check_gemini_env(),
201
  _check_mcp_health(),
202
  ]
demo/requirements.txt CHANGED
@@ -2,3 +2,4 @@ gradio>=6.0.0.dev0
2
  yt-dlp[default]>=2025.11.12
3
  fastmcp>=0.1.11
4
  google-genai>=0.8.0
 
 
2
  yt-dlp[default]>=2025.11.12
3
  fastmcp>=0.1.11
4
  google-genai>=0.8.0
5
+ ffmpeg-python>=0.2.0
mcp/pyproject.toml CHANGED
@@ -11,6 +11,7 @@ dependencies = [
11
  "fastmcp>=0.1.11",
12
  "yt-dlp[default]>=2025.11.12",
13
  "google-genai>=0.8.0",
 
14
  ]
15
 
16
  [project.scripts]
 
11
  "fastmcp>=0.1.11",
12
  "yt-dlp[default]>=2025.11.12",
13
  "google-genai>=0.8.0",
14
+ "ffmpeg-python>=0.2.0"
15
  ]
16
 
17
  [project.scripts]
mcp/src/aileen3_mcp/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
  """Aileen3 MCP server package."""
2
 
3
- __all__ = ["server"]
 
1
  """Aileen3 MCP server package."""
2
 
3
+ __all__ = ["server", "media_tools"]
mcp/src/aileen3_mcp/media_tools.py ADDED
@@ -0,0 +1,940 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import base64
5
+ import hashlib
6
+ import json
7
+ import logging
8
+ import os
9
+ import re
10
+ import secrets
11
+ import tempfile
12
+ import time
13
+ from dataclasses import dataclass, field
14
+ from pathlib import Path
15
+ from typing import Any, Callable, Dict, Optional
16
+
17
+ import ffmpeg
18
+ from fastmcp import Context, FastMCP
19
+ from contextlib import redirect_stdout, redirect_stderr, contextmanager
20
+ import io
21
+
22
+ log = logging.getLogger(__name__)
23
+
24
+
25
+ # ---------------------------------------------------------------------------------------------------------------------
26
+ # Paths & storage
27
+ # ---------------------------------------------------------------------------------------------------------------------
28
+ BASE_CACHE = Path(os.environ.get("AILEEN3_CACHE_DIR", Path.home() / ".cache" / "aileen3"))
29
+ MEDIA_CACHE = BASE_CACHE / "media"
30
+ SLIDE_CACHE = BASE_CACHE / "slides"
31
+ ANALYSIS_CACHE = BASE_CACHE / "analysis"
32
+
33
+ for _path in (MEDIA_CACHE, SLIDE_CACHE, ANALYSIS_CACHE):
34
+ _path.mkdir(parents=True, exist_ok=True)
35
+
36
+ DEBUG = os.environ.get("AILEEN3_DEBUG", "").lower() in {"1", "true", "yes", "on"}
37
+ DEBUG_DIR = Path(tempfile.gettempdir()) / "aileen3-debug"
38
+ if DEBUG:
39
+ DEBUG_DIR.mkdir(parents=True, exist_ok=True)
40
+
41
+ LOG_DIR = BASE_CACHE / "logs"
42
+ LOG_DIR.mkdir(parents=True, exist_ok=True)
43
+ LOG_FILE = LOG_DIR / "aileen3-mcp.log"
44
+
45
+
46
+ def _ensure_file_logging():
47
+ root = logging.getLogger()
48
+ # Avoid adding duplicate handlers to root
49
+ for h in root.handlers:
50
+ if isinstance(h, logging.FileHandler) and Path(getattr(h, "baseFilename", "")) == LOG_FILE:
51
+ return
52
+ handler = logging.FileHandler(LOG_FILE, encoding="utf-8")
53
+ handler.setLevel(logging.DEBUG if DEBUG else logging.INFO)
54
+ fmt = logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s")
55
+ handler.setFormatter(fmt)
56
+ root.addHandler(handler)
57
+
58
+
59
+ _ensure_file_logging()
60
+
61
+
62
+ def _write_debug(reference: str, suffix: str, data: Any) -> None:
63
+ if not DEBUG:
64
+ return
65
+ path = DEBUG_DIR / f"{reference}_{suffix}"
66
+ try:
67
+ if isinstance(data, (bytes, bytearray)):
68
+ path.write_bytes(data)
69
+ else:
70
+ path.write_text(json.dumps(data, indent=2, default=str))
71
+ except Exception:
72
+ log.debug("Failed to write debug artifact %s", path)
73
+
74
+
75
+ class _YDLLogger:
76
+ """Silence yt-dlp stdout/stderr while keeping messages in Python logging."""
77
+
78
+ def debug(self, msg):
79
+ log.debug("yt-dlp: %s", msg)
80
+
81
+ def info(self, msg):
82
+ log.info("yt-dlp: %s", msg)
83
+
84
+ def warning(self, msg):
85
+ log.warning("yt-dlp: %s", msg)
86
+
87
+ def error(self, msg):
88
+ log.error("yt-dlp: %s", msg)
89
+
90
+
91
+ @contextmanager
92
+ def _silence_stdio():
93
+ buf_out = io.StringIO()
94
+ buf_err = io.StringIO()
95
+ with redirect_stdout(buf_out), redirect_stderr(buf_err):
96
+ yield
97
+
98
+
99
+ # ---------------------------------------------------------------------------------------------------------------------
100
+ # Job bookkeeping
101
+ # ---------------------------------------------------------------------------------------------------------------------
102
+
103
+
104
+ class JobStatus:
105
+ PENDING = "pending"
106
+ RUNNING = "running"
107
+ DONE = "done"
108
+ FAILED = "failed"
109
+
110
+
111
+ @dataclass
112
+ class Priors:
113
+ """User-supplied and media-derived context to steer analysis."""
114
+
115
+ context: str = ""
116
+ expectations: str = ""
117
+ prior_knowledge: str = ""
118
+ questions: str = ""
119
+ media_context: str = ""
120
+
121
+ @classmethod
122
+ def from_obj(cls, obj: dict | None, media_context: str = "") -> "Priors":
123
+ obj = obj or {}
124
+ return cls(
125
+ context=str(obj.get("context", "") or ""),
126
+ expectations=str(obj.get("expectations", "") or ""),
127
+ prior_knowledge=str(obj.get("prior_knowledge") or obj.get("prior knowledge") or ""),
128
+ questions=str(obj.get("questions", "") or ""),
129
+ media_context=media_context,
130
+ )
131
+
132
+ def as_prompt_text(self) -> str:
133
+ sections = []
134
+ for label, value in (
135
+ ("User context", self.context),
136
+ ("Expectations", self.expectations),
137
+ ("Prior knowledge", self.prior_knowledge),
138
+ ("Questions", self.questions),
139
+ ("Media context", self.media_context),
140
+ ):
141
+ if value:
142
+ sections.append(f"``` {label}\n{value}\n```\n")
143
+ return "\n".join(sections) if sections else "No specific priors provided."
144
+
145
+
146
+ @dataclass
147
+ class JobRecord:
148
+ id: str
149
+ kind: str
150
+ reference: str
151
+ status: str = JobStatus.PENDING
152
+ progress: float = 0.0
153
+ error: Optional[str] = None
154
+ result: Optional[dict] = None
155
+ created_at: float = field(default_factory=time.time)
156
+ finished_at: Optional[float] = None
157
+ task: Optional[asyncio.Task] = field(default=None, repr=False)
158
+
159
+
160
+ JOBS: Dict[str, JobRecord] = {}
161
+ REFERENCE_INDEX: Dict[tuple[str, str], str] = {}
162
+ JOB_LOCK = asyncio.Lock()
163
+
164
+
165
+ def _error(detail: str, reference: str | None = None, status: str = "error") -> dict:
166
+ payload = {"status": status, "detail": detail, "is_error": True}
167
+ if reference:
168
+ payload["reference"] = reference
169
+ return payload
170
+
171
+
172
+ def _build_reference(info: dict | None, source: str) -> str:
173
+ source = source.strip()
174
+ if info:
175
+ extractor = (info.get("extractor_key") or "media").lower()
176
+ vid = info.get("id")
177
+ if vid and re.fullmatch(r"[A-Za-z0-9_-]+", str(vid)):
178
+ safe_id = re.sub(r"[^A-Za-z0-9_-]", "_", str(vid))
179
+ return f"{extractor}_{safe_id}"[:200]
180
+
181
+ digest = hashlib.sha256(source.encode()).hexdigest()[:32]
182
+ return f"media_{digest}"
183
+
184
+
185
+ def _job_payload(job: JobRecord, include_result: bool = True) -> dict:
186
+ payload = {
187
+ "job_id": job.id,
188
+ "reference": job.reference,
189
+ "kind": job.kind,
190
+ "status": job.status,
191
+ "progress": job.progress,
192
+ "created_at": job.created_at,
193
+ "finished_at": job.finished_at,
194
+ }
195
+ if job.error:
196
+ payload["error"] = job.error
197
+ payload["is_error"] = True
198
+ if job.status == JobStatus.FAILED:
199
+ payload["is_error"] = True
200
+ if include_result and job.status == JobStatus.DONE:
201
+ payload["result"] = job.result
202
+ return payload
203
+
204
+
205
+ async def _maybe_wait(job: JobRecord, wait_seconds: int) -> dict:
206
+ """Wait briefly for completion; otherwise return running status."""
207
+ task = job.task
208
+ if not task:
209
+ return _job_payload(job, include_result=False)
210
+
211
+ try:
212
+ await asyncio.wait_for(asyncio.shield(task), timeout=max(0, wait_seconds))
213
+ except asyncio.TimeoutError:
214
+ return _job_payload(job, include_result=False)
215
+ except asyncio.CancelledError:
216
+ job.status = JobStatus.FAILED
217
+ job.error = "task cancelled"
218
+ job.finished_at = time.time()
219
+ return _job_payload(job, include_result=False)
220
+
221
+ # If we reach here, task finished
222
+ return _job_payload(job, include_result=True)
223
+
224
+
225
+ async def _get_or_create_job(kind: str, reference: str, factory: Callable[[], JobRecord]) -> JobRecord:
226
+ async with JOB_LOCK:
227
+ existing_id = REFERENCE_INDEX.get((kind, reference))
228
+ if existing_id and existing_id in JOBS:
229
+ return JOBS[existing_id]
230
+
231
+ job = factory()
232
+ JOBS[job.id] = job
233
+ REFERENCE_INDEX[(kind, reference)] = job.id
234
+ return job
235
+
236
+
237
+ # ---------------------------------------------------------------------------------------------------------------------
238
+ # Helpers: media metadata & ffmpeg probes
239
+ # ---------------------------------------------------------------------------------------------------------------------
240
+
241
+
242
+ def _media_dir(reference: str) -> Path:
243
+ return MEDIA_CACHE / reference
244
+
245
+
246
+ def _metadata_path(reference: str) -> Path:
247
+ return _media_dir(reference) / "metadata.json"
248
+
249
+
250
+ def _slides_json_path(reference: str) -> Path:
251
+ return SLIDE_CACHE / f"{reference}.json"
252
+
253
+
254
+ def _analysis_json_path(reference: str) -> Path:
255
+ return ANALYSIS_CACHE / f"{reference}.json"
256
+
257
+
258
+ def _load_json(path: Path) -> dict | None:
259
+ if path.exists():
260
+ try:
261
+ return json.loads(path.read_text())
262
+ except Exception:
263
+ log.warning("Failed to parse JSON from %s", path)
264
+ return None
265
+
266
+
267
+ def _save_json(path: Path, payload: dict) -> None:
268
+ path.parent.mkdir(parents=True, exist_ok=True)
269
+ path.write_text(json.dumps(payload, indent=2))
270
+
271
+
272
+ def _probe_duration(video_path: Path) -> Optional[float]:
273
+ try:
274
+ probe = ffmpeg.probe(str(video_path))
275
+ fmt = probe.get("format", {})
276
+ duration_str = fmt.get("duration")
277
+ return float(duration_str) if duration_str else None
278
+ except Exception:
279
+ return None
280
+
281
+
282
+ def _extract_frame(video_path: Path, timestamp: float) -> Optional[bytes]:
283
+ if timestamp < 0:
284
+ return None
285
+ try:
286
+ out, err = (
287
+ ffmpeg.input(str(video_path), ss=timestamp)
288
+ .output("pipe:", vframes=1, format="image2", vcodec="png")
289
+ .run(capture_stdout=True, capture_stderr=True, overwrite_output=True)
290
+ )
291
+ except ffmpeg.Error as exc: # pragma: no cover - runtime dependency
292
+ log.debug("ffmpeg extract error for %s at %.2fs: %s", video_path, timestamp, exc.stderr.decode(errors="ignore")[:200])
293
+ return None
294
+ return out
295
+
296
+
297
+ # ---------------------------------------------------------------------------------------------------------------------
298
+ # yt-dlp based download
299
+ # ---------------------------------------------------------------------------------------------------------------------
300
+
301
+
302
+ def _run_ytdlp_download(source: str, reference: str, prefer_audio_only: bool) -> dict:
303
+ from yt_dlp import YoutubeDL # local import to keep module import light
304
+
305
+ target_dir = _media_dir(reference)
306
+ target_dir.mkdir(parents=True, exist_ok=True)
307
+
308
+ ytdlp_opts: dict[str, Any] = {
309
+ "outtmpl": str(target_dir / "%(id)s.%(ext)s"),
310
+ "quiet": True,
311
+ "noplaylist": True,
312
+ "ignoreerrors": False,
313
+ }
314
+
315
+ # Prefer combined AV for slides; fall back to audio only if requested or video unavailable
316
+ if prefer_audio_only:
317
+ ytdlp_opts["format"] = "bestaudio/best"
318
+ else:
319
+ ytdlp_opts["format"] = "bestvideo+bestaudio/best"
320
+
321
+ shared_opts = {
322
+ "skip_download": True,
323
+ "quiet": True,
324
+ "no_warnings": True,
325
+ "noprogress": True,
326
+ "noplaylist": True,
327
+ "logger": _YDLLogger(),
328
+ "extractor_args": {"youtube": {"player_client": ["default"]}},
329
+ }
330
+
331
+ with _silence_stdio():
332
+ with YoutubeDL(params=shared_opts) as ydl:
333
+ info = ydl.extract_info(source, download=False)
334
+
335
+ if not info:
336
+ raise RuntimeError("Unable to resolve media info via yt-dlp")
337
+
338
+ with _silence_stdio():
339
+ with YoutubeDL(params=ytdlp_opts) as ydl:
340
+ result = ydl.extract_info(source, download=True)
341
+ download_path = Path(ydl.prepare_filename(result))
342
+
343
+ if not download_path.exists():
344
+ raise RuntimeError("yt-dlp finished without producing a file")
345
+
346
+ metadata = {
347
+ "reference": reference,
348
+ "source": source,
349
+ "title": result.get("title"),
350
+ "duration": result.get("duration"),
351
+ "ext": result.get("ext"),
352
+ "download_path": str(download_path),
353
+ "thumbnail": result.get("thumbnail"),
354
+ "channel": result.get("channel"),
355
+ "channel_id": result.get("channel_id"),
356
+ "uploader": result.get("uploader"),
357
+ "id": result.get("id"),
358
+ "description": result.get("description"),
359
+ "webpage_url": result.get("webpage_url"),
360
+ "extractor_key": result.get("extractor_key"),
361
+ }
362
+
363
+ _save_json(_metadata_path(reference), metadata)
364
+ return metadata
365
+
366
+
367
+ def _ensure_audio_sidecar(video_path: Path, reference: str) -> Path:
368
+ """Create an AAC sidecar for the video (preferred by Gemini)."""
369
+
370
+ audio_path = video_path.with_suffix(".m4a")
371
+ if audio_path.exists():
372
+ return audio_path
373
+
374
+ audio_path.parent.mkdir(parents=True, exist_ok=True)
375
+ try:
376
+ (
377
+ ffmpeg.input(str(video_path))
378
+ .output(str(audio_path), acodec="aac", audio_bitrate="128k", ac=2, ar=16000, vn=None)
379
+ .overwrite_output()
380
+ .run(capture_stdout=True, capture_stderr=True)
381
+ )
382
+ except ffmpeg.Error as exc: # pragma: no cover - runtime dependency
383
+ msg = exc.stderr.decode("utf-8", "ignore") if exc.stderr else str(exc)
384
+ raise RuntimeError(f"ffmpeg failed to extract audio: {msg[:400]}")
385
+ return audio_path
386
+
387
+
388
+ # ---------------------------------------------------------------------------------------------------------------------
389
+ # Gemini helpers
390
+ # ---------------------------------------------------------------------------------------------------------------------
391
+
392
+
393
+ def _build_gemini_client():
394
+ try:
395
+ from google import genai
396
+ except Exception as exc: # pragma: no cover - runtime dependency
397
+ raise RuntimeError(f"google-genai not available: {exc}")
398
+
399
+ api_key = os.environ.get("GEMINI_API_KEY")
400
+ if not api_key:
401
+ raise RuntimeError("GEMINI_API_KEY environment variable is required")
402
+ return genai.Client(api_key=api_key)
403
+
404
+
405
+ def _wait_for_upload(client, upload):
406
+ from google.genai import types
407
+
408
+ while upload.state.name == "PROCESSING":
409
+ time.sleep(1)
410
+ upload = client.files.get(name=upload.name)
411
+ if upload.state.name != "ACTIVE":
412
+ raise RuntimeError(f"Upload failed: {upload.state.name}")
413
+ return upload
414
+
415
+
416
+ def _gemini_structured_slide_times(client, video_path: Path, reference: str) -> list[dict]:
417
+ from google.genai import types
418
+
419
+ upload = client.files.upload(
420
+ file=str(video_path),
421
+ config=types.UploadFileConfig(
422
+ display_name=video_path.name,
423
+ mime_type="video/mp4",
424
+ ),
425
+ )
426
+ upload = _wait_for_upload(client, upload)
427
+
428
+ schema = types.Schema(
429
+ type=types.Type.OBJECT,
430
+ properties={
431
+ "slides": types.Schema(
432
+ type=types.Type.ARRAY,
433
+ items=types.Schema(
434
+ type=types.Type.OBJECT,
435
+ properties={
436
+ "label": types.Schema(type=types.Type.STRING),
437
+ "from_seconds": types.Schema(type=types.Type.NUMBER),
438
+ "to_seconds": types.Schema(type=types.Type.NUMBER),
439
+ },
440
+ required=["from_seconds", "to_seconds"],
441
+ ),
442
+ )
443
+ },
444
+ required=["slides"],
445
+ )
446
+
447
+ file = types.Part.from_uri(file_uri=upload.uri, mime_type=upload.mime_type or "video/mp4")
448
+
449
+ response = client.models.generate_content(
450
+ model="gemini-flash-lite-latest",
451
+ contents=[file, "What are the timestamps of individual slides presented?"],
452
+ )
453
+
454
+ raw = getattr(response, "text", None) or getattr(response, "raw", None)
455
+ if not raw and hasattr(response, "output_text"):
456
+ raw = response.output_text # type: ignore[attr-defined]
457
+ if not raw:
458
+ # try candidates
459
+ candidates = getattr(response, "candidates", None)
460
+ if candidates:
461
+ raw = candidates[0].content.parts[0].text # type: ignore[index]
462
+ if not raw:
463
+ raise RuntimeError("Slide analysis model returned empty response")
464
+
465
+ _write_debug(reference, "slides_raw.json", raw or "")
466
+
467
+ try:
468
+ payload = json.loads(raw) if raw else {"slides": []}
469
+ except Exception:
470
+ log.warning("Gemini slide response not JSON: %s", raw[:200])
471
+ payload = {"slides": []}
472
+
473
+ slides = payload.get("slides") or []
474
+ sanitized: list[dict] = []
475
+ for slide in slides:
476
+ try:
477
+ start = float(slide.get("from_seconds"))
478
+ end = float(slide.get("to_seconds"))
479
+ except Exception:
480
+ continue
481
+ label = (slide.get("label") or "").strip()
482
+ sanitized.append({"from": start, "to": end, "label": label})
483
+ return sanitized
484
+
485
+
486
+ def _gemini_analyze_audio(client, audio_path: Path, slides: list[dict], priors: Priors) -> dict:
487
+ from google.genai import types
488
+
489
+ upload = client.files.upload(
490
+ file=str(audio_path),
491
+ config=types.UploadFileConfig(
492
+ display_name=audio_path.name,
493
+ mime_type="audio/mp4", # AAC in M4A container
494
+ ),
495
+ )
496
+ upload = _wait_for_upload(client, upload)
497
+
498
+ slide_files = []
499
+ for slide in slides:
500
+ uri = slide.get("file_uri")
501
+ if not uri:
502
+ continue
503
+ slide_files.append(types.Part.from_uri(file_uri=uri, mime_type="image/png"))
504
+
505
+ priors_text = priors.as_prompt_text()
506
+
507
+ # FIXME: improve prompt: conceptualize "surprises", "priors"
508
+ contents = [
509
+ types.Content(
510
+ role="user",
511
+ parts=[
512
+ types.Part.from_text(text=priors_text),
513
+ types.Part.from_uri(file_uri=upload.uri, mime_type=upload.mime_type or "audio/wav"),
514
+ *slide_files,
515
+ types.Part.from_text(
516
+ text=
517
+ "Provide concise analysis and key insights using the supplied context, expected takeaways,"
518
+ " and questions. Base the reasoning on the audio transcript and the slide snapshots; do not"
519
+ " assume access to the full video."
520
+ ),
521
+ ],
522
+ )
523
+ ]
524
+
525
+ response = client.models.generate_content(
526
+ model="gemini-flash-latest",
527
+ contents=contents,
528
+ )
529
+
530
+ text = getattr(response, "text", None)
531
+ if not text and hasattr(response, "output_text"):
532
+ text = response.output_text # type: ignore[attr-defined]
533
+ if not text:
534
+ candidates = getattr(response, "candidates", None)
535
+ if candidates:
536
+ text = candidates[0].content.parts[0].text # type: ignore[index]
537
+ if not text:
538
+ raise RuntimeError("Gemini returned no analysis")
539
+ return {
540
+ "analysis": text,
541
+ "audio_file_uri": upload.uri,
542
+ "slide_count": len(slide_files),
543
+ }
544
+
545
+
546
+ # ---------------------------------------------------------------------------------------------------------------------
547
+ # Slide extraction pipeline
548
+ # ---------------------------------------------------------------------------------------------------------------------
549
+
550
+
551
+ def _extract_slides_flow(metadata: dict) -> dict:
552
+ reference = metadata["reference"]
553
+ video_path = Path(metadata["download_path"])
554
+ duration = metadata.get("duration")
555
+
556
+ duration_seconds = float(duration) if duration else _probe_duration(video_path)
557
+
558
+ client = _build_gemini_client()
559
+ with _silence_stdio(): # silence any ffmpeg/yt-dlp noise during upload
560
+ slides_raw = _gemini_structured_slide_times(client, video_path, reference)
561
+
562
+ seen_hashes: set[str] = set()
563
+ slide_entries: list[dict] = []
564
+
565
+ for idx, slide in enumerate(slides_raw):
566
+ start = float(slide.get("from", 0))
567
+ end = float(slide.get("to", start))
568
+ if duration_seconds and start >= duration_seconds:
569
+ continue
570
+ midpoint = start + (abs(end - start) / 2.0)
571
+ if duration_seconds and midpoint > duration_seconds:
572
+ continue
573
+
574
+ frame_bytes = _extract_frame(video_path, midpoint)
575
+ if not frame_bytes:
576
+ continue
577
+
578
+ digest = hashlib.sha1(frame_bytes).hexdigest()
579
+ if digest in seen_hashes:
580
+ continue
581
+ seen_hashes.add(digest)
582
+
583
+ data_uri = "data:image/png;base64," + base64.b64encode(frame_bytes).decode("ascii")
584
+
585
+ image_path = SLIDE_CACHE / reference / f"slide_{idx:03d}.png"
586
+ image_path.parent.mkdir(parents=True, exist_ok=True)
587
+ image_path.write_bytes(frame_bytes)
588
+
589
+ slide_entries.append(
590
+ {
591
+ "index": len(slide_entries),
592
+ "from": start,
593
+ "to": end,
594
+ "mid": midpoint,
595
+ "label": slide.get("label") or "",
596
+ "image_data_uri": data_uri,
597
+ }
598
+ )
599
+
600
+ payload = {
601
+ "reference": reference,
602
+ "count": len(slide_entries),
603
+ "slides": slide_entries,
604
+ "source": metadata.get("source"),
605
+ }
606
+
607
+ _save_json(_slides_json_path(reference), payload)
608
+ _write_debug(reference, "slides_sanitized.json", payload)
609
+ return payload
610
+
611
+
612
+ # ---------------------------------------------------------------------------------------------------------------------
613
+ # Analysis pipeline
614
+ # ---------------------------------------------------------------------------------------------------------------------
615
+
616
+
617
+ def _media_context_from_metadata(metadata: dict) -> str:
618
+ parts = []
619
+ title = metadata.get("title")
620
+ description = metadata.get("description")
621
+ channel = metadata.get("channel") or metadata.get("uploader")
622
+ url = metadata.get("webpage_url") or metadata.get("source")
623
+ if title:
624
+ parts.append(f"Title: {title}")
625
+ if channel:
626
+ parts.append(f"Channel: {channel}")
627
+ if url:
628
+ parts.append(f"URL: {url}")
629
+ if description:
630
+ parts.append(f"Description:\n{description}")
631
+ return "\n".join(parts)
632
+
633
+
634
+ def _analysis_flow(metadata: dict, priors_obj: Priors | dict) -> dict:
635
+ reference = metadata["reference"]
636
+ video_path = Path(metadata["download_path"])
637
+ audio_path = _ensure_audio_sidecar(video_path, reference)
638
+
639
+ priors = priors_obj if isinstance(priors_obj, Priors) else Priors.from_obj(priors_obj)
640
+ priors.media_context = _media_context_from_metadata(metadata)
641
+
642
+ # Ensure slides exist; reuse cache if available
643
+ slides_payload = _load_json(_slides_json_path(reference))
644
+ if not slides_payload:
645
+ slides_payload = _extract_slides_flow(metadata)
646
+
647
+ slides = slides_payload.get("slides", [])
648
+
649
+ # Upload slide stills to Gemini for context
650
+ client = _build_gemini_client()
651
+ uploaded_slides = []
652
+ for slide in slides:
653
+ data_uri = slide.get("image_data_uri")
654
+ if not data_uri:
655
+ continue
656
+ _, b64 = data_uri.split(",", 1)
657
+ image_bytes = base64.b64decode(b64)
658
+ path = SLIDE_CACHE / reference / "_tmp_upload.png"
659
+ path.write_bytes(image_bytes)
660
+ upload = client.files.upload(
661
+ file=str(path),
662
+ config=None,
663
+ )
664
+ upload = _wait_for_upload(client, upload)
665
+ slide["file_uri"] = upload.uri
666
+ uploaded_slides.append(slide)
667
+
668
+ with _silence_stdio(): # suppress any upload chatter
669
+ analysis_result = _gemini_analyze_audio(client, audio_path, uploaded_slides, priors)
670
+
671
+ payload = {
672
+ "reference": reference,
673
+ "analysis": analysis_result.get("analysis"),
674
+ "slide_count": len(uploaded_slides),
675
+ "audio_uri": analysis_result.get("audio_file_uri"),
676
+ "source": metadata.get("source"),
677
+ "title": metadata.get("title"),
678
+ }
679
+
680
+ _save_json(_analysis_json_path(reference), payload)
681
+ _write_debug(reference, "analysis.json", payload)
682
+ return payload
683
+
684
+
685
+ # ---------------------------------------------------------------------------------------------------------------------
686
+ # Public MCP registration
687
+ # ---------------------------------------------------------------------------------------------------------------------
688
+
689
+
690
+ def register_media_tools(app: FastMCP) -> None:
691
+ """Register media-related MCP tools on the given app."""
692
+
693
+ @app.tool()
694
+ async def start_media_retrieval(
695
+ ctx: Context,
696
+ source: str,
697
+ prefer_audio_only: bool = False,
698
+ wait_seconds: int = 54,
699
+ ) -> dict:
700
+ """
701
+ Retrieve long-form media (conference session, lecture, webinar, podcast episode, or direct HTTP media URL).
702
+ Designed for MCP clients / LLM tools that have short time limits: will wait up to
703
+ `wait_seconds` for completion, otherwise returns in-progress status plus a `reference`
704
+ token that can be used with `get_media_retrieval_status`, `start_media_analysis`, and slide tools.
705
+
706
+ Note:
707
+ - Claude uses an internal timeout of 240 seconds. `wait_seconds` should be in the same order of magnitude with Claude, and a minimum of 55 seconds if in doubt.
708
+
709
+ Parameters:
710
+ source: YouTube URL/ID, podcast/HTTP media URL, or any supported locator supported by yt-dlp.
711
+ prefer_audio_only: If true, download audio-first formats to save bandwidth when visuals (e.g. slides) are not needed. Default is False, as visuals often allow richer analysis. Audio-only should only be used if asked for by the user specifically.
712
+ wait_seconds: Time to await before returning; helps fast-complete short downloads without extra calls.
713
+
714
+ Returns (happy path):
715
+ { reference, status="done", metadata={title, description, duration, download_path, ...}, cached? }
716
+
717
+ Returns (in progress):
718
+ { reference, status="running" | "pending", progress?, job_id }
719
+
720
+ Returns (error):
721
+ { is_error: true, status: "error"|"failed", detail, reference }
722
+ """
723
+
724
+ info_reference = None
725
+ try:
726
+ from yt_dlp import YoutubeDL
727
+
728
+ with YoutubeDL(params={"skip_download": True, "quiet": True, "noplaylist": True}) as ydl:
729
+ info = ydl.extract_info(source, download=False)
730
+ info_reference = _build_reference(info, source)
731
+ except Exception:
732
+ info_reference = _build_reference(None, source)
733
+
734
+ reference = info_reference
735
+
736
+ # If already cached, skip job creation
737
+ metadata = _load_json(_metadata_path(reference))
738
+ if metadata and Path(metadata.get("download_path", "")).exists():
739
+ return {
740
+ "reference": reference,
741
+ "status": JobStatus.DONE,
742
+ "cached": True,
743
+ "metadata": metadata,
744
+ }
745
+
746
+ def factory() -> JobRecord:
747
+ return JobRecord(id=secrets.token_urlsafe(16), kind="media_retrieval", reference=reference)
748
+
749
+ job = await _get_or_create_job("media_retrieval", reference, factory)
750
+
751
+ if job.status in (JobStatus.DONE, JobStatus.RUNNING):
752
+ return await _maybe_wait(job, wait_seconds)
753
+
754
+ async def runner():
755
+ job.status = JobStatus.RUNNING
756
+ try:
757
+ metadata_result = await asyncio.to_thread(
758
+ _run_ytdlp_download, source, reference, prefer_audio_only
759
+ )
760
+ job.result = metadata_result
761
+ job.status = JobStatus.DONE
762
+ except Exception as exc: # pragma: no cover - defensive
763
+ log.exception("media retrieval failed for %s", reference)
764
+ job.error = str(exc)
765
+ job.status = JobStatus.FAILED
766
+ finally:
767
+ job.finished_at = time.time()
768
+
769
+ job.task = asyncio.create_task(runner())
770
+ return await _maybe_wait(job, wait_seconds)
771
+
772
+ @app.tool()
773
+ async def get_media_retrieval_status(ctx: Context, reference: str, wait_seconds: int = 0) -> dict:
774
+ """Poll download status for a `reference` returned by start_media_retrieval.
775
+
776
+ Returns cached metadata immediately when available; otherwise echoes job status or {status: "not_found"}.
777
+ Errors include `is_error: true`.
778
+ """
779
+ metadata = _load_json(_metadata_path(reference))
780
+ if metadata and Path(metadata.get("download_path", "")).exists():
781
+ return {
782
+ "reference": reference,
783
+ "status": JobStatus.DONE,
784
+ "metadata": metadata,
785
+ }
786
+
787
+ job_id = REFERENCE_INDEX.get(("media_retrieval", reference))
788
+ if job_id and job_id in JOBS:
789
+ job = JOBS[job_id]
790
+ if wait_seconds > 0:
791
+ return await _maybe_wait(job, wait_seconds)
792
+ return _job_payload(job, include_result=True)
793
+
794
+ return {"status": "not_found", "reference": reference}
795
+
796
+ @app.tool()
797
+ async def start_slide_extraction(ctx: Context, reference: str, wait_seconds: int = 55) -> dict:
798
+ """Extract representative slide stills from a downloaded video.
799
+
800
+ Note: media analysis (start_media_analysis) includes slides extraction, so no need to call this function explicitely when aiming for full media analysis
801
+ """
802
+ metadata = _load_json(_metadata_path(reference))
803
+ if not metadata or not Path(metadata.get("download_path", "")).exists():
804
+ return _error("media not downloaded", reference)
805
+
806
+ existing = _load_json(_slides_json_path(reference))
807
+ if existing:
808
+ return {
809
+ "status": JobStatus.DONE,
810
+ "reference": reference,
811
+ "slides": existing,
812
+ "cached": True,
813
+ }
814
+
815
+ def factory() -> JobRecord:
816
+ return JobRecord(id=secrets.token_urlsafe(16), kind="slide_extraction", reference=reference)
817
+
818
+ job = await _get_or_create_job("slide_extraction", reference, factory)
819
+ if job.status in (JobStatus.DONE, JobStatus.RUNNING):
820
+ return await _maybe_wait(job, wait_seconds)
821
+
822
+ async def runner():
823
+ job.status = JobStatus.RUNNING
824
+ try:
825
+ slide_payload = await asyncio.to_thread(_extract_slides_flow, metadata)
826
+ job.result = slide_payload
827
+ job.status = JobStatus.DONE
828
+ except Exception as exc:
829
+ log.exception("slide extraction failed for %s", reference)
830
+ job.status = JobStatus.FAILED
831
+ job.error = str(exc)
832
+ finally:
833
+ job.finished_at = time.time()
834
+
835
+ job.task = asyncio.create_task(runner())
836
+ return await _maybe_wait(job, wait_seconds)
837
+
838
+ @app.tool()
839
+ async def get_extracted_slides(ctx: Context, reference: str, wait_seconds: int = 0) -> dict:
840
+ """Fetch extracted slides for a reference, or current slide-extraction job status."""
841
+ existing = _load_json(_slides_json_path(reference))
842
+ if existing:
843
+ return {
844
+ "status": JobStatus.DONE,
845
+ "reference": reference,
846
+ "slides": existing,
847
+ }
848
+
849
+ job_id = REFERENCE_INDEX.get(("slide_extraction", reference))
850
+ if job_id and job_id in JOBS:
851
+ job = JOBS[job_id]
852
+ if wait_seconds > 0:
853
+ return await _maybe_wait(job, wait_seconds)
854
+ return _job_payload(job, include_result=True)
855
+
856
+ return {"status": "not_found", "reference": reference}
857
+
858
+ @app.tool()
859
+ async def start_media_analysis(
860
+ ctx: Context,
861
+ reference: str,
862
+ priors: dict,
863
+ wait_seconds: int = 55,
864
+ ) -> dict:
865
+ """
866
+ Analyze the primary audio plus extracted slides, guided by rich "priors" (analysis hints).
867
+
868
+ Priors object schema (all strings, optional):
869
+ - context: User-supplied scene-setting (participants, venue, meeting goal, etc.). Used to establish topical background and spelling of names, abbreviations, etc. Optional field, should only be filled with data explicitely supplied by the user.
870
+ - expectations: What factuals, insights or takeaways are anticipated by the user. These serve as a baseline, and surprises from this will be surfaced. Optional field, should only be filled with data explicitely supplied by the user.
871
+ - prior_knowledge: What the user already knows (acronyms, previous meetings). This again serves as a basis for efficient information foraging efforts by the user. Optional field, should only be filled with data explicitely supplied by the user.
872
+ - questions: Specific questions from the user to answer. Optional field, should only be filled with data explicitely supplied by the user.
873
+
874
+ The analysis automatically builds on priors with media-derived context (title, description, channel, URL) and supplies
875
+ user context to the analysis pipeline as well.
876
+
877
+ Parameters:
878
+ - reference: the reference token obtained from `start_media_retrieval`
879
+ - priors: prior information and user-supplied background per the "Priors object schema" definition
880
+
881
+ Note:
882
+ - Claude uses an internal timeout of 240 seconds. `wait_seconds` should be in the same order of magnitude with Claude, and a minimum of 55 seconds if in doubt.
883
+
884
+ Returns:
885
+ - in-progress status if still running; use the 'get_media_analysis_result' to monitor for further progress and to retrieve the final result
886
+ - if process has already finished, the final analysis text is returned
887
+ - errors are flagged with `is_error: true`.
888
+ """
889
+ metadata = _load_json(_metadata_path(reference))
890
+ if not metadata or not Path(metadata.get("download_path", "")).exists():
891
+ return _error("media not downloaded", reference)
892
+
893
+ if not isinstance(priors, dict):
894
+ return _error("priors must be an object with string fields: context, expectations, prior_knowledge, questions", reference)
895
+
896
+ existing = _load_json(_analysis_json_path(reference))
897
+ if existing:
898
+ return {"status": JobStatus.DONE, "reference": reference, "analysis": existing, "cached": True}
899
+
900
+ def factory() -> JobRecord:
901
+ return JobRecord(id=secrets.token_urlsafe(16), kind="media_analysis", reference=reference)
902
+
903
+ job = await _get_or_create_job("media_analysis", reference, factory)
904
+ if job.status in (JobStatus.DONE, JobStatus.RUNNING):
905
+ return await _maybe_wait(job, wait_seconds)
906
+
907
+ async def runner():
908
+ job.status = JobStatus.RUNNING
909
+ try:
910
+ result = await asyncio.to_thread(_analysis_flow, metadata, priors)
911
+ job.result = result
912
+ job.status = JobStatus.DONE
913
+ except Exception as exc:
914
+ log.exception("media analysis failed for %s", reference)
915
+ job.status = JobStatus.FAILED
916
+ job.error = str(exc)
917
+ finally:
918
+ job.finished_at = time.time()
919
+
920
+ job.task = asyncio.create_task(runner())
921
+ return await _maybe_wait(job, wait_seconds)
922
+
923
+ @app.tool()
924
+ async def get_media_analysis_result(ctx: Context, reference: str, wait_seconds: int = 0) -> dict:
925
+ """Return completed analysis for a reference, or current job status, with `is_error` on failures."""
926
+ existing = _load_json(_analysis_json_path(reference))
927
+ if existing:
928
+ return {"status": JobStatus.DONE, "reference": reference, "analysis": existing}
929
+
930
+ job_id = REFERENCE_INDEX.get(("media_analysis", reference))
931
+ if job_id and job_id in JOBS:
932
+ job = JOBS[job_id]
933
+ if wait_seconds > 0:
934
+ return await _maybe_wait(job, wait_seconds)
935
+ return _job_payload(job, include_result=True)
936
+
937
+ return {"status": "not_found", "reference": reference}
938
+
939
+
940
+ __all__ = ["register_media_tools"]
mcp/src/aileen3_mcp/server.py CHANGED
@@ -1,10 +1,16 @@
1
  from __future__ import annotations
2
 
3
  import logging
 
4
  from dataclasses import asdict, dataclass
5
 
 
 
 
6
  from fastmcp import FastMCP
7
 
 
 
8
  log = logging.getLogger(__name__)
9
 
10
 
@@ -20,6 +26,27 @@ def make_app() -> FastMCP:
20
 
21
  @app.tool()
22
  def health() -> dict:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  def _gemini_key_ok() -> tuple[bool, str]:
24
  key = bool(os.environ.get("GEMINI_API_KEY"))
25
  return (key, "GEMINI_API_KEY is set" if key else "GEMINI_API_KEY missing")
@@ -62,15 +89,20 @@ def make_app() -> FastMCP:
62
  capped_results = max(1, min(max_results, 50))
63
  opts = {
64
  "quiet": True,
 
 
65
  "skip_download": True,
66
  "extract_flat": "in_playlist",
 
 
67
  }
68
 
69
  search_spec = f"ytsearch{capped_results}:{query}"
70
  log.info("search_youtube query=%r max_results=%d", query, capped_results)
71
 
72
- with YoutubeDL(opts) as ydl:
73
- info = ydl.extract_info(search_spec, download=False)
 
74
 
75
  entries = info.get("entries", []) if info else []
76
  videos = []
@@ -93,6 +125,15 @@ def make_app() -> FastMCP:
93
 
94
  return {"videos": videos}
95
 
 
 
 
 
 
 
 
 
 
96
  return app
97
 
98
 
 
1
  from __future__ import annotations
2
 
3
  import logging
4
+ import os
5
  from dataclasses import asdict, dataclass
6
 
7
+ import shutil
8
+ import subprocess
9
+
10
  from fastmcp import FastMCP
11
 
12
+ from aileen3_mcp.media_tools import register_media_tools, _silence_stdio, _YDLLogger
13
+
14
  log = logging.getLogger(__name__)
15
 
16
 
 
26
 
27
  @app.tool()
28
  def health() -> dict:
29
+ """Return a basic health payload including ffmpeg and Gemini env availability."""
30
+
31
+ def _ffmpeg_ok() -> tuple[bool, str]:
32
+ binary = shutil.which("ffmpeg")
33
+ if not binary:
34
+ return False, "ffmpeg not found on PATH"
35
+ try:
36
+ completed = subprocess.run(
37
+ [binary, "-version"],
38
+ capture_output=True,
39
+ text=True,
40
+ timeout=5,
41
+ check=False,
42
+ )
43
+ except Exception as exc: # pragma: no cover - defensive
44
+ return False, f"ffmpeg exec failed: {exc}"
45
+ if completed.returncode != 0:
46
+ return False, completed.stderr.strip() or "ffmpeg returned error"
47
+ first = (completed.stdout or "").splitlines()[0] if completed.stdout else "ffmpeg present"
48
+ return True, first
49
+
50
  def _gemini_key_ok() -> tuple[bool, str]:
51
  key = bool(os.environ.get("GEMINI_API_KEY"))
52
  return (key, "GEMINI_API_KEY is set" if key else "GEMINI_API_KEY missing")
 
89
  capped_results = max(1, min(max_results, 50))
90
  opts = {
91
  "quiet": True,
92
+ "no_warnings": True,
93
+ "noprogress": True,
94
  "skip_download": True,
95
  "extract_flat": "in_playlist",
96
+ "logger": _YDLLogger(),
97
+ "extractor_args": {"youtube": {"player_client": ["default"]}},
98
  }
99
 
100
  search_spec = f"ytsearch{capped_results}:{query}"
101
  log.info("search_youtube query=%r max_results=%d", query, capped_results)
102
 
103
+ with _silence_stdio():
104
+ with YoutubeDL(opts) as ydl:
105
+ info = ydl.extract_info(search_spec, download=False)
106
 
107
  entries = info.get("entries", []) if info else []
108
  videos = []
 
125
 
126
  return {"videos": videos}
127
 
128
+ # Register media analysis tools:
129
+ # - start_media_retrieval
130
+ # - get_media_retrieval_status
131
+ # - start_slide_extraction
132
+ # - get_extracted_slides
133
+ # - start_media_analysis
134
+ # - get_media_analysis_result
135
+ register_media_tools(app)
136
+
137
  return app
138
 
139