Spaces:
Sleeping
Sleeping
YouTube analysis works
Browse files- Dockerfile +1 -1
- demo/health.py +23 -0
- demo/requirements.txt +1 -0
- mcp/pyproject.toml +1 -0
- mcp/src/aileen3_mcp/__init__.py +1 -1
- mcp/src/aileen3_mcp/media_tools.py +940 -0
- mcp/src/aileen3_mcp/server.py +43 -2
Dockerfile
CHANGED
|
@@ -10,7 +10,7 @@ ARG DENO_VERSION=2.0.0
|
|
| 10 |
WORKDIR /app
|
| 11 |
|
| 12 |
RUN apt-get update && \
|
| 13 |
-
apt-get install -y --no-install-recommends curl unzip ca-certificates && \
|
| 14 |
curl -fsSL "https://github.com/denoland/deno/releases/download/v${DENO_VERSION}/deno-x86_64-unknown-linux-gnu.zip" -o /tmp/deno.zip && \
|
| 15 |
unzip -q /tmp/deno.zip -d /tmp && \
|
| 16 |
mv /tmp/deno /usr/local/bin/deno && \
|
|
|
|
| 10 |
WORKDIR /app
|
| 11 |
|
| 12 |
RUN apt-get update && \
|
| 13 |
+
apt-get install -y --no-install-recommends curl unzip ca-certificates ffmpeg && \
|
| 14 |
curl -fsSL "https://github.com/denoland/deno/releases/download/v${DENO_VERSION}/deno-x86_64-unknown-linux-gnu.zip" -o /tmp/deno.zip && \
|
| 15 |
unzip -q /tmp/deno.zip -d /tmp && \
|
| 16 |
mv /tmp/deno /usr/local/bin/deno && \
|
demo/health.py
CHANGED
|
@@ -17,6 +17,7 @@ from typing import Iterable
|
|
| 17 |
|
| 18 |
MIN_DENO_VERSION = (2, 0, 0)
|
| 19 |
MIN_YTDLP_VERSION = (2025, 11, 12)
|
|
|
|
| 20 |
GEMINI_ENV_VAR = "GEMINI_API_KEY"
|
| 21 |
|
| 22 |
|
|
@@ -102,6 +103,27 @@ def _check_yt_dlp_python() -> ToolStatus:
|
|
| 102 |
return ToolStatus(label, False, "yt_dlp_ejs missing (JS sites will fail)")
|
| 103 |
|
| 104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
def _check_gemini_env() -> ToolStatus:
|
| 106 |
label = "Gemini API key"
|
| 107 |
if os.environ.get(GEMINI_ENV_VAR):
|
|
@@ -174,6 +196,7 @@ def run_health_report() -> HealthReport:
|
|
| 174 |
tool_statuses = [
|
| 175 |
_check_deno(),
|
| 176 |
_check_yt_dlp_python(),
|
|
|
|
| 177 |
_check_gemini_env(),
|
| 178 |
_check_mcp_health(),
|
| 179 |
]
|
|
|
|
| 17 |
|
| 18 |
MIN_DENO_VERSION = (2, 0, 0)
|
| 19 |
MIN_YTDLP_VERSION = (2025, 11, 12)
|
| 20 |
+
MIN_FFMPEG_VERSION = (4, 0)
|
| 21 |
GEMINI_ENV_VAR = "GEMINI_API_KEY"
|
| 22 |
|
| 23 |
|
|
|
|
| 103 |
return ToolStatus(label, False, "yt_dlp_ejs missing (JS sites will fail)")
|
| 104 |
|
| 105 |
|
| 106 |
+
def _check_ffmpeg() -> ToolStatus:
|
| 107 |
+
label = "ffmpeg"
|
| 108 |
+
binary = shutil.which("ffmpeg")
|
| 109 |
+
if not binary:
|
| 110 |
+
return ToolStatus(label, False, "`ffmpeg` binary not found on PATH")
|
| 111 |
+
try:
|
| 112 |
+
completed = subprocess.run(
|
| 113 |
+
[binary, "-version"],
|
| 114 |
+
capture_output=True,
|
| 115 |
+
text=True,
|
| 116 |
+
check=False,
|
| 117 |
+
timeout=5,
|
| 118 |
+
)
|
| 119 |
+
except Exception as exc:
|
| 120 |
+
return ToolStatus(label, False, f"failed to exec: {exc}")
|
| 121 |
+
if completed.returncode != 0:
|
| 122 |
+
return ToolStatus(label, False, completed.stderr.strip() or "ffmpeg returned error")
|
| 123 |
+
first_line = (completed.stdout or "").splitlines()[0] if completed.stdout else "ffmpeg present"
|
| 124 |
+
return ToolStatus(label, True, first_line)
|
| 125 |
+
|
| 126 |
+
|
| 127 |
def _check_gemini_env() -> ToolStatus:
|
| 128 |
label = "Gemini API key"
|
| 129 |
if os.environ.get(GEMINI_ENV_VAR):
|
|
|
|
| 196 |
tool_statuses = [
|
| 197 |
_check_deno(),
|
| 198 |
_check_yt_dlp_python(),
|
| 199 |
+
_check_ffmpeg(),
|
| 200 |
_check_gemini_env(),
|
| 201 |
_check_mcp_health(),
|
| 202 |
]
|
demo/requirements.txt
CHANGED
|
@@ -2,3 +2,4 @@ gradio>=6.0.0.dev0
|
|
| 2 |
yt-dlp[default]>=2025.11.12
|
| 3 |
fastmcp>=0.1.11
|
| 4 |
google-genai>=0.8.0
|
|
|
|
|
|
| 2 |
yt-dlp[default]>=2025.11.12
|
| 3 |
fastmcp>=0.1.11
|
| 4 |
google-genai>=0.8.0
|
| 5 |
+
ffmpeg-python>=0.2.0
|
mcp/pyproject.toml
CHANGED
|
@@ -11,6 +11,7 @@ dependencies = [
|
|
| 11 |
"fastmcp>=0.1.11",
|
| 12 |
"yt-dlp[default]>=2025.11.12",
|
| 13 |
"google-genai>=0.8.0",
|
|
|
|
| 14 |
]
|
| 15 |
|
| 16 |
[project.scripts]
|
|
|
|
| 11 |
"fastmcp>=0.1.11",
|
| 12 |
"yt-dlp[default]>=2025.11.12",
|
| 13 |
"google-genai>=0.8.0",
|
| 14 |
+
"ffmpeg-python>=0.2.0"
|
| 15 |
]
|
| 16 |
|
| 17 |
[project.scripts]
|
mcp/src/aileen3_mcp/__init__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
"""Aileen3 MCP server package."""
|
| 2 |
|
| 3 |
-
__all__ = ["server"]
|
|
|
|
| 1 |
"""Aileen3 MCP server package."""
|
| 2 |
|
| 3 |
+
__all__ = ["server", "media_tools"]
|
mcp/src/aileen3_mcp/media_tools.py
ADDED
|
@@ -0,0 +1,940 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import asyncio
|
| 4 |
+
import base64
|
| 5 |
+
import hashlib
|
| 6 |
+
import json
|
| 7 |
+
import logging
|
| 8 |
+
import os
|
| 9 |
+
import re
|
| 10 |
+
import secrets
|
| 11 |
+
import tempfile
|
| 12 |
+
import time
|
| 13 |
+
from dataclasses import dataclass, field
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
from typing import Any, Callable, Dict, Optional
|
| 16 |
+
|
| 17 |
+
import ffmpeg
|
| 18 |
+
from fastmcp import Context, FastMCP
|
| 19 |
+
from contextlib import redirect_stdout, redirect_stderr, contextmanager
|
| 20 |
+
import io
|
| 21 |
+
|
| 22 |
+
log = logging.getLogger(__name__)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
# ---------------------------------------------------------------------------------------------------------------------
|
| 26 |
+
# Paths & storage
|
| 27 |
+
# ---------------------------------------------------------------------------------------------------------------------
|
| 28 |
+
BASE_CACHE = Path(os.environ.get("AILEEN3_CACHE_DIR", Path.home() / ".cache" / "aileen3"))
|
| 29 |
+
MEDIA_CACHE = BASE_CACHE / "media"
|
| 30 |
+
SLIDE_CACHE = BASE_CACHE / "slides"
|
| 31 |
+
ANALYSIS_CACHE = BASE_CACHE / "analysis"
|
| 32 |
+
|
| 33 |
+
for _path in (MEDIA_CACHE, SLIDE_CACHE, ANALYSIS_CACHE):
|
| 34 |
+
_path.mkdir(parents=True, exist_ok=True)
|
| 35 |
+
|
| 36 |
+
DEBUG = os.environ.get("AILEEN3_DEBUG", "").lower() in {"1", "true", "yes", "on"}
|
| 37 |
+
DEBUG_DIR = Path(tempfile.gettempdir()) / "aileen3-debug"
|
| 38 |
+
if DEBUG:
|
| 39 |
+
DEBUG_DIR.mkdir(parents=True, exist_ok=True)
|
| 40 |
+
|
| 41 |
+
LOG_DIR = BASE_CACHE / "logs"
|
| 42 |
+
LOG_DIR.mkdir(parents=True, exist_ok=True)
|
| 43 |
+
LOG_FILE = LOG_DIR / "aileen3-mcp.log"
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def _ensure_file_logging():
|
| 47 |
+
root = logging.getLogger()
|
| 48 |
+
# Avoid adding duplicate handlers to root
|
| 49 |
+
for h in root.handlers:
|
| 50 |
+
if isinstance(h, logging.FileHandler) and Path(getattr(h, "baseFilename", "")) == LOG_FILE:
|
| 51 |
+
return
|
| 52 |
+
handler = logging.FileHandler(LOG_FILE, encoding="utf-8")
|
| 53 |
+
handler.setLevel(logging.DEBUG if DEBUG else logging.INFO)
|
| 54 |
+
fmt = logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s")
|
| 55 |
+
handler.setFormatter(fmt)
|
| 56 |
+
root.addHandler(handler)
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
_ensure_file_logging()
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def _write_debug(reference: str, suffix: str, data: Any) -> None:
|
| 63 |
+
if not DEBUG:
|
| 64 |
+
return
|
| 65 |
+
path = DEBUG_DIR / f"{reference}_{suffix}"
|
| 66 |
+
try:
|
| 67 |
+
if isinstance(data, (bytes, bytearray)):
|
| 68 |
+
path.write_bytes(data)
|
| 69 |
+
else:
|
| 70 |
+
path.write_text(json.dumps(data, indent=2, default=str))
|
| 71 |
+
except Exception:
|
| 72 |
+
log.debug("Failed to write debug artifact %s", path)
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
class _YDLLogger:
|
| 76 |
+
"""Silence yt-dlp stdout/stderr while keeping messages in Python logging."""
|
| 77 |
+
|
| 78 |
+
def debug(self, msg):
|
| 79 |
+
log.debug("yt-dlp: %s", msg)
|
| 80 |
+
|
| 81 |
+
def info(self, msg):
|
| 82 |
+
log.info("yt-dlp: %s", msg)
|
| 83 |
+
|
| 84 |
+
def warning(self, msg):
|
| 85 |
+
log.warning("yt-dlp: %s", msg)
|
| 86 |
+
|
| 87 |
+
def error(self, msg):
|
| 88 |
+
log.error("yt-dlp: %s", msg)
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
@contextmanager
|
| 92 |
+
def _silence_stdio():
|
| 93 |
+
buf_out = io.StringIO()
|
| 94 |
+
buf_err = io.StringIO()
|
| 95 |
+
with redirect_stdout(buf_out), redirect_stderr(buf_err):
|
| 96 |
+
yield
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
# ---------------------------------------------------------------------------------------------------------------------
|
| 100 |
+
# Job bookkeeping
|
| 101 |
+
# ---------------------------------------------------------------------------------------------------------------------
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
class JobStatus:
|
| 105 |
+
PENDING = "pending"
|
| 106 |
+
RUNNING = "running"
|
| 107 |
+
DONE = "done"
|
| 108 |
+
FAILED = "failed"
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
@dataclass
|
| 112 |
+
class Priors:
|
| 113 |
+
"""User-supplied and media-derived context to steer analysis."""
|
| 114 |
+
|
| 115 |
+
context: str = ""
|
| 116 |
+
expectations: str = ""
|
| 117 |
+
prior_knowledge: str = ""
|
| 118 |
+
questions: str = ""
|
| 119 |
+
media_context: str = ""
|
| 120 |
+
|
| 121 |
+
@classmethod
|
| 122 |
+
def from_obj(cls, obj: dict | None, media_context: str = "") -> "Priors":
|
| 123 |
+
obj = obj or {}
|
| 124 |
+
return cls(
|
| 125 |
+
context=str(obj.get("context", "") or ""),
|
| 126 |
+
expectations=str(obj.get("expectations", "") or ""),
|
| 127 |
+
prior_knowledge=str(obj.get("prior_knowledge") or obj.get("prior knowledge") or ""),
|
| 128 |
+
questions=str(obj.get("questions", "") or ""),
|
| 129 |
+
media_context=media_context,
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
def as_prompt_text(self) -> str:
|
| 133 |
+
sections = []
|
| 134 |
+
for label, value in (
|
| 135 |
+
("User context", self.context),
|
| 136 |
+
("Expectations", self.expectations),
|
| 137 |
+
("Prior knowledge", self.prior_knowledge),
|
| 138 |
+
("Questions", self.questions),
|
| 139 |
+
("Media context", self.media_context),
|
| 140 |
+
):
|
| 141 |
+
if value:
|
| 142 |
+
sections.append(f"``` {label}\n{value}\n```\n")
|
| 143 |
+
return "\n".join(sections) if sections else "No specific priors provided."
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
@dataclass
|
| 147 |
+
class JobRecord:
|
| 148 |
+
id: str
|
| 149 |
+
kind: str
|
| 150 |
+
reference: str
|
| 151 |
+
status: str = JobStatus.PENDING
|
| 152 |
+
progress: float = 0.0
|
| 153 |
+
error: Optional[str] = None
|
| 154 |
+
result: Optional[dict] = None
|
| 155 |
+
created_at: float = field(default_factory=time.time)
|
| 156 |
+
finished_at: Optional[float] = None
|
| 157 |
+
task: Optional[asyncio.Task] = field(default=None, repr=False)
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
JOBS: Dict[str, JobRecord] = {}
|
| 161 |
+
REFERENCE_INDEX: Dict[tuple[str, str], str] = {}
|
| 162 |
+
JOB_LOCK = asyncio.Lock()
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
def _error(detail: str, reference: str | None = None, status: str = "error") -> dict:
|
| 166 |
+
payload = {"status": status, "detail": detail, "is_error": True}
|
| 167 |
+
if reference:
|
| 168 |
+
payload["reference"] = reference
|
| 169 |
+
return payload
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
def _build_reference(info: dict | None, source: str) -> str:
|
| 173 |
+
source = source.strip()
|
| 174 |
+
if info:
|
| 175 |
+
extractor = (info.get("extractor_key") or "media").lower()
|
| 176 |
+
vid = info.get("id")
|
| 177 |
+
if vid and re.fullmatch(r"[A-Za-z0-9_-]+", str(vid)):
|
| 178 |
+
safe_id = re.sub(r"[^A-Za-z0-9_-]", "_", str(vid))
|
| 179 |
+
return f"{extractor}_{safe_id}"[:200]
|
| 180 |
+
|
| 181 |
+
digest = hashlib.sha256(source.encode()).hexdigest()[:32]
|
| 182 |
+
return f"media_{digest}"
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
def _job_payload(job: JobRecord, include_result: bool = True) -> dict:
|
| 186 |
+
payload = {
|
| 187 |
+
"job_id": job.id,
|
| 188 |
+
"reference": job.reference,
|
| 189 |
+
"kind": job.kind,
|
| 190 |
+
"status": job.status,
|
| 191 |
+
"progress": job.progress,
|
| 192 |
+
"created_at": job.created_at,
|
| 193 |
+
"finished_at": job.finished_at,
|
| 194 |
+
}
|
| 195 |
+
if job.error:
|
| 196 |
+
payload["error"] = job.error
|
| 197 |
+
payload["is_error"] = True
|
| 198 |
+
if job.status == JobStatus.FAILED:
|
| 199 |
+
payload["is_error"] = True
|
| 200 |
+
if include_result and job.status == JobStatus.DONE:
|
| 201 |
+
payload["result"] = job.result
|
| 202 |
+
return payload
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
async def _maybe_wait(job: JobRecord, wait_seconds: int) -> dict:
|
| 206 |
+
"""Wait briefly for completion; otherwise return running status."""
|
| 207 |
+
task = job.task
|
| 208 |
+
if not task:
|
| 209 |
+
return _job_payload(job, include_result=False)
|
| 210 |
+
|
| 211 |
+
try:
|
| 212 |
+
await asyncio.wait_for(asyncio.shield(task), timeout=max(0, wait_seconds))
|
| 213 |
+
except asyncio.TimeoutError:
|
| 214 |
+
return _job_payload(job, include_result=False)
|
| 215 |
+
except asyncio.CancelledError:
|
| 216 |
+
job.status = JobStatus.FAILED
|
| 217 |
+
job.error = "task cancelled"
|
| 218 |
+
job.finished_at = time.time()
|
| 219 |
+
return _job_payload(job, include_result=False)
|
| 220 |
+
|
| 221 |
+
# If we reach here, task finished
|
| 222 |
+
return _job_payload(job, include_result=True)
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
async def _get_or_create_job(kind: str, reference: str, factory: Callable[[], JobRecord]) -> JobRecord:
|
| 226 |
+
async with JOB_LOCK:
|
| 227 |
+
existing_id = REFERENCE_INDEX.get((kind, reference))
|
| 228 |
+
if existing_id and existing_id in JOBS:
|
| 229 |
+
return JOBS[existing_id]
|
| 230 |
+
|
| 231 |
+
job = factory()
|
| 232 |
+
JOBS[job.id] = job
|
| 233 |
+
REFERENCE_INDEX[(kind, reference)] = job.id
|
| 234 |
+
return job
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
# ---------------------------------------------------------------------------------------------------------------------
|
| 238 |
+
# Helpers: media metadata & ffmpeg probes
|
| 239 |
+
# ---------------------------------------------------------------------------------------------------------------------
|
| 240 |
+
|
| 241 |
+
|
| 242 |
+
def _media_dir(reference: str) -> Path:
|
| 243 |
+
return MEDIA_CACHE / reference
|
| 244 |
+
|
| 245 |
+
|
| 246 |
+
def _metadata_path(reference: str) -> Path:
|
| 247 |
+
return _media_dir(reference) / "metadata.json"
|
| 248 |
+
|
| 249 |
+
|
| 250 |
+
def _slides_json_path(reference: str) -> Path:
|
| 251 |
+
return SLIDE_CACHE / f"{reference}.json"
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
def _analysis_json_path(reference: str) -> Path:
|
| 255 |
+
return ANALYSIS_CACHE / f"{reference}.json"
|
| 256 |
+
|
| 257 |
+
|
| 258 |
+
def _load_json(path: Path) -> dict | None:
|
| 259 |
+
if path.exists():
|
| 260 |
+
try:
|
| 261 |
+
return json.loads(path.read_text())
|
| 262 |
+
except Exception:
|
| 263 |
+
log.warning("Failed to parse JSON from %s", path)
|
| 264 |
+
return None
|
| 265 |
+
|
| 266 |
+
|
| 267 |
+
def _save_json(path: Path, payload: dict) -> None:
|
| 268 |
+
path.parent.mkdir(parents=True, exist_ok=True)
|
| 269 |
+
path.write_text(json.dumps(payload, indent=2))
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
def _probe_duration(video_path: Path) -> Optional[float]:
|
| 273 |
+
try:
|
| 274 |
+
probe = ffmpeg.probe(str(video_path))
|
| 275 |
+
fmt = probe.get("format", {})
|
| 276 |
+
duration_str = fmt.get("duration")
|
| 277 |
+
return float(duration_str) if duration_str else None
|
| 278 |
+
except Exception:
|
| 279 |
+
return None
|
| 280 |
+
|
| 281 |
+
|
| 282 |
+
def _extract_frame(video_path: Path, timestamp: float) -> Optional[bytes]:
|
| 283 |
+
if timestamp < 0:
|
| 284 |
+
return None
|
| 285 |
+
try:
|
| 286 |
+
out, err = (
|
| 287 |
+
ffmpeg.input(str(video_path), ss=timestamp)
|
| 288 |
+
.output("pipe:", vframes=1, format="image2", vcodec="png")
|
| 289 |
+
.run(capture_stdout=True, capture_stderr=True, overwrite_output=True)
|
| 290 |
+
)
|
| 291 |
+
except ffmpeg.Error as exc: # pragma: no cover - runtime dependency
|
| 292 |
+
log.debug("ffmpeg extract error for %s at %.2fs: %s", video_path, timestamp, exc.stderr.decode(errors="ignore")[:200])
|
| 293 |
+
return None
|
| 294 |
+
return out
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
# ---------------------------------------------------------------------------------------------------------------------
|
| 298 |
+
# yt-dlp based download
|
| 299 |
+
# ---------------------------------------------------------------------------------------------------------------------
|
| 300 |
+
|
| 301 |
+
|
| 302 |
+
def _run_ytdlp_download(source: str, reference: str, prefer_audio_only: bool) -> dict:
|
| 303 |
+
from yt_dlp import YoutubeDL # local import to keep module import light
|
| 304 |
+
|
| 305 |
+
target_dir = _media_dir(reference)
|
| 306 |
+
target_dir.mkdir(parents=True, exist_ok=True)
|
| 307 |
+
|
| 308 |
+
ytdlp_opts: dict[str, Any] = {
|
| 309 |
+
"outtmpl": str(target_dir / "%(id)s.%(ext)s"),
|
| 310 |
+
"quiet": True,
|
| 311 |
+
"noplaylist": True,
|
| 312 |
+
"ignoreerrors": False,
|
| 313 |
+
}
|
| 314 |
+
|
| 315 |
+
# Prefer combined AV for slides; fall back to audio only if requested or video unavailable
|
| 316 |
+
if prefer_audio_only:
|
| 317 |
+
ytdlp_opts["format"] = "bestaudio/best"
|
| 318 |
+
else:
|
| 319 |
+
ytdlp_opts["format"] = "bestvideo+bestaudio/best"
|
| 320 |
+
|
| 321 |
+
shared_opts = {
|
| 322 |
+
"skip_download": True,
|
| 323 |
+
"quiet": True,
|
| 324 |
+
"no_warnings": True,
|
| 325 |
+
"noprogress": True,
|
| 326 |
+
"noplaylist": True,
|
| 327 |
+
"logger": _YDLLogger(),
|
| 328 |
+
"extractor_args": {"youtube": {"player_client": ["default"]}},
|
| 329 |
+
}
|
| 330 |
+
|
| 331 |
+
with _silence_stdio():
|
| 332 |
+
with YoutubeDL(params=shared_opts) as ydl:
|
| 333 |
+
info = ydl.extract_info(source, download=False)
|
| 334 |
+
|
| 335 |
+
if not info:
|
| 336 |
+
raise RuntimeError("Unable to resolve media info via yt-dlp")
|
| 337 |
+
|
| 338 |
+
with _silence_stdio():
|
| 339 |
+
with YoutubeDL(params=ytdlp_opts) as ydl:
|
| 340 |
+
result = ydl.extract_info(source, download=True)
|
| 341 |
+
download_path = Path(ydl.prepare_filename(result))
|
| 342 |
+
|
| 343 |
+
if not download_path.exists():
|
| 344 |
+
raise RuntimeError("yt-dlp finished without producing a file")
|
| 345 |
+
|
| 346 |
+
metadata = {
|
| 347 |
+
"reference": reference,
|
| 348 |
+
"source": source,
|
| 349 |
+
"title": result.get("title"),
|
| 350 |
+
"duration": result.get("duration"),
|
| 351 |
+
"ext": result.get("ext"),
|
| 352 |
+
"download_path": str(download_path),
|
| 353 |
+
"thumbnail": result.get("thumbnail"),
|
| 354 |
+
"channel": result.get("channel"),
|
| 355 |
+
"channel_id": result.get("channel_id"),
|
| 356 |
+
"uploader": result.get("uploader"),
|
| 357 |
+
"id": result.get("id"),
|
| 358 |
+
"description": result.get("description"),
|
| 359 |
+
"webpage_url": result.get("webpage_url"),
|
| 360 |
+
"extractor_key": result.get("extractor_key"),
|
| 361 |
+
}
|
| 362 |
+
|
| 363 |
+
_save_json(_metadata_path(reference), metadata)
|
| 364 |
+
return metadata
|
| 365 |
+
|
| 366 |
+
|
| 367 |
+
def _ensure_audio_sidecar(video_path: Path, reference: str) -> Path:
|
| 368 |
+
"""Create an AAC sidecar for the video (preferred by Gemini)."""
|
| 369 |
+
|
| 370 |
+
audio_path = video_path.with_suffix(".m4a")
|
| 371 |
+
if audio_path.exists():
|
| 372 |
+
return audio_path
|
| 373 |
+
|
| 374 |
+
audio_path.parent.mkdir(parents=True, exist_ok=True)
|
| 375 |
+
try:
|
| 376 |
+
(
|
| 377 |
+
ffmpeg.input(str(video_path))
|
| 378 |
+
.output(str(audio_path), acodec="aac", audio_bitrate="128k", ac=2, ar=16000, vn=None)
|
| 379 |
+
.overwrite_output()
|
| 380 |
+
.run(capture_stdout=True, capture_stderr=True)
|
| 381 |
+
)
|
| 382 |
+
except ffmpeg.Error as exc: # pragma: no cover - runtime dependency
|
| 383 |
+
msg = exc.stderr.decode("utf-8", "ignore") if exc.stderr else str(exc)
|
| 384 |
+
raise RuntimeError(f"ffmpeg failed to extract audio: {msg[:400]}")
|
| 385 |
+
return audio_path
|
| 386 |
+
|
| 387 |
+
|
| 388 |
+
# ---------------------------------------------------------------------------------------------------------------------
|
| 389 |
+
# Gemini helpers
|
| 390 |
+
# ---------------------------------------------------------------------------------------------------------------------
|
| 391 |
+
|
| 392 |
+
|
| 393 |
+
def _build_gemini_client():
|
| 394 |
+
try:
|
| 395 |
+
from google import genai
|
| 396 |
+
except Exception as exc: # pragma: no cover - runtime dependency
|
| 397 |
+
raise RuntimeError(f"google-genai not available: {exc}")
|
| 398 |
+
|
| 399 |
+
api_key = os.environ.get("GEMINI_API_KEY")
|
| 400 |
+
if not api_key:
|
| 401 |
+
raise RuntimeError("GEMINI_API_KEY environment variable is required")
|
| 402 |
+
return genai.Client(api_key=api_key)
|
| 403 |
+
|
| 404 |
+
|
| 405 |
+
def _wait_for_upload(client, upload):
|
| 406 |
+
from google.genai import types
|
| 407 |
+
|
| 408 |
+
while upload.state.name == "PROCESSING":
|
| 409 |
+
time.sleep(1)
|
| 410 |
+
upload = client.files.get(name=upload.name)
|
| 411 |
+
if upload.state.name != "ACTIVE":
|
| 412 |
+
raise RuntimeError(f"Upload failed: {upload.state.name}")
|
| 413 |
+
return upload
|
| 414 |
+
|
| 415 |
+
|
| 416 |
+
def _gemini_structured_slide_times(client, video_path: Path, reference: str) -> list[dict]:
|
| 417 |
+
from google.genai import types
|
| 418 |
+
|
| 419 |
+
upload = client.files.upload(
|
| 420 |
+
file=str(video_path),
|
| 421 |
+
config=types.UploadFileConfig(
|
| 422 |
+
display_name=video_path.name,
|
| 423 |
+
mime_type="video/mp4",
|
| 424 |
+
),
|
| 425 |
+
)
|
| 426 |
+
upload = _wait_for_upload(client, upload)
|
| 427 |
+
|
| 428 |
+
schema = types.Schema(
|
| 429 |
+
type=types.Type.OBJECT,
|
| 430 |
+
properties={
|
| 431 |
+
"slides": types.Schema(
|
| 432 |
+
type=types.Type.ARRAY,
|
| 433 |
+
items=types.Schema(
|
| 434 |
+
type=types.Type.OBJECT,
|
| 435 |
+
properties={
|
| 436 |
+
"label": types.Schema(type=types.Type.STRING),
|
| 437 |
+
"from_seconds": types.Schema(type=types.Type.NUMBER),
|
| 438 |
+
"to_seconds": types.Schema(type=types.Type.NUMBER),
|
| 439 |
+
},
|
| 440 |
+
required=["from_seconds", "to_seconds"],
|
| 441 |
+
),
|
| 442 |
+
)
|
| 443 |
+
},
|
| 444 |
+
required=["slides"],
|
| 445 |
+
)
|
| 446 |
+
|
| 447 |
+
file = types.Part.from_uri(file_uri=upload.uri, mime_type=upload.mime_type or "video/mp4")
|
| 448 |
+
|
| 449 |
+
response = client.models.generate_content(
|
| 450 |
+
model="gemini-flash-lite-latest",
|
| 451 |
+
contents=[file, "What are the timestamps of individual slides presented?"],
|
| 452 |
+
)
|
| 453 |
+
|
| 454 |
+
raw = getattr(response, "text", None) or getattr(response, "raw", None)
|
| 455 |
+
if not raw and hasattr(response, "output_text"):
|
| 456 |
+
raw = response.output_text # type: ignore[attr-defined]
|
| 457 |
+
if not raw:
|
| 458 |
+
# try candidates
|
| 459 |
+
candidates = getattr(response, "candidates", None)
|
| 460 |
+
if candidates:
|
| 461 |
+
raw = candidates[0].content.parts[0].text # type: ignore[index]
|
| 462 |
+
if not raw:
|
| 463 |
+
raise RuntimeError("Slide analysis model returned empty response")
|
| 464 |
+
|
| 465 |
+
_write_debug(reference, "slides_raw.json", raw or "")
|
| 466 |
+
|
| 467 |
+
try:
|
| 468 |
+
payload = json.loads(raw) if raw else {"slides": []}
|
| 469 |
+
except Exception:
|
| 470 |
+
log.warning("Gemini slide response not JSON: %s", raw[:200])
|
| 471 |
+
payload = {"slides": []}
|
| 472 |
+
|
| 473 |
+
slides = payload.get("slides") or []
|
| 474 |
+
sanitized: list[dict] = []
|
| 475 |
+
for slide in slides:
|
| 476 |
+
try:
|
| 477 |
+
start = float(slide.get("from_seconds"))
|
| 478 |
+
end = float(slide.get("to_seconds"))
|
| 479 |
+
except Exception:
|
| 480 |
+
continue
|
| 481 |
+
label = (slide.get("label") or "").strip()
|
| 482 |
+
sanitized.append({"from": start, "to": end, "label": label})
|
| 483 |
+
return sanitized
|
| 484 |
+
|
| 485 |
+
|
| 486 |
+
def _gemini_analyze_audio(client, audio_path: Path, slides: list[dict], priors: Priors) -> dict:
|
| 487 |
+
from google.genai import types
|
| 488 |
+
|
| 489 |
+
upload = client.files.upload(
|
| 490 |
+
file=str(audio_path),
|
| 491 |
+
config=types.UploadFileConfig(
|
| 492 |
+
display_name=audio_path.name,
|
| 493 |
+
mime_type="audio/mp4", # AAC in M4A container
|
| 494 |
+
),
|
| 495 |
+
)
|
| 496 |
+
upload = _wait_for_upload(client, upload)
|
| 497 |
+
|
| 498 |
+
slide_files = []
|
| 499 |
+
for slide in slides:
|
| 500 |
+
uri = slide.get("file_uri")
|
| 501 |
+
if not uri:
|
| 502 |
+
continue
|
| 503 |
+
slide_files.append(types.Part.from_uri(file_uri=uri, mime_type="image/png"))
|
| 504 |
+
|
| 505 |
+
priors_text = priors.as_prompt_text()
|
| 506 |
+
|
| 507 |
+
# FIXME: improve prompt: conceptualize "surprises", "priors"
|
| 508 |
+
contents = [
|
| 509 |
+
types.Content(
|
| 510 |
+
role="user",
|
| 511 |
+
parts=[
|
| 512 |
+
types.Part.from_text(text=priors_text),
|
| 513 |
+
types.Part.from_uri(file_uri=upload.uri, mime_type=upload.mime_type or "audio/wav"),
|
| 514 |
+
*slide_files,
|
| 515 |
+
types.Part.from_text(
|
| 516 |
+
text=
|
| 517 |
+
"Provide concise analysis and key insights using the supplied context, expected takeaways,"
|
| 518 |
+
" and questions. Base the reasoning on the audio transcript and the slide snapshots; do not"
|
| 519 |
+
" assume access to the full video."
|
| 520 |
+
),
|
| 521 |
+
],
|
| 522 |
+
)
|
| 523 |
+
]
|
| 524 |
+
|
| 525 |
+
response = client.models.generate_content(
|
| 526 |
+
model="gemini-flash-latest",
|
| 527 |
+
contents=contents,
|
| 528 |
+
)
|
| 529 |
+
|
| 530 |
+
text = getattr(response, "text", None)
|
| 531 |
+
if not text and hasattr(response, "output_text"):
|
| 532 |
+
text = response.output_text # type: ignore[attr-defined]
|
| 533 |
+
if not text:
|
| 534 |
+
candidates = getattr(response, "candidates", None)
|
| 535 |
+
if candidates:
|
| 536 |
+
text = candidates[0].content.parts[0].text # type: ignore[index]
|
| 537 |
+
if not text:
|
| 538 |
+
raise RuntimeError("Gemini returned no analysis")
|
| 539 |
+
return {
|
| 540 |
+
"analysis": text,
|
| 541 |
+
"audio_file_uri": upload.uri,
|
| 542 |
+
"slide_count": len(slide_files),
|
| 543 |
+
}
|
| 544 |
+
|
| 545 |
+
|
| 546 |
+
# ---------------------------------------------------------------------------------------------------------------------
|
| 547 |
+
# Slide extraction pipeline
|
| 548 |
+
# ---------------------------------------------------------------------------------------------------------------------
|
| 549 |
+
|
| 550 |
+
|
| 551 |
+
def _extract_slides_flow(metadata: dict) -> dict:
|
| 552 |
+
reference = metadata["reference"]
|
| 553 |
+
video_path = Path(metadata["download_path"])
|
| 554 |
+
duration = metadata.get("duration")
|
| 555 |
+
|
| 556 |
+
duration_seconds = float(duration) if duration else _probe_duration(video_path)
|
| 557 |
+
|
| 558 |
+
client = _build_gemini_client()
|
| 559 |
+
with _silence_stdio(): # silence any ffmpeg/yt-dlp noise during upload
|
| 560 |
+
slides_raw = _gemini_structured_slide_times(client, video_path, reference)
|
| 561 |
+
|
| 562 |
+
seen_hashes: set[str] = set()
|
| 563 |
+
slide_entries: list[dict] = []
|
| 564 |
+
|
| 565 |
+
for idx, slide in enumerate(slides_raw):
|
| 566 |
+
start = float(slide.get("from", 0))
|
| 567 |
+
end = float(slide.get("to", start))
|
| 568 |
+
if duration_seconds and start >= duration_seconds:
|
| 569 |
+
continue
|
| 570 |
+
midpoint = start + (abs(end - start) / 2.0)
|
| 571 |
+
if duration_seconds and midpoint > duration_seconds:
|
| 572 |
+
continue
|
| 573 |
+
|
| 574 |
+
frame_bytes = _extract_frame(video_path, midpoint)
|
| 575 |
+
if not frame_bytes:
|
| 576 |
+
continue
|
| 577 |
+
|
| 578 |
+
digest = hashlib.sha1(frame_bytes).hexdigest()
|
| 579 |
+
if digest in seen_hashes:
|
| 580 |
+
continue
|
| 581 |
+
seen_hashes.add(digest)
|
| 582 |
+
|
| 583 |
+
data_uri = "data:image/png;base64," + base64.b64encode(frame_bytes).decode("ascii")
|
| 584 |
+
|
| 585 |
+
image_path = SLIDE_CACHE / reference / f"slide_{idx:03d}.png"
|
| 586 |
+
image_path.parent.mkdir(parents=True, exist_ok=True)
|
| 587 |
+
image_path.write_bytes(frame_bytes)
|
| 588 |
+
|
| 589 |
+
slide_entries.append(
|
| 590 |
+
{
|
| 591 |
+
"index": len(slide_entries),
|
| 592 |
+
"from": start,
|
| 593 |
+
"to": end,
|
| 594 |
+
"mid": midpoint,
|
| 595 |
+
"label": slide.get("label") or "",
|
| 596 |
+
"image_data_uri": data_uri,
|
| 597 |
+
}
|
| 598 |
+
)
|
| 599 |
+
|
| 600 |
+
payload = {
|
| 601 |
+
"reference": reference,
|
| 602 |
+
"count": len(slide_entries),
|
| 603 |
+
"slides": slide_entries,
|
| 604 |
+
"source": metadata.get("source"),
|
| 605 |
+
}
|
| 606 |
+
|
| 607 |
+
_save_json(_slides_json_path(reference), payload)
|
| 608 |
+
_write_debug(reference, "slides_sanitized.json", payload)
|
| 609 |
+
return payload
|
| 610 |
+
|
| 611 |
+
|
| 612 |
+
# ---------------------------------------------------------------------------------------------------------------------
|
| 613 |
+
# Analysis pipeline
|
| 614 |
+
# ---------------------------------------------------------------------------------------------------------------------
|
| 615 |
+
|
| 616 |
+
|
| 617 |
+
def _media_context_from_metadata(metadata: dict) -> str:
|
| 618 |
+
parts = []
|
| 619 |
+
title = metadata.get("title")
|
| 620 |
+
description = metadata.get("description")
|
| 621 |
+
channel = metadata.get("channel") or metadata.get("uploader")
|
| 622 |
+
url = metadata.get("webpage_url") or metadata.get("source")
|
| 623 |
+
if title:
|
| 624 |
+
parts.append(f"Title: {title}")
|
| 625 |
+
if channel:
|
| 626 |
+
parts.append(f"Channel: {channel}")
|
| 627 |
+
if url:
|
| 628 |
+
parts.append(f"URL: {url}")
|
| 629 |
+
if description:
|
| 630 |
+
parts.append(f"Description:\n{description}")
|
| 631 |
+
return "\n".join(parts)
|
| 632 |
+
|
| 633 |
+
|
| 634 |
+
def _analysis_flow(metadata: dict, priors_obj: Priors | dict) -> dict:
|
| 635 |
+
reference = metadata["reference"]
|
| 636 |
+
video_path = Path(metadata["download_path"])
|
| 637 |
+
audio_path = _ensure_audio_sidecar(video_path, reference)
|
| 638 |
+
|
| 639 |
+
priors = priors_obj if isinstance(priors_obj, Priors) else Priors.from_obj(priors_obj)
|
| 640 |
+
priors.media_context = _media_context_from_metadata(metadata)
|
| 641 |
+
|
| 642 |
+
# Ensure slides exist; reuse cache if available
|
| 643 |
+
slides_payload = _load_json(_slides_json_path(reference))
|
| 644 |
+
if not slides_payload:
|
| 645 |
+
slides_payload = _extract_slides_flow(metadata)
|
| 646 |
+
|
| 647 |
+
slides = slides_payload.get("slides", [])
|
| 648 |
+
|
| 649 |
+
# Upload slide stills to Gemini for context
|
| 650 |
+
client = _build_gemini_client()
|
| 651 |
+
uploaded_slides = []
|
| 652 |
+
for slide in slides:
|
| 653 |
+
data_uri = slide.get("image_data_uri")
|
| 654 |
+
if not data_uri:
|
| 655 |
+
continue
|
| 656 |
+
_, b64 = data_uri.split(",", 1)
|
| 657 |
+
image_bytes = base64.b64decode(b64)
|
| 658 |
+
path = SLIDE_CACHE / reference / "_tmp_upload.png"
|
| 659 |
+
path.write_bytes(image_bytes)
|
| 660 |
+
upload = client.files.upload(
|
| 661 |
+
file=str(path),
|
| 662 |
+
config=None,
|
| 663 |
+
)
|
| 664 |
+
upload = _wait_for_upload(client, upload)
|
| 665 |
+
slide["file_uri"] = upload.uri
|
| 666 |
+
uploaded_slides.append(slide)
|
| 667 |
+
|
| 668 |
+
with _silence_stdio(): # suppress any upload chatter
|
| 669 |
+
analysis_result = _gemini_analyze_audio(client, audio_path, uploaded_slides, priors)
|
| 670 |
+
|
| 671 |
+
payload = {
|
| 672 |
+
"reference": reference,
|
| 673 |
+
"analysis": analysis_result.get("analysis"),
|
| 674 |
+
"slide_count": len(uploaded_slides),
|
| 675 |
+
"audio_uri": analysis_result.get("audio_file_uri"),
|
| 676 |
+
"source": metadata.get("source"),
|
| 677 |
+
"title": metadata.get("title"),
|
| 678 |
+
}
|
| 679 |
+
|
| 680 |
+
_save_json(_analysis_json_path(reference), payload)
|
| 681 |
+
_write_debug(reference, "analysis.json", payload)
|
| 682 |
+
return payload
|
| 683 |
+
|
| 684 |
+
|
| 685 |
+
# ---------------------------------------------------------------------------------------------------------------------
|
| 686 |
+
# Public MCP registration
|
| 687 |
+
# ---------------------------------------------------------------------------------------------------------------------
|
| 688 |
+
|
| 689 |
+
|
| 690 |
+
def register_media_tools(app: FastMCP) -> None:
|
| 691 |
+
"""Register media-related MCP tools on the given app."""
|
| 692 |
+
|
| 693 |
+
@app.tool()
|
| 694 |
+
async def start_media_retrieval(
|
| 695 |
+
ctx: Context,
|
| 696 |
+
source: str,
|
| 697 |
+
prefer_audio_only: bool = False,
|
| 698 |
+
wait_seconds: int = 54,
|
| 699 |
+
) -> dict:
|
| 700 |
+
"""
|
| 701 |
+
Retrieve long-form media (conference session, lecture, webinar, podcast episode, or direct HTTP media URL).
|
| 702 |
+
Designed for MCP clients / LLM tools that have short time limits: will wait up to
|
| 703 |
+
`wait_seconds` for completion, otherwise returns in-progress status plus a `reference`
|
| 704 |
+
token that can be used with `get_media_retrieval_status`, `start_media_analysis`, and slide tools.
|
| 705 |
+
|
| 706 |
+
Note:
|
| 707 |
+
- Claude uses an internal timeout of 240 seconds. `wait_seconds` should be in the same order of magnitude with Claude, and a minimum of 55 seconds if in doubt.
|
| 708 |
+
|
| 709 |
+
Parameters:
|
| 710 |
+
source: YouTube URL/ID, podcast/HTTP media URL, or any supported locator supported by yt-dlp.
|
| 711 |
+
prefer_audio_only: If true, download audio-first formats to save bandwidth when visuals (e.g. slides) are not needed. Default is False, as visuals often allow richer analysis. Audio-only should only be used if asked for by the user specifically.
|
| 712 |
+
wait_seconds: Time to await before returning; helps fast-complete short downloads without extra calls.
|
| 713 |
+
|
| 714 |
+
Returns (happy path):
|
| 715 |
+
{ reference, status="done", metadata={title, description, duration, download_path, ...}, cached? }
|
| 716 |
+
|
| 717 |
+
Returns (in progress):
|
| 718 |
+
{ reference, status="running" | "pending", progress?, job_id }
|
| 719 |
+
|
| 720 |
+
Returns (error):
|
| 721 |
+
{ is_error: true, status: "error"|"failed", detail, reference }
|
| 722 |
+
"""
|
| 723 |
+
|
| 724 |
+
info_reference = None
|
| 725 |
+
try:
|
| 726 |
+
from yt_dlp import YoutubeDL
|
| 727 |
+
|
| 728 |
+
with YoutubeDL(params={"skip_download": True, "quiet": True, "noplaylist": True}) as ydl:
|
| 729 |
+
info = ydl.extract_info(source, download=False)
|
| 730 |
+
info_reference = _build_reference(info, source)
|
| 731 |
+
except Exception:
|
| 732 |
+
info_reference = _build_reference(None, source)
|
| 733 |
+
|
| 734 |
+
reference = info_reference
|
| 735 |
+
|
| 736 |
+
# If already cached, skip job creation
|
| 737 |
+
metadata = _load_json(_metadata_path(reference))
|
| 738 |
+
if metadata and Path(metadata.get("download_path", "")).exists():
|
| 739 |
+
return {
|
| 740 |
+
"reference": reference,
|
| 741 |
+
"status": JobStatus.DONE,
|
| 742 |
+
"cached": True,
|
| 743 |
+
"metadata": metadata,
|
| 744 |
+
}
|
| 745 |
+
|
| 746 |
+
def factory() -> JobRecord:
|
| 747 |
+
return JobRecord(id=secrets.token_urlsafe(16), kind="media_retrieval", reference=reference)
|
| 748 |
+
|
| 749 |
+
job = await _get_or_create_job("media_retrieval", reference, factory)
|
| 750 |
+
|
| 751 |
+
if job.status in (JobStatus.DONE, JobStatus.RUNNING):
|
| 752 |
+
return await _maybe_wait(job, wait_seconds)
|
| 753 |
+
|
| 754 |
+
async def runner():
|
| 755 |
+
job.status = JobStatus.RUNNING
|
| 756 |
+
try:
|
| 757 |
+
metadata_result = await asyncio.to_thread(
|
| 758 |
+
_run_ytdlp_download, source, reference, prefer_audio_only
|
| 759 |
+
)
|
| 760 |
+
job.result = metadata_result
|
| 761 |
+
job.status = JobStatus.DONE
|
| 762 |
+
except Exception as exc: # pragma: no cover - defensive
|
| 763 |
+
log.exception("media retrieval failed for %s", reference)
|
| 764 |
+
job.error = str(exc)
|
| 765 |
+
job.status = JobStatus.FAILED
|
| 766 |
+
finally:
|
| 767 |
+
job.finished_at = time.time()
|
| 768 |
+
|
| 769 |
+
job.task = asyncio.create_task(runner())
|
| 770 |
+
return await _maybe_wait(job, wait_seconds)
|
| 771 |
+
|
| 772 |
+
@app.tool()
|
| 773 |
+
async def get_media_retrieval_status(ctx: Context, reference: str, wait_seconds: int = 0) -> dict:
|
| 774 |
+
"""Poll download status for a `reference` returned by start_media_retrieval.
|
| 775 |
+
|
| 776 |
+
Returns cached metadata immediately when available; otherwise echoes job status or {status: "not_found"}.
|
| 777 |
+
Errors include `is_error: true`.
|
| 778 |
+
"""
|
| 779 |
+
metadata = _load_json(_metadata_path(reference))
|
| 780 |
+
if metadata and Path(metadata.get("download_path", "")).exists():
|
| 781 |
+
return {
|
| 782 |
+
"reference": reference,
|
| 783 |
+
"status": JobStatus.DONE,
|
| 784 |
+
"metadata": metadata,
|
| 785 |
+
}
|
| 786 |
+
|
| 787 |
+
job_id = REFERENCE_INDEX.get(("media_retrieval", reference))
|
| 788 |
+
if job_id and job_id in JOBS:
|
| 789 |
+
job = JOBS[job_id]
|
| 790 |
+
if wait_seconds > 0:
|
| 791 |
+
return await _maybe_wait(job, wait_seconds)
|
| 792 |
+
return _job_payload(job, include_result=True)
|
| 793 |
+
|
| 794 |
+
return {"status": "not_found", "reference": reference}
|
| 795 |
+
|
| 796 |
+
@app.tool()
|
| 797 |
+
async def start_slide_extraction(ctx: Context, reference: str, wait_seconds: int = 55) -> dict:
|
| 798 |
+
"""Extract representative slide stills from a downloaded video.
|
| 799 |
+
|
| 800 |
+
Note: media analysis (start_media_analysis) includes slides extraction, so no need to call this function explicitely when aiming for full media analysis
|
| 801 |
+
"""
|
| 802 |
+
metadata = _load_json(_metadata_path(reference))
|
| 803 |
+
if not metadata or not Path(metadata.get("download_path", "")).exists():
|
| 804 |
+
return _error("media not downloaded", reference)
|
| 805 |
+
|
| 806 |
+
existing = _load_json(_slides_json_path(reference))
|
| 807 |
+
if existing:
|
| 808 |
+
return {
|
| 809 |
+
"status": JobStatus.DONE,
|
| 810 |
+
"reference": reference,
|
| 811 |
+
"slides": existing,
|
| 812 |
+
"cached": True,
|
| 813 |
+
}
|
| 814 |
+
|
| 815 |
+
def factory() -> JobRecord:
|
| 816 |
+
return JobRecord(id=secrets.token_urlsafe(16), kind="slide_extraction", reference=reference)
|
| 817 |
+
|
| 818 |
+
job = await _get_or_create_job("slide_extraction", reference, factory)
|
| 819 |
+
if job.status in (JobStatus.DONE, JobStatus.RUNNING):
|
| 820 |
+
return await _maybe_wait(job, wait_seconds)
|
| 821 |
+
|
| 822 |
+
async def runner():
|
| 823 |
+
job.status = JobStatus.RUNNING
|
| 824 |
+
try:
|
| 825 |
+
slide_payload = await asyncio.to_thread(_extract_slides_flow, metadata)
|
| 826 |
+
job.result = slide_payload
|
| 827 |
+
job.status = JobStatus.DONE
|
| 828 |
+
except Exception as exc:
|
| 829 |
+
log.exception("slide extraction failed for %s", reference)
|
| 830 |
+
job.status = JobStatus.FAILED
|
| 831 |
+
job.error = str(exc)
|
| 832 |
+
finally:
|
| 833 |
+
job.finished_at = time.time()
|
| 834 |
+
|
| 835 |
+
job.task = asyncio.create_task(runner())
|
| 836 |
+
return await _maybe_wait(job, wait_seconds)
|
| 837 |
+
|
| 838 |
+
@app.tool()
|
| 839 |
+
async def get_extracted_slides(ctx: Context, reference: str, wait_seconds: int = 0) -> dict:
|
| 840 |
+
"""Fetch extracted slides for a reference, or current slide-extraction job status."""
|
| 841 |
+
existing = _load_json(_slides_json_path(reference))
|
| 842 |
+
if existing:
|
| 843 |
+
return {
|
| 844 |
+
"status": JobStatus.DONE,
|
| 845 |
+
"reference": reference,
|
| 846 |
+
"slides": existing,
|
| 847 |
+
}
|
| 848 |
+
|
| 849 |
+
job_id = REFERENCE_INDEX.get(("slide_extraction", reference))
|
| 850 |
+
if job_id and job_id in JOBS:
|
| 851 |
+
job = JOBS[job_id]
|
| 852 |
+
if wait_seconds > 0:
|
| 853 |
+
return await _maybe_wait(job, wait_seconds)
|
| 854 |
+
return _job_payload(job, include_result=True)
|
| 855 |
+
|
| 856 |
+
return {"status": "not_found", "reference": reference}
|
| 857 |
+
|
| 858 |
+
@app.tool()
|
| 859 |
+
async def start_media_analysis(
|
| 860 |
+
ctx: Context,
|
| 861 |
+
reference: str,
|
| 862 |
+
priors: dict,
|
| 863 |
+
wait_seconds: int = 55,
|
| 864 |
+
) -> dict:
|
| 865 |
+
"""
|
| 866 |
+
Analyze the primary audio plus extracted slides, guided by rich "priors" (analysis hints).
|
| 867 |
+
|
| 868 |
+
Priors object schema (all strings, optional):
|
| 869 |
+
- context: User-supplied scene-setting (participants, venue, meeting goal, etc.). Used to establish topical background and spelling of names, abbreviations, etc. Optional field, should only be filled with data explicitely supplied by the user.
|
| 870 |
+
- expectations: What factuals, insights or takeaways are anticipated by the user. These serve as a baseline, and surprises from this will be surfaced. Optional field, should only be filled with data explicitely supplied by the user.
|
| 871 |
+
- prior_knowledge: What the user already knows (acronyms, previous meetings). This again serves as a basis for efficient information foraging efforts by the user. Optional field, should only be filled with data explicitely supplied by the user.
|
| 872 |
+
- questions: Specific questions from the user to answer. Optional field, should only be filled with data explicitely supplied by the user.
|
| 873 |
+
|
| 874 |
+
The analysis automatically builds on priors with media-derived context (title, description, channel, URL) and supplies
|
| 875 |
+
user context to the analysis pipeline as well.
|
| 876 |
+
|
| 877 |
+
Parameters:
|
| 878 |
+
- reference: the reference token obtained from `start_media_retrieval`
|
| 879 |
+
- priors: prior information and user-supplied background per the "Priors object schema" definition
|
| 880 |
+
|
| 881 |
+
Note:
|
| 882 |
+
- Claude uses an internal timeout of 240 seconds. `wait_seconds` should be in the same order of magnitude with Claude, and a minimum of 55 seconds if in doubt.
|
| 883 |
+
|
| 884 |
+
Returns:
|
| 885 |
+
- in-progress status if still running; use the 'get_media_analysis_result' to monitor for further progress and to retrieve the final result
|
| 886 |
+
- if process has already finished, the final analysis text is returned
|
| 887 |
+
- errors are flagged with `is_error: true`.
|
| 888 |
+
"""
|
| 889 |
+
metadata = _load_json(_metadata_path(reference))
|
| 890 |
+
if not metadata or not Path(metadata.get("download_path", "")).exists():
|
| 891 |
+
return _error("media not downloaded", reference)
|
| 892 |
+
|
| 893 |
+
if not isinstance(priors, dict):
|
| 894 |
+
return _error("priors must be an object with string fields: context, expectations, prior_knowledge, questions", reference)
|
| 895 |
+
|
| 896 |
+
existing = _load_json(_analysis_json_path(reference))
|
| 897 |
+
if existing:
|
| 898 |
+
return {"status": JobStatus.DONE, "reference": reference, "analysis": existing, "cached": True}
|
| 899 |
+
|
| 900 |
+
def factory() -> JobRecord:
|
| 901 |
+
return JobRecord(id=secrets.token_urlsafe(16), kind="media_analysis", reference=reference)
|
| 902 |
+
|
| 903 |
+
job = await _get_or_create_job("media_analysis", reference, factory)
|
| 904 |
+
if job.status in (JobStatus.DONE, JobStatus.RUNNING):
|
| 905 |
+
return await _maybe_wait(job, wait_seconds)
|
| 906 |
+
|
| 907 |
+
async def runner():
|
| 908 |
+
job.status = JobStatus.RUNNING
|
| 909 |
+
try:
|
| 910 |
+
result = await asyncio.to_thread(_analysis_flow, metadata, priors)
|
| 911 |
+
job.result = result
|
| 912 |
+
job.status = JobStatus.DONE
|
| 913 |
+
except Exception as exc:
|
| 914 |
+
log.exception("media analysis failed for %s", reference)
|
| 915 |
+
job.status = JobStatus.FAILED
|
| 916 |
+
job.error = str(exc)
|
| 917 |
+
finally:
|
| 918 |
+
job.finished_at = time.time()
|
| 919 |
+
|
| 920 |
+
job.task = asyncio.create_task(runner())
|
| 921 |
+
return await _maybe_wait(job, wait_seconds)
|
| 922 |
+
|
| 923 |
+
@app.tool()
|
| 924 |
+
async def get_media_analysis_result(ctx: Context, reference: str, wait_seconds: int = 0) -> dict:
|
| 925 |
+
"""Return completed analysis for a reference, or current job status, with `is_error` on failures."""
|
| 926 |
+
existing = _load_json(_analysis_json_path(reference))
|
| 927 |
+
if existing:
|
| 928 |
+
return {"status": JobStatus.DONE, "reference": reference, "analysis": existing}
|
| 929 |
+
|
| 930 |
+
job_id = REFERENCE_INDEX.get(("media_analysis", reference))
|
| 931 |
+
if job_id and job_id in JOBS:
|
| 932 |
+
job = JOBS[job_id]
|
| 933 |
+
if wait_seconds > 0:
|
| 934 |
+
return await _maybe_wait(job, wait_seconds)
|
| 935 |
+
return _job_payload(job, include_result=True)
|
| 936 |
+
|
| 937 |
+
return {"status": "not_found", "reference": reference}
|
| 938 |
+
|
| 939 |
+
|
| 940 |
+
__all__ = ["register_media_tools"]
|
mcp/src/aileen3_mcp/server.py
CHANGED
|
@@ -1,10 +1,16 @@
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
import logging
|
|
|
|
| 4 |
from dataclasses import asdict, dataclass
|
| 5 |
|
|
|
|
|
|
|
|
|
|
| 6 |
from fastmcp import FastMCP
|
| 7 |
|
|
|
|
|
|
|
| 8 |
log = logging.getLogger(__name__)
|
| 9 |
|
| 10 |
|
|
@@ -20,6 +26,27 @@ def make_app() -> FastMCP:
|
|
| 20 |
|
| 21 |
@app.tool()
|
| 22 |
def health() -> dict:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
def _gemini_key_ok() -> tuple[bool, str]:
|
| 24 |
key = bool(os.environ.get("GEMINI_API_KEY"))
|
| 25 |
return (key, "GEMINI_API_KEY is set" if key else "GEMINI_API_KEY missing")
|
|
@@ -62,15 +89,20 @@ def make_app() -> FastMCP:
|
|
| 62 |
capped_results = max(1, min(max_results, 50))
|
| 63 |
opts = {
|
| 64 |
"quiet": True,
|
|
|
|
|
|
|
| 65 |
"skip_download": True,
|
| 66 |
"extract_flat": "in_playlist",
|
|
|
|
|
|
|
| 67 |
}
|
| 68 |
|
| 69 |
search_spec = f"ytsearch{capped_results}:{query}"
|
| 70 |
log.info("search_youtube query=%r max_results=%d", query, capped_results)
|
| 71 |
|
| 72 |
-
with
|
| 73 |
-
|
|
|
|
| 74 |
|
| 75 |
entries = info.get("entries", []) if info else []
|
| 76 |
videos = []
|
|
@@ -93,6 +125,15 @@ def make_app() -> FastMCP:
|
|
| 93 |
|
| 94 |
return {"videos": videos}
|
| 95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
return app
|
| 97 |
|
| 98 |
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
import logging
|
| 4 |
+
import os
|
| 5 |
from dataclasses import asdict, dataclass
|
| 6 |
|
| 7 |
+
import shutil
|
| 8 |
+
import subprocess
|
| 9 |
+
|
| 10 |
from fastmcp import FastMCP
|
| 11 |
|
| 12 |
+
from aileen3_mcp.media_tools import register_media_tools, _silence_stdio, _YDLLogger
|
| 13 |
+
|
| 14 |
log = logging.getLogger(__name__)
|
| 15 |
|
| 16 |
|
|
|
|
| 26 |
|
| 27 |
@app.tool()
|
| 28 |
def health() -> dict:
|
| 29 |
+
"""Return a basic health payload including ffmpeg and Gemini env availability."""
|
| 30 |
+
|
| 31 |
+
def _ffmpeg_ok() -> tuple[bool, str]:
|
| 32 |
+
binary = shutil.which("ffmpeg")
|
| 33 |
+
if not binary:
|
| 34 |
+
return False, "ffmpeg not found on PATH"
|
| 35 |
+
try:
|
| 36 |
+
completed = subprocess.run(
|
| 37 |
+
[binary, "-version"],
|
| 38 |
+
capture_output=True,
|
| 39 |
+
text=True,
|
| 40 |
+
timeout=5,
|
| 41 |
+
check=False,
|
| 42 |
+
)
|
| 43 |
+
except Exception as exc: # pragma: no cover - defensive
|
| 44 |
+
return False, f"ffmpeg exec failed: {exc}"
|
| 45 |
+
if completed.returncode != 0:
|
| 46 |
+
return False, completed.stderr.strip() or "ffmpeg returned error"
|
| 47 |
+
first = (completed.stdout or "").splitlines()[0] if completed.stdout else "ffmpeg present"
|
| 48 |
+
return True, first
|
| 49 |
+
|
| 50 |
def _gemini_key_ok() -> tuple[bool, str]:
|
| 51 |
key = bool(os.environ.get("GEMINI_API_KEY"))
|
| 52 |
return (key, "GEMINI_API_KEY is set" if key else "GEMINI_API_KEY missing")
|
|
|
|
| 89 |
capped_results = max(1, min(max_results, 50))
|
| 90 |
opts = {
|
| 91 |
"quiet": True,
|
| 92 |
+
"no_warnings": True,
|
| 93 |
+
"noprogress": True,
|
| 94 |
"skip_download": True,
|
| 95 |
"extract_flat": "in_playlist",
|
| 96 |
+
"logger": _YDLLogger(),
|
| 97 |
+
"extractor_args": {"youtube": {"player_client": ["default"]}},
|
| 98 |
}
|
| 99 |
|
| 100 |
search_spec = f"ytsearch{capped_results}:{query}"
|
| 101 |
log.info("search_youtube query=%r max_results=%d", query, capped_results)
|
| 102 |
|
| 103 |
+
with _silence_stdio():
|
| 104 |
+
with YoutubeDL(opts) as ydl:
|
| 105 |
+
info = ydl.extract_info(search_spec, download=False)
|
| 106 |
|
| 107 |
entries = info.get("entries", []) if info else []
|
| 108 |
videos = []
|
|
|
|
| 125 |
|
| 126 |
return {"videos": videos}
|
| 127 |
|
| 128 |
+
# Register media analysis tools:
|
| 129 |
+
# - start_media_retrieval
|
| 130 |
+
# - get_media_retrieval_status
|
| 131 |
+
# - start_slide_extraction
|
| 132 |
+
# - get_extracted_slides
|
| 133 |
+
# - start_media_analysis
|
| 134 |
+
# - get_media_analysis_result
|
| 135 |
+
register_media_tools(app)
|
| 136 |
+
|
| 137 |
return app
|
| 138 |
|
| 139 |
|