ndurner commited on
Commit
0c163b8
·
1 Parent(s): 73ba3e9

add comments

Browse files
demo/app.py CHANGED
@@ -20,7 +20,12 @@ def render_health_panel(gemini_api_key: str | None = None) -> str:
20
 
21
 
22
  def create_app() -> gr.Blocks:
23
- """Create the Gradio application with a minimal notebook-like health cell."""
 
 
 
 
 
24
  with gr.Blocks(title="Aileen3 Demo") as demo:
25
  gr.HTML(f"<style>{CELL_CSS}</style>")
26
 
 
20
 
21
 
22
  def create_app() -> gr.Blocks:
23
+ """Create the Gradio Blocks application used in the Hugging Face Space.
24
+
25
+ The layout is intentionally notebook-like: each conceptual unit
26
+ (problem, health, demos, wrap-up) is encapsulated in its own module
27
+ and rendered as a "cell" to keep the main app glue straightforward.
28
+ """
29
  with gr.Blocks(title="Aileen3 Demo") as demo:
30
  gr.HTML(f"<style>{CELL_CSS}</style>")
31
 
demo/context_biased_transcription_cell.py CHANGED
@@ -20,6 +20,8 @@ from problem_cell import (
20
 
21
  log = logging.getLogger(__name__)
22
 
 
 
23
  MAX_POLL_ATTEMPTS = 20
24
  POLL_WAIT_SECONDS = 58
25
 
@@ -91,7 +93,13 @@ async def _poll_until_done(
91
 
92
 
93
  async def _run_transcription_flow(gemini_api_key: str) -> Tuple[str, str]:
94
- """Drive the MCP media tools to run a context-biased transcription demo."""
 
 
 
 
 
 
95
  try:
96
  from fastmcp import Client # type: ignore[import-untyped]
97
  from fastmcp.client.transports import StdioTransport # type: ignore[import-untyped]
@@ -99,6 +107,9 @@ async def _run_transcription_flow(gemini_api_key: str) -> Tuple[str, str]:
99
  status = render_status_box(f"fastmcp is not available in this environment: {exc}", "fail")
100
  return status, ""
101
 
 
 
 
102
  repo_root = Path(__file__).resolve().parents[1]
103
  mcp_src = repo_root / "mcp" / "src"
104
  existing_py_path = os.environ.get("PYTHONPATH", "")
 
20
 
21
  log = logging.getLogger(__name__)
22
 
23
+ # Context-biased transcription can take a bit longer; use more generous
24
+ # polling defaults here than in the other cells.
25
  MAX_POLL_ATTEMPTS = 20
26
  POLL_WAIT_SECONDS = 58
27
 
 
93
 
94
 
95
  async def _run_transcription_flow(gemini_api_key: str) -> Tuple[str, str]:
96
+ """Drive the MCP media tools to run a context-biased transcription demo.
97
+
98
+ This mirrors a typical client-side flow:
99
+ - retrieve media via `start_media_retrieval`,
100
+ - derive a textual prior from the YouTube description, and
101
+ - call `start_media_transcription` with that prior as context.
102
+ """
103
  try:
104
  from fastmcp import Client # type: ignore[import-untyped]
105
  from fastmcp.client.transports import StdioTransport # type: ignore[import-untyped]
 
107
  status = render_status_box(f"fastmcp is not available in this environment: {exc}", "fail")
108
  return status, ""
109
 
110
+ # As in the other cells we spawn the MCP server as a subprocess and
111
+ # point PYTHONPATH at `mcp/src` so that editable installs are not
112
+ # required to run the demo locally.
113
  repo_root = Path(__file__).resolve().parents[1]
114
  mcp_src = repo_root / "mcp" / "src"
115
  existing_py_path = os.environ.get("PYTHONPATH", "")
demo/demo_logging.py CHANGED
@@ -5,6 +5,9 @@ import os
5
  from pathlib import Path
6
  from typing import Optional
7
 
 
 
 
8
  LOGGER_NAME = "aileen3_demo"
9
  LOG_LEVEL_ENV = "AILEEN3_DEMO_LOGLEVEL"
10
 
 
5
  from pathlib import Path
6
  from typing import Optional
7
 
8
+ # Lightweight logging helper for the Gradio demo.
9
+ # The MCP server has its own logging configuration; this module keeps
10
+ # demo-specific logs in a separate file referenced from the UI.
11
  LOGGER_NAME = "aileen3_demo"
12
  LOG_LEVEL_ENV = "AILEEN3_DEMO_LOGLEVEL"
13
 
demo/health.py CHANGED
@@ -15,6 +15,7 @@ from itertools import zip_longest
15
  from typing import Iterable
16
 
17
 
 
18
  MIN_DENO_VERSION = (2, 0, 0)
19
  MIN_YTDLP_VERSION = (2025, 11, 12)
20
  MIN_FFMPEG_VERSION = (4, 0)
@@ -141,6 +142,9 @@ def _check_mcp_health(gemini_api_key: str | None = None) -> ToolStatus:
141
  except Exception as exc: # pragma: no cover - defensive
142
  return ToolStatus(label, False, f"fastmcp missing: {exc}")
143
 
 
 
 
144
  repo_root = Path(__file__).resolve().parents[1]
145
  mcp_src = repo_root / "mcp" / "src"
146
  existing_py_path = os.environ.get("PYTHONPATH", "")
 
15
  from typing import Iterable
16
 
17
 
18
+ # Version and environment constraints for a "green" health check in the demo.
19
  MIN_DENO_VERSION = (2, 0, 0)
20
  MIN_YTDLP_VERSION = (2025, 11, 12)
21
  MIN_FFMPEG_VERSION = (4, 0)
 
142
  except Exception as exc: # pragma: no cover - defensive
143
  return ToolStatus(label, False, f"fastmcp missing: {exc}")
144
 
145
+ # When running inside the Hugging Face Space, the repo root is the
146
+ # working directory; keep this logic in sync with the Dockerfile so
147
+ # that imports work both locally and in production.
148
  repo_root = Path(__file__).resolve().parents[1]
149
  mcp_src = repo_root / "mcp" / "src"
150
  existing_py_path = os.environ.get("PYTHONPATH", "")
demo/layout.py CHANGED
@@ -3,6 +3,7 @@ from __future__ import annotations
3
  from contextlib import contextmanager
4
  import gradio as gr
5
 
 
6
  CELL_CSS = """
7
  .cell-wrapper {
8
  border: 1px solid rgba(0, 0, 0, 0.9);
@@ -68,6 +69,12 @@ CELL_CSS = """
68
 
69
  @contextmanager
70
  def cell(title: str):
 
 
 
 
 
 
71
  with gr.Column(elem_classes="cell-wrapper") as column:
72
  gr.HTML(f"<div class='cell-title'>{title}</div>")
73
  yield column
 
3
  from contextlib import contextmanager
4
  import gradio as gr
5
 
6
+ # Shared CSS used by all "cells" in the notebook-style demo.
7
  CELL_CSS = """
8
  .cell-wrapper {
9
  border: 1px solid rgba(0, 0, 0, 0.9);
 
69
 
70
  @contextmanager
71
  def cell(title: str):
72
+ """Context manager that renders a titled notebook-style cell.
73
+
74
+ Usage:
75
+ with cell("My title"):
76
+ ... # add components that appear inside the cell
77
+ """
78
  with gr.Column(elem_classes="cell-wrapper") as column:
79
  gr.HTML(f"<div class='cell-title'>{title}</div>")
80
  yield column
demo/media_analysis_cell.py CHANGED
@@ -21,9 +21,11 @@ from slide_utils import normalize_slide_entries
21
  log = get_demo_logger(__name__)
22
  DEMO_LOG_PATH = str(get_demo_log_path())
23
 
 
24
  MAX_POLL_ATTEMPTS = 3
25
  POLL_WAIT_SECONDS = 54
26
 
 
27
  ANALYSIS_VIDEO_URL = "https://youtu.be/eXP-PvKcI9A"
28
 
29
 
@@ -133,7 +135,13 @@ async def _run_media_analysis_flow(
133
  prior_knowledge: str,
134
  questions: str,
135
  ) -> Tuple[str, str, List[list]]:
136
- """Drive the MCP tools to run expectation-driven media analysis for a fixed video."""
 
 
 
 
 
 
137
  try:
138
  from fastmcp import Client # type: ignore[import-untyped]
139
  from fastmcp.client.transports import StdioTransport # type: ignore[import-untyped]
@@ -157,6 +165,9 @@ async def _run_media_analysis_flow(
157
  questions_len,
158
  )
159
 
 
 
 
160
  repo_root = Path(__file__).resolve().parents[1]
161
  mcp_src = repo_root / "mcp" / "src"
162
  existing_py_path = os.environ.get("PYTHONPATH", "")
 
21
  log = get_demo_logger(__name__)
22
  DEMO_LOG_PATH = str(get_demo_log_path())
23
 
24
+ # Polling strategy for long-running MCP jobs started from the demo.
25
  MAX_POLL_ATTEMPTS = 3
26
  POLL_WAIT_SECONDS = 54
27
 
28
+ # Fixed video used in the expectation-driven analysis cell.
29
  ANALYSIS_VIDEO_URL = "https://youtu.be/eXP-PvKcI9A"
30
 
31
 
 
135
  prior_knowledge: str,
136
  questions: str,
137
  ) -> Tuple[str, str, List[list]]:
138
+ """Drive the MCP tools to run expectation-driven media analysis for a fixed video.
139
+
140
+ The flow mirrors how an MCP-capable client would typically use the tools:
141
+ - start_media_retrieval → wait for cached or finished download
142
+ - start_media_analysis → wait for the expectation-driven briefing
143
+ - get_extracted_slides → fetch slide stills used as priors
144
+ """
145
  try:
146
  from fastmcp import Client # type: ignore[import-untyped]
147
  from fastmcp.client.transports import StdioTransport # type: ignore[import-untyped]
 
165
  questions_len,
166
  )
167
 
168
+ # Spawn the MCP server as a subprocess, pointing PYTHONPATH at the
169
+ # local `mcp/src` tree so this file keeps working both locally and
170
+ # inside the Space image.
171
  repo_root = Path(__file__).resolve().parents[1]
172
  mcp_src = repo_root / "mcp" / "src"
173
  existing_py_path = os.environ.get("PYTHONPATH", "")
demo/problem_cell.py CHANGED
@@ -54,6 +54,12 @@ def _extract_video_id(video_url: str) -> str | None:
54
 
55
 
56
  def _fetch_transcript(video_url: str) -> tuple[str | None, str | None]:
 
 
 
 
 
 
57
  TRANSCRIPTION_CACHE.mkdir(parents=True, exist_ok=True)
58
 
59
  if YoutubeDL is None: # pragma: no cover - dependency should always be present
@@ -62,8 +68,10 @@ def _fetch_transcript(video_url: str) -> tuple[str | None, str | None]:
62
  if not video_id:
63
  return None, "That does not look like a valid YouTube URL with a video id."
64
 
65
- # Align cache layout with `media_tools`: transcription cache under BASE_CACHE/transcription
66
- # using a stable reference derived from the YouTube video id when available.
 
 
67
  reference = f"youtube_{hashlib.sha256(video_id.encode('utf-8')).hexdigest()[:32]}"
68
  cache_path = _transcription_cache_path(reference)
69
  if cache_path.exists():
 
54
 
55
 
56
  def _fetch_transcript(video_url: str) -> tuple[str | None, str | None]:
57
+ """Retrieve or cache a plain-text transcript for the given YouTube URL.
58
+
59
+ For the purposes of this cell we rely on YouTube auto captions via
60
+ yt-dlp; the heavy-duty Gemini-based transcription lives in the MCP
61
+ tools and separate demo cells.
62
+ """
63
  TRANSCRIPTION_CACHE.mkdir(parents=True, exist_ok=True)
64
 
65
  if YoutubeDL is None: # pragma: no cover - dependency should always be present
 
68
  if not video_id:
69
  return None, "That does not look like a valid YouTube URL with a video id."
70
 
71
+ # Align cache layout with `media_tools`: transcription cache under
72
+ # BASE_CACHE/transcription using a stable reference derived from the
73
+ # YouTube video id when available. This keeps the demo and MCP server
74
+ # caches compatible and easier to inspect.
75
  reference = f"youtube_{hashlib.sha256(video_id.encode('utf-8')).hexdigest()[:32]}"
76
  cache_path = _transcription_cache_path(reference)
77
  if cache_path.exists():
demo/slide_utils.py CHANGED
@@ -5,7 +5,13 @@ from typing import Any, Iterable
5
 
6
 
7
  def _data_uri_from(value: Any) -> str | None:
8
- """Convert raw slide/image representations into a data URI string."""
 
 
 
 
 
 
9
 
10
  if not value:
11
  return None
 
5
 
6
 
7
  def _data_uri_from(value: Any) -> str | None:
8
+ """Convert raw slide/image representations into a data URI string.
9
+
10
+ This accepts several shapes that can appear in MCP tool responses:
11
+ - plain bytes
12
+ - dicts or objects with ``data``/``mimeType`` fields
13
+ - already-encoded ``data:...`` URIs
14
+ """
15
 
16
  if not value:
17
  return None
demo/translation_cell.py CHANGED
@@ -36,7 +36,12 @@ async def _run_translation_flow(
36
  language: str,
37
  slide_index_text: str,
38
  ) -> Tuple[str, str, Optional[Image.Image]]:
39
- """Drive the MCP tools to translate a representative slide from the fixed video."""
 
 
 
 
 
40
  try:
41
  from fastmcp import Client # type: ignore[import-untyped]
42
  from fastmcp.client.transports import StdioTransport # type: ignore[import-untyped]
@@ -76,6 +81,8 @@ async def _run_translation_flow(
76
  slide_index_text,
77
  )
78
 
 
 
79
  repo_root = Path(__file__).resolve().parents[1]
80
  mcp_src = repo_root / "mcp" / "src"
81
  existing_py_path = os.environ.get("PYTHONPATH", "")
 
36
  language: str,
37
  slide_index_text: str,
38
  ) -> Tuple[str, str, Optional[Image.Image]]:
39
+ """Drive the MCP tools to translate a particular slide from the fixed video.
40
+
41
+ The flow reuses the same retrieval and slide-extraction pipeline as the
42
+ expectation-driven analysis cell, then calls the dedicated `translate_slide`
43
+ MCP tool to produce a target-language slide image.
44
+ """
45
  try:
46
  from fastmcp import Client # type: ignore[import-untyped]
47
  from fastmcp.client.transports import StdioTransport # type: ignore[import-untyped]
 
81
  slide_index_text,
82
  )
83
 
84
+ # Spawn the MCP server as a subprocess with PYTHONPATH pointing at
85
+ # the local `mcp/src` tree so this stays self-contained in the Space.
86
  repo_root = Path(__file__).resolve().parents[1]
87
  mcp_src = repo_root / "mcp" / "src"
88
  existing_py_path = os.environ.get("PYTHONPATH", "")
mcp/src/aileen3_mcp/logging_utils.py CHANGED
@@ -4,7 +4,8 @@ import logging
4
  import os
5
  from pathlib import Path
6
 
7
- # Logging paths
 
8
  BASE_CACHE = Path(os.environ.get("AILEEN3_CACHE_DIR", Path.home() / ".cache" / "aileen3"))
9
  LOG_DIR = BASE_CACHE / "logs"
10
  LOG_FILE = LOG_DIR / "aileen3-mcp.log"
@@ -12,6 +13,7 @@ PACKAGE_LOGGER_NAME = "aileen3_mcp"
12
 
13
 
14
  def _resolve_level() -> int:
 
15
  level_name = os.environ.get("AILEEN3_LOGLEVEL", "").upper() or "INFO"
16
  level = getattr(logging, level_name, None)
17
  if isinstance(level, int):
 
4
  import os
5
  from pathlib import Path
6
 
7
+ # Logging paths used by the MCP package.
8
+ # The demo UI links directly to these files, so keep the layout stable.
9
  BASE_CACHE = Path(os.environ.get("AILEEN3_CACHE_DIR", Path.home() / ".cache" / "aileen3"))
10
  LOG_DIR = BASE_CACHE / "logs"
11
  LOG_FILE = LOG_DIR / "aileen3-mcp.log"
 
13
 
14
 
15
  def _resolve_level() -> int:
16
+ """Resolve the log level from ``AILEEN3_LOGLEVEL`` or fall back to INFO."""
17
  level_name = os.environ.get("AILEEN3_LOGLEVEL", "").upper() or "INFO"
18
  level = getattr(logging, level_name, None)
19
  if isinstance(level, int):
mcp/src/aileen3_mcp/media_tools.py CHANGED
@@ -36,6 +36,9 @@ TRANSCRIPTION_CACHE = BASE_CACHE / "transcription"
36
  for _path in (MEDIA_CACHE, SLIDE_CACHE, ANALYSIS_CACHE, TRANSCRIPTION_CACHE):
37
  _path.mkdir(parents=True, exist_ok=True)
38
 
 
 
 
39
  DEBUG = os.environ.get("AILEEN3_DEBUG", "").lower() in {"1", "true", "yes", "on"}
40
  DEBUG_DIR = Path(tempfile.gettempdir()) / "aileen3-debug"
41
  if DEBUG:
@@ -71,6 +74,12 @@ class _YDLLogger:
71
 
72
  @contextmanager
73
  def _silence_stdio():
 
 
 
 
 
 
74
  buf_out = io.StringIO()
75
  buf_err = io.StringIO()
76
  with redirect_stdout(buf_out), redirect_stderr(buf_err):
 
36
  for _path in (MEDIA_CACHE, SLIDE_CACHE, ANALYSIS_CACHE, TRANSCRIPTION_CACHE):
37
  _path.mkdir(parents=True, exist_ok=True)
38
 
39
+ # Optional debug artefacts for inspecting Gemini responses and intermediate files.
40
+ # These are deliberately kept out of the main cache to avoid interfering with
41
+ # normal operation and are only written when AILEEN3_DEBUG is enabled.
42
  DEBUG = os.environ.get("AILEEN3_DEBUG", "").lower() in {"1", "true", "yes", "on"}
43
  DEBUG_DIR = Path(tempfile.gettempdir()) / "aileen3-debug"
44
  if DEBUG:
 
74
 
75
  @contextmanager
76
  def _silence_stdio():
77
+ """Context manager that temporarily captures stdout/stderr of noisy libraries.
78
+
79
+ yt-dlp and ffmpeg are quite chatty; redirecting their output keeps the
80
+ Space logs readable while still allowing us to inspect any errors via
81
+ Python logging where needed.
82
+ """
83
  buf_out = io.StringIO()
84
  buf_err = io.StringIO()
85
  with redirect_stdout(buf_out), redirect_stderr(buf_err):
mcp/src/aileen3_mcp/server.py CHANGED
@@ -23,14 +23,27 @@ class HealthResult:
23
 
24
 
25
  def make_app() -> FastMCP:
26
- """Create the MCP application with available tools."""
 
 
 
 
 
27
  app = FastMCP("aileen3-mcp")
28
 
29
  @app.tool()
30
  def health() -> dict:
31
- """Return a basic health payload including ffmpeg and Gemini env availability."""
 
 
 
 
 
32
 
33
  def _ffmpeg_ok() -> tuple[bool, str]:
 
 
 
34
  binary = shutil.which("ffmpeg")
35
  if not binary:
36
  return False, "ffmpeg not found on PATH"
@@ -131,18 +144,20 @@ def make_app() -> FastMCP:
131
  return {"videos": videos}
132
 
133
  # Register media analysis tools:
134
- # - start_media_retrieval
135
- # - get_media_retrieval_status
136
- # - start_slide_extraction
137
- # - get_extracted_slides
138
- # - start_media_analysis
139
- # - get_media_analysis_result
 
140
  register_media_tools(app)
141
 
142
  return app
143
 
144
 
145
  def main() -> None:
 
146
  configure_logging()
147
  app = make_app()
148
  app.run() # stdio transport by default
 
23
 
24
 
25
  def make_app() -> FastMCP:
26
+ """Create the MCP application with all tools registered on a FastMCP instance.
27
+
28
+ This function is the single entry point for tool registration:
29
+ - it wires up lightweight health checks used by the demo Space, and
30
+ - it delegates to :func:`register_media_tools` for all long-running media flows.
31
+ """
32
  app = FastMCP("aileen3-mcp")
33
 
34
  @app.tool()
35
  def health() -> dict:
36
+ """Return a basic health payload including ffmpeg and Gemini env availability.
37
+
38
+ This mirrors the Gradio health cell and is intentionally cheap:
39
+ it only checks for the *presence* of ffmpeg and a Gemini API key,
40
+ leaving deeper checks to the demo-side health probe.
41
+ """
42
 
43
  def _ffmpeg_ok() -> tuple[bool, str]:
44
+ # Keep this probe very small: just check that the binary is
45
+ # callable and returns a version string, without running any
46
+ # actual media processing pipeline.
47
  binary = shutil.which("ffmpeg")
48
  if not binary:
49
  return False, "ffmpeg not found on PATH"
 
144
  return {"videos": videos}
145
 
146
  # Register media analysis tools:
147
+ # - start_media_retrieval / get_media_retrieval_status
148
+ # - start_slide_extraction / get_extracted_slides
149
+ # - start_media_analysis / get_media_analysis_result
150
+ # - start_media_transcription / get_media_transcription_result
151
+ #
152
+ # Each of these is exposed as an MCP tool that can be called from
153
+ # Claude Desktop, the Aileen 3 Agent, or the Gradio demo.
154
  register_media_tools(app)
155
 
156
  return app
157
 
158
 
159
  def main() -> None:
160
+ """Configure logging and run the MCP server over stdio."""
161
  configure_logging()
162
  app = make_app()
163
  app.run() # stdio transport by default