Spaces:

MCP-1st-Birthday
/

aileen3-core

Running

App Files Files Community

ndurner commited on 18 days ago

Commit

8ea41f1

1 Parent(s): 581f330

Expectation-driven media analysis

Browse files

Files changed (3) hide show

demo/app.py +2 -0
demo/media_analysis_cell.py +491 -0
mcp/src/aileen3_mcp/media_tools.py +87 -27

demo/app.py CHANGED Viewed

@@ -10,6 +10,7 @@ from problem_cell import render_problem_cell
 from solution_cell import render_solution_cell
 from setup_cell import render_setup_cell
 from context_biased_transcription_cell import render_context_biased_transcription_cell
 from translation_cell import render_translation_cell
@@ -80,6 +81,7 @@ Think of this interface as a lightweight Jupyter notebook: instead of code cells
             )
         render_context_biased_transcription_cell(gemini_key_box)
         render_translation_cell(gemini_key_box)
     return demo

 from solution_cell import render_solution_cell
 from setup_cell import render_setup_cell
 from context_biased_transcription_cell import render_context_biased_transcription_cell
+from media_analysis_cell import render_media_analysis_cell
 from translation_cell import render_translation_cell
             )
         render_context_biased_transcription_cell(gemini_key_box)
+        render_media_analysis_cell(gemini_key_box)
         render_translation_cell(gemini_key_box)
     return demo

demo/media_analysis_cell.py ADDED Viewed

	@@ -0,0 +1,491 @@

+from __future__ import annotations
+import asyncio
+import base64
+import logging
+import os
+import sys
+from io import BytesIO
+from pathlib import Path
+from typing import List, Tuple
+import gradio as gr
+from PIL import Image
+from demo_logging import get_demo_logger, get_demo_log_path
+from health import GEMINI_ENV_VAR
+from layout import cell
+from problem_cell import render_status_box
+from slide_utils import normalize_slide_entries
+log = get_demo_logger(__name__)
+DEMO_LOG_PATH = str(get_demo_log_path())
+MAX_POLL_ATTEMPTS = 3
+POLL_WAIT_SECONDS = 54
+ANALYSIS_VIDEO_URL = "https://youtu.be/eXP-PvKcI9A"
+def _image_from_data_uri(data: str) -> Image.Image | None:
+    """Decode a data URI or bare base64 string into a PIL image."""
+    if not isinstance(data, str):
+        return None
+    image_bytes: bytes | None = None
+    if data.startswith("data:"):
+        try:
+            _header, b64_part = data.split(",", 1)
+        except ValueError:
+            b64_part = ""
+        if b64_part:
+            try:
+                image_bytes = base64.b64decode(b64_part)
+            except Exception:
+                image_bytes = None
+    else:
+        try:
+            image_bytes = base64.b64decode(data)
+        except Exception:
+            image_bytes = None
+    if not image_bytes:
+        return None
+    try:
+        with Image.open(BytesIO(image_bytes)) as img:
+            return img.copy()
+    except Exception:
+        return None
+def _unwrap_tool_result(result: object) -> dict:
+    """Adapt FastMCP CallToolResult objects into plain dicts."""
+    payload = getattr(result, "data", None) or getattr(result, "structured_content", None) or result
+    if isinstance(payload, dict):
+        return payload
+    return {
+        "status": "error",
+        "is_error": True,
+        "detail": f"Unexpected tool result type: {type(payload)!r}",
+    }
+def _status(payload: dict) -> str:
+    return str(payload.get("status") or "").lower()
+def _is_done(payload: dict) -> bool:
+    return _status(payload) == "done"
+def _needs_poll(payload: dict) -> bool:
+    return _status(payload) in {"pending", "running"}
+async def _poll_until_done(
+    client,
+    *,
+    tool_name: str,
+    reference: str,
+    wait_seconds: int,
+    max_attempts: int = MAX_POLL_ATTEMPTS,
+) -> dict:
+    """Poll the get_* MCP tools until a job finishes or attempts are exhausted."""
+    latest: dict = {}
+    for attempt in range(max_attempts):
+        try:
+            latest = _unwrap_tool_result(
+                await client.call_tool(
+                    tool_name,
+                    {"reference": reference, "wait_seconds": wait_seconds},
+                )
+            )
+        except Exception as exc:  # pragma: no cover - defensive
+            return {
+                "status": "error",
+                "is_error": True,
+                "detail": f"Polling {tool_name} failed: {exc}",
+            }
+        if latest.get("is_error") or _is_done(latest):
+            return latest
+        if not _needs_poll(latest):
+            return latest
+    if latest:
+        latest.setdefault("detail", f"{tool_name} never reported completion; try again later.")
+    else:
+        latest = {
+            "status": "error",
+            "is_error": True,
+            "detail": f"{tool_name} did not return a response.",
+        }
+    return latest
+async def _run_media_analysis_flow(
+    gemini_api_key: str,
+    model_name: str,
+    context: str,
+    expectations: str,
+    prior_knowledge: str,
+    questions: str,
+) -> Tuple[str, str, List[list]]:
+    """Drive the MCP tools to run expectation-driven media analysis for a fixed video."""
+    try:
+        from fastmcp import Client  # type: ignore[import-untyped]
+        from fastmcp.client.transports import StdioTransport  # type: ignore[import-untyped]
+    except Exception as exc:  # pragma: no cover - defensive
+        status = render_status_box(f"fastmcp is not available in this environment: {exc}", "fail")
+        return status, "", []
+    context_len = len((context or "").strip())
+    expectations_len = len((expectations or "").strip())
+    prior_len = len((prior_knowledge or "").strip())
+    questions_len = len((questions or "").strip())
+    normalized_model = (model_name or "").strip()
+    selected_model = normalized_model or "gemini-flash-latest"
+    log.info(
+        "Media analysis demo start video=%s model=%s context_len=%d expectations_len=%d prior_len=%d questions_len=%d",
+        ANALYSIS_VIDEO_URL,
+        selected_model,
+        context_len,
+        expectations_len,
+        prior_len,
+        questions_len,
+    )
+    repo_root = Path(__file__).resolve().parents[1]
+    mcp_src = repo_root / "mcp" / "src"
+    existing_py_path = os.environ.get("PYTHONPATH", "")
+    py_path = f"{mcp_src}{os.pathsep}{existing_py_path}" if existing_py_path else str(mcp_src)
+    env = os.environ.copy()
+    env["PYTHONPATH"] = py_path
+    env[GEMINI_ENV_VAR] = gemini_api_key
+    if normalized_model:
+        env["AILEEN3_ANALYSIS_MODEL"] = normalized_model
+    server_entry = ["-m", "aileen3_mcp.server"]
+    log.info(
+        "Media analysis demo spawning MCP server: cmd=%s args=%s PYTHONPATH=%s cwd=%s model=%s",
+        sys.executable,
+        server_entry,
+        py_path,
+        repo_root,
+        model_name,
+    )
+    transport = StdioTransport(
+        command=sys.executable,
+        args=server_entry,
+        env=env,
+        cwd=str(repo_root),
+    )
+    priors_payload = {
+        "context": (context or "").strip(),
+        "expectations": (expectations or "").strip(),
+        "prior_knowledge": (prior_knowledge or "").strip(),
+        "questions": (questions or "").strip(),
+    }
+    async with Client(transport) as client:
+        retrieval_start = _unwrap_tool_result(
+            await client.call_tool(
+                "start_media_retrieval",
+                {
+                    "source": ANALYSIS_VIDEO_URL,
+                    "prefer_audio_only": False,
+                    "wait_seconds": POLL_WAIT_SECONDS,
+                },
+            )
+        )
+        if retrieval_start.get("is_error"):
+            detail = retrieval_start.get("detail") or "Media retrieval failed."
+            log.warning("Media analysis retrieval failed: %s", detail)
+            status = render_status_box(detail, "fail")
+            return status, "", []
+        reference = retrieval_start.get("reference")
+        if not reference:
+            log.warning("Media analysis retrieval missing reference for video=%s", ANALYSIS_VIDEO_URL)
+            status = render_status_box(
+                "Media retrieval did not return a reference token.", "fail"
+            )
+            return status, "", []
+        retrieval = retrieval_start
+        if not _is_done(retrieval_start):
+            retrieval = await _poll_until_done(
+                client,
+                tool_name="get_media_retrieval_status",
+                reference=reference,
+                wait_seconds=POLL_WAIT_SECONDS,
+            )
+        if retrieval.get("is_error") or not _is_done(retrieval):
+            detail = retrieval.get("detail") or retrieval.get("status") or "Retrieval incomplete."
+            log.warning("Media analysis retrieval incomplete reference=%s detail=%s", reference, detail)
+            status = render_status_box(
+                f"Media retrieval did not complete successfully: {detail}", "fail"
+            )
+            return status, "", []
+        analysis_start = _unwrap_tool_result(
+            await client.call_tool(
+                "start_media_analysis",
+                {
+                    "reference": reference,
+                    "priors": priors_payload,
+                    "wait_seconds": POLL_WAIT_SECONDS,
+                },
+            )
+        )
+        if analysis_start.get("is_error"):
+            detail = analysis_start.get("detail") or "Media analysis failed to start."
+            log.warning("Media analysis job failed to start reference=%s detail=%s", reference, detail)
+            status = render_status_box(
+                f"Media analysis did not complete successfully: {detail}", "fail"
+            )
+            return status, "", []
+        analysis = analysis_start
+        if not _is_done(analysis_start):
+            analysis = await _poll_until_done(
+                client,
+                tool_name="get_media_analysis_result",
+                reference=reference,
+                wait_seconds=POLL_WAIT_SECONDS,
+            )
+        if analysis.get("is_error") or not _is_done(analysis):
+            detail = analysis.get("detail") or analysis.get("status") or "Analysis incomplete."
+            log.warning("Media analysis job incomplete reference=%s detail=%s", reference, detail)
+            status = render_status_box(
+                f"Media analysis did not complete successfully: {detail}", "fail"
+            )
+            return status, "", []
+        payload = analysis.get("analysis") or analysis.get("result") or {}
+        if not isinstance(payload, dict):
+            log.warning("Media analysis payload unexpected type=%s reference=%s", type(payload), reference)
+            status = render_status_box(
+                "Media analysis returned an unexpected payload; check the Space logs for details.",
+                "fail",
+            )
+            return status, "", []
+        analysis_text = str(payload.get("analysis") or "").strip()
+        if not analysis_text:
+            log.warning("Media analysis returned empty text reference=%s", reference)
+            status = render_status_box(
+                "Media analysis finished but returned an empty briefing.", "fail"
+            )
+            return status, "", []
+        slides_result = _unwrap_tool_result(
+            await client.call_tool(
+                "get_extracted_slides",
+                {
+                    "reference": reference,
+                    "wait_seconds": 0,
+                },
+            )
+        )
+        slides = normalize_slide_entries(slides_result)
+        if not slides:
+            log.warning(
+                "Media analysis reference=%s has no slides in payload type=%s",
+                reference,
+                type(slides_result.get("slides")),
+            )
+        gallery_items: List[list] = []
+        for slide in slides:
+            image_data = slide.get("image_data_uri")
+            if not isinstance(image_data, str):
+                continue
+            image = _image_from_data_uri(image_data)
+            if image is None:
+                continue
+            index = slide.get("index")
+            if index is None:
+                index = len(gallery_items)
+            label = (slide.get("label") or "").strip()
+            start = slide.get("from")
+            end = slide.get("to")
+            time_range = ""
+            if isinstance(start, (int, float)) and isinstance(end, (int, float)):
+                time_range = f"{int(start)}s–{int(end)}s"
+            parts = [f"#{index}"]
+            if label:
+                parts.append(label)
+            if time_range:
+                parts.append(time_range)
+            caption = " · ".join(parts)
+            gallery_items.append([image, caption])
+        log.info(
+            "Media analysis success reference=%s model=%s slides=%d briefing_chars=%d",
+            reference,
+            selected_model,
+            len(gallery_items),
+            len(analysis_text),
+        )
+        headline = (
+            f"✅ Expectation-driven analysis finished for the short lecture clip "
+            f"using model `{selected_model}`."
+        )
+        status_html = render_status_box(headline, "success")
+        return status_html, analysis_text, gallery_items
+def run_media_analysis_demo(
+    gemini_api_key: str | None,
+    model_name: str,
+    context: str,
+    expectations: str,
+    prior_knowledge: str,
+    questions: str,
+) -> Tuple[str, str, List[list]]:
+    """Gradio callback entry point for the media analysis demo."""
+    key = (gemini_api_key or "").strip()
+    if not key:
+        status = render_status_box(
+            "Please provide a Gemini API key in the setup cell above before running this demo.",
+            "fail",
+        )
+        details = (
+            "The media analysis demo relies on Gemini via the Aileen MCP server. "
+            "Set `GEMINI_API_KEY` in the setup cell, run the health check to verify it, "
+            "then try this demo again."
+        )
+        return status, details, []
+    try:
+        return asyncio.run(
+            _run_media_analysis_flow(
+                key,
+                (model_name or "").strip(),
+                context,
+                expectations,
+                prior_knowledge,
+                questions,
+            )
+        )
+    except Exception as exc:  # pragma: no cover - defensive
+        log.exception("Media analysis demo failed: %s", exc)
+        status = render_status_box(f"Media analysis failed: {exc}", "fail")
+        details = (
+            "Something went wrong while talking to the Aileen MCP media tools. "
+            "Check the Space logs for more detail (demo log at "
+            f"`{DEMO_LOG_PATH}`) and ensure that ffmpeg, yt-dlp and Gemini are all available."
+        )
+        return status, details, []
+def render_media_analysis_cell(gemini_key_input: gr.Textbox) -> None:
+    """Render the notebook-style cell for expectation-driven media analysis."""
+    with cell("🧩 Expectation-driven media analysis with priors"):
+        gr.Markdown(
+            """
+### 👩🏻‍🏫 Background
+The contextual transcription demo above nudged Gemini with a simple text prior (the YouTube description). Aileen 3 Core takes this a step
+further: it lets you describe your **baseline script** for a talk – who is speaking, what you expect to hear, what you already know, and
+which questions you actually care about – and then asks the model to surface where the session *deviates* from that script.
+These structured priors are the heart of the expectation-driven “Sinnfinder” idea: they turn a long conference video into a search for
+prediction errors. Instead of a neutral recap, Aileen 3 Core asks Gemini to focus on surprises, newly introduced actors or systems, and
+concrete commitments, while only briefly acknowledging content that matches your baseline.
+### 💁🏻‍♀️ Demo
+In this cell we run full expectation-driven analysis on a **short, lecture-style video** about the GPT-OSS open-weight release and its
+deliberative alignment / instruction hierarchy safety story. You can tweak the priors to reflect your own context and questions, and pick
+which Gemini model should power the analysis. Under the hood, the MCP server retrieves the video, extracts representative slides, and calls
+Gemini with both the audio and your priors. The resulting briefing and the detected slides are shown below.
+            """
+        )
+        gr.Textbox(
+            label="YouTube video URL",
+            value=ANALYSIS_VIDEO_URL,
+            interactive=False,
+        )
+        model_selector = gr.Dropdown(
+            label="Gemini analysis model",
+            choices=["gemini-flash-latest", "gemini-3-pro-preview"],
+            value="gemini-flash-latest",
+        )
+        context_box = gr.Textbox(
+            label="Context (scene setting, audience, constraints)",
+            lines=2,
+            value=(
+                "Short internal explainer on OpenAI's GPT-OSS open-weight release, "
+                "its safety training story (deliberative alignment, instruction hierarchy), "
+                "and what this means for everyday agent builders."
+            ),
+        )
+        expectations_box = gr.Textbox(
+            label="Expectations (what would *not* be surprising)",
+            lines=3,
+            value=(
+                "Clear overview of GPT-OSS model sizes and capabilities; explanation that GPT-OSS is an open-weight sibling of the o-series "
+                "with strong safety alignment; generic claims that deliberative alignment plus instruction hierarchy reduce jailbreak and "
+                "prompt-injection risk."
+            ),
+        )
+        prior_knowledge_box = gr.Textbox(
+            label="Prior knowledge (what you already know)",
+            lines=3,
+            value=(
+                "I already know that GPT-OSS ships in two open-weight reasoning-focused sizes, that it uses deliberative alignment "
+                "(chain-of-thought safety checks) plus instruction hierarchy (privilege-aware prompt handling), and that these models "
+                "perform competitively with o4-mini on strong safety benchmarks."
+            ),
+        )
+        questions_box = gr.Textbox(
+            label="Questions (what you want answered)",
+            lines=3,
+            value=(
+                "Are there any updates over what was discussed in the Kaggle writeup"
+            ),
+        )
+        run_button = gr.Button("Run expectation-driven analysis", variant="primary")
+        result_panel = gr.HTML(
+            value=render_status_box(
+                "👉 Click the button to retrieve the media, run expectation-driven analysis with your priors, and view the briefing plus slides.",
+                "placeholder",
+            )
+        )
+        analysis_markdown = gr.Markdown(visible=True)
+        slides_gallery = gr.Gallery(
+            label="Extracted slides",
+            value=[],
+            columns=4,
+        )
+        run_button.click(
+            fn=run_media_analysis_demo,
+            inputs=[
+                gemini_key_input,
+                model_selector,
+                context_box,
+                expectations_box,
+                prior_knowledge_box,
+                questions_box,
+            ],
+            outputs=[result_panel, analysis_markdown, slides_gallery],
+            queue=False,
+        )

mcp/src/aileen3_mcp/media_tools.py CHANGED Viewed

@@ -281,25 +281,46 @@ async def _get_or_create_job(kind: str, reference: str, factory: Callable[[], Jo
         return job
 async def _start_media_processing_job(
     *,
     kind: str,
     reference: str,
     wait_seconds: int,
     result_field: str,
-    cache_path_fn: Callable[[str], Path],
     flow_callable: Callable[..., dict],
     flow_args: tuple[Any, ...] = (),
 ) -> dict:
-    cache_path = cache_path_fn(reference)
-    existing = _load_json(cache_path)
-    if existing:
-        return {
-            "status": JobStatus.DONE,
-            "reference": reference,
-            result_field: existing,
-            "cached": True,
-        }
     def factory() -> JobRecord:
         return JobRecord(id=secrets.token_urlsafe(16), kind=kind, reference=reference)
@@ -322,7 +343,8 @@ async def _start_media_processing_job(
             job.finished_at = time.time()
     job.task = asyncio.create_task(runner())
-    return await _maybe_wait(job, wait_seconds)
 async def _get_media_processing_result(
@@ -331,23 +353,26 @@ async def _get_media_processing_result(
     reference: str,
     wait_seconds: int,
     result_field: str,
-    cache_path_fn: Callable[[str], Path],
 ) -> dict:
-    cache_path = cache_path_fn(reference)
-    existing = _load_json(cache_path)
-    if existing:
-        return {
-            "status": JobStatus.DONE,
-            "reference": reference,
-            result_field: existing,
-        }
     job_id = REFERENCE_INDEX.get((kind, reference))
     if job_id and job_id in JOBS:
         job = JOBS[job_id]
         if wait_seconds > 0:
-            return await _maybe_wait(job, wait_seconds)
-        return _job_payload(job, include_result=True)
     return {"status": "not_found", "reference": reference}
@@ -656,7 +681,9 @@ def _gemini_structured_slide_times(client, video_path: Path, reference: str) ->
     return sanitized
-def _gemini_analyze_audio(client, audio_path: Path, slides: list[dict], priors: Priors) -> dict:
     from google.genai import types
     upload = client.files.upload(
@@ -722,16 +749,33 @@ def _gemini_analyze_audio(client, audio_path: Path, slides: list[dict], priors:
         )
     ]
-    response = client.models.generate_content(model="gemini-flash-latest", contents=contents)
     text = _response_text(response)
     if not text:
         raise RuntimeError("Gemini returned no analysis")
-    return {
         "analysis": text,
         "audio_file_uri": upload.uri,
         "slide_count": len(slide_files),
     }
 def _language_slug(value: str) -> str:
@@ -961,13 +1005,24 @@ def _analysis_flow(metadata: dict, priors_obj: Priors | dict) -> dict:
     priors.media_context = _media_context_from_metadata(metadata)
     slides = _load_or_extract_slides(metadata)
     # Upload slide stills to Gemini for context
     client = _build_gemini_client()
     uploaded_slides = _upload_slides_to_gemini(client, slides, reference)
     with _silence_stdio():  # suppress any upload chatter
-        analysis_result = _gemini_analyze_audio(client, audio_path, uploaded_slides, priors)
     payload = {
         "reference": reference,
@@ -978,7 +1033,12 @@ def _analysis_flow(metadata: dict, priors_obj: Priors | dict) -> dict:
         "title": metadata.get("title"),
     }
-    _save_json(_analysis_json_path(reference), payload)
     _write_debug(reference, "analysis.json", payload)
     return payload

         return job
+def _normalize_processing_response(payload: dict, result_field: str) -> dict:
+    """Align background job responses with the historical schema."""
+    if not isinstance(payload, dict):
+        return payload
+    status = payload.get("status")
+    if status == JobStatus.DONE and "result" in payload:
+        normalized = {
+            "status": JobStatus.DONE,
+            "reference": payload.get("reference"),
+            result_field: payload.get("result"),
+        }
+        if "job_id" in payload:
+            normalized["job_id"] = payload["job_id"]
+        if "cached" in payload:
+            normalized["cached"] = payload["cached"]
+        return normalized
+    return payload
 async def _start_media_processing_job(
     *,
     kind: str,
     reference: str,
     wait_seconds: int,
     result_field: str,
+    cache_path_fn: Callable[[str], Path] | None,
     flow_callable: Callable[..., dict],
     flow_args: tuple[Any, ...] = (),
 ) -> dict:
+    if cache_path_fn is not None:
+        cache_path = cache_path_fn(reference)
+        existing = _load_json(cache_path)
+        if existing:
+            return {
+                "status": JobStatus.DONE,
+                "reference": reference,
+                result_field: existing,
+                "cached": True,
+            }
     def factory() -> JobRecord:
         return JobRecord(id=secrets.token_urlsafe(16), kind=kind, reference=reference)
             job.finished_at = time.time()
     job.task = asyncio.create_task(runner())
+    response = await _maybe_wait(job, wait_seconds)
+    return _normalize_processing_response(response, result_field)
 async def _get_media_processing_result(
     reference: str,
     wait_seconds: int,
     result_field: str,
+    cache_path_fn: Callable[[str], Path] | None,
 ) -> dict:
+    if cache_path_fn is not None:
+        cache_path = cache_path_fn(reference)
+        existing = _load_json(cache_path)
+        if existing:
+            return {
+                "status": JobStatus.DONE,
+                "reference": reference,
+                result_field: existing,
+            }
     job_id = REFERENCE_INDEX.get((kind, reference))
     if job_id and job_id in JOBS:
         job = JOBS[job_id]
         if wait_seconds > 0:
+            response = await _maybe_wait(job, wait_seconds)
+        else:
+            response = _job_payload(job, include_result=True)
+        return _normalize_processing_response(response, result_field)
     return {"status": "not_found", "reference": reference}
     return sanitized
+def _gemini_analyze_audio(
+    client, audio_path: Path, slides: list[dict], priors: Priors, reference: str
+) -> dict:
     from google.genai import types
     upload = client.files.upload(
         )
     ]
+    model_name = os.environ.get("AILEEN3_ANALYSIS_MODEL") or "gemini-flash-latest"
+    log.info(
+        "Gemini analysis call reference=%s model=%s audio=%s slides=%d",
+        reference,
+        model_name,
+        audio_path.name,
+        len(slide_files),
+    )
+    response = client.models.generate_content(model=model_name, contents=contents)
     text = _response_text(response)
     if not text:
+        log.error("Gemini returned no analysis")
         raise RuntimeError("Gemini returned no analysis")
+    result = {
         "analysis": text,
         "audio_file_uri": upload.uri,
         "slide_count": len(slide_files),
     }
+    log.info(
+        "Gemini analysis completed reference=%s model=%s slide_count=%d text_chars=%d",
+        reference,
+        model_name,
+        len(slide_files),
+        len(text or ""),
+    )
+    return result
 def _language_slug(value: str) -> str:
     priors.media_context = _media_context_from_metadata(metadata)
     slides = _load_or_extract_slides(metadata)
+    log.info(
+        "analysis_flow start reference=%s title=%s slide_count=%d",
+        reference,
+        metadata.get("title"),
+        len(slides),
+    )
     # Upload slide stills to Gemini for context
     client = _build_gemini_client()
     uploaded_slides = _upload_slides_to_gemini(client, slides, reference)
+    log.debug(
+        "analysis_flow reference=%s uploaded_slides=%d", reference, len(uploaded_slides)
+    )
     with _silence_stdio():  # suppress any upload chatter
+        analysis_result = _gemini_analyze_audio(
+            client, audio_path, uploaded_slides, priors, reference
+        )
     payload = {
         "reference": reference,
         "title": metadata.get("title"),
     }
+    log.info(
+        "analysis_flow finished reference=%s slide_count=%d audio_uri=%s",
+        reference,
+        payload["slide_count"],
+        payload["audio_uri"],
+    )
     _write_debug(reference, "analysis.json", payload)
     return payload