Spaces:

MCP-1st-Birthday
/

aileen3-core

Running

File size: 5,999 Bytes

from __future__ import annotations

import os
from dataclasses import asdict, dataclass

import shutil
import subprocess

from fastmcp import FastMCP

from aileen3_mcp.media_tools import register_media_tools, _silence_stdio, _YDLLogger
from aileen3_mcp.logging_utils import configure_logging

import logging

log = logging.getLogger(__name__)


@dataclass
class HealthResult:
    ok: bool
    detail: str


def make_app() -> FastMCP:
    """Create the MCP application with all tools registered on a FastMCP instance.

    This function is the single entry point for tool registration:
    - it wires up lightweight health checks used by the demo Space, and
    - it delegates to :func:`register_media_tools` for all long-running media flows.
    """
    app = FastMCP("aileen3-mcp")

    @app.tool()
    def health() -> dict:
        """Return a basic health payload including ffmpeg and Gemini env availability.

        This mirrors the Gradio health cell and is intentionally cheap:
        it only checks for the *presence* of ffmpeg and a Gemini API key,
        leaving deeper checks to the demo-side health probe.
        """

        def _ffmpeg_ok() -> tuple[bool, str]:
            # Keep this probe very small: just check that the binary is
            # callable and returns a version string, without running any
            # actual media processing pipeline.
            binary = shutil.which("ffmpeg")
            if not binary:
                return False, "ffmpeg not found on PATH"
            try:
                completed = subprocess.run(
                    [binary, "-version"],
                    capture_output=True,
                    text=True,
                    timeout=5,
                    check=False,
                )
            except Exception as exc:  # pragma: no cover - defensive
                return False, f"ffmpeg exec failed: {exc}"
            if completed.returncode != 0:
                return False, completed.stderr.strip() or "ffmpeg returned error"
            first = (completed.stdout or "").splitlines()[0] if completed.stdout else "ffmpeg present"
            return True, first

        def _gemini_key_ok() -> tuple[bool, str]:
            key = bool(os.environ.get("GEMINI_API_KEY"))
            return (key, "GEMINI_API_KEY is set" if key else "GEMINI_API_KEY missing")

        ff_ok, ff_detail = _ffmpeg_ok()
        gem_ok, gem_detail = _gemini_key_ok()

        overall_ok = ff_ok and gem_ok
        detail = "; ".join(d for d in (ff_detail, gem_detail) if d)
        result = {"ok": overall_ok, "detail": detail, "ffmpeg": ff_ok, "gemini_api_key": gem_ok}
        log.debug("Health probe returning %s", result)
        return result

    @app.tool()
    def search_youtube(query: str, max_results: int = 10) -> dict:
        """Search YouTube for videos.

        Args:
            query: Free-form search terms, e.g. "lofi focus mix" or
                "python packaging tutorial 2024".
            max_results: Maximum number of videos to return (1-50). Defaults to 10.

        Returns:
            A dictionary with a single key ``videos`` that maps to a list of
            video objects. Each object contains:
            - id: YouTube video ID
            - title: Video title
            - webpage_url: Canonical video URL
            - duration_seconds: Video length in seconds (may be null)
            - channel: Channel name
            - channel_id: Channel ID

        Typical `Aileen Agent` usage:
            - Use this tool when the user describes what they want to analyze but does not give a concrete URL.
            
        Notes for LLM tool users:
            - Use this tool to retrieve candidate videos before choosing one to
              watch or share; it does not download media.
            - Keep ``max_results`` modest (<=10) for fastest responses.
        """
        from yt_dlp import YoutubeDL  # local import so health probe stays light

        capped_results = max(1, min(max_results, 50))
        opts = {
            "quiet": True,
            "no_warnings": True,
            "noprogress": True,
            "skip_download": True,
            "extract_flat": "in_playlist",
            "logger": _YDLLogger(),
            "extractor_args": {"youtube": {"player_client": ["default"]}},
        }

        search_spec = f"ytsearch{capped_results}:{query}"
        log.info("search_youtube query=%r max_results=%d", query, capped_results)

        with _silence_stdio():
            with YoutubeDL(opts) as ydl:
                info = ydl.extract_info(search_spec, download=False)

        entries = info.get("entries", []) if info else []
        videos = []
        for entry in entries:
            video_id = entry.get("id")
            webpage_url = entry.get("webpage_url") or (
                f"https://www.youtube.com/watch?v={video_id}" if video_id else None
            )

            videos.append(
                {
                    "id": video_id,
                    "title": entry.get("title"),
                    "webpage_url": webpage_url,
                    "duration_seconds": entry.get("duration"),
                    "channel": entry.get("channel"),
                    "channel_id": entry.get("channel_id"),
                }
            )

        return {"videos": videos}

    # Register media analysis tools:
    #   - start_media_retrieval / get_media_retrieval_status
    #   - start_slide_extraction / get_extracted_slides
    #   - start_media_analysis / get_media_analysis_result
    #   - start_media_transcription / get_media_transcription_result
    #
    # Each of these is exposed as an MCP tool that can be called from
    # Claude Desktop, the Aileen 3 Agent, or the Gradio demo.
    register_media_tools(app)

    return app


def main() -> None:
    """Configure logging and run the MCP server over stdio."""
    configure_logging()
    app = make_app()
    app.run()  # stdio transport by default


if __name__ == "__main__":
    main()