Spaces:

T0X1N
/

Medium-MCP

Sleeping

File size: 6,806 Bytes

e98cc10

"""
Type Definitions for Medium-MCP

This module provides TypedDict, dataclass, and enum definitions for
type-safe development with mypy strict mode enabled.
"""

from __future__ import annotations

from dataclasses import dataclass, field
from enum import Enum
from typing import Literal, NotRequired, TypedDict


# =============================================================================
# ENUMS
# =============================================================================


class OutputFormat(Enum):
    """Output format options for scraped content."""

    MARKDOWN = "markdown"
    HTML = "html"
    BOTH = "both"


class ScrapeTier(Enum):
    """Scraping tier identifiers for tracking source."""

    CACHE = "cache"
    GRAPHQL = "graphql"
    HTTPX = "httpx"
    BROWSER = "browser"
    WAYBACK = "wayback"
    VISION = "vision"
    CROSS_SOURCE = "cross_source"


class CircuitState(Enum):
    """Circuit breaker states."""

    CLOSED = "closed"
    OPEN = "open"
    HALF_OPEN = "half_open"


class LogLevel(Enum):
    """Logging levels."""

    DEBUG = "DEBUG"
    INFO = "INFO"
    WARNING = "WARNING"
    ERROR = "ERROR"
    CRITICAL = "CRITICAL"


# =============================================================================
# TYPED DICTS - Author & Publication
# =============================================================================


class AuthorInfo(TypedDict):
    """Author information structure."""

    name: str
    username: str
    bio: NotRequired[str]
    avatar_url: NotRequired[str]
    followers: NotRequired[int]
    following: NotRequired[int]


class PublicationInfo(TypedDict):
    """Publication/publication information."""

    name: str
    slug: str
    description: NotRequired[str]
    followers: NotRequired[int]
    url: NotRequired[str]


# =============================================================================
# TYPED DICTS - Article Data
# =============================================================================


class ArticleMetadata(TypedDict):
    """Metadata about an article."""

    url: str
    title: str
    subtitle: NotRequired[str]
    author: AuthorInfo
    publication: NotRequired[str]
    tags: list[str]
    reading_time: int
    claps: NotRequired[int]
    responses: NotRequired[int]
    is_paywalled: bool
    published_at: NotRequired[str]
    updated_at: NotRequired[str]


class ArticleContent(TypedDict):
    """Article content in various formats."""

    markdown: str
    html: NotRequired[str]
    word_count: int
    images: list[str]
    code_blocks: NotRequired[list[str]]


class ScrapeResult(TypedDict):
    """Complete result from a scrape operation."""

    metadata: ArticleMetadata
    content: ArticleContent
    source_tier: str
    cached: bool
    scraped_at: str
    embedding: NotRequired[list[float]]


class ArticleRecord(TypedDict):
    """Database record for cached articles."""

    url: str
    title: str
    author: str
    markdown_content: str
    html_content: NotRequired[str]
    tags: str  # JSON serialized
    is_paywalled: bool
    scraped_at: str
    embedding: NotRequired[str]  # JSON serialized


# =============================================================================
# TYPED DICTS - Search & Discovery
# =============================================================================


class SearchResult(TypedDict):
    """Individual search result."""

    title: str
    url: str
    author: str
    publication: NotRequired[str]
    preview: NotRequired[str]
    reading_time: NotRequired[int]


class TagFeed(TypedDict):
    """Tag-based feed result."""

    tag: str
    articles: list[SearchResult]
    count: int


# =============================================================================
# TYPED DICTS - API Responses
# =============================================================================


class GraphQLPostResponse(TypedDict):
    """GraphQL API post response structure."""

    id: str
    title: str
    content: dict  # bodyModel
    creator: AuthorInfo
    tags: NotRequired[list[dict]]
    clapCount: NotRequired[int]


class ApolloState(TypedDict):
    """Apollo client __APOLLO_STATE__ structure."""

    ROOT_QUERY: dict
    # Additional dynamic keys


# =============================================================================
# TYPED DICTS - MCP Tool Outputs
# =============================================================================


class ScrapeToolOutput(TypedDict):
    """Output schema for medium_scrape tool."""

    title: str
    author: AuthorInfo
    markdown_content: str
    html_content: NotRequired[str]
    tags: list[str]
    reading_time: int
    is_paywalled: bool
    source_tier: str
    url: str


class BatchToolOutput(TypedDict):
    """Output schema for medium_batch tool."""

    success: list[ScrapeToolOutput]
    failed: list[dict]
    stats: dict


class ReportToolOutput(TypedDict):
    """Output schema for medium_report tool."""

    title: str
    executive_summary: str
    key_insights: list[str]
    articles_analyzed: int
    generated_at: str


# =============================================================================
# DATACLASSES - Options & Config
# =============================================================================


@dataclass
class ScrapeOptions:
    """Options for scraping operations."""

    force_refresh: bool = False
    recursive_depth: int = 0
    enable_enhancements: bool = False
    output_format: OutputFormat = OutputFormat.BOTH
    timeout_seconds: int = 30
    max_retries: int = 3


@dataclass
class HTTPConfig:
    """HTTP client configuration."""

    max_connections: int = 100
    max_keepalive_connections: int = 20
    keepalive_expiry: float = 5.0
    connect_timeout: float = 5.0
    read_timeout: float = 30.0
    http2: bool = True


@dataclass
class ResilienceConfig:
    """Circuit breaker and retry configuration."""

    failure_threshold: int = 5
    recovery_timeout: int = 300
    rate_limit_requests: int = 10
    rate_limit_period: int = 60


@dataclass
class ScraperConfig:
    """Complete scraper configuration."""

    max_workers: int = 5
    headless: bool = True
    http: HTTPConfig = field(default_factory=HTTPConfig)
    resilience: ResilienceConfig = field(default_factory=ResilienceConfig)


# =============================================================================
# TYPE ALIASES
# =============================================================================

# Common callback signatures
ProgressCallback = type[None] | type["async def(str) -> None"]  # type: ignore
ErrorHandler = type[None] | type["def(Exception, str) -> None"]  # type: ignore

# JSON-like structures
JSONValue = str | int | float | bool | None | list["JSONValue"] | dict[str, "JSONValue"]
JSONDict = dict[str, JSONValue]