""" Type Definitions for Medium-MCP This module provides TypedDict, dataclass, and enum definitions for type-safe development with mypy strict mode enabled. """ from __future__ import annotations from dataclasses import dataclass, field from enum import Enum from typing import Literal, NotRequired, TypedDict # ============================================================================= # ENUMS # ============================================================================= class OutputFormat(Enum): """Output format options for scraped content.""" MARKDOWN = "markdown" HTML = "html" BOTH = "both" class ScrapeTier(Enum): """Scraping tier identifiers for tracking source.""" CACHE = "cache" GRAPHQL = "graphql" HTTPX = "httpx" BROWSER = "browser" WAYBACK = "wayback" VISION = "vision" CROSS_SOURCE = "cross_source" class CircuitState(Enum): """Circuit breaker states.""" CLOSED = "closed" OPEN = "open" HALF_OPEN = "half_open" class LogLevel(Enum): """Logging levels.""" DEBUG = "DEBUG" INFO = "INFO" WARNING = "WARNING" ERROR = "ERROR" CRITICAL = "CRITICAL" # ============================================================================= # TYPED DICTS - Author & Publication # ============================================================================= class AuthorInfo(TypedDict): """Author information structure.""" name: str username: str bio: NotRequired[str] avatar_url: NotRequired[str] followers: NotRequired[int] following: NotRequired[int] class PublicationInfo(TypedDict): """Publication/publication information.""" name: str slug: str description: NotRequired[str] followers: NotRequired[int] url: NotRequired[str] # ============================================================================= # TYPED DICTS - Article Data # ============================================================================= class ArticleMetadata(TypedDict): """Metadata about an article.""" url: str title: str subtitle: NotRequired[str] author: AuthorInfo publication: NotRequired[str] tags: list[str] reading_time: int claps: NotRequired[int] responses: NotRequired[int] is_paywalled: bool published_at: NotRequired[str] updated_at: NotRequired[str] class ArticleContent(TypedDict): """Article content in various formats.""" markdown: str html: NotRequired[str] word_count: int images: list[str] code_blocks: NotRequired[list[str]] class ScrapeResult(TypedDict): """Complete result from a scrape operation.""" metadata: ArticleMetadata content: ArticleContent source_tier: str cached: bool scraped_at: str embedding: NotRequired[list[float]] class ArticleRecord(TypedDict): """Database record for cached articles.""" url: str title: str author: str markdown_content: str html_content: NotRequired[str] tags: str # JSON serialized is_paywalled: bool scraped_at: str embedding: NotRequired[str] # JSON serialized # ============================================================================= # TYPED DICTS - Search & Discovery # ============================================================================= class SearchResult(TypedDict): """Individual search result.""" title: str url: str author: str publication: NotRequired[str] preview: NotRequired[str] reading_time: NotRequired[int] class TagFeed(TypedDict): """Tag-based feed result.""" tag: str articles: list[SearchResult] count: int # ============================================================================= # TYPED DICTS - API Responses # ============================================================================= class GraphQLPostResponse(TypedDict): """GraphQL API post response structure.""" id: str title: str content: dict # bodyModel creator: AuthorInfo tags: NotRequired[list[dict]] clapCount: NotRequired[int] class ApolloState(TypedDict): """Apollo client __APOLLO_STATE__ structure.""" ROOT_QUERY: dict # Additional dynamic keys # ============================================================================= # TYPED DICTS - MCP Tool Outputs # ============================================================================= class ScrapeToolOutput(TypedDict): """Output schema for medium_scrape tool.""" title: str author: AuthorInfo markdown_content: str html_content: NotRequired[str] tags: list[str] reading_time: int is_paywalled: bool source_tier: str url: str class BatchToolOutput(TypedDict): """Output schema for medium_batch tool.""" success: list[ScrapeToolOutput] failed: list[dict] stats: dict class ReportToolOutput(TypedDict): """Output schema for medium_report tool.""" title: str executive_summary: str key_insights: list[str] articles_analyzed: int generated_at: str # ============================================================================= # DATACLASSES - Options & Config # ============================================================================= @dataclass class ScrapeOptions: """Options for scraping operations.""" force_refresh: bool = False recursive_depth: int = 0 enable_enhancements: bool = False output_format: OutputFormat = OutputFormat.BOTH timeout_seconds: int = 30 max_retries: int = 3 @dataclass class HTTPConfig: """HTTP client configuration.""" max_connections: int = 100 max_keepalive_connections: int = 20 keepalive_expiry: float = 5.0 connect_timeout: float = 5.0 read_timeout: float = 30.0 http2: bool = True @dataclass class ResilienceConfig: """Circuit breaker and retry configuration.""" failure_threshold: int = 5 recovery_timeout: int = 300 rate_limit_requests: int = 10 rate_limit_period: int = 60 @dataclass class ScraperConfig: """Complete scraper configuration.""" max_workers: int = 5 headless: bool = True http: HTTPConfig = field(default_factory=HTTPConfig) resilience: ResilienceConfig = field(default_factory=ResilienceConfig) # ============================================================================= # TYPE ALIASES # ============================================================================= # Common callback signatures ProgressCallback = type[None] | type["async def(str) -> None"] # type: ignore ErrorHandler = type[None] | type["def(Exception, str) -> None"] # type: ignore # JSON-like structures JSONValue = str | int | float | bool | None | list["JSONValue"] | dict[str, "JSONValue"] JSONDict = dict[str, JSONValue]