Medium-MCP / src /types.py
Nikhil Pravin Pise
feat: implement comprehensive improvement plan (Phases 1-5)
e98cc10
"""
Type Definitions for Medium-MCP
This module provides TypedDict, dataclass, and enum definitions for
type-safe development with mypy strict mode enabled.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from enum import Enum
from typing import Literal, NotRequired, TypedDict
# =============================================================================
# ENUMS
# =============================================================================
class OutputFormat(Enum):
"""Output format options for scraped content."""
MARKDOWN = "markdown"
HTML = "html"
BOTH = "both"
class ScrapeTier(Enum):
"""Scraping tier identifiers for tracking source."""
CACHE = "cache"
GRAPHQL = "graphql"
HTTPX = "httpx"
BROWSER = "browser"
WAYBACK = "wayback"
VISION = "vision"
CROSS_SOURCE = "cross_source"
class CircuitState(Enum):
"""Circuit breaker states."""
CLOSED = "closed"
OPEN = "open"
HALF_OPEN = "half_open"
class LogLevel(Enum):
"""Logging levels."""
DEBUG = "DEBUG"
INFO = "INFO"
WARNING = "WARNING"
ERROR = "ERROR"
CRITICAL = "CRITICAL"
# =============================================================================
# TYPED DICTS - Author & Publication
# =============================================================================
class AuthorInfo(TypedDict):
"""Author information structure."""
name: str
username: str
bio: NotRequired[str]
avatar_url: NotRequired[str]
followers: NotRequired[int]
following: NotRequired[int]
class PublicationInfo(TypedDict):
"""Publication/publication information."""
name: str
slug: str
description: NotRequired[str]
followers: NotRequired[int]
url: NotRequired[str]
# =============================================================================
# TYPED DICTS - Article Data
# =============================================================================
class ArticleMetadata(TypedDict):
"""Metadata about an article."""
url: str
title: str
subtitle: NotRequired[str]
author: AuthorInfo
publication: NotRequired[str]
tags: list[str]
reading_time: int
claps: NotRequired[int]
responses: NotRequired[int]
is_paywalled: bool
published_at: NotRequired[str]
updated_at: NotRequired[str]
class ArticleContent(TypedDict):
"""Article content in various formats."""
markdown: str
html: NotRequired[str]
word_count: int
images: list[str]
code_blocks: NotRequired[list[str]]
class ScrapeResult(TypedDict):
"""Complete result from a scrape operation."""
metadata: ArticleMetadata
content: ArticleContent
source_tier: str
cached: bool
scraped_at: str
embedding: NotRequired[list[float]]
class ArticleRecord(TypedDict):
"""Database record for cached articles."""
url: str
title: str
author: str
markdown_content: str
html_content: NotRequired[str]
tags: str # JSON serialized
is_paywalled: bool
scraped_at: str
embedding: NotRequired[str] # JSON serialized
# =============================================================================
# TYPED DICTS - Search & Discovery
# =============================================================================
class SearchResult(TypedDict):
"""Individual search result."""
title: str
url: str
author: str
publication: NotRequired[str]
preview: NotRequired[str]
reading_time: NotRequired[int]
class TagFeed(TypedDict):
"""Tag-based feed result."""
tag: str
articles: list[SearchResult]
count: int
# =============================================================================
# TYPED DICTS - API Responses
# =============================================================================
class GraphQLPostResponse(TypedDict):
"""GraphQL API post response structure."""
id: str
title: str
content: dict # bodyModel
creator: AuthorInfo
tags: NotRequired[list[dict]]
clapCount: NotRequired[int]
class ApolloState(TypedDict):
"""Apollo client __APOLLO_STATE__ structure."""
ROOT_QUERY: dict
# Additional dynamic keys
# =============================================================================
# TYPED DICTS - MCP Tool Outputs
# =============================================================================
class ScrapeToolOutput(TypedDict):
"""Output schema for medium_scrape tool."""
title: str
author: AuthorInfo
markdown_content: str
html_content: NotRequired[str]
tags: list[str]
reading_time: int
is_paywalled: bool
source_tier: str
url: str
class BatchToolOutput(TypedDict):
"""Output schema for medium_batch tool."""
success: list[ScrapeToolOutput]
failed: list[dict]
stats: dict
class ReportToolOutput(TypedDict):
"""Output schema for medium_report tool."""
title: str
executive_summary: str
key_insights: list[str]
articles_analyzed: int
generated_at: str
# =============================================================================
# DATACLASSES - Options & Config
# =============================================================================
@dataclass
class ScrapeOptions:
"""Options for scraping operations."""
force_refresh: bool = False
recursive_depth: int = 0
enable_enhancements: bool = False
output_format: OutputFormat = OutputFormat.BOTH
timeout_seconds: int = 30
max_retries: int = 3
@dataclass
class HTTPConfig:
"""HTTP client configuration."""
max_connections: int = 100
max_keepalive_connections: int = 20
keepalive_expiry: float = 5.0
connect_timeout: float = 5.0
read_timeout: float = 30.0
http2: bool = True
@dataclass
class ResilienceConfig:
"""Circuit breaker and retry configuration."""
failure_threshold: int = 5
recovery_timeout: int = 300
rate_limit_requests: int = 10
rate_limit_period: int = 60
@dataclass
class ScraperConfig:
"""Complete scraper configuration."""
max_workers: int = 5
headless: bool = True
http: HTTPConfig = field(default_factory=HTTPConfig)
resilience: ResilienceConfig = field(default_factory=ResilienceConfig)
# =============================================================================
# TYPE ALIASES
# =============================================================================
# Common callback signatures
ProgressCallback = type[None] | type["async def(str) -> None"] # type: ignore
ErrorHandler = type[None] | type["def(Exception, str) -> None"] # type: ignore
# JSON-like structures
JSONValue = str | int | float | bool | None | list["JSONValue"] | dict[str, "JSONValue"]
JSONDict = dict[str, JSONValue]