Spaces:

T0X1N
/

Medium-MCP

Sleeping

Nikhil Pravin Pise

feat: implement comprehensive improvement plan (Phases 1-5)

e98cc10 3 months ago

6.81 kB

	"""
	Type Definitions for Medium-MCP

	This module provides TypedDict, dataclass, and enum definitions for
	type-safe development with mypy strict mode enabled.
	"""

	from __future__ import annotations

	from dataclasses import dataclass, field
	from enum import Enum
	from typing import Literal, NotRequired, TypedDict


	# =============================================================================
	# ENUMS
	# =============================================================================


	class OutputFormat(Enum):
	"""Output format options for scraped content."""

	MARKDOWN = "markdown"
	HTML = "html"
	BOTH = "both"


	class ScrapeTier(Enum):
	"""Scraping tier identifiers for tracking source."""

	CACHE = "cache"
	GRAPHQL = "graphql"
	HTTPX = "httpx"
	BROWSER = "browser"
	WAYBACK = "wayback"
	VISION = "vision"
	CROSS_SOURCE = "cross_source"


	class CircuitState(Enum):
	"""Circuit breaker states."""

	CLOSED = "closed"
	OPEN = "open"
	HALF_OPEN = "half_open"


	class LogLevel(Enum):
	"""Logging levels."""

	DEBUG = "DEBUG"
	INFO = "INFO"
	WARNING = "WARNING"
	ERROR = "ERROR"
	CRITICAL = "CRITICAL"


	# =============================================================================
	# TYPED DICTS - Author & Publication
	# =============================================================================


	class AuthorInfo(TypedDict):
	"""Author information structure."""

	name: str
	username: str
	bio: NotRequired[str]
	avatar_url: NotRequired[str]
	followers: NotRequired[int]
	following: NotRequired[int]


	class PublicationInfo(TypedDict):
	"""Publication/publication information."""

	name: str
	slug: str
	description: NotRequired[str]
	followers: NotRequired[int]
	url: NotRequired[str]


	# =============================================================================
	# TYPED DICTS - Article Data
	# =============================================================================


	class ArticleMetadata(TypedDict):
	"""Metadata about an article."""

	url: str
	title: str
	subtitle: NotRequired[str]
	author: AuthorInfo
	publication: NotRequired[str]
	tags: list[str]
	reading_time: int
	claps: NotRequired[int]
	responses: NotRequired[int]
	is_paywalled: bool
	published_at: NotRequired[str]
	updated_at: NotRequired[str]


	class ArticleContent(TypedDict):
	"""Article content in various formats."""

	markdown: str
	html: NotRequired[str]
	word_count: int
	images: list[str]
	code_blocks: NotRequired[list[str]]


	class ScrapeResult(TypedDict):
	"""Complete result from a scrape operation."""

	metadata: ArticleMetadata
	content: ArticleContent
	source_tier: str
	cached: bool
	scraped_at: str
	embedding: NotRequired[list[float]]


	class ArticleRecord(TypedDict):
	"""Database record for cached articles."""

	url: str
	title: str
	author: str
	markdown_content: str
	html_content: NotRequired[str]
	tags: str # JSON serialized
	is_paywalled: bool
	scraped_at: str
	embedding: NotRequired[str] # JSON serialized


	# =============================================================================
	# TYPED DICTS - Search & Discovery
	# =============================================================================


	class SearchResult(TypedDict):
	"""Individual search result."""

	title: str
	url: str
	author: str
	publication: NotRequired[str]
	preview: NotRequired[str]
	reading_time: NotRequired[int]


	class TagFeed(TypedDict):
	"""Tag-based feed result."""

	tag: str
	articles: list[SearchResult]
	count: int


	# =============================================================================
	# TYPED DICTS - API Responses
	# =============================================================================


	class GraphQLPostResponse(TypedDict):
	"""GraphQL API post response structure."""

	id: str
	title: str
	content: dict # bodyModel
	creator: AuthorInfo
	tags: NotRequired[list[dict]]
	clapCount: NotRequired[int]


	class ApolloState(TypedDict):
	"""Apollo client __APOLLO_STATE__ structure."""

	ROOT_QUERY: dict
	# Additional dynamic keys


	# =============================================================================
	# TYPED DICTS - MCP Tool Outputs
	# =============================================================================


	class ScrapeToolOutput(TypedDict):
	"""Output schema for medium_scrape tool."""

	title: str
	author: AuthorInfo
	markdown_content: str
	html_content: NotRequired[str]
	tags: list[str]
	reading_time: int
	is_paywalled: bool
	source_tier: str
	url: str


	class BatchToolOutput(TypedDict):
	"""Output schema for medium_batch tool."""

	success: list[ScrapeToolOutput]
	failed: list[dict]
	stats: dict


	class ReportToolOutput(TypedDict):
	"""Output schema for medium_report tool."""

	title: str
	executive_summary: str
	key_insights: list[str]
	articles_analyzed: int
	generated_at: str


	# =============================================================================
	# DATACLASSES - Options & Config
	# =============================================================================


	@dataclass
	class ScrapeOptions:
	"""Options for scraping operations."""

	force_refresh: bool = False
	recursive_depth: int = 0
	enable_enhancements: bool = False
	output_format: OutputFormat = OutputFormat.BOTH
	timeout_seconds: int = 30
	max_retries: int = 3


	@dataclass
	class HTTPConfig:
	"""HTTP client configuration."""

	max_connections: int = 100
	max_keepalive_connections: int = 20
	keepalive_expiry: float = 5.0
	connect_timeout: float = 5.0
	read_timeout: float = 30.0
	http2: bool = True


	@dataclass
	class ResilienceConfig:
	"""Circuit breaker and retry configuration."""

	failure_threshold: int = 5
	recovery_timeout: int = 300
	rate_limit_requests: int = 10
	rate_limit_period: int = 60


	@dataclass
	class ScraperConfig:
	"""Complete scraper configuration."""

	max_workers: int = 5
	headless: bool = True
	http: HTTPConfig = field(default_factory=HTTPConfig)
	resilience: ResilienceConfig = field(default_factory=ResilienceConfig)


	# =============================================================================
	# TYPE ALIASES
	# =============================================================================

	# Common callback signatures
	ProgressCallback = type[None] \| type["async def(str) -> None"] # type: ignore
	ErrorHandler = type[None] \| type["def(Exception, str) -> None"] # type: ignore

	# JSON-like structures
	JSONValue = str \| int \| float \| bool \| None \| list["JSONValue"] \| dict[str, "JSONValue"]
	JSONDict = dict[str, JSONValue]