Spaces:
Sleeping
Sleeping
| """ | |
| Type Definitions for Medium-MCP | |
| This module provides TypedDict, dataclass, and enum definitions for | |
| type-safe development with mypy strict mode enabled. | |
| """ | |
| from __future__ import annotations | |
| from dataclasses import dataclass, field | |
| from enum import Enum | |
| from typing import Literal, NotRequired, TypedDict | |
| # ============================================================================= | |
| # ENUMS | |
| # ============================================================================= | |
| class OutputFormat(Enum): | |
| """Output format options for scraped content.""" | |
| MARKDOWN = "markdown" | |
| HTML = "html" | |
| BOTH = "both" | |
| class ScrapeTier(Enum): | |
| """Scraping tier identifiers for tracking source.""" | |
| CACHE = "cache" | |
| GRAPHQL = "graphql" | |
| HTTPX = "httpx" | |
| BROWSER = "browser" | |
| WAYBACK = "wayback" | |
| VISION = "vision" | |
| CROSS_SOURCE = "cross_source" | |
| class CircuitState(Enum): | |
| """Circuit breaker states.""" | |
| CLOSED = "closed" | |
| OPEN = "open" | |
| HALF_OPEN = "half_open" | |
| class LogLevel(Enum): | |
| """Logging levels.""" | |
| DEBUG = "DEBUG" | |
| INFO = "INFO" | |
| WARNING = "WARNING" | |
| ERROR = "ERROR" | |
| CRITICAL = "CRITICAL" | |
| # ============================================================================= | |
| # TYPED DICTS - Author & Publication | |
| # ============================================================================= | |
| class AuthorInfo(TypedDict): | |
| """Author information structure.""" | |
| name: str | |
| username: str | |
| bio: NotRequired[str] | |
| avatar_url: NotRequired[str] | |
| followers: NotRequired[int] | |
| following: NotRequired[int] | |
| class PublicationInfo(TypedDict): | |
| """Publication/publication information.""" | |
| name: str | |
| slug: str | |
| description: NotRequired[str] | |
| followers: NotRequired[int] | |
| url: NotRequired[str] | |
| # ============================================================================= | |
| # TYPED DICTS - Article Data | |
| # ============================================================================= | |
| class ArticleMetadata(TypedDict): | |
| """Metadata about an article.""" | |
| url: str | |
| title: str | |
| subtitle: NotRequired[str] | |
| author: AuthorInfo | |
| publication: NotRequired[str] | |
| tags: list[str] | |
| reading_time: int | |
| claps: NotRequired[int] | |
| responses: NotRequired[int] | |
| is_paywalled: bool | |
| published_at: NotRequired[str] | |
| updated_at: NotRequired[str] | |
| class ArticleContent(TypedDict): | |
| """Article content in various formats.""" | |
| markdown: str | |
| html: NotRequired[str] | |
| word_count: int | |
| images: list[str] | |
| code_blocks: NotRequired[list[str]] | |
| class ScrapeResult(TypedDict): | |
| """Complete result from a scrape operation.""" | |
| metadata: ArticleMetadata | |
| content: ArticleContent | |
| source_tier: str | |
| cached: bool | |
| scraped_at: str | |
| embedding: NotRequired[list[float]] | |
| class ArticleRecord(TypedDict): | |
| """Database record for cached articles.""" | |
| url: str | |
| title: str | |
| author: str | |
| markdown_content: str | |
| html_content: NotRequired[str] | |
| tags: str # JSON serialized | |
| is_paywalled: bool | |
| scraped_at: str | |
| embedding: NotRequired[str] # JSON serialized | |
| # ============================================================================= | |
| # TYPED DICTS - Search & Discovery | |
| # ============================================================================= | |
| class SearchResult(TypedDict): | |
| """Individual search result.""" | |
| title: str | |
| url: str | |
| author: str | |
| publication: NotRequired[str] | |
| preview: NotRequired[str] | |
| reading_time: NotRequired[int] | |
| class TagFeed(TypedDict): | |
| """Tag-based feed result.""" | |
| tag: str | |
| articles: list[SearchResult] | |
| count: int | |
| # ============================================================================= | |
| # TYPED DICTS - API Responses | |
| # ============================================================================= | |
| class GraphQLPostResponse(TypedDict): | |
| """GraphQL API post response structure.""" | |
| id: str | |
| title: str | |
| content: dict # bodyModel | |
| creator: AuthorInfo | |
| tags: NotRequired[list[dict]] | |
| clapCount: NotRequired[int] | |
| class ApolloState(TypedDict): | |
| """Apollo client __APOLLO_STATE__ structure.""" | |
| ROOT_QUERY: dict | |
| # Additional dynamic keys | |
| # ============================================================================= | |
| # TYPED DICTS - MCP Tool Outputs | |
| # ============================================================================= | |
| class ScrapeToolOutput(TypedDict): | |
| """Output schema for medium_scrape tool.""" | |
| title: str | |
| author: AuthorInfo | |
| markdown_content: str | |
| html_content: NotRequired[str] | |
| tags: list[str] | |
| reading_time: int | |
| is_paywalled: bool | |
| source_tier: str | |
| url: str | |
| class BatchToolOutput(TypedDict): | |
| """Output schema for medium_batch tool.""" | |
| success: list[ScrapeToolOutput] | |
| failed: list[dict] | |
| stats: dict | |
| class ReportToolOutput(TypedDict): | |
| """Output schema for medium_report tool.""" | |
| title: str | |
| executive_summary: str | |
| key_insights: list[str] | |
| articles_analyzed: int | |
| generated_at: str | |
| # ============================================================================= | |
| # DATACLASSES - Options & Config | |
| # ============================================================================= | |
| class ScrapeOptions: | |
| """Options for scraping operations.""" | |
| force_refresh: bool = False | |
| recursive_depth: int = 0 | |
| enable_enhancements: bool = False | |
| output_format: OutputFormat = OutputFormat.BOTH | |
| timeout_seconds: int = 30 | |
| max_retries: int = 3 | |
| class HTTPConfig: | |
| """HTTP client configuration.""" | |
| max_connections: int = 100 | |
| max_keepalive_connections: int = 20 | |
| keepalive_expiry: float = 5.0 | |
| connect_timeout: float = 5.0 | |
| read_timeout: float = 30.0 | |
| http2: bool = True | |
| class ResilienceConfig: | |
| """Circuit breaker and retry configuration.""" | |
| failure_threshold: int = 5 | |
| recovery_timeout: int = 300 | |
| rate_limit_requests: int = 10 | |
| rate_limit_period: int = 60 | |
| class ScraperConfig: | |
| """Complete scraper configuration.""" | |
| max_workers: int = 5 | |
| headless: bool = True | |
| http: HTTPConfig = field(default_factory=HTTPConfig) | |
| resilience: ResilienceConfig = field(default_factory=ResilienceConfig) | |
| # ============================================================================= | |
| # TYPE ALIASES | |
| # ============================================================================= | |
| # Common callback signatures | |
| ProgressCallback = type[None] | type["async def(str) -> None"] # type: ignore | |
| ErrorHandler = type[None] | type["def(Exception, str) -> None"] # type: ignore | |
| # JSON-like structures | |
| JSONValue = str | int | float | bool | None | list["JSONValue"] | dict[str, "JSONValue"] | |
| JSONDict = dict[str, JSONValue] | |