""" Constants for Medium-MCP Centralized configuration values, magic numbers, and string literals. Eliminates scattered hardcoded values throughout the codebase. """ from __future__ import annotations from typing import Final # ============================================================================= # MEDIUM DOMAINS # ============================================================================= # Official Medium domains MEDIUM_DOMAINS: Final[frozenset[str]] = frozenset({ "medium.com", "www.medium.com", }) # Known Medium publication domains PUBLICATION_DOMAINS: Final[frozenset[str]] = frozenset({ "towardsdatascience.com", "betterprogramming.pub", "levelup.gitconnected.com", "javascript.plainenglish.io", "python.plainenglish.io", "aws.plainenglish.io", "blog.devgenius.io", "blog.stackademic.com", "medium.datadriveninvestor.com", "entrepreneurshandbook.co", "betterhumans.pub", "writingcooperative.com", "psiloveyou.xyz", "uxdesign.cc", "eand.co", "codeburst.io", "itnext.io", "blog.prototypr.io", "uxplanet.org", "hackernoon.com", "thebolditalic.com", "forge.medium.com", "marker.medium.com", "onezero.medium.com", "gen.medium.com", "elemental.medium.com", "debugger.medium.com", "zora.medium.com", "heated.medium.com", "humanparts.medium.com", }) # All valid Medium-related domains ALL_MEDIUM_DOMAINS: Final[frozenset[str]] = MEDIUM_DOMAINS | PUBLICATION_DOMAINS # ============================================================================= # URL PATTERNS # ============================================================================= # Post ID characteristics POST_ID_MIN_LENGTH: Final[int] = 10 POST_ID_MAX_LENGTH: Final[int] = 16 POST_ID_HEX_CHARS: Final[str] = "0123456789abcdef" # URL tracking domains to unwrap TRACKING_DOMAINS: Final[frozenset[str]] = frozenset({ "l.facebook.com", "lm.facebook.com", "t.co", "bit.ly", "tinyurl.com", "goo.gl", "ow.ly", "buff.ly", "lnkd.in", "is.gd", "rb.gy", }) # Cache/archive domains CACHE_DOMAINS: Final[frozenset[str]] = frozenset({ "webcache.googleusercontent.com", "web.archive.org", "archive.is", "archive.today", "archive.ph", "archive.vn", "cached.googleusercontent.com", }) # ============================================================================= # HTTP CONFIGURATION # ============================================================================= # Connection pooling DEFAULT_MAX_CONNECTIONS: Final[int] = 100 DEFAULT_KEEPALIVE_CONNECTIONS: Final[int] = 20 DEFAULT_KEEPALIVE_EXPIRY: Final[float] = 5.0 # Timeouts (in seconds) DEFAULT_CONNECT_TIMEOUT: Final[float] = 5.0 DEFAULT_READ_TIMEOUT: Final[float] = 30.0 DEFAULT_TOTAL_TIMEOUT: Final[float] = 60.0 # Retry configuration DEFAULT_MAX_RETRIES: Final[int] = 3 DEFAULT_RETRY_BACKOFF_FACTOR: Final[float] = 0.5 # Rate limiting DEFAULT_RATE_LIMIT_REQUESTS: Final[int] = 10 DEFAULT_RATE_LIMIT_PERIOD: Final[int] = 60 # seconds # ============================================================================= # SCRAPER CONFIGURATION # ============================================================================= # Worker pool DEFAULT_MAX_WORKERS: Final[int] = 5 DEFAULT_BATCH_SIZE: Final[int] = 20 # Content limits MAX_ARTICLE_SIZE_BYTES: Final[int] = 10 * 1024 * 1024 # 10 MB MAX_IMAGES_PER_ARTICLE: Final[int] = 50 MAX_RECURSIVE_DEPTH: Final[int] = 3 # Caching DEFAULT_CACHE_TTL_HOURS: Final[int] = 24 STALE_CACHE_THRESHOLD_DAYS: Final[int] = 7 # ============================================================================= # RESILIENCE # ============================================================================= # Circuit breaker DEFAULT_FAILURE_THRESHOLD: Final[int] = 5 DEFAULT_RECOVERY_TIMEOUT: Final[int] = 300 # 5 minutes DEFAULT_HALF_OPEN_REQUESTS: Final[int] = 1 # ============================================================================= # LLM CONFIGURATION # ============================================================================= # Model names GROQ_DEFAULT_MODEL: Final[str] = "llama-3.3-70b-versatile" OPENAI_DEFAULT_MODEL: Final[str] = "gpt-4o-mini" GEMINI_DEFAULT_MODEL: Final[str] = "gemini-2.0-flash-exp" # Token limits GROQ_MAX_TOKENS: Final[int] = 8000 OPENAI_MAX_TOKENS: Final[int] = 128000 GEMINI_MAX_TOKENS: Final[int] = 1000000 # Character limits (approximate) GROQ_MAX_CHARS: Final[int] = 32000 OPENAI_MAX_CHARS: Final[int] = 500000 GEMINI_MAX_CHARS: Final[int] = 4000000 # ============================================================================= # AUDIO (ELEVENLABS) # ============================================================================= # Default voices ELEVENLABS_DEFAULT_VOICE: Final[str] = "Rachel" ELEVENLABS_VOICES: Final[tuple[str, ...]] = ( "Rachel", "Drew", "Clyde", "Paul", "Domi", "Dave", "Fin", "Sarah", "Antoni", "Thomas", "Charlie", "George", "Emily", "Elli", "Callum", ) # Audio settings DEFAULT_AUDIO_STABILITY: Final[float] = 0.5 DEFAULT_AUDIO_SIMILARITY: Final[float] = 0.75 DEFAULT_AUDIO_STYLE: Final[float] = 0.0 # ============================================================================= # GRADIO UI # ============================================================================= # Server configuration GRADIO_DEFAULT_PORT: Final[int] = 7860 GRADIO_DEFAULT_HOST: Final[str] = "0.0.0.0" # UI limits MAX_URL_INPUT_LENGTH: Final[int] = 500 MAX_QUERY_INPUT_LENGTH: Final[int] = 200 MAX_BATCH_URLS: Final[int] = 20 # ============================================================================= # MCP SERVER # ============================================================================= MCP_SERVER_NAME: Final[str] = "Medium Scraper v3" MCP_SERVER_VERSION: Final[str] = "3.1.0" # ============================================================================= # USER AGENTS # ============================================================================= DEFAULT_USER_AGENT: Final[str] = ( "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/120.0.0.0 Safari/537.36" ) # User agent pool for rotation USER_AGENT_POOL: Final[tuple[str, ...]] = ( DEFAULT_USER_AGENT, "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 Safari/605.1.15", )