Spaces:
Sleeping
Sleeping
| """ | |
| Merged Configuration for medium-mcp | |
| Combines Medium-Scraper Config and medium-mcp-server MCPConfig | |
| """ | |
| import os | |
| from dataclasses import dataclass | |
| from typing import Optional | |
| from fake_useragent import UserAgent | |
| from pathlib import Path | |
| # Import shared configuration (now in same directory) | |
| from src.shared_config import SharedConfig | |
| class Config: | |
| """ | |
| Scraper configuration (extends SharedConfig for backward compatibility). | |
| """ | |
| # Initialize shared config | |
| _shared = SharedConfig.from_env() | |
| # Paths (adjusted for medium-mcp structure) | |
| BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
| # For HuggingFace: Use in-memory database (ephemeral storage) | |
| DB_PATH = ":memory:" if os.getenv("SPACE_ID") else os.path.join(BASE_DIR, "articles.db") | |
| # API Keys (from shared config) | |
| GROQ_API_KEY = _shared.groq_api_key or os.getenv("GROQ_API_KEY") | |
| GEMINI_API_KEY = _shared.gemini_api_key or os.getenv("GEMINI_API_KEY") | |
| # Scraping Settings (from shared config) | |
| HEADLESS = True | |
| TIMEOUT_MS = _shared.default_timeout * 1000 | |
| MAX_WORKERS = int(os.getenv("MAX_WORKERS", "2")) # Reduced for HF (was 5) | |
| RECURSIVE_DEPTH = 1 | |
| # Resilience (from shared config) | |
| MAX_RETRIES = _shared.max_retries | |
| CIRCUIT_BREAKER_THRESHOLD = _shared.circuit_breaker_threshold | |
| CIRCUIT_BREAKER_TIMEOUT = _shared.circuit_breaker_timeout | |
| # Proxy (Optional) | |
| PROXY_URL = os.getenv("PROXY_URL") | |
| # Medium GraphQL API Settings | |
| MEDIUM_AUTH_COOKIES = os.getenv("MEDIUM_COOKIES") | |
| COOKIES_PATH = os.path.join(BASE_DIR, "medium_cookies.json") # Optional: for persistent cookies | |
| MEDIUM_API_TIMEOUT = _shared.http_timeout | |
| # HTTP Settings (from shared config) | |
| HTTP_MAX_CONNECTIONS = _shared.max_connections | |
| HTTP_MAX_KEEPALIVE = _shared.max_keepalive_connections | |
| HTTP_KEEPALIVE_EXPIRY = _shared.keepalive_expiry | |
| # Dynamic User Agent | |
| _ua = UserAgent() | |
| def get_user_agent(cls): | |
| try: | |
| return cls._ua.random | |
| except: | |
| return "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" | |
| def get_headers(cls): | |
| return { | |
| "User-Agent": cls.get_user_agent(), | |
| "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", | |
| "Accept-Language": "en-US,en;q=0.5", | |
| "DNT": "1", | |
| "Connection": "keep-alive", | |
| } | |
| def get_shared_config(cls) -> SharedConfig: | |
| return cls._shared | |
| def reload_config(cls): | |
| cls._shared = SharedConfig.from_env() | |
| cls.GROQ_API_KEY = cls._shared.groq_api_key or os.getenv("GROQ_API_KEY") | |
| cls.GEMINI_API_KEY = cls._shared.gemini_api_key or os.getenv("GEMINI_API_KEY") | |
| cls.TIMEOUT_MS = cls._shared.default_timeout * 1000 | |
| cls.MAX_WORKERS = int(os.getenv("MAX_WORKERS", cls._shared.max_workers)) | |
| class MCPConfig: | |
| """ | |
| MCP Server configuration (extends SharedConfig). | |
| """ | |
| # Scraper settings (from shared config) | |
| max_workers: int = 5 | |
| max_batch_size: int = 20 | |
| default_timeout: int = 30 | |
| default_output_format: str = "both" | |
| # Output directories | |
| audio_output_dir: str = "./outputs" | |
| # ElevenLabs settings | |
| elevenlabs_model: str = "eleven_multilingual_v2" | |
| elevenlabs_output_format: str = "mp3_44100_192" | |
| elevenlabs_default_voice: str = "george" | |
| # Shared config reference | |
| shared: Optional[SharedConfig] = None | |
| def from_env(cls) -> "MCPConfig": | |
| shared = SharedConfig.from_env(env_prefix="MCP_") | |
| return cls( | |
| max_workers=shared.max_workers, | |
| max_batch_size=shared.max_batch_size, | |
| default_timeout=shared.default_timeout, | |
| default_output_format=os.getenv("MCP_DEFAULT_FORMAT", "both"), | |
| audio_output_dir=os.getenv("MCP_AUDIO_DIR", "./outputs"), | |
| elevenlabs_model=os.getenv("ELEVENLABS_MODEL", "eleven_multilingual_v2"), | |
| elevenlabs_output_format=os.getenv("ELEVENLABS_FORMAT", "mp3_44100_192"), | |
| elevenlabs_default_voice=os.getenv("ELEVENLABS_VOICE", "george"), | |
| shared=shared, | |
| ) | |
| def get_shared_config(self) -> SharedConfig: | |
| return self.shared if self.shared else SharedConfig.from_env() | |
| # ElevenLabs character limits | |
| ELEVENLABS_CHAR_LIMITS = { | |
| "eleven_multilingual_v2": 10000, | |
| "eleven_flash_v2_5": 40000, | |
| "eleven_turbo_v2_5": 40000, | |
| "eleven_v3": 5000, | |
| } | |
| ELEVENLABS_OUTPUT_FORMATS = { | |
| "standard": "mp3_22050_32", | |
| "high": "mp3_44100_128", | |
| "premium": "mp3_44100_192", | |
| } | |