Medium-MCP / src /config.py
Nikhil Pravin Pise
feat: Switch to Groq as primary LLM provider
40cdc42
"""
Merged Configuration for medium-mcp
Combines Medium-Scraper Config and medium-mcp-server MCPConfig
"""
import os
from dataclasses import dataclass
from typing import Optional
from fake_useragent import UserAgent
from pathlib import Path
# Import shared configuration (now in same directory)
from src.shared_config import SharedConfig
@dataclass
class Config:
"""
Scraper configuration (extends SharedConfig for backward compatibility).
"""
# Initialize shared config
_shared = SharedConfig.from_env()
# Paths (adjusted for medium-mcp structure)
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# For HuggingFace: Use in-memory database (ephemeral storage)
DB_PATH = ":memory:" if os.getenv("SPACE_ID") else os.path.join(BASE_DIR, "articles.db")
# API Keys (from shared config)
GROQ_API_KEY = _shared.groq_api_key or os.getenv("GROQ_API_KEY")
GEMINI_API_KEY = _shared.gemini_api_key or os.getenv("GEMINI_API_KEY")
# Scraping Settings (from shared config)
HEADLESS = True
TIMEOUT_MS = _shared.default_timeout * 1000
MAX_WORKERS = int(os.getenv("MAX_WORKERS", "2")) # Reduced for HF (was 5)
RECURSIVE_DEPTH = 1
# Resilience (from shared config)
MAX_RETRIES = _shared.max_retries
CIRCUIT_BREAKER_THRESHOLD = _shared.circuit_breaker_threshold
CIRCUIT_BREAKER_TIMEOUT = _shared.circuit_breaker_timeout
# Proxy (Optional)
PROXY_URL = os.getenv("PROXY_URL")
# Medium GraphQL API Settings
MEDIUM_AUTH_COOKIES = os.getenv("MEDIUM_COOKIES")
COOKIES_PATH = os.path.join(BASE_DIR, "medium_cookies.json") # Optional: for persistent cookies
MEDIUM_API_TIMEOUT = _shared.http_timeout
# HTTP Settings (from shared config)
HTTP_MAX_CONNECTIONS = _shared.max_connections
HTTP_MAX_KEEPALIVE = _shared.max_keepalive_connections
HTTP_KEEPALIVE_EXPIRY = _shared.keepalive_expiry
# Dynamic User Agent
_ua = UserAgent()
@classmethod
def get_user_agent(cls):
try:
return cls._ua.random
except:
return "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
@classmethod
def get_headers(cls):
return {
"User-Agent": cls.get_user_agent(),
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"DNT": "1",
"Connection": "keep-alive",
}
@classmethod
def get_shared_config(cls) -> SharedConfig:
return cls._shared
@classmethod
def reload_config(cls):
cls._shared = SharedConfig.from_env()
cls.GROQ_API_KEY = cls._shared.groq_api_key or os.getenv("GROQ_API_KEY")
cls.GEMINI_API_KEY = cls._shared.gemini_api_key or os.getenv("GEMINI_API_KEY")
cls.TIMEOUT_MS = cls._shared.default_timeout * 1000
cls.MAX_WORKERS = int(os.getenv("MAX_WORKERS", cls._shared.max_workers))
@dataclass
class MCPConfig:
"""
MCP Server configuration (extends SharedConfig).
"""
# Scraper settings (from shared config)
max_workers: int = 5
max_batch_size: int = 20
default_timeout: int = 30
default_output_format: str = "both"
# Output directories
audio_output_dir: str = "./outputs"
# ElevenLabs settings
elevenlabs_model: str = "eleven_multilingual_v2"
elevenlabs_output_format: str = "mp3_44100_192"
elevenlabs_default_voice: str = "george"
# Shared config reference
shared: Optional[SharedConfig] = None
@classmethod
def from_env(cls) -> "MCPConfig":
shared = SharedConfig.from_env(env_prefix="MCP_")
return cls(
max_workers=shared.max_workers,
max_batch_size=shared.max_batch_size,
default_timeout=shared.default_timeout,
default_output_format=os.getenv("MCP_DEFAULT_FORMAT", "both"),
audio_output_dir=os.getenv("MCP_AUDIO_DIR", "./outputs"),
elevenlabs_model=os.getenv("ELEVENLABS_MODEL", "eleven_multilingual_v2"),
elevenlabs_output_format=os.getenv("ELEVENLABS_FORMAT", "mp3_44100_192"),
elevenlabs_default_voice=os.getenv("ELEVENLABS_VOICE", "george"),
shared=shared,
)
def get_shared_config(self) -> SharedConfig:
return self.shared if self.shared else SharedConfig.from_env()
# ElevenLabs character limits
ELEVENLABS_CHAR_LIMITS = {
"eleven_multilingual_v2": 10000,
"eleven_flash_v2_5": 40000,
"eleven_turbo_v2_5": 40000,
"eleven_v3": 5000,
}
ELEVENLABS_OUTPUT_FORMATS = {
"standard": "mp3_22050_32",
"high": "mp3_44100_128",
"premium": "mp3_44100_192",
}