Spaces:
Sleeping
Sleeping
File size: 8,405 Bytes
ae588db 40cdc42 ae588db 40cdc42 ae588db 40cdc42 ae588db | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 | """
Shared Configuration Module for Medium Agent Ecosystem
This module provides a centralized configuration system that both
Medium-Scraper and medium-mcp-server can import and extend.
Maintains backward compatibility while providing a single source of truth.
"""
import os
from dataclasses import dataclass, field
from typing import Optional, Dict, Any
from pathlib import Path
@dataclass
class SharedConfig:
"""
Shared configuration for the Medium Agent ecosystem.
Both Medium-Scraper and medium-mcp-server extend this base config.
All settings can be overridden via environment variables.
"""
# ========================================================================
# Scraper Settings
# ========================================================================
max_workers: int = 5
max_batch_size: int = 20
default_timeout: int = 30
max_concurrency: int = 5
# ========================================================================
# API Keys
# ========================================================================
groq_api_key: Optional[str] = None
gemini_api_key: Optional[str] = None
openai_api_key: Optional[str] = None
elevenlabs_api_key: Optional[str] = None
# ========================================================================
# HTTP Settings (Connection Pooling)
# ========================================================================
max_connections: int = 100
max_keepalive_connections: int = 20
keepalive_expiry: float = 5.0
http_timeout: int = 30
enable_http2: bool = True
# ========================================================================
# Rate Limiting
# ========================================================================
requests_per_minute: int = 60
enable_rate_limiting: bool = True
# ========================================================================
# Database Settings
# ========================================================================
db_pool_size: int = 5
db_timeout: int = 30
enable_async_db: bool = True
db_wal_mode: bool = True # Write-Ahead Logging for better concurrency
# ========================================================================
# Resilience Settings
# ========================================================================
circuit_breaker_threshold: int = 5
circuit_breaker_timeout: int = 300
max_retries: int = 3
retry_backoff_base: float = 1.0 # Initial delay in seconds
retry_backoff_multiplier: float = 2.0 # Exponential multiplier
# ========================================================================
# Logging Settings
# ========================================================================
log_level: str = "INFO"
enable_structured_logging: bool = True
log_format: str = "json" # "json" or "console"
# ========================================================================
# Cache Settings
# ========================================================================
enable_caching: bool = True
cache_ttl: int = 3600 # 1 hour default
cache_max_size: int = 1000 # Max items in memory cache
# ========================================================================
# Paths
# ========================================================================
base_dir: Path = field(default_factory=lambda: Path(__file__).parent)
db_path: Optional[Path] = None
output_dir: Optional[Path] = None
@classmethod
def from_env(cls, env_prefix: str = "") -> "SharedConfig":
"""
Load configuration from environment variables.
Args:
env_prefix: Optional prefix for environment variables
(e.g., "MCP_" or "SCRAPER_")
Returns:
SharedConfig instance with values from environment
"""
def get_env(key: str, default: Any = None, cast_type=str) -> Any:
"""Get environment variable with optional prefix and type casting."""
env_key = f"{env_prefix}{key}" if env_prefix else key
value = os.getenv(env_key, os.getenv(key, default))
if value is None:
return default
# Type casting
if cast_type == bool:
return str(value).lower() in ('true', '1', 'yes', 'on')
elif cast_type == int:
return int(value)
elif cast_type == float:
return float(value)
else:
return value
return cls(
# Scraper settings
max_workers=get_env("MAX_WORKERS", 5, int),
max_batch_size=get_env("MAX_BATCH_SIZE", 20, int),
default_timeout=get_env("DEFAULT_TIMEOUT", 30, int),
max_concurrency=get_env("MAX_CONCURRENCY", 5, int),
# API Keys
groq_api_key=get_env("GROQ_API_KEY"),
gemini_api_key=get_env("GEMINI_API_KEY"),
openai_api_key=get_env("OPENAI_API_KEY"),
elevenlabs_api_key=get_env("ELEVENLABS_API_KEY"),
# HTTP Settings
max_connections=get_env("HTTP_MAX_CONNECTIONS", 100, int),
max_keepalive_connections=get_env("HTTP_MAX_KEEPALIVE", 20, int),
keepalive_expiry=get_env("HTTP_KEEPALIVE_EXPIRY", 5.0, float),
http_timeout=get_env("HTTP_TIMEOUT", 30, int),
enable_http2=get_env("ENABLE_HTTP2", True, bool),
# Rate Limiting
requests_per_minute=get_env("RATE_LIMIT_RPM", 60, int),
enable_rate_limiting=get_env("ENABLE_RATE_LIMITING", True, bool),
# Database
db_pool_size=get_env("DB_POOL_SIZE", 5, int),
db_timeout=get_env("DB_TIMEOUT", 30, int),
enable_async_db=get_env("ENABLE_ASYNC_DB", True, bool),
db_wal_mode=get_env("DB_WAL_MODE", True, bool),
# Resilience
circuit_breaker_threshold=get_env("CIRCUIT_BREAKER_THRESHOLD", 5, int),
circuit_breaker_timeout=get_env("CIRCUIT_BREAKER_TIMEOUT", 300, int),
max_retries=get_env("MAX_RETRIES", 3, int),
retry_backoff_base=get_env("RETRY_BACKOFF_BASE", 1.0, float),
retry_backoff_multiplier=get_env("RETRY_BACKOFF_MULTIPLIER", 2.0, float),
# Logging
log_level=get_env("LOG_LEVEL", "INFO"),
enable_structured_logging=get_env("ENABLE_STRUCTURED_LOGGING", True, bool),
log_format=get_env("LOG_FORMAT", "json"),
# Cache
enable_caching=get_env("ENABLE_CACHING", True, bool),
cache_ttl=get_env("CACHE_TTL", 3600, int),
cache_max_size=get_env("CACHE_MAX_SIZE", 1000, int),
)
def to_dict(self) -> Dict[str, Any]:
"""Convert configuration to dictionary."""
return {
k: str(v) if isinstance(v, Path) else v
for k, v in self.__dict__.items()
}
def __repr__(self) -> str:
"""String representation with sensitive data masked."""
safe_dict = self.to_dict()
# Mask sensitive keys
sensitive_keys = ['groq_api_key', 'gemini_api_key', 'openai_api_key', 'elevenlabs_api_key']
for key in sensitive_keys:
if safe_dict.get(key):
safe_dict[key] = safe_dict[key][:8] + "..." if safe_dict[key] else None
return f"SharedConfig({safe_dict})"
# Singleton instance for global access (optional)
_global_config: Optional[SharedConfig] = None
def get_config(reload: bool = False) -> SharedConfig:
"""
Get global configuration instance.
Args:
reload: If True, reload config from environment
Returns:
SharedConfig instance
"""
global _global_config
if _global_config is None or reload:
_global_config = SharedConfig.from_env()
return _global_config
def set_config(config: SharedConfig) -> None:
"""
Set global configuration instance.
Args:
config: SharedConfig instance to use globally
"""
global _global_config
_global_config = config
|