Spaces:
Running
Running
| """ | |
| Configuration management for the HF EDA MCP Server. | |
| This module provides configuration classes and utilities for managing | |
| server settings, authentication, caching, and performance parameters. | |
| """ | |
| import os | |
| import logging | |
| import sys | |
| from typing import Optional, Dict, Any | |
| from dataclasses import dataclass, field | |
| class ServerConfig: | |
| """Configuration class for the HF EDA MCP Server.""" | |
| # Server settings | |
| port: int = 7860 | |
| host: str = "0.0.0.0" | |
| mcp_server: bool = True | |
| share: bool = False | |
| # Authentication settings | |
| hf_token: Optional[str] = None | |
| # Logging settings | |
| log_level: str = "INFO" | |
| log_format: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" | |
| # Cache settings | |
| cache_dir: Optional[str] = None | |
| max_cache_size: int = 1000 # MB | |
| # Performance settings | |
| max_sample_size: int = 50000 | |
| max_concurrent_requests: int = 10 | |
| request_timeout: int = 300 # seconds | |
| # Additional Gradio settings | |
| gradio_settings: Dict[str, Any] = field(default_factory=dict) | |
| def from_env(cls) -> "ServerConfig": | |
| """Create configuration from environment variables.""" | |
| config = cls() | |
| # Server settings | |
| config.port = int(os.getenv("HF_EDA_PORT", config.port)) | |
| config.host = os.getenv("HF_EDA_HOST", config.host) | |
| config.mcp_server = os.getenv("HF_EDA_MCP_ENABLED", "true").lower() == "true" | |
| config.share = os.getenv("HF_EDA_SHARE", "false").lower() == "true" | |
| # Authentication | |
| config.hf_token = os.getenv("HF_TOKEN") | |
| # Logging | |
| config.log_level = os.getenv("HF_EDA_LOG_LEVEL", config.log_level).upper() | |
| # Cache settings | |
| config.cache_dir = os.getenv("HF_EDA_CACHE_DIR") | |
| config.max_cache_size = int( | |
| os.getenv("HF_EDA_MAX_CACHE_SIZE", config.max_cache_size) | |
| ) | |
| # Performance settings | |
| config.max_sample_size = int( | |
| os.getenv("HF_EDA_MAX_SAMPLE_SIZE", config.max_sample_size) | |
| ) | |
| config.max_concurrent_requests = int( | |
| os.getenv("HF_EDA_MAX_CONCURRENT", config.max_concurrent_requests) | |
| ) | |
| config.request_timeout = int( | |
| os.getenv("HF_EDA_REQUEST_TIMEOUT", config.request_timeout) | |
| ) | |
| return config | |
| def setup_logging(config: ServerConfig) -> logging.Logger: | |
| """Set up logging configuration.""" | |
| # Configure root logger | |
| logging.basicConfig( | |
| level=getattr(logging, config.log_level), | |
| format=config.log_format, | |
| handlers=[ | |
| logging.StreamHandler(sys.stdout), | |
| ], | |
| ) | |
| # Create logger for this module | |
| logger = logging.getLogger(__name__) | |
| # Set specific log levels for external libraries | |
| logging.getLogger("gradio").setLevel(logging.WARNING) | |
| logging.getLogger("httpx").setLevel(logging.WARNING) | |
| logging.getLogger("urllib3").setLevel(logging.WARNING) | |
| return logger | |
| def validate_config(config: ServerConfig) -> None: | |
| """Validate server configuration and log warnings for potential issues.""" | |
| logger = logging.getLogger(__name__) | |
| # Validate port range | |
| if not (1024 <= config.port <= 65535): | |
| logger.warning( | |
| f"Port {config.port} may require elevated privileges or be invalid" | |
| ) | |
| # Check cache directory | |
| if config.cache_dir: | |
| try: | |
| os.makedirs(config.cache_dir, exist_ok=True) | |
| if not os.access(config.cache_dir, os.W_OK): | |
| logger.error(f"Cache directory {config.cache_dir} is not writable") | |
| raise PermissionError( | |
| f"Cannot write to cache directory: {config.cache_dir}" | |
| ) | |
| except Exception as e: | |
| logger.error( | |
| f"Failed to create/access cache directory {config.cache_dir}: {e}" | |
| ) | |
| raise | |
| # Validate performance settings | |
| if config.max_sample_size > 100000: | |
| logger.warning( | |
| f"Large max_sample_size ({config.max_sample_size}) may cause memory issues" | |
| ) | |
| if config.request_timeout < 30: | |
| logger.warning( | |
| f"Short request timeout ({config.request_timeout}s) may cause failures for large datasets" | |
| ) | |
| # Check authentication | |
| if not config.hf_token: | |
| logger.warning( | |
| "No HuggingFace token configured - only public datasets will be accessible" | |
| ) | |
| logger.info("Set HF_TOKEN environment variable to access private datasets") | |
| else: | |
| logger.info("HuggingFace token configured - private datasets accessible") | |
| # Global configuration instance | |
| _global_config: Optional[ServerConfig] = None | |
| def get_config() -> ServerConfig: | |
| """Get the global configuration instance.""" | |
| global _global_config | |
| if _global_config is None: | |
| _global_config = ServerConfig.from_env() | |
| return _global_config | |
| def set_config(config: ServerConfig) -> None: | |
| """Set the global configuration instance.""" | |
| global _global_config | |
| _global_config = config |