Spaces:
Sleeping
Sleeping
| """ | |
| Hugging Face Spaces Configuration | |
| ================================ | |
| This module contains configuration settings optimized for deployment on | |
| Hugging Face Spaces. It handles cache directories, permissions, and | |
| environment-specific optimizations. | |
| Key Features: | |
| - Automatic cache directory setup in /tmp | |
| - Permission handling for HF Spaces environment | |
| - Model loading optimizations | |
| - Resource usage monitoring | |
| """ | |
| import os | |
| import logging | |
| from pathlib import Path | |
| # Configure logging for HF Spaces | |
| logging.basicConfig( | |
| level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" | |
| ) | |
| logger = logging.getLogger(__name__) | |
| class HFSpacesConfig: | |
| """ | |
| Configuration class for Hugging Face Spaces deployment | |
| This class manages all environment-specific settings and ensures | |
| the application works correctly in the HF Spaces environment. | |
| """ | |
| def __init__(self): | |
| """Initialize HF Spaces configuration""" | |
| self.is_hf_spaces = self._detect_hf_spaces() | |
| self.cache_dirs = self._setup_cache_directories() | |
| self.env_vars = self._setup_environment_variables() | |
| def _detect_hf_spaces(self) -> bool: | |
| """ | |
| Detect if running in Hugging Face Spaces environment | |
| Returns: | |
| bool: True if running in HF Spaces | |
| """ | |
| # Check for HF Spaces environment indicators | |
| hf_indicators = [ | |
| "SPACE_ID" in os.environ, | |
| "SPACE_HOST" in os.environ, | |
| "HF_HUB_ENDPOINT" in os.environ, | |
| os.path.exists("/tmp/huggingface"), | |
| ] | |
| is_hf = any(hf_indicators) | |
| logger.info(f"HF Spaces environment detected: {is_hf}") | |
| return is_hf | |
| def _setup_cache_directories(self) -> dict: | |
| """ | |
| Set up cache directories for HF Spaces | |
| Returns: | |
| dict: Cache directory paths | |
| """ | |
| if self.is_hf_spaces: | |
| # Use /tmp for HF Spaces (writable) | |
| cache_dirs = { | |
| "hf_home": "/tmp/huggingface", | |
| "transformers_cache": "/tmp/huggingface/transformers", | |
| "torch_home": "/tmp/torch", | |
| "hub_cache": "/tmp/huggingface/hub", | |
| "xdg_cache": "/tmp", | |
| "vector_store": "./vector_store", | |
| } | |
| else: | |
| # Use standard locations for local development | |
| cache_dirs = { | |
| "hf_home": os.path.expanduser("~/.cache/huggingface"), | |
| "transformers_cache": os.path.expanduser( | |
| "~/.cache/huggingface/transformers" | |
| ), | |
| "torch_home": os.path.expanduser("~/.cache/torch"), | |
| "hub_cache": os.path.expanduser("~/.cache/huggingface/hub"), | |
| "xdg_cache": os.path.expanduser("~/.cache"), | |
| "vector_store": "./vector_store", | |
| } | |
| # Create directories | |
| for name, path in cache_dirs.items(): | |
| try: | |
| Path(path).mkdir(parents=True, exist_ok=True) | |
| logger.info(f"Cache directory ready: {name} -> {path}") | |
| except Exception as e: | |
| logger.warning(f"Could not create cache directory {name}: {e}") | |
| return cache_dirs | |
| def _setup_environment_variables(self) -> dict: | |
| """ | |
| Set up environment variables for HF Spaces | |
| Returns: | |
| dict: Environment variable settings | |
| """ | |
| env_vars = { | |
| "HF_HOME": self.cache_dirs["hf_home"], | |
| "TRANSFORMERS_CACHE": self.cache_dirs["transformers_cache"], | |
| "TORCH_HOME": self.cache_dirs["torch_home"], | |
| "XDG_CACHE_HOME": self.cache_dirs["xdg_cache"], | |
| "HF_HUB_CACHE": self.cache_dirs["hub_cache"], | |
| "PYTHONPATH": "/app", | |
| "STREAMLIT_SERVER_PORT": "8501", | |
| "STREAMLIT_SERVER_ADDRESS": "0.0.0.0", | |
| "STREAMLIT_SERVER_HEADLESS": "true", | |
| "STREAMLIT_SERVER_ENABLE_CORS": "false", | |
| "STREAMLIT_SERVER_ENABLE_XSRF_PROTECTION": "false", | |
| "STREAMLIT_LOGGER_LEVEL": "info", | |
| } | |
| # Set environment variables | |
| for key, value in env_vars.items(): | |
| os.environ[key] = value | |
| logger.info(f"Set environment variable: {key}={value}") | |
| return env_vars | |
| def get_model_config(self) -> dict: | |
| """ | |
| Get optimized model configuration for HF Spaces | |
| Returns: | |
| dict: Model configuration settings | |
| """ | |
| return { | |
| "embedding_model": "all-MiniLM-L6-v2", | |
| "generative_model": "Qwen/Qwen2.5-1.5B-Instruct", | |
| "fallback_model": "distilgpt2", | |
| "chunk_sizes": [512, 1024, 2048], | |
| "vector_store_path": self.cache_dirs["vector_store"], | |
| "enable_guard_rails": True, | |
| "cache_dir": self.cache_dirs["transformers_cache"], | |
| } | |
| def get_guard_rail_config(self) -> dict: | |
| """ | |
| Get guard rail configuration optimized for HF Spaces | |
| Returns: | |
| dict: Guard rail configuration settings | |
| """ | |
| return { | |
| "max_query_length": 1000, | |
| "max_response_length": 5000, | |
| "min_confidence_threshold": 0.3, | |
| "rate_limit_requests": 10, | |
| "rate_limit_window": 60, | |
| "enable_pii_detection": True, | |
| "enable_prompt_injection_detection": True, | |
| } | |
| def get_resource_limits(self) -> dict: | |
| """ | |
| Get resource limits for HF Spaces environment | |
| Returns: | |
| dict: Resource limit settings | |
| """ | |
| return { | |
| "max_memory_usage": 0.8, # 80% of available memory | |
| "max_cpu_usage": 0.9, # 90% of available CPU | |
| "max_concurrent_requests": 5, | |
| "model_timeout": 30, # seconds | |
| "cache_cleanup_interval": 3600, # 1 hour | |
| } | |
| def cleanup_cache(self): | |
| """ | |
| Clean up cache directories to free space | |
| This is important for HF Spaces with limited storage. | |
| """ | |
| if not self.is_hf_spaces: | |
| return | |
| try: | |
| import shutil | |
| import time | |
| # Remove old cache files (older than 1 hour) | |
| current_time = time.time() | |
| for cache_path in [ | |
| self.cache_dirs["transformers_cache"], | |
| self.cache_dirs["torch_home"], | |
| ]: | |
| if os.path.exists(cache_path): | |
| for item in os.listdir(cache_path): | |
| item_path = os.path.join(cache_path, item) | |
| if os.path.isfile(item_path): | |
| if current_time - os.path.getmtime(item_path) > 3600: | |
| os.remove(item_path) | |
| logger.info(f"Cleaned up old cache file: {item_path}") | |
| logger.info("Cache cleanup completed") | |
| except Exception as e: | |
| logger.warning(f"Cache cleanup failed: {e}") | |
| # Global configuration instance | |
| hf_config = HFSpacesConfig() | |
| def get_hf_config() -> HFSpacesConfig: | |
| """ | |
| Get the global HF Spaces configuration instance | |
| Returns: | |
| HFSpacesConfig: Configuration instance | |
| """ | |
| return hf_config | |
| def is_hf_spaces() -> bool: | |
| """ | |
| Check if running in HF Spaces environment | |
| Returns: | |
| bool: True if in HF Spaces | |
| """ | |
| return hf_config.is_hf_spaces | |
| def get_cache_dir() -> str: | |
| """ | |
| Get the appropriate cache directory for the current environment | |
| Returns: | |
| str: Cache directory path | |
| """ | |
| return hf_config.cache_dirs["transformers_cache"] | |