Spaces:

sinhapiyush86
/

convAI

Sleeping

App Files Files Community

convAI / hf_spaces_config.py

sinhapiyush86

Upload 15 files

afad319 verified 6 months ago

raw

history blame contribute delete

7.66 kB

	"""
	Hugging Face Spaces Configuration
	================================

	This module contains configuration settings optimized for deployment on
	Hugging Face Spaces. It handles cache directories, permissions, and
	environment-specific optimizations.

	Key Features:
	- Automatic cache directory setup in /tmp
	- Permission handling for HF Spaces environment
	- Model loading optimizations
	- Resource usage monitoring
	"""

	import os
	import logging
	from pathlib import Path

	# Configure logging for HF Spaces
	logging.basicConfig(
	level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
	)
	logger = logging.getLogger(__name__)


	class HFSpacesConfig:
	"""
	Configuration class for Hugging Face Spaces deployment

	This class manages all environment-specific settings and ensures
	the application works correctly in the HF Spaces environment.
	"""

	def __init__(self):
	"""Initialize HF Spaces configuration"""
	self.is_hf_spaces = self._detect_hf_spaces()
	self.cache_dirs = self._setup_cache_directories()
	self.env_vars = self._setup_environment_variables()

	def _detect_hf_spaces(self) -> bool:
	"""
	Detect if running in Hugging Face Spaces environment

	Returns:
	bool: True if running in HF Spaces
	"""
	# Check for HF Spaces environment indicators
	hf_indicators = [
	"SPACE_ID" in os.environ,
	"SPACE_HOST" in os.environ,
	"HF_HUB_ENDPOINT" in os.environ,
	os.path.exists("/tmp/huggingface"),
	]

	is_hf = any(hf_indicators)
	logger.info(f"HF Spaces environment detected: {is_hf}")
	return is_hf

	def _setup_cache_directories(self) -> dict:
	"""
	Set up cache directories for HF Spaces

	Returns:
	dict: Cache directory paths
	"""
	if self.is_hf_spaces:
	# Use /tmp for HF Spaces (writable)
	cache_dirs = {
	"hf_home": "/tmp/huggingface",
	"transformers_cache": "/tmp/huggingface/transformers",
	"torch_home": "/tmp/torch",
	"hub_cache": "/tmp/huggingface/hub",
	"xdg_cache": "/tmp",
	"vector_store": "./vector_store",
	}
	else:
	# Use standard locations for local development
	cache_dirs = {
	"hf_home": os.path.expanduser("~/.cache/huggingface"),
	"transformers_cache": os.path.expanduser(
	"~/.cache/huggingface/transformers"
	),
	"torch_home": os.path.expanduser("~/.cache/torch"),
	"hub_cache": os.path.expanduser("~/.cache/huggingface/hub"),
	"xdg_cache": os.path.expanduser("~/.cache"),
	"vector_store": "./vector_store",
	}

	# Create directories
	for name, path in cache_dirs.items():
	try:
	Path(path).mkdir(parents=True, exist_ok=True)
	logger.info(f"Cache directory ready: {name} -> {path}")
	except Exception as e:
	logger.warning(f"Could not create cache directory {name}: {e}")

	return cache_dirs

	def _setup_environment_variables(self) -> dict:
	"""
	Set up environment variables for HF Spaces

	Returns:
	dict: Environment variable settings
	"""
	env_vars = {
	"HF_HOME": self.cache_dirs["hf_home"],
	"TRANSFORMERS_CACHE": self.cache_dirs["transformers_cache"],
	"TORCH_HOME": self.cache_dirs["torch_home"],
	"XDG_CACHE_HOME": self.cache_dirs["xdg_cache"],
	"HF_HUB_CACHE": self.cache_dirs["hub_cache"],
	"PYTHONPATH": "/app",
	"STREAMLIT_SERVER_PORT": "8501",
	"STREAMLIT_SERVER_ADDRESS": "0.0.0.0",
	"STREAMLIT_SERVER_HEADLESS": "true",
	"STREAMLIT_SERVER_ENABLE_CORS": "false",
	"STREAMLIT_SERVER_ENABLE_XSRF_PROTECTION": "false",
	"STREAMLIT_LOGGER_LEVEL": "info",
	}

	# Set environment variables
	for key, value in env_vars.items():
	os.environ[key] = value
	logger.info(f"Set environment variable: {key}={value}")

	return env_vars

	def get_model_config(self) -> dict:
	"""
	Get optimized model configuration for HF Spaces

	Returns:
	dict: Model configuration settings
	"""
	return {
	"embedding_model": "all-MiniLM-L6-v2",
	"generative_model": "Qwen/Qwen2.5-1.5B-Instruct",
	"fallback_model": "distilgpt2",
	"chunk_sizes": [512, 1024, 2048],
	"vector_store_path": self.cache_dirs["vector_store"],
	"enable_guard_rails": True,
	"cache_dir": self.cache_dirs["transformers_cache"],
	}

	def get_guard_rail_config(self) -> dict:
	"""
	Get guard rail configuration optimized for HF Spaces

	Returns:
	dict: Guard rail configuration settings
	"""
	return {
	"max_query_length": 1000,
	"max_response_length": 5000,
	"min_confidence_threshold": 0.3,
	"rate_limit_requests": 10,
	"rate_limit_window": 60,
	"enable_pii_detection": True,
	"enable_prompt_injection_detection": True,
	}

	def get_resource_limits(self) -> dict:
	"""
	Get resource limits for HF Spaces environment

	Returns:
	dict: Resource limit settings
	"""
	return {
	"max_memory_usage": 0.8, # 80% of available memory
	"max_cpu_usage": 0.9, # 90% of available CPU
	"max_concurrent_requests": 5,
	"model_timeout": 30, # seconds
	"cache_cleanup_interval": 3600, # 1 hour
	}

	def cleanup_cache(self):
	"""
	Clean up cache directories to free space

	This is important for HF Spaces with limited storage.
	"""
	if not self.is_hf_spaces:
	return

	try:
	import shutil
	import time

	# Remove old cache files (older than 1 hour)
	current_time = time.time()
	for cache_path in [
	self.cache_dirs["transformers_cache"],
	self.cache_dirs["torch_home"],
	]:
	if os.path.exists(cache_path):
	for item in os.listdir(cache_path):
	item_path = os.path.join(cache_path, item)
	if os.path.isfile(item_path):
	if current_time - os.path.getmtime(item_path) > 3600:
	os.remove(item_path)
	logger.info(f"Cleaned up old cache file: {item_path}")

	logger.info("Cache cleanup completed")
	except Exception as e:
	logger.warning(f"Cache cleanup failed: {e}")


	# Global configuration instance
	hf_config = HFSpacesConfig()


	def get_hf_config() -> HFSpacesConfig:
	"""
	Get the global HF Spaces configuration instance

	Returns:
	HFSpacesConfig: Configuration instance
	"""
	return hf_config


	def is_hf_spaces() -> bool:
	"""
	Check if running in HF Spaces environment

	Returns:
	bool: True if in HF Spaces
	"""
	return hf_config.is_hf_spaces


	def get_cache_dir() -> str:
	"""
	Get the appropriate cache directory for the current environment

	Returns:
	str: Cache directory path
	"""
	return hf_config.cache_dirs["transformers_cache"]