""" Hugging Face Spaces GPU configuration """ import os import torch # Set environment variables for Hugging Face Spaces os.environ.update({ 'TRANSFORMERS_CACHE': '/tmp/huggingface', 'HF_HOME': '/tmp/huggingface', 'TOKENIZERS_PARALLELISM': 'false', 'TRANSFORMERS_VERBOSITY': 'error', 'BITSANDBYTES_NOWELCOME': '1', 'PYTORCH_CUDA_ALLOC_CONF': 'max_split_size_mb:128' # Memory efficient attention }) # Create necessary directories for directory in ['/tmp/huggingface', '/tmp/vector_db', '/tmp/session_data', '/tmp/session_summaries']: os.makedirs(directory, exist_ok=True) # Hugging Face Spaces specific settings SPACES_CONFIG = { 'port': 7860, # Default port for Hugging Face Spaces 'host': '0.0.0.0', 'workers': 1, # Single worker for Hugging Face Spaces 'timeout': 180, # Increased timeout for model loading 'log_level': 'info' } # Model settings optimized for T4 GPU MODEL_CONFIG = { 'model_name': 'meta-llama/Llama-3.2-3B-Instruct', 'peft_model_path': 'nada013/mental-health-chatbot', 'use_4bit': True, 'device': 'cuda' if torch.cuda.is_available() else 'cpu', # Use GPU if available 'batch_size': 4, # Optimized for T4 GPU 'max_memory': {0: "14GB"} if torch.cuda.is_available() else None # T4 GPU memory limit }