File size: 1,336 Bytes
4d1131a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
"""

Hugging Face Spaces GPU configuration

"""
import os
import torch

# Set environment variables for Hugging Face Spaces
os.environ.update({
    'TRANSFORMERS_CACHE': '/tmp/huggingface',
    'HF_HOME': '/tmp/huggingface',
    'TOKENIZERS_PARALLELISM': 'false',
    'TRANSFORMERS_VERBOSITY': 'error',
    'BITSANDBYTES_NOWELCOME': '1',
    'PYTORCH_CUDA_ALLOC_CONF': 'max_split_size_mb:128'  # Memory efficient attention
})

# Create necessary directories
for directory in ['/tmp/huggingface', '/tmp/vector_db', '/tmp/session_data', '/tmp/session_summaries']:
    os.makedirs(directory, exist_ok=True)

# Hugging Face Spaces specific settings
SPACES_CONFIG = {
    'port': 7860,  # Default port for Hugging Face Spaces
    'host': '0.0.0.0',
    'workers': 1,  # Single worker for Hugging Face Spaces
    'timeout': 180,  # Increased timeout for model loading
    'log_level': 'info'
}

# Model settings optimized for T4 GPU
MODEL_CONFIG = {
    'model_name': 'meta-llama/Llama-3.2-3B-Instruct',
    'peft_model_path': 'nada013/mental-health-chatbot',
    'use_4bit': True,
    'device': 'cuda' if torch.cuda.is_available() else 'cpu',  # Use GPU if available
    'batch_size': 4,  # Optimized for T4 GPU
    'max_memory': {0: "14GB"} if torch.cuda.is_available() else None  # T4 GPU memory limit
}