|
|
""" |
|
|
Hugging Face Spaces GPU configuration |
|
|
""" |
|
|
import os |
|
|
import torch |
|
|
|
|
|
|
|
|
os.environ.update({ |
|
|
'TRANSFORMERS_CACHE': '/tmp/huggingface', |
|
|
'HF_HOME': '/tmp/huggingface', |
|
|
'TOKENIZERS_PARALLELISM': 'false', |
|
|
'TRANSFORMERS_VERBOSITY': 'error', |
|
|
'BITSANDBYTES_NOWELCOME': '1', |
|
|
'PYTORCH_CUDA_ALLOC_CONF': 'max_split_size_mb:128' |
|
|
}) |
|
|
|
|
|
|
|
|
for directory in ['/tmp/huggingface', '/tmp/vector_db', '/tmp/session_data', '/tmp/session_summaries']: |
|
|
os.makedirs(directory, exist_ok=True) |
|
|
|
|
|
|
|
|
SPACES_CONFIG = { |
|
|
'port': 7860, |
|
|
'host': '0.0.0.0', |
|
|
'workers': 1, |
|
|
'timeout': 180, |
|
|
'log_level': 'info' |
|
|
} |
|
|
|
|
|
|
|
|
MODEL_CONFIG = { |
|
|
'model_name': 'meta-llama/Llama-3.2-3B-Instruct', |
|
|
'peft_model_path': 'nada013/mental-health-chatbot', |
|
|
'use_4bit': True, |
|
|
'device': 'cuda' if torch.cuda.is_available() else 'cpu', |
|
|
'batch_size': 4, |
|
|
'max_memory': {0: "14GB"} if torch.cuda.is_available() else None |
|
|
} |