Spaces:

nada013
/

chat-gpu

Paused

chat-gpu / hf_spaces.py

Nada

besm ellah

b6c5517 9 months ago

1.3 kB

	"""
	Hugging Face Spaces GPU configuration
	"""
	import os
	import torch

	# Set environment variables for Hugging Face Spaces
	os.environ.update({
	'TRANSFORMERS_CACHE': '/tmp/huggingface',
	'HF_HOME': '/tmp/huggingface',
	'TOKENIZERS_PARALLELISM': 'false',
	'TRANSFORMERS_VERBOSITY': 'error',
	'BITSANDBYTES_NOWELCOME': '1',
	'PYTORCH_CUDA_ALLOC_CONF': 'max_split_size_mb:128' # Memory efficient attention
	})

	# Create necessary directories
	for directory in ['/tmp/huggingface', '/tmp/vector_db', '/tmp/session_data', '/tmp/session_summaries']:
	os.makedirs(directory, exist_ok=True)

	# Hugging Face Spaces specific settings
	SPACES_CONFIG = {
	'port': 7860, # Default port for Hugging Face Spaces
	'host': '0.0.0.0',
	'workers': 1, # Single worker for Hugging Face Spaces
	'timeout': 180, # Increased timeout for model loading
	'log_level': 'info'
	}

	# Model settings optimized for T4 GPU
	MODEL_CONFIG = {
	'model_name': 'meta-llama/Llama-3.2-3B-Instruct',
	'peft_model_path': 'nada013/mental-health-chatbot',
	'use_4bit': True,
	'device': 'cuda' if torch.cuda.is_available() else 'cpu', # Use GPU if available
	'batch_size': 4, # Optimized for T4 GPU
	'max_memory': {0: "14GB"} if torch.cuda.is_available() else None # T4 GPU memory limit
	}