Spaces:
Paused
Paused
| # Service URLs Configuration | |
| LLM_ENGINE_URL=http://localhost:8001 | |
| RAG_ENGINE_URL=http://localhost:8002 | |
| # LLM Engine Server Configuration | |
| LLM_ENGINE_HOST=0.0.0.0 | |
| LLM_ENGINE_PORT=8001 | |
| # RAG Engine Server Configuration (if running locally) | |
| RAG_ENGINE_HOST=0.0.0.0 | |
| RAG_ENGINE_PORT=8002 | |
| # Base Paths Configuration | |
| BAS_MODEL_PATH=/path/to/your/model | |
| BAS_RESOURCES=/path/to/resources | |
| # CUDA Memory Management | |
| PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128,garbage_collection_threshold:0.8,expandable_segments:True | |
| # Other memory-related settings | |
| CUDA_LAUNCH_BLOCKING=0 | |
| CUDA_VISIBLE_DEVICES=0 | |
| # Logging Configuration | |
| LOG_LEVEL=INFO # DEBUG, INFO, WARNING, ERROR, CRITICAL | |
| # GPU Configuration (optional) | |
| # CUDA_VISIBLE_DEVICES=0,1 # Specify which GPUs to use | |
| # Memory Configuration (optional) | |
| # MAX_GPU_MEMORY=16Gi # Maximum GPU memory to use | |
| # MAX_CPU_MEMORY=32Gi # Maximum CPU memory to use | |
| # Security (if needed) | |
| # API_KEY=your-api-key-here | |
| # SSL_CERT_PATH=/path/to/cert | |
| # SSL_KEY_PATH=/path/to/key | |
| # Development Settings | |
| # DEBUG=True # Enable debug mode | |
| # RELOAD=False # Enable auto-reload for development | |
| # Model Default Parameters (optional) | |
| # DEFAULT_MAX_NEW_TOKENS=50 | |
| # DEFAULT_TEMPERATURE=1.0 | |
| # DEFAULT_TOP_K=50 | |
| # DEFAULT_TOP_P=1.0 | |
| # Cache Settings (optional) | |
| # CACHE_DIR=/path/to/cache | |
| # MAX_CACHE_SIZE=10Gi | |
| # Monitoring (optional) | |
| # ENABLE_METRICS=True | |
| # PROMETHEUS_PORT=9090 | |