| # Model configuration | |
| MODEL_REPO=lmstudio-community/gemma-3n-E4B-it-text-GGUF | |
| MODEL_FILENAME=gemma-3n-E4B-it-Q8_0.gguf | |
| MODEL_PATH=./models/gemma-3n-E4B-it-Q8_0.gguf | |
| HUGGINGFACE_TOKEN= | |
| # Model parameters - optimized for Docker containers | |
| N_CTX=4096 | |
| N_GPU_LAYERS=0 | |
| N_THREADS=4 | |
| N_BATCH=512 | |
| USE_MLOCK=false | |
| USE_MMAP=true | |
| F16_KV=true | |
| SEED=42 | |
| # Server settings | |
| HOST=0.0.0.0 | |
| GRADIO_PORT=7860 | |
| API_PORT=8000 | |
| # Generation settings | |
| MAX_NEW_TOKENS=256 | |
| TEMPERATURE=0.1 | |
| # File upload settings | |
| MAX_FILE_SIZE=10485760 | |