File size: 734 Bytes
b269c5d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
version: '3.8'
services:
llm-app:
build:
context: .
dockerfile: Dockerfile
ports:
- "7860:7860"
environment:
- MODEL_REPO=lmstudio-community/gemma-3n-E4B-it-text-GGUF
- MODEL_FILENAME=gemma-3n-E4B-it-Q8_0.gguf
- N_CTX=4096
- N_GPU_LAYERS=0
- N_THREADS=4
- MAX_NEW_TOKENS=256
- TEMPERATURE=0.1
volumes:
# Optional: Mount models directory to persist downloaded models
- ./models:/app/models
restart: unless-stopped
mem_limit: 8g
# Uncomment below for GPU support
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# count: 1
# capabilities: [gpu]
|