version: '3.8' services: text-gen: image: ghcr.io/huggingface/text-generation-inference:1.3 ports: - "8080:80" environment: - MODEL_ID=HuggingFaceH4/zephyr-7b-beta - HF_HOME=/cache - HUGGINGFACE_HUB_CACHE=/cache volumes: - ./cache:/cache deploy: resources: limits: memory: 16G # Ajusta si sabes cuánto puedes usar