version: "3.8" services: ml-backend: container_name: ml-backend image: humansignal/ml-backend:v0 build: context: . args: TEST_ENV: ${TEST_ENV} environment: # Model settings - MODEL_NAME=google/flan-t5-base - MAX_LENGTH=512 - GENERATION_MAX_LENGTH=128 - NUM_RETURN_SEQUENCES=1 # LoRA settings - LORA_R=16 - LORA_ALPHA=16 - LORA_DROPOUT=0.1 - LORA_TARGET_MODULES=q,v # Training settings - NUM_EPOCHS=16 - LEARNING_RATE=1e-5 - BATCH_SIZE=1 - MAX_STEPS=100 - SAVE_STEPS=50 # Other settings - BASIC_AUTH_USER= - BASIC_AUTH_PASS= - LOG_LEVEL=DEBUG - WORKERS=1 - THREADS=8 - MODEL_DIR=/data/models - HF_CHECKPOINT_DIR=/data/checkpoints ports: - "9090:9090" volumes: - "./data/server:/data" # TODO: verify if the file is needed