LaelaZ's picture
Deploy Emotion Spectrum API to HF Spaces (Docker)
43a2563 verified
# Convenience targets. Everything defaults to OFFLINE=1 so it runs with no
# model download and no network access.
.DEFAULT_GOAL := help
PORT ?= 8000
OFFLINE ?= 1
export OFFLINE
PY ?= python3
.PHONY: help install install-ml install-dev demo serve run test lint \
bench bench-table docker-build docker-run compose-up compose-down clean
help: ## Show this help
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \
awk 'BEGIN {FS = ":.*?## "}; {printf " \033[36m%-16s\033[0m %s\n", $$1, $$2}'
install: ## Install lean runtime deps (offline-capable)
$(PY) -m pip install -r requirements.txt
install-dev: ## Install runtime + test deps
$(PY) -m pip install -r requirements-dev.txt
install-ml: ## Install the heavy ML stack for a REAL model run (OFFLINE=0)
$(PY) -m pip install -r requirements-ml.txt
demo: serve ## Serve the API + demo UI locally (alias for `serve`)
serve: ## Run the API (UI at /demo, docs at /docs, metrics at /metrics)
@echo "API + UI on http://localhost:$(PORT)/demo (OFFLINE=$(OFFLINE))"
$(PY) -m uvicorn app.main:app --host 0.0.0.0 --port $(PORT) --reload
run: serve ## Alias for `serve`
test: ## Run the test suite offline
$(PY) -m pytest -q
bench: ## Run the load test (self-hosted, offline) and print a summary
$(PY) scripts/loadtest.py --requests 3000 --concurrency 8 --warmup 100
bench-table: ## Print the README benchmark table row (markdown)
$(PY) scripts/loadtest.py --requests 3000 --concurrency 8 --warmup 100 --markdown
docker-build: ## Build the slim runtime image
docker build -t distilbert-emotion-api:local .
docker-run: docker-build ## Run the image locally on $(PORT)
docker run --rm -e OFFLINE=$(OFFLINE) -p $(PORT):8000 distilbert-emotion-api:local
compose-up: ## Start API + Prometheus + Grafana
docker compose up --build
compose-down: ## Stop the observability stack
docker compose down
clean: ## Remove caches
rm -rf .pytest_cache **/__pycache__ .ruff_cache