# Convenience targets. Everything defaults to OFFLINE=1 so it runs with no
# model download and no network access.

.DEFAULT_GOAL := help
PORT ?= 8000
OFFLINE ?= 1
export OFFLINE

PY ?= python3

.PHONY: help install install-ml install-dev demo serve run test lint \
        bench bench-table docker-build docker-run compose-up compose-down clean

help: ## Show this help
	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \
		awk 'BEGIN {FS = ":.*?## "}; {printf "  \033[36m%-16s\033[0m %s\n", $$1, $$2}'

install: ## Install lean runtime deps (offline-capable)
	$(PY) -m pip install -r requirements.txt

install-dev: ## Install runtime + test deps
	$(PY) -m pip install -r requirements-dev.txt

install-ml: ## Install the heavy ML stack for a REAL model run (OFFLINE=0)
	$(PY) -m pip install -r requirements-ml.txt

demo: serve ## Serve the API + demo UI locally (alias for `serve`)

serve: ## Run the API (UI at /demo, docs at /docs, metrics at /metrics)
	@echo "API + UI on http://localhost:$(PORT)/demo  (OFFLINE=$(OFFLINE))"
	$(PY) -m uvicorn app.main:app --host 0.0.0.0 --port $(PORT) --reload

run: serve ## Alias for `serve`

test: ## Run the test suite offline
	$(PY) -m pytest -q

bench: ## Run the load test (self-hosted, offline) and print a summary
	$(PY) scripts/loadtest.py --requests 3000 --concurrency 8 --warmup 100

bench-table: ## Print the README benchmark table row (markdown)
	$(PY) scripts/loadtest.py --requests 3000 --concurrency 8 --warmup 100 --markdown

docker-build: ## Build the slim runtime image
	docker build -t distilbert-emotion-api:local .

docker-run: docker-build ## Run the image locally on $(PORT)
	docker run --rm -e OFFLINE=$(OFFLINE) -p $(PORT):8000 distilbert-emotion-api:local

compose-up: ## Start API + Prometheus + Grafana
	docker compose up --build

compose-down: ## Stop the observability stack
	docker compose down

clean: ## Remove caches
	rm -rf .pytest_cache **/__pycache__ .ruff_cache