Spaces:
Sleeping
Sleeping
| # ECE Compliance RAG β Developer Workflow | |
| # | |
| # Usage: | |
| # make check Ollama running? Model loaded? Corpus & indexes OK? | |
| # make corpus Build all domain compilations from sources | |
| # make index Build all PageIndex trees (LLM-heavy) | |
| # make test Staged pipeline validation | |
| # make app Launch Streamlit UI | |
| # make help Show all targets | |
| .PHONY: help check corpus corpus-legislation corpus-licensing corpus-reform \ | |
| index index-legislation index-licensing index-ero index-cross-regulator index-reform \ | |
| test benchmark benchmark-quick app app-qwen app-gemma mlx-stop download rebuild | |
| SHELL := /bin/bash | |
| export PYTHONUNBUFFERED := 1 | |
| UV := /opt/homebrew/bin/uv run python | |
| # ββ Help ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| help: | |
| @echo "" | |
| @echo "ECE Compliance RAG β Developer Workflow" | |
| @echo "========================================" | |
| @echo "" | |
| @echo "Health checks (fast, no LLM):" | |
| @echo " make check Corpus, indexes, mlx-lm server" | |
| @echo " make check-ollama Just Ollama connectivity (legacy)" | |
| @echo "" | |
| @echo "Source acquisition (network, no LLM):" | |
| @echo " make download Fetch HTML/PDF sources" | |
| @echo "" | |
| @echo "Corpus building (local, no LLM):" | |
| @echo " make corpus Build all 5 domain compilations" | |
| @echo " make corpus-legislation Just rebuild legislation.md from HTML" | |
| @echo " make corpus-licensing Just rebuild licensing-criteria.md" | |
| @echo " make corpus-reform Just rebuild reform-context.md" | |
| @echo "" | |
| @echo "Index building (LLM-heavy β slow):" | |
| @echo " make index Build all 5 PageIndex trees (auto-runs propagate-urls)" | |
| @echo " make index-legislation Just rebuild legislation index" | |
| @echo " make index-licensing Just rebuild licensing-criteria index" | |
| @echo " make index-ero Just rebuild ero index" | |
| @echo " make index-cross-regulator" | |
| @echo " make index-reform Just rebuild reform-context index" | |
| @echo " make propagate-urls Propagate Source: URLs down index trees (no LLM)" | |
| @echo "" | |
| @echo "Testing:" | |
| @echo " make test Staged pipeline test (1 LLM call)" | |
| @echo " make benchmark Full 20-question benchmark (~30 min)" | |
| @echo " make benchmark-quick 3 questions (~5 min)" | |
| @echo "" | |
| @echo "App:" | |
| @echo " make app Launch Streamlit UI (default: Qwen 2.5 14B)" | |
| @echo " make app-qwen Launch with Qwen 2.5 14B (~9 GB, local)" | |
| @echo " make app-gemma Launch with Gemma 4 26B MoE (~17 GB, local)" | |
| @echo " make app-sonnet Launch with Claude Sonnet (API, fast)" | |
| @echo " make mlx-stop Stop the mlx-lm server" | |
| @echo "" | |
| @echo "Full rebuild:" | |
| @echo " make rebuild corpus β index β test (end to end)" | |
| @echo "" | |
| # ββ Health checks βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| check: | |
| @$(UV) scripts/check.py | |
| check-ollama: | |
| @$(UV) scripts/check.py --ollama | |
| # ββ Source acquisition ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| download: | |
| @echo "Downloading legislation HTML sources..." | |
| @$(UV) scripts/convert_legislation_html.py | |
| @echo "" | |
| @echo "Converting PDF sources to Markdown..." | |
| @$(UV) scripts/convert_sources.py | |
| # ββ Corpus building ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| corpus: corpus-legislation corpus-licensing corpus-reform corpus-ero | |
| @echo "" | |
| @echo "All corpus files built." | |
| corpus-legislation: | |
| @echo "Building legislation corpus from HTML..." | |
| @$(UV) scripts/convert_legislation_html.py | |
| corpus-licensing: | |
| @echo "Building licensing-criteria corpus from education.govt.nz..." | |
| @$(UV) scripts/build_licensing_html.py | |
| corpus-reform: | |
| @echo "Building reform-context corpus..." | |
| @$(UV) scripts/build_reform_compilation.py | |
| corpus-ero: | |
| @echo "Building ERO corpus from HTML sources..." | |
| @$(UV) scripts/build_ero_compilation.py | |
| # ββ Index building ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| dry-run: | |
| @$(UV) scripts/build_indexes.py --dry-run | |
| index: | |
| @$(UV) scripts/build_indexes.py | |
| @$(MAKE) --no-print-directory propagate-urls | |
| index-legislation: | |
| @$(UV) scripts/build_indexes.py legislation | |
| @$(UV) scripts/propagate_source_urls.py --apply legislation | |
| index-licensing: | |
| @$(UV) scripts/build_indexes.py licensing-criteria | |
| @$(UV) scripts/propagate_source_urls.py --apply licensing-criteria | |
| index-ero: | |
| @$(UV) scripts/build_indexes.py ero | |
| @$(UV) scripts/propagate_source_urls.py --apply ero | |
| index-cross-regulator: | |
| @$(UV) scripts/build_indexes.py cross-regulator | |
| @$(UV) scripts/propagate_source_urls.py --apply cross-regulator | |
| index-reform: | |
| @$(UV) scripts/build_indexes.py reform-context | |
| @$(UV) scripts/propagate_source_urls.py --apply reform-context | |
| # Propagate Source: URLs down every index tree so deep nodes inherit | |
| # the nearest ancestor's URL. Zero API cost. Run automatically after | |
| # `make index` but can be re-run standalone on existing indexes. | |
| propagate-urls: | |
| @$(UV) scripts/propagate_source_urls.py --apply | |
| # ββ Testing βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| test: | |
| @$(UV) test_pipeline.py | |
| benchmark: | |
| @$(UV) benchmark/run_benchmark.py | |
| benchmark-quick: | |
| @$(UV) benchmark/run_benchmark.py --ids 1,4,5 | |
| benchmark-compare: | |
| @$(UV) benchmark/run_benchmark.py --compare qwen,sonnet | |
| # ββ App βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Model presets β override with: make app MLX_MODEL=your-model-here | |
| MODEL_QWEN := mlx-community/Qwen2.5-14B-Instruct-4bit | |
| MODEL_GEMMA := mlx-community/gemma-4-26b-a4b-it-4bit | |
| MLX_MODEL ?= $(MODEL_QWEN) | |
| MLX_PORT := 8080 | |
| # Stop any running mlx-lm server (useful when swapping models) | |
| mlx-stop: | |
| @echo "Stopping mlx-lm server..." | |
| @pkill -f "mlx_lm.*server" 2>/dev/null && echo "Server stopped." || echo "No server running." | |
| # Start mlx-lm server for a given model, restarting if a different model is running | |
| define start-mlx-server | |
| @RUNNING_MODEL=$$(curl -s --max-time 2 http://localhost:$(MLX_PORT)/v1/models 2>/dev/null | python3 -c "import sys,json; d=json.load(sys.stdin); print(d['data'][0]['id'])" 2>/dev/null || echo ""); \ | |
| if [ "$$RUNNING_MODEL" = "$(1)" ]; then \ | |
| echo "mlx-lm server already running $(1)"; \ | |
| else \ | |
| if [ -n "$$RUNNING_MODEL" ]; then \ | |
| echo "Stopping server (was running $$RUNNING_MODEL)..."; \ | |
| pkill -f "mlx_lm.*server" 2>/dev/null; sleep 2; \ | |
| fi; \ | |
| echo "Starting mlx-lm server ($(1))..."; \ | |
| ECE_MLX_MODEL=$(1) /opt/homebrew/bin/uv run python -m mlx_lm server --model $(1) --port $(MLX_PORT) > /dev/null 2>&1 & \ | |
| echo "Waiting for server..."; sleep 10; \ | |
| echo "Server ready."; \ | |
| fi | |
| endef | |
| app: ## Launch with current MLX_MODEL (default: Qwen) | |
| $(call start-mlx-server,$(MLX_MODEL)) | |
| @echo "Launching Streamlit UI ($(MLX_MODEL))..." | |
| @ECE_MLX_MODEL=$(MLX_MODEL) /opt/homebrew/bin/uv run streamlit run app.py --server.headless true | |
| app-qwen: ## Launch with Qwen 2.5 14B | |
| $(call start-mlx-server,$(MODEL_QWEN)) | |
| @echo "Launching Streamlit UI (Qwen 2.5 14B)..." | |
| @ECE_MLX_MODEL=$(MODEL_QWEN) /opt/homebrew/bin/uv run streamlit run app.py --server.headless true | |
| app-gemma: ## Launch with Gemma 4 26B MoE | |
| $(call start-mlx-server,$(MODEL_GEMMA)) | |
| @echo "Launching Streamlit UI (Gemma 4 26B MoE)..." | |
| @ECE_MLX_MODEL=$(MODEL_GEMMA) /opt/homebrew/bin/uv run streamlit run app.py --server.headless true | |
| app-sonnet: ## Launch with Claude Sonnet (API β no local server needed) | |
| @echo "Launching Streamlit UI (Claude Sonnet β API)..." | |
| @ECE_MODEL=sonnet /opt/homebrew/bin/uv run streamlit run app.py --server.headless true | |
| # ββ Full rebuild ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| rebuild: corpus index test | |
| @echo "" | |
| @echo "Full rebuild complete." | |