################################################################################# # GLOBALS # ################################################################################# PROJECT_NAME = Hopcroft PYTHON_VERSION = 3.10 PYTHON_INTERPRETER = python ################################################################################# # COMMANDS # ################################################################################# ## Install Python dependencies .PHONY: requirements requirements: $(PYTHON_INTERPRETER) -m pip install -U pip $(PYTHON_INTERPRETER) -m pip install -r requirements.txt ## Delete all compiled Python files .PHONY: clean clean: find . -type f -name "*.py[co]" -delete find . -type d -name "__pycache__" -delete ## Lint using ruff .PHONY: lint lint: ruff format --check ruff check ## Format source code with ruff .PHONY: format format: ruff check --fix ruff format ################################################################################# # PROJECT RULES # ################################################################################# ## Download dataset from Hugging Face .PHONY: data data: $(PYTHON_INTERPRETER) -m hopcroft_skill_classification_tool_competition.dataset ## Extract features from raw data .PHONY: features features: $(PYTHON_INTERPRETER) -m hopcroft_skill_classification_tool_competition.features ################################################################################# # TRAINING RULES # ################################################################################# ## Train Random Forest baseline with TF-IDF features (cleaned data) .PHONY: train-baseline-tfidf train-baseline-tfidf: $(PYTHON_INTERPRETER) -m hopcroft_skill_classification_tool_competition.modeling.train baseline ## Train Random Forest baseline with Embedding features (cleaned data) .PHONY: train-baseline-embeddings train-baseline-embeddings: $(PYTHON_INTERPRETER) -c "from hopcroft_skill_classification_tool_competition.modeling.train import run_baseline_train; run_baseline_train(feature_type='embedding', use_cleaned=True)" ## Train Random Forest with SMOTE and TF-IDF features (cleaned data) .PHONY: train-smote-tfidf train-smote-tfidf: $(PYTHON_INTERPRETER) -c "from hopcroft_skill_classification_tool_competition.modeling.train import run_smote_experiment, load_data; X, Y = load_data(feature_type='tfidf', use_cleaned=True); run_smote_experiment(X, Y, feature_type='tfidf')" ## Train Random Forest with SMOTE and Embedding features (cleaned data) .PHONY: train-smote-embeddings train-smote-embeddings: $(PYTHON_INTERPRETER) -c "from hopcroft_skill_classification_tool_competition.modeling.train import run_smote_experiment, load_data; X, Y = load_data(feature_type='embedding', use_cleaned=True); run_smote_experiment(X, Y, feature_type='embedding')" ################################################################################# # TESTING RULES # ################################################################################# ## Run all unit tests .PHONY: test-unit test-unit: pytest tests/unit/ -v -m unit ## Run all integration tests .PHONY: test-integration test-integration: pytest tests/integration/ -v -m integration ## Run all system tests .PHONY: test-system test-system: pytest tests/system/ -v -m system ## Run all tests (unit, integration, system) .PHONY: test-all test-all: pytest tests/ -v --ignore=tests/behavioral --ignore=tests/deepchecks ## Run tests with coverage report .PHONY: test-coverage test-coverage: pytest tests/ --cov=hopcroft_skill_classification_tool_competition --cov-report=html --cov-report=term ## Run fast tests only (exclude slow tests) .PHONY: test-fast test-fast: pytest tests/ -v -m "not slow" --ignore=tests/behavioral --ignore=tests/deepchecks ## Run behavioral tests .PHONY: test-behavioral test-behavioral: pytest tests/behavioral/ -v --ignore=tests/behavioral/test_model_training.py ## Run Great Expectations validation .PHONY: validate-gx validate-gx: $(PYTHON_INTERPRETER) -m hopcroft_skill_classification_tool_competition.tests.test_gx ## Run Deepchecks validation .PHONY: validate-deepchecks validate-deepchecks: $(PYTHON_INTERPRETER) tests/deepchecks/run_all_deepchecks.py ## Run all validation and tests .PHONY: test-complete test-complete: test-all validate-gx validate-deepchecks test-behavioral ################################################################################# # Self Documenting Commands # ################################################################################# .DEFAULT_GOAL := help define PRINT_HELP_PYSCRIPT import re, sys; \ lines = '\n'.join([line for line in sys.stdin]); \ matches = re.findall(r'\n## (.*)\n[\s\S]+?\n([a-zA-Z_-]+):', lines); \ print('Available rules:\n'); \ print('\n'.join(['{:25}{}'.format(*reversed(match)) for match in matches])) endef export PRINT_HELP_PYSCRIPT help: @$(PYTHON_INTERPRETER) -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST) ################################################################################ # API COMMANDS # ################################################################################ ## Run API in development mode .PHONY: api-dev api-dev: fastapi dev hopcroft_skill_classification_tool_competition/main.py ## Run API in production mode .PHONY: api-run api-run: fastapi run hopcroft_skill_classification_tool_competition/main.py ## Test API health check (requires running API) .PHONY: test-api-health test-api-health: @echo "Testing API health endpoint..." curl -X GET "http://127.0.0.1:8000/health" ## Test API POST /predict (requires running API) .PHONY: test-api-predict test-api-predict: @echo "Testing prediction endpoint..." curl -X POST "http://127.0.0.1:8000/predict" -H "Content-Type: application/json" -d '{"issue_text": "Fix critical bug in authentication and login flow with OAuth2", "repo_name": "my-repo"}' ## Test API GET /predictions (requires running API) .PHONY: test-api-list test-api-list: @echo "Testing list predictions endpoint..." curl "http://127.0.0.1:8000/predictions?limit=5" ## Test API GET /predictions/{run_id} (requires running API and valid run_id) .PHONY: test-api-get-prediction test-api-get-prediction: @echo "Testing get specific prediction endpoint..." @echo "Usage: make test-api-get-prediction RUN_ID=" @if [ -z "$(RUN_ID)" ]; then echo "Error: RUN_ID not set. Example: make test-api-get-prediction RUN_ID=abc123"; exit 1; fi curl "http://127.0.0.1:8000/predictions/$(RUN_ID)" ## Run all API tests (requires running API) .PHONY: test-api-all test-api-all: test-api-health test-api-predict test-api-list @echo "\n All API tests completed!"