| ################################################################################# | |
| # GLOBALS # | |
| ################################################################################# | |
| PROJECT_NAME = Hopcroft | |
| PYTHON_VERSION = 3.10 | |
| PYTHON_INTERPRETER = python | |
| ################################################################################# | |
| # COMMANDS # | |
| ################################################################################# | |
| ## Install Python dependencies | |
| .PHONY: requirements | |
| requirements: | |
| $(PYTHON_INTERPRETER) -m pip install -U pip | |
| $(PYTHON_INTERPRETER) -m pip install -r requirements.txt | |
| ## Delete all compiled Python files | |
| .PHONY: clean | |
| clean: | |
| find . -type f -name "*.py[co]" -delete | |
| find . -type d -name "__pycache__" -delete | |
| ## Lint using ruff | |
| .PHONY: lint | |
| lint: | |
| ruff format --check | |
| ruff check | |
| ## Format source code with ruff | |
| .PHONY: format | |
| format: | |
| ruff check --fix | |
| ruff format | |
| ################################################################################# | |
| # PROJECT RULES # | |
| ################################################################################# | |
| ## Download dataset from Hugging Face | |
| .PHONY: data | |
| data: | |
| $(PYTHON_INTERPRETER) -m hopcroft_skill_classification_tool_competition.dataset | |
| ## Extract features from raw data | |
| .PHONY: features | |
| features: | |
| $(PYTHON_INTERPRETER) -m hopcroft_skill_classification_tool_competition.features | |
| ################################################################################# | |
| # TRAINING RULES # | |
| ################################################################################# | |
| ## Train Random Forest baseline with TF-IDF features (cleaned data) | |
| .PHONY: train-baseline-tfidf | |
| train-baseline-tfidf: | |
| $(PYTHON_INTERPRETER) -m hopcroft_skill_classification_tool_competition.modeling.train baseline | |
| ## Train Random Forest baseline with Embedding features (cleaned data) | |
| .PHONY: train-baseline-embeddings | |
| train-baseline-embeddings: | |
| $(PYTHON_INTERPRETER) -c "from hopcroft_skill_classification_tool_competition.modeling.train import run_baseline_train; run_baseline_train(feature_type='embedding', use_cleaned=True)" | |
| ## Train Random Forest with SMOTE and TF-IDF features (cleaned data) | |
| .PHONY: train-smote-tfidf | |
| train-smote-tfidf: | |
| $(PYTHON_INTERPRETER) -c "from hopcroft_skill_classification_tool_competition.modeling.train import run_smote_experiment, load_data; X, Y = load_data(feature_type='tfidf', use_cleaned=True); run_smote_experiment(X, Y, feature_type='tfidf')" | |
| ## Train Random Forest with SMOTE and Embedding features (cleaned data) | |
| .PHONY: train-smote-embeddings | |
| train-smote-embeddings: | |
| $(PYTHON_INTERPRETER) -c "from hopcroft_skill_classification_tool_competition.modeling.train import run_smote_experiment, load_data; X, Y = load_data(feature_type='embedding', use_cleaned=True); run_smote_experiment(X, Y, feature_type='embedding')" | |
| ################################################################################# | |
| # TESTING RULES # | |
| ################################################################################# | |
| ## Run all unit tests | |
| .PHONY: test-unit | |
| test-unit: | |
| pytest tests/unit/ -v -m unit | |
| ## Run all integration tests | |
| .PHONY: test-integration | |
| test-integration: | |
| pytest tests/integration/ -v -m integration | |
| ## Run all system tests | |
| .PHONY: test-system | |
| test-system: | |
| pytest tests/system/ -v -m system | |
| ## Run all tests (unit, integration, system) | |
| .PHONY: test-all | |
| test-all: | |
| pytest tests/ -v --ignore=tests/behavioral --ignore=tests/deepchecks | |
| ## Run tests with coverage report | |
| .PHONY: test-coverage | |
| test-coverage: | |
| pytest tests/ --cov=hopcroft_skill_classification_tool_competition --cov-report=html --cov-report=term | |
| ## Run fast tests only (exclude slow tests) | |
| .PHONY: test-fast | |
| test-fast: | |
| pytest tests/ -v -m "not slow" --ignore=tests/behavioral --ignore=tests/deepchecks | |
| ## Run behavioral tests | |
| .PHONY: test-behavioral | |
| test-behavioral: | |
| pytest tests/behavioral/ -v --ignore=tests/behavioral/test_model_training.py | |
| ## Run Great Expectations validation | |
| .PHONY: validate-gx | |
| validate-gx: | |
| $(PYTHON_INTERPRETER) -m hopcroft_skill_classification_tool_competition.tests.test_gx | |
| ## Run Deepchecks validation | |
| .PHONY: validate-deepchecks | |
| validate-deepchecks: | |
| $(PYTHON_INTERPRETER) tests/deepchecks/run_all_deepchecks.py | |
| ## Run all validation and tests | |
| .PHONY: test-complete | |
| test-complete: test-all validate-gx validate-deepchecks test-behavioral | |
| ################################################################################# | |
| # Self Documenting Commands # | |
| ################################################################################# | |
| .DEFAULT_GOAL := help | |
| define PRINT_HELP_PYSCRIPT | |
| import re, sys; \ | |
| lines = '\n'.join([line for line in sys.stdin]); \ | |
| matches = re.findall(r'\n## (.*)\n[\s\S]+?\n([a-zA-Z_-]+):', lines); \ | |
| print('Available rules:\n'); \ | |
| print('\n'.join(['{:25}{}'.format(*reversed(match)) for match in matches])) | |
| endef | |
| export PRINT_HELP_PYSCRIPT | |
| help: | |
| @$(PYTHON_INTERPRETER) -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST) | |
| ################################################################################ | |
| # API COMMANDS # | |
| ################################################################################ | |
| ## Run API in development mode | |
| .PHONY: api-dev | |
| api-dev: | |
| fastapi dev hopcroft_skill_classification_tool_competition/main.py | |
| ## Run API in production mode | |
| .PHONY: api-run | |
| api-run: | |
| fastapi run hopcroft_skill_classification_tool_competition/main.py | |
| ## Test API health check (requires running API) | |
| .PHONY: test-api-health | |
| test-api-health: | |
| @echo "Testing API health endpoint..." | |
| curl -X GET "http://127.0.0.1:8000/health" | |
| ## Test API POST /predict (requires running API) | |
| .PHONY: test-api-predict | |
| test-api-predict: | |
| @echo "Testing prediction endpoint..." | |
| curl -X POST "http://127.0.0.1:8000/predict" -H "Content-Type: application/json" -d '{"issue_text": "Fix critical bug in authentication and login flow with OAuth2", "repo_name": "my-repo"}' | |
| ## Test API GET /predictions (requires running API) | |
| .PHONY: test-api-list | |
| test-api-list: | |
| @echo "Testing list predictions endpoint..." | |
| curl "http://127.0.0.1:8000/predictions?limit=5" | |
| ## Test API GET /predictions/{run_id} (requires running API and valid run_id) | |
| .PHONY: test-api-get-prediction | |
| test-api-get-prediction: | |
| @echo "Testing get specific prediction endpoint..." | |
| @echo "Usage: make test-api-get-prediction RUN_ID=<your_run_id>" | |
| @if [ -z "$(RUN_ID)" ]; then echo "Error: RUN_ID not set. Example: make test-api-get-prediction RUN_ID=abc123"; exit 1; fi | |
| curl "http://127.0.0.1:8000/predictions/$(RUN_ID)" | |
| ## Run all API tests (requires running API) | |
| .PHONY: test-api-all | |
| test-api-all: test-api-health test-api-predict test-api-list | |
| @echo "\n All API tests completed!" |