File size: 7,102 Bytes
225af6a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
#################################################################################
# GLOBALS #
#################################################################################
PROJECT_NAME = Hopcroft
PYTHON_VERSION = 3.10
PYTHON_INTERPRETER = python
#################################################################################
# COMMANDS #
#################################################################################
## Install Python dependencies
.PHONY: requirements
requirements:
$(PYTHON_INTERPRETER) -m pip install -U pip
$(PYTHON_INTERPRETER) -m pip install -r requirements.txt
## Delete all compiled Python files
.PHONY: clean
clean:
find . -type f -name "*.py[co]" -delete
find . -type d -name "__pycache__" -delete
## Lint using ruff
.PHONY: lint
lint:
ruff format --check
ruff check
## Format source code with ruff
.PHONY: format
format:
ruff check --fix
ruff format
#################################################################################
# PROJECT RULES #
#################################################################################
## Download dataset from Hugging Face
.PHONY: data
data:
$(PYTHON_INTERPRETER) -m hopcroft_skill_classification_tool_competition.dataset
## Extract features from raw data
.PHONY: features
features:
$(PYTHON_INTERPRETER) -m hopcroft_skill_classification_tool_competition.features
#################################################################################
# TRAINING RULES #
#################################################################################
## Train Random Forest baseline with TF-IDF features (cleaned data)
.PHONY: train-baseline-tfidf
train-baseline-tfidf:
$(PYTHON_INTERPRETER) -m hopcroft_skill_classification_tool_competition.modeling.train baseline
## Train Random Forest baseline with Embedding features (cleaned data)
.PHONY: train-baseline-embeddings
train-baseline-embeddings:
$(PYTHON_INTERPRETER) -c "from hopcroft_skill_classification_tool_competition.modeling.train import run_baseline_train; run_baseline_train(feature_type='embedding', use_cleaned=True)"
## Train Random Forest with SMOTE and TF-IDF features (cleaned data)
.PHONY: train-smote-tfidf
train-smote-tfidf:
$(PYTHON_INTERPRETER) -c "from hopcroft_skill_classification_tool_competition.modeling.train import run_smote_experiment, load_data; X, Y = load_data(feature_type='tfidf', use_cleaned=True); run_smote_experiment(X, Y, feature_type='tfidf')"
## Train Random Forest with SMOTE and Embedding features (cleaned data)
.PHONY: train-smote-embeddings
train-smote-embeddings:
$(PYTHON_INTERPRETER) -c "from hopcroft_skill_classification_tool_competition.modeling.train import run_smote_experiment, load_data; X, Y = load_data(feature_type='embedding', use_cleaned=True); run_smote_experiment(X, Y, feature_type='embedding')"
#################################################################################
# TESTING RULES #
#################################################################################
## Run all unit tests
.PHONY: test-unit
test-unit:
pytest tests/unit/ -v -m unit
## Run all integration tests
.PHONY: test-integration
test-integration:
pytest tests/integration/ -v -m integration
## Run all system tests
.PHONY: test-system
test-system:
pytest tests/system/ -v -m system
## Run all tests (unit, integration, system)
.PHONY: test-all
test-all:
pytest tests/ -v --ignore=tests/behavioral --ignore=tests/deepchecks
## Run tests with coverage report
.PHONY: test-coverage
test-coverage:
pytest tests/ --cov=hopcroft_skill_classification_tool_competition --cov-report=html --cov-report=term
## Run fast tests only (exclude slow tests)
.PHONY: test-fast
test-fast:
pytest tests/ -v -m "not slow" --ignore=tests/behavioral --ignore=tests/deepchecks
## Run behavioral tests
.PHONY: test-behavioral
test-behavioral:
pytest tests/behavioral/ -v --ignore=tests/behavioral/test_model_training.py
## Run Great Expectations validation
.PHONY: validate-gx
validate-gx:
$(PYTHON_INTERPRETER) -m hopcroft_skill_classification_tool_competition.tests.test_gx
## Run Deepchecks validation
.PHONY: validate-deepchecks
validate-deepchecks:
$(PYTHON_INTERPRETER) tests/deepchecks/run_all_deepchecks.py
## Run all validation and tests
.PHONY: test-complete
test-complete: test-all validate-gx validate-deepchecks test-behavioral
#################################################################################
# Self Documenting Commands #
#################################################################################
.DEFAULT_GOAL := help
define PRINT_HELP_PYSCRIPT
import re, sys; \
lines = '\n'.join([line for line in sys.stdin]); \
matches = re.findall(r'\n## (.*)\n[\s\S]+?\n([a-zA-Z_-]+):', lines); \
print('Available rules:\n'); \
print('\n'.join(['{:25}{}'.format(*reversed(match)) for match in matches]))
endef
export PRINT_HELP_PYSCRIPT
help:
@$(PYTHON_INTERPRETER) -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST)
################################################################################
# API COMMANDS #
################################################################################
## Run API in development mode
.PHONY: api-dev
api-dev:
fastapi dev hopcroft_skill_classification_tool_competition/main.py
## Run API in production mode
.PHONY: api-run
api-run:
fastapi run hopcroft_skill_classification_tool_competition/main.py
## Test API health check (requires running API)
.PHONY: test-api-health
test-api-health:
@echo "Testing API health endpoint..."
curl -X GET "http://127.0.0.1:8000/health"
## Test API POST /predict (requires running API)
.PHONY: test-api-predict
test-api-predict:
@echo "Testing prediction endpoint..."
curl -X POST "http://127.0.0.1:8000/predict" -H "Content-Type: application/json" -d '{"issue_text": "Fix critical bug in authentication and login flow with OAuth2", "repo_name": "my-repo"}'
## Test API GET /predictions (requires running API)
.PHONY: test-api-list
test-api-list:
@echo "Testing list predictions endpoint..."
curl "http://127.0.0.1:8000/predictions?limit=5"
## Test API GET /predictions/{run_id} (requires running API and valid run_id)
.PHONY: test-api-get-prediction
test-api-get-prediction:
@echo "Testing get specific prediction endpoint..."
@echo "Usage: make test-api-get-prediction RUN_ID=<your_run_id>"
@if [ -z "$(RUN_ID)" ]; then echo "Error: RUN_ID not set. Example: make test-api-get-prediction RUN_ID=abc123"; exit 1; fi
curl "http://127.0.0.1:8000/predictions/$(RUN_ID)"
## Run all API tests (requires running API)
.PHONY: test-api-all
test-api-all: test-api-health test-api-predict test-api-list
@echo "\n All API tests completed!" |