DaCrow13
Deploy to HF Spaces (Clean)
225af6a
#################################################################################
# GLOBALS #
#################################################################################
PROJECT_NAME = Hopcroft
PYTHON_VERSION = 3.10
PYTHON_INTERPRETER = python
#################################################################################
# COMMANDS #
#################################################################################
## Install Python dependencies
.PHONY: requirements
requirements:
$(PYTHON_INTERPRETER) -m pip install -U pip
$(PYTHON_INTERPRETER) -m pip install -r requirements.txt
## Delete all compiled Python files
.PHONY: clean
clean:
find . -type f -name "*.py[co]" -delete
find . -type d -name "__pycache__" -delete
## Lint using ruff
.PHONY: lint
lint:
ruff format --check
ruff check
## Format source code with ruff
.PHONY: format
format:
ruff check --fix
ruff format
#################################################################################
# PROJECT RULES #
#################################################################################
## Download dataset from Hugging Face
.PHONY: data
data:
$(PYTHON_INTERPRETER) -m hopcroft_skill_classification_tool_competition.dataset
## Extract features from raw data
.PHONY: features
features:
$(PYTHON_INTERPRETER) -m hopcroft_skill_classification_tool_competition.features
#################################################################################
# TRAINING RULES #
#################################################################################
## Train Random Forest baseline with TF-IDF features (cleaned data)
.PHONY: train-baseline-tfidf
train-baseline-tfidf:
$(PYTHON_INTERPRETER) -m hopcroft_skill_classification_tool_competition.modeling.train baseline
## Train Random Forest baseline with Embedding features (cleaned data)
.PHONY: train-baseline-embeddings
train-baseline-embeddings:
$(PYTHON_INTERPRETER) -c "from hopcroft_skill_classification_tool_competition.modeling.train import run_baseline_train; run_baseline_train(feature_type='embedding', use_cleaned=True)"
## Train Random Forest with SMOTE and TF-IDF features (cleaned data)
.PHONY: train-smote-tfidf
train-smote-tfidf:
$(PYTHON_INTERPRETER) -c "from hopcroft_skill_classification_tool_competition.modeling.train import run_smote_experiment, load_data; X, Y = load_data(feature_type='tfidf', use_cleaned=True); run_smote_experiment(X, Y, feature_type='tfidf')"
## Train Random Forest with SMOTE and Embedding features (cleaned data)
.PHONY: train-smote-embeddings
train-smote-embeddings:
$(PYTHON_INTERPRETER) -c "from hopcroft_skill_classification_tool_competition.modeling.train import run_smote_experiment, load_data; X, Y = load_data(feature_type='embedding', use_cleaned=True); run_smote_experiment(X, Y, feature_type='embedding')"
#################################################################################
# TESTING RULES #
#################################################################################
## Run all unit tests
.PHONY: test-unit
test-unit:
pytest tests/unit/ -v -m unit
## Run all integration tests
.PHONY: test-integration
test-integration:
pytest tests/integration/ -v -m integration
## Run all system tests
.PHONY: test-system
test-system:
pytest tests/system/ -v -m system
## Run all tests (unit, integration, system)
.PHONY: test-all
test-all:
pytest tests/ -v --ignore=tests/behavioral --ignore=tests/deepchecks
## Run tests with coverage report
.PHONY: test-coverage
test-coverage:
pytest tests/ --cov=hopcroft_skill_classification_tool_competition --cov-report=html --cov-report=term
## Run fast tests only (exclude slow tests)
.PHONY: test-fast
test-fast:
pytest tests/ -v -m "not slow" --ignore=tests/behavioral --ignore=tests/deepchecks
## Run behavioral tests
.PHONY: test-behavioral
test-behavioral:
pytest tests/behavioral/ -v --ignore=tests/behavioral/test_model_training.py
## Run Great Expectations validation
.PHONY: validate-gx
validate-gx:
$(PYTHON_INTERPRETER) -m hopcroft_skill_classification_tool_competition.tests.test_gx
## Run Deepchecks validation
.PHONY: validate-deepchecks
validate-deepchecks:
$(PYTHON_INTERPRETER) tests/deepchecks/run_all_deepchecks.py
## Run all validation and tests
.PHONY: test-complete
test-complete: test-all validate-gx validate-deepchecks test-behavioral
#################################################################################
# Self Documenting Commands #
#################################################################################
.DEFAULT_GOAL := help
define PRINT_HELP_PYSCRIPT
import re, sys; \
lines = '\n'.join([line for line in sys.stdin]); \
matches = re.findall(r'\n## (.*)\n[\s\S]+?\n([a-zA-Z_-]+):', lines); \
print('Available rules:\n'); \
print('\n'.join(['{:25}{}'.format(*reversed(match)) for match in matches]))
endef
export PRINT_HELP_PYSCRIPT
help:
@$(PYTHON_INTERPRETER) -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST)
################################################################################
# API COMMANDS #
################################################################################
## Run API in development mode
.PHONY: api-dev
api-dev:
fastapi dev hopcroft_skill_classification_tool_competition/main.py
## Run API in production mode
.PHONY: api-run
api-run:
fastapi run hopcroft_skill_classification_tool_competition/main.py
## Test API health check (requires running API)
.PHONY: test-api-health
test-api-health:
@echo "Testing API health endpoint..."
curl -X GET "http://127.0.0.1:8000/health"
## Test API POST /predict (requires running API)
.PHONY: test-api-predict
test-api-predict:
@echo "Testing prediction endpoint..."
curl -X POST "http://127.0.0.1:8000/predict" -H "Content-Type: application/json" -d '{"issue_text": "Fix critical bug in authentication and login flow with OAuth2", "repo_name": "my-repo"}'
## Test API GET /predictions (requires running API)
.PHONY: test-api-list
test-api-list:
@echo "Testing list predictions endpoint..."
curl "http://127.0.0.1:8000/predictions?limit=5"
## Test API GET /predictions/{run_id} (requires running API and valid run_id)
.PHONY: test-api-get-prediction
test-api-get-prediction:
@echo "Testing get specific prediction endpoint..."
@echo "Usage: make test-api-get-prediction RUN_ID=<your_run_id>"
@if [ -z "$(RUN_ID)" ]; then echo "Error: RUN_ID not set. Example: make test-api-get-prediction RUN_ID=abc123"; exit 1; fi
curl "http://127.0.0.1:8000/predictions/$(RUN_ID)"
## Run all API tests (requires running API)
.PHONY: test-api-all
test-api-all: test-api-health test-api-predict test-api-list
@echo "\n All API tests completed!"