File size: 7,102 Bytes
225af6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
#################################################################################
# GLOBALS                                                                       #
#################################################################################

PROJECT_NAME = Hopcroft
PYTHON_VERSION = 3.10
PYTHON_INTERPRETER = python

#################################################################################
# COMMANDS                                                                      #
#################################################################################

## Install Python dependencies
.PHONY: requirements
requirements:
	$(PYTHON_INTERPRETER) -m pip install -U pip
	$(PYTHON_INTERPRETER) -m pip install -r requirements.txt

## Delete all compiled Python files
.PHONY: clean
clean:
	find . -type f -name "*.py[co]" -delete
	find . -type d -name "__pycache__" -delete

## Lint using ruff
.PHONY: lint
lint:
	ruff format --check
	ruff check

## Format source code with ruff
.PHONY: format
format:
	ruff check --fix
	ruff format

#################################################################################
# PROJECT RULES                                                                 #
#################################################################################

## Download dataset from Hugging Face
.PHONY: data
data:
	$(PYTHON_INTERPRETER) -m hopcroft_skill_classification_tool_competition.dataset

## Extract features from raw data
.PHONY: features
features:
	$(PYTHON_INTERPRETER) -m hopcroft_skill_classification_tool_competition.features

#################################################################################
# TRAINING RULES                                                                #
#################################################################################

## Train Random Forest baseline with TF-IDF features (cleaned data)
.PHONY: train-baseline-tfidf
train-baseline-tfidf:
	$(PYTHON_INTERPRETER) -m hopcroft_skill_classification_tool_competition.modeling.train baseline

## Train Random Forest baseline with Embedding features (cleaned data)
.PHONY: train-baseline-embeddings
train-baseline-embeddings:
	$(PYTHON_INTERPRETER) -c "from hopcroft_skill_classification_tool_competition.modeling.train import run_baseline_train; run_baseline_train(feature_type='embedding', use_cleaned=True)"

## Train Random Forest with SMOTE and TF-IDF features (cleaned data)
.PHONY: train-smote-tfidf
train-smote-tfidf:
	$(PYTHON_INTERPRETER) -c "from hopcroft_skill_classification_tool_competition.modeling.train import run_smote_experiment, load_data; X, Y = load_data(feature_type='tfidf', use_cleaned=True); run_smote_experiment(X, Y, feature_type='tfidf')"

## Train Random Forest with SMOTE and Embedding features (cleaned data)
.PHONY: train-smote-embeddings
train-smote-embeddings:
	$(PYTHON_INTERPRETER) -c "from hopcroft_skill_classification_tool_competition.modeling.train import run_smote_experiment, load_data; X, Y = load_data(feature_type='embedding', use_cleaned=True); run_smote_experiment(X, Y, feature_type='embedding')"

#################################################################################
# TESTING RULES                                                                 #
#################################################################################

## Run all unit tests
.PHONY: test-unit
test-unit:
	pytest tests/unit/ -v -m unit

## Run all integration tests
.PHONY: test-integration
test-integration:
	pytest tests/integration/ -v -m integration

## Run all system tests
.PHONY: test-system
test-system:
	pytest tests/system/ -v -m system

## Run all tests (unit, integration, system)
.PHONY: test-all
test-all:
	pytest tests/ -v --ignore=tests/behavioral --ignore=tests/deepchecks

## Run tests with coverage report
.PHONY: test-coverage
test-coverage:
	pytest tests/ --cov=hopcroft_skill_classification_tool_competition --cov-report=html --cov-report=term

## Run fast tests only (exclude slow tests)
.PHONY: test-fast
test-fast:
	pytest tests/ -v -m "not slow" --ignore=tests/behavioral --ignore=tests/deepchecks

## Run behavioral tests
.PHONY: test-behavioral
test-behavioral:
	pytest tests/behavioral/ -v --ignore=tests/behavioral/test_model_training.py

## Run Great Expectations validation
.PHONY: validate-gx
validate-gx:
	$(PYTHON_INTERPRETER) -m hopcroft_skill_classification_tool_competition.tests.test_gx

## Run Deepchecks validation
.PHONY: validate-deepchecks
validate-deepchecks:
	$(PYTHON_INTERPRETER) tests/deepchecks/run_all_deepchecks.py

## Run all validation and tests
.PHONY: test-complete
test-complete: test-all validate-gx validate-deepchecks test-behavioral

#################################################################################
# Self Documenting Commands                                                     #
#################################################################################

.DEFAULT_GOAL := help

define PRINT_HELP_PYSCRIPT
import re, sys; \
lines = '\n'.join([line for line in sys.stdin]); \
matches = re.findall(r'\n## (.*)\n[\s\S]+?\n([a-zA-Z_-]+):', lines); \
print('Available rules:\n'); \
print('\n'.join(['{:25}{}'.format(*reversed(match)) for match in matches]))
endef
export PRINT_HELP_PYSCRIPT

help:
	@$(PYTHON_INTERPRETER) -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST)

################################################################################
# API COMMANDS                                                                 #
################################################################################

## Run API in development mode
.PHONY: api-dev
api-dev:
	fastapi dev hopcroft_skill_classification_tool_competition/main.py

## Run API in production mode
.PHONY: api-run
api-run:
	fastapi run hopcroft_skill_classification_tool_competition/main.py

## Test API health check (requires running API)
.PHONY: test-api-health
test-api-health:
	@echo "Testing API health endpoint..."
	curl -X GET "http://127.0.0.1:8000/health"

## Test API POST /predict (requires running API)
.PHONY: test-api-predict
test-api-predict:
	@echo "Testing prediction endpoint..."
	curl -X POST "http://127.0.0.1:8000/predict" -H "Content-Type: application/json" -d '{"issue_text": "Fix critical bug in authentication and login flow with OAuth2", "repo_name": "my-repo"}'

## Test API GET /predictions (requires running API)
.PHONY: test-api-list
test-api-list:
	@echo "Testing list predictions endpoint..."
	curl "http://127.0.0.1:8000/predictions?limit=5"

## Test API GET /predictions/{run_id} (requires running API and valid run_id)
.PHONY: test-api-get-prediction
test-api-get-prediction:
	@echo "Testing get specific prediction endpoint..."
	@echo "Usage: make test-api-get-prediction RUN_ID=<your_run_id>"
	@if [ -z "$(RUN_ID)" ]; then echo "Error: RUN_ID not set. Example: make test-api-get-prediction RUN_ID=abc123"; exit 1; fi
	curl "http://127.0.0.1:8000/predictions/$(RUN_ID)"

## Run all API tests (requires running API)
.PHONY: test-api-all
test-api-all: test-api-health test-api-predict test-api-list
	@echo "\n All API tests completed!"