File size: 7,663 Bytes
52c4067 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 | """
This file contains tests for the API of your model. You can run these tests by installing test requirements:
```bash
pip install -r requirements-test.txt
```
Then execute `pytest` in the directory of this file.
- Change `NewModel` to the name of the class in your model.py file.
- Change the `request` and `expected_response` variables to match the input and output of your model.
"""
import pytest
import json
from model import HuggingFaceNER
import unittest.mock as mock
@pytest.fixture
def client():
from _wsgi import init_app
app = init_app(model_class=HuggingFaceNER)
app.config['TESTING'] = True
with app.test_client() as client:
yield client
def test_predict(client):
request = {
'tasks': [{
'data': {
'text': 'President Obama is speaking at 3pm today in New York.'
}
}],
# Your labeling configuration here
'label_config': '''
<View>
<Text name="text" value="$text"/>
<Labels name="ner" toName="text">
<Label value="Person"/>
<Label value="Location"/>
<Label value="Time"/>
</Labels>
</View>
'''
}
expected_response = {
'results': [{
'model_version': 'HuggingFaceNER-v0.0.1',
'result': [{
'from_name': 'ner',
'score': 0.9974774718284607,
'to_name': 'text',
'type': 'labels',
'value': {
'end': 15,
'labels': ['PER'],
'start': 10}},
{'from_name': 'ner',
'score': 0.9994751214981079,
'to_name': 'text',
'type': 'labels',
'value': {'end': 52,
'labels': ['LOC'],
'start': 44}}],
'score': 0.9984762966632843}]
}
response = client.post('/predict', data=json.dumps(request), content_type='application/json')
assert response.status_code == 200
response = json.loads(response.data)
assert response['results'][0]['model_version'] == expected_response['results'][0]['model_version']
assert response['results'][0]['result'][0]['value'] == expected_response['results'][0]['result'][0]['value']
assert response['results'][0]['result'][1]['value'] == expected_response['results'][0]['result'][1]['value']
# mock response of label_studio_sdk.Project.get_labeled_tasks() and return the list of Label Studio tasks with NER annotations
def get_labeled_tasks_mock(self, project_id):
return [
{
'id': '0',
'data': {'text': 'President Obama is speaking at 3pm today in New York'},
'annotations': [
{
'result': [
{
'from_name': 'ner',
'to_name': 'text',
'type': 'labels',
'value': {
'start': 10,
'end': 15,
'labels': ['Person']
}
},
{
'from_name': 'ner',
'to_name': 'text',
'type': 'labels',
'value': {
'start': 44,
'end': 52,
'labels': ['Location']
}
},
{
'from_name': 'ner',
'to_name': 'text',
'type': 'labels',
'value': {
'start': 31,
'end': 40,
'labels': ['Time']
}
}
]
}
]
}
]
# mock NewModel.START_TRAINING_EACH_N_UPDATES to 1 to trigger training in the test
@pytest.fixture
def mock_start_training():
with mock.patch.object(HuggingFaceNER, 'START_TRAINING_EACH_N_UPDATES', new=1):
yield
@pytest.fixture
def mock_get_labeled_tasks():
with mock.patch.object(HuggingFaceNER, '_get_tasks', new=get_labeled_tasks_mock):
yield
@pytest.fixture
def mock_baseline_model_name_for_train():
with mock.patch('model.BASELINE_MODEL_NAME', new='distilbert/distilbert-base-uncased'):
yield
def test_fit(client, mock_get_labeled_tasks, mock_start_training, mock_baseline_model_name_for_train):
request = {
'action': 'ANNOTATION_CREATED',
'project': {
'id': 12345,
'label_config': '''
<View>
<Text name="text" value="$text"/>
<Labels name="ner" toName="text">
<Label value="Person"/>
<Label value="Location"/>
<Label value="Time"/>
</Labels>
</View>
'''
},
'annotation': {
'project': 12345
}
}
response = client.post('/webhook', data=json.dumps(request), content_type='application/json')
assert response.status_code == 201
# assert new model is created in ./results/finetuned_model directory
import os
from model import MODEL_DIR
results_dir = os.path.join(MODEL_DIR, 'finetuned_model')
assert os.path.exists(os.path.join(results_dir, 'pytorch_model.bin'))
# now let's test whether the model is trained by running predict
request = {
'tasks': [{
'data': {
'text': 'President Obama is speaking at 3pm today in New York.'
}
}],
# Your labeling configuration here
'label_config': '''
<View>
<Text name="text" value="$text"/>
<Labels name="ner" toName="text">
<Label value="Person"/>
<Label value="Location"/>
<Label value="Time"/>
</Labels>
</View>
'''
}
response = client.post('/predict', data=json.dumps(request), content_type='application/json')
assert response.status_code == 200
# TODO: we also need to check the prediction results to make sure the model is trained correctly
# but the training needs to be deterministic to make the test stable
# assert response is as expected
# remove './results/finetuned_model' directory after testing
import shutil
shutil.rmtree(results_dir)
def test_fit_missing_annotation(monkeypatch):
# Initialize the model
model = HuggingFaceNER()
# Mock label_interface to avoid AttributeError
model.label_interface = mock.MagicMock()
# Mock get_first_tag_occurence to return fake values
model.label_interface.get_first_tag_occurence.return_value = ('Labels', 'Text', 'text_field_name')
# Mock data payload with annotation missing, only project present
payload = {
"action": "ANNOTATION_UPDATED",
"project": {"id": 123, "name": "Test Project"}
}
# Monkeypatch _get_tasks to return one fake task
monkeypatch.setattr(model, "_get_tasks", lambda project_id: [
{
"id": "1",
"data": {"text_field_name": "Hello world"},
"annotations": []
}
])
# Call fit()
try:
model.fit(event="ANNOTATION_UPDATED", data=payload)
except Exception as e:
pytest.fail(f"fit() raised an exception when annotation is missing: {e}") |