File size: 1,924 Bytes
19b8775 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
"""
Basic tests of the stanza data objects, especially the setter/getter routines
"""
import pytest
import stanza
from stanza.models.common.doc import Document, Sentence, Word
from stanza.tests import *
pytestmark = pytest.mark.pipeline
# data for testing
EN_DOC = "This is a test document. Pretty cool!"
EN_DOC_UPOS_XPOS = (('PRON_DT', 'AUX_VBZ', 'DET_DT', 'NOUN_NN', 'NOUN_NN', 'PUNCT_.'), ('ADV_RB', 'ADJ_JJ', 'PUNCT_.'))
EN_DOC2 = "Chris Manning wrote a sentence. Then another."
@pytest.fixture(scope="module")
def nlp_pipeline():
nlp = stanza.Pipeline(dir=TEST_MODELS_DIR, lang='en')
return nlp
def test_readonly(nlp_pipeline):
Document.add_property('some_property', 123)
doc = nlp_pipeline(EN_DOC)
assert doc.some_property == 123
with pytest.raises(ValueError):
doc.some_property = 456
def test_getter(nlp_pipeline):
Word.add_property('upos_xpos', getter=lambda self: f"{self.upos}_{self.xpos}")
doc = nlp_pipeline(EN_DOC)
assert EN_DOC_UPOS_XPOS == tuple(tuple(word.upos_xpos for word in sentence.words) for sentence in doc.sentences)
def test_setter_getter(nlp_pipeline):
int2str = {0: 'ok', 1: 'good', 2: 'bad'}
str2int = {'ok': 0, 'good': 1, 'bad': 2}
def setter(self, value):
self._classname = str2int[value]
Sentence.add_property('classname', getter=lambda self: int2str[self._classname] if self._classname is not None else None, setter=setter)
doc = nlp_pipeline(EN_DOC)
sentence = doc.sentences[0]
sentence.classname = 'good'
assert sentence._classname == 1
# don't try this at home
sentence._classname = 2
assert sentence.classname == 'bad'
def test_backpointer(nlp_pipeline):
doc = nlp_pipeline(EN_DOC2)
ent = doc.ents[0]
assert ent.sent is doc.sentences[0]
assert list(doc.iter_words())[0].sent is doc.sentences[0]
assert list(doc.iter_tokens())[-1].sent is doc.sentences[-1]
|