|
|
""" |
|
|
Basic tests of the stanza data objects, especially the setter/getter routines |
|
|
""" |
|
|
import pytest |
|
|
|
|
|
import stanza |
|
|
from stanza.models.common.doc import Document, Sentence, Word |
|
|
from stanza.tests import * |
|
|
|
|
|
pytestmark = pytest.mark.pipeline |
|
|
|
|
|
|
|
|
EN_DOC = "This is a test document. Pretty cool!" |
|
|
|
|
|
EN_DOC_UPOS_XPOS = (('PRON_DT', 'AUX_VBZ', 'DET_DT', 'NOUN_NN', 'NOUN_NN', 'PUNCT_.'), ('ADV_RB', 'ADJ_JJ', 'PUNCT_.')) |
|
|
|
|
|
EN_DOC2 = "Chris Manning wrote a sentence. Then another." |
|
|
|
|
|
@pytest.fixture(scope="module") |
|
|
def nlp_pipeline(): |
|
|
nlp = stanza.Pipeline(dir=TEST_MODELS_DIR, lang='en') |
|
|
return nlp |
|
|
|
|
|
def test_readonly(nlp_pipeline): |
|
|
Document.add_property('some_property', 123) |
|
|
doc = nlp_pipeline(EN_DOC) |
|
|
assert doc.some_property == 123 |
|
|
with pytest.raises(ValueError): |
|
|
doc.some_property = 456 |
|
|
|
|
|
|
|
|
def test_getter(nlp_pipeline): |
|
|
Word.add_property('upos_xpos', getter=lambda self: f"{self.upos}_{self.xpos}") |
|
|
|
|
|
doc = nlp_pipeline(EN_DOC) |
|
|
|
|
|
assert EN_DOC_UPOS_XPOS == tuple(tuple(word.upos_xpos for word in sentence.words) for sentence in doc.sentences) |
|
|
|
|
|
def test_setter_getter(nlp_pipeline): |
|
|
int2str = {0: 'ok', 1: 'good', 2: 'bad'} |
|
|
str2int = {'ok': 0, 'good': 1, 'bad': 2} |
|
|
def setter(self, value): |
|
|
self._classname = str2int[value] |
|
|
Sentence.add_property('classname', getter=lambda self: int2str[self._classname] if self._classname is not None else None, setter=setter) |
|
|
|
|
|
doc = nlp_pipeline(EN_DOC) |
|
|
sentence = doc.sentences[0] |
|
|
sentence.classname = 'good' |
|
|
assert sentence._classname == 1 |
|
|
|
|
|
|
|
|
sentence._classname = 2 |
|
|
assert sentence.classname == 'bad' |
|
|
|
|
|
def test_backpointer(nlp_pipeline): |
|
|
doc = nlp_pipeline(EN_DOC2) |
|
|
ent = doc.ents[0] |
|
|
assert ent.sent is doc.sentences[0] |
|
|
assert list(doc.iter_words())[0].sent is doc.sentences[0] |
|
|
assert list(doc.iter_tokens())[-1].sent is doc.sentences[-1] |
|
|
|