Albin Thörn Cleland
Clean initial commit with LFS
19b8775
import pytest
from stanza.models.constituency import tree_reader
from stanza.models.constituency.tree_reader import MixedTreeError, UnclosedTreeError, UnlabeledTreeError
from stanza.tests import *
pytestmark = [pytest.mark.pipeline, pytest.mark.travis]
def test_simple():
"""
Tests reading two simple trees from the same text
"""
text = "(VB Unban) (NNP Opal)"
trees = tree_reader.read_trees(text)
assert len(trees) == 2
assert trees[0].is_preterminal()
assert trees[0].label == 'VB'
assert trees[0].children[0].label == 'Unban'
assert trees[1].is_preterminal()
assert trees[1].label == 'NNP'
assert trees[1].children[0].label == 'Opal'
def test_newlines():
"""
The same test should work if there are newlines
"""
text = "(VB Unban)\n\n(NNP Opal)"
trees = tree_reader.read_trees(text)
assert len(trees) == 2
def test_parens():
"""
Parens should be escaped in the tree files and escaped when written
"""
text = "(-LRB- -LRB-) (-RRB- -RRB-)"
trees = tree_reader.read_trees(text)
assert len(trees) == 2
assert trees[0].label == '-LRB-'
assert trees[0].children[0].label == '('
assert "{}".format(trees[0]) == '(-LRB- -LRB-)'
assert trees[1].label == '-RRB-'
assert trees[1].children[0].label == ')'
assert "{}".format(trees[1]) == '(-RRB- -RRB-)'
def test_complicated():
"""
A more complicated tree that should successfully read
"""
text="( (SBARQ (WHNP (WP Who)) (SQ (VP (VBZ sits) (PP (IN in) (NP (DT this) (NN seat))))) (. ?)))"
trees = tree_reader.read_trees(text)
assert len(trees) == 1
tree = trees[0]
assert not tree.is_leaf()
assert not tree.is_preterminal()
assert tree.label == 'ROOT'
assert len(tree.children) == 1
assert tree.children[0].label == 'SBARQ'
assert len(tree.children[0].children) == 3
assert [x.label for x in tree.children[0].children] == ['WHNP', 'SQ', '.']
# etc etc
def test_one_word():
"""
Check that one node trees are correctly read
probably not super relevant for the parsing use case
"""
text="(FOO) (BAR)"
trees = tree_reader.read_trees(text)
assert len(trees) == 2
assert trees[0].is_leaf()
assert trees[0].label == 'FOO'
assert trees[1].is_leaf()
assert trees[1].label == 'BAR'
def test_missing_close_parens():
"""
Test the unclosed error condition
"""
text = "(Foo) \n (Bar \n zzz"
try:
trees = tree_reader.read_trees(text)
raise AssertionError("Expected an exception")
except UnclosedTreeError as e:
assert e.line_num == 1
def test_mixed_tree():
"""
Test the mixed error condition
"""
text = "(Foo) \n (Bar) \n (Unban (Mox) Opal)"
try:
trees = tree_reader.read_trees(text)
raise AssertionError("Expected an exception")
except MixedTreeError as e:
assert e.line_num == 2
trees = tree_reader.read_trees(text, broken_ok=True)
assert len(trees) == 3
def test_unlabeled_tree():
"""
Test the unlabeled error condition
"""
text = "(ROOT ((Foo) (Bar)))"
try:
trees = tree_reader.read_trees(text)
raise AssertionError("Expected an exception")
except UnlabeledTreeError as e:
assert e.line_num == 0
trees = tree_reader.read_trees(text, broken_ok=True)
assert len(trees) == 1