stanza-digphil / stanza /tests /tokenization /test_tokenize_files.py
Albin Thörn Cleland
Clean initial commit with LFS
19b8775
import pytest
from stanza.models.tokenization import tokenize_files
from stanza.tests import TEST_MODELS_DIR
pytestmark = [pytest.mark.pipeline, pytest.mark.travis]
EXPECTED = """
This is a test . This is a second sentence .
I took my daughter ice skating
""".lstrip()
def test_tokenize_files(tmp_path):
input_file = tmp_path / "input.txt"
with open(input_file, "w") as fout:
fout.write("This is a test. This is a second sentence.\n\nI took my daughter ice skating")
output_file = tmp_path / "output.txt"
tokenize_files.main([str(input_file), "--lang", "en", "--output_file", str(output_file), "--model_dir", TEST_MODELS_DIR])
with open(output_file) as fin:
text = fin.read()
assert EXPECTED == text