import pytest from stanza.models.tokenization import tokenize_files from stanza.tests import TEST_MODELS_DIR pytestmark = [pytest.mark.pipeline, pytest.mark.travis] EXPECTED = """ This is a test . This is a second sentence . I took my daughter ice skating """.lstrip() def test_tokenize_files(tmp_path): input_file = tmp_path / "input.txt" with open(input_file, "w") as fout: fout.write("This is a test. This is a second sentence.\n\nI took my daughter ice skating") output_file = tmp_path / "output.txt" tokenize_files.main([str(input_file), "--lang", "en", "--output_file", str(output_file), "--model_dir", TEST_MODELS_DIR]) with open(output_file) as fin: text = fin.read() assert EXPECTED == text