cot-anc / tests /test_sentence_split.py
BART-ender's picture
Deploy Thought Anchors
fda8fb3 verified
raw
history blame contribute delete
626 Bytes
from __future__ import annotations
from app.analysis.sentence_split import normalize_trace_text, split_sentences
def test_sentence_split_preserves_spacing_and_newlines() -> None:
raw = "<think>First sentence. Second sentence?\n\nThird sentence!</think>"
normalized = normalize_trace_text(raw)
spans = split_sentences(normalized)
assert normalized == "First sentence. Second sentence?\n\nThird sentence!"
assert [span.text for span in spans] == [
"First sentence. ",
"Second sentence?\n\n",
"Third sentence!",
]
assert spans[1].start_char == len("First sentence. ")