from src.flashcard_generator.text_processing import clean_text, split_into_chunks, token_count def test_clean_text_normalizes_whitespace_and_hyphenation(): text = "Neural net-\nworks\n\n learn patterns." assert clean_text(text) == "Neural networks learn patterns." def test_split_into_chunks_keeps_content(): sentence = "Photosynthesis converts light energy into chemical energy." text = " ".join([sentence] * 120) chunks = split_into_chunks(text, min_tokens=40, max_tokens=80) assert len(chunks) > 1 assert sum(token_count(chunk) for chunk in chunks) == token_count(text) assert all(token_count(chunk) <= 120 for chunk in chunks)