Spaces:
Running
Running
| from src.flashcard_generator.text_processing import clean_text, split_into_chunks, token_count | |
| def test_clean_text_normalizes_whitespace_and_hyphenation(): | |
| text = "Neural net-\nworks\n\n learn patterns." | |
| assert clean_text(text) == "Neural networks learn patterns." | |
| def test_split_into_chunks_keeps_content(): | |
| sentence = "Photosynthesis converts light energy into chemical energy." | |
| text = " ".join([sentence] * 120) | |
| chunks = split_into_chunks(text, min_tokens=40, max_tokens=80) | |
| assert len(chunks) > 1 | |
| assert sum(token_count(chunk) for chunk in chunks) == token_count(text) | |
| assert all(token_count(chunk) <= 120 for chunk in chunks) | |