ai_flashcard_generator / tests /test_text_processing.py
pranshu dhiman
Initial commit with Docker and Streamlit
46b701f
Raw
History Blame Contribute Delete
669 Bytes
from src.flashcard_generator.text_processing import clean_text, split_into_chunks, token_count
def test_clean_text_normalizes_whitespace_and_hyphenation():
text = "Neural net-\nworks\n\n learn patterns."
assert clean_text(text) == "Neural networks learn patterns."
def test_split_into_chunks_keeps_content():
sentence = "Photosynthesis converts light energy into chemical energy."
text = " ".join([sentence] * 120)
chunks = split_into_chunks(text, min_tokens=40, max_tokens=80)
assert len(chunks) > 1
assert sum(token_count(chunk) for chunk in chunks) == token_count(text)
assert all(token_count(chunk) <= 120 for chunk in chunks)