Spaces:
Sleeping
Sleeping
| from .IChunkGenerator import IChunkGenerator | |
| import nltk | |
| from nltk.tokenize import sent_tokenize | |
| class ChunkGenerator(IChunkGenerator): | |
| def chunk_text(self, text: str,max_words: int=100) -> list: | |
| sentences = sent_tokenize(text) | |
| chunks, chunk = [], [] | |
| word_count = 0 | |
| for sentence in sentences: | |
| word_count += len(sentence.split()) | |
| chunk.append(sentence) | |
| if word_count >= max_words: | |
| chunks.append(" ".join(chunk)) | |
| chunk = [] | |
| word_count = 0 | |
| if chunk: | |
| chunks.append(" ".join(chunk)) | |
| return chunks |