Spaces:
Sleeping
Sleeping
Update app/backend/text_processing.py
Browse files
app/backend/text_processing.py
CHANGED
|
@@ -6,7 +6,7 @@ import re
|
|
| 6 |
|
| 7 |
|
| 8 |
def sent_tokenize(text):
|
| 9 |
-
"""Simple sentence tokenizer using regex
|
| 10 |
# Split on sentence endings followed by whitespace and capital letter
|
| 11 |
sentences = re.split(r'(?<=[.!?])\s+(?=[A-Z])', text)
|
| 12 |
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
def sent_tokenize(text):
|
| 9 |
+
"""Simple sentence tokenizer using regex (simpler alternative to NLTK)"""
|
| 10 |
# Split on sentence endings followed by whitespace and capital letter
|
| 11 |
sentences = re.split(r'(?<=[.!?])\s+(?=[A-Z])', text)
|
| 12 |
|