Spaces:
Sleeping
Sleeping
Update services/preprocessing.py
Browse files
services/preprocessing.py
CHANGED
|
@@ -3,6 +3,10 @@ import re
|
|
| 3 |
def clean_text(text):
|
| 4 |
text = text.lower()
|
| 5 |
text = re.sub(r"http\S+", "", text)
|
| 6 |
-
text = re.sub(r"
|
| 7 |
-
text = re.sub(r
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
def clean_text(text):
|
| 4 |
text = text.lower()
|
| 5 |
text = re.sub(r"http\S+", "", text)
|
| 6 |
+
text = re.sub(r"[^a-zA-Z\s]", " ", text)
|
| 7 |
+
text = re.sub(r'(.)\1+', r'\1\1', text)
|
| 8 |
+
text = re.sub(r'\s+', ' ', text).strip()
|
| 9 |
+
return text
|
| 10 |
+
|
| 11 |
+
def is_valid(text):
|
| 12 |
+
return len(text.split()) > 3
|