noranisa commited on
Commit
19e8b7e
·
verified ·
1 Parent(s): e56b60e

Update services/preprocessing.py

Browse files
Files changed (1) hide show
  1. services/preprocessing.py +7 -3
services/preprocessing.py CHANGED
@@ -3,6 +3,10 @@ import re
3
  def clean_text(text):
4
  text = text.lower()
5
  text = re.sub(r"http\S+", "", text)
6
- text = re.sub(r"@\w+", "", text)
7
- text = re.sub(r"[^a-zA-Z\s]", "", text)
8
- return text.strip()
 
 
 
 
 
3
  def clean_text(text):
4
  text = text.lower()
5
  text = re.sub(r"http\S+", "", text)
6
+ text = re.sub(r"[^a-zA-Z\s]", " ", text)
7
+ text = re.sub(r'(.)\1+', r'\1\1', text)
8
+ text = re.sub(r'\s+', ' ', text).strip()
9
+ return text
10
+
11
+ def is_valid(text):
12
+ return len(text.split()) > 3