Spaces:
Sleeping
Sleeping
Update preprocessing.py
Browse files- preprocessing.py +1 -1
preprocessing.py
CHANGED
|
@@ -71,7 +71,7 @@ def clean_text(text):
|
|
| 71 |
# Remove URLs, emails, and other patterns
|
| 72 |
text = re.sub(r"http\S+|www\S+|https\S+", "", text, flags=re.MULTILINE)
|
| 73 |
text = re.sub(r"\s+", " ", text) # Replace multiple spaces with a single space
|
| 74 |
-
text = re.sub(r"[^\u0600-\u06FF\s]", "", text) # Keep only Persian characters and spaces
|
| 75 |
return text.strip()
|
| 76 |
|
| 77 |
|
|
|
|
| 71 |
# Remove URLs, emails, and other patterns
|
| 72 |
text = re.sub(r"http\S+|www\S+|https\S+", "", text, flags=re.MULTILINE)
|
| 73 |
text = re.sub(r"\s+", " ", text) # Replace multiple spaces with a single space
|
| 74 |
+
# text = re.sub(r"[^\u0600-\u06FF\s]", "", text) # Keep only Persian characters and spaces
|
| 75 |
return text.strip()
|
| 76 |
|
| 77 |
|