Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,24 +8,21 @@ import re
|
|
| 8 |
from io import BytesIO
|
| 9 |
|
| 10 |
def preprocess_data(df):
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
columns_to_drop = ["Timestamp", "sender", "User id", "User Name", "User Email", "User Phone No", "Platform", "Message Type"]
|
| 17 |
-
df = df.drop(columns=columns_to_drop, axis=1)
|
| 18 |
-
|
| 19 |
-
# Lowercase the "Message" column and rename it to "texts"
|
| 20 |
-
df['Message'] = df['Message'].str.lower()
|
| 21 |
-
df.rename(columns={'Message': 'texts'}, inplace=True)
|
| 22 |
-
|
| 23 |
# Convert the 'texts' column to string
|
| 24 |
df['texts'] = df['texts'].astype(str)
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
| 26 |
# Remove URL from text
|
| 27 |
df['texts'] = df['texts'].apply(lambda text: re.sub(r'https?://\S+|www\.\S+', '', text))
|
| 28 |
|
|
|
|
| 29 |
# Remove emojis from text
|
| 30 |
def remove_emoji(string):
|
| 31 |
emoji_pattern = re.compile("["
|
|
|
|
| 8 |
from io import BytesIO
|
| 9 |
|
| 10 |
def preprocess_data(df):
|
| 11 |
+
print("Preprocessing data...")
|
| 12 |
+
|
| 13 |
+
# Renaming the 'Queries' column to 'texts'
|
| 14 |
+
df.rename(columns={'Queries': 'texts'}, inplace=True)
|
| 15 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
# Convert the 'texts' column to string
|
| 17 |
df['texts'] = df['texts'].astype(str)
|
| 18 |
+
|
| 19 |
+
# Lowercase the 'texts' column
|
| 20 |
+
df['texts'] = df['texts'].str.lower()
|
| 21 |
+
|
| 22 |
# Remove URL from text
|
| 23 |
df['texts'] = df['texts'].apply(lambda text: re.sub(r'https?://\S+|www\.\S+', '', text))
|
| 24 |
|
| 25 |
+
|
| 26 |
# Remove emojis from text
|
| 27 |
def remove_emoji(string):
|
| 28 |
emoji_pattern = re.compile("["
|