tanish78 commited on
Commit
148df09
·
verified ·
1 Parent(s): a8b9aeb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -13
app.py CHANGED
@@ -8,24 +8,21 @@ import re
8
  from io import BytesIO
9
 
10
  def preprocess_data(df):
11
- #Keep rows with "user" in the "sender" column
12
- df = df[df['sender'].str.contains('user', case=False)]
13
- df = df[df['Message Type'].isin(['TEXT']) | df['Message Type'].isin(['POSTBACK'])]
14
-
15
- # Remove unnecessary columns
16
- columns_to_drop = ["Timestamp", "sender", "User id", "User Name", "User Email", "User Phone No", "Platform", "Message Type"]
17
- df = df.drop(columns=columns_to_drop, axis=1)
18
-
19
- # Lowercase the "Message" column and rename it to "texts"
20
- df['Message'] = df['Message'].str.lower()
21
- df.rename(columns={'Message': 'texts'}, inplace=True)
22
-
23
  # Convert the 'texts' column to string
24
  df['texts'] = df['texts'].astype(str)
25
-
 
 
 
26
  # Remove URL from text
27
  df['texts'] = df['texts'].apply(lambda text: re.sub(r'https?://\S+|www\.\S+', '', text))
28
 
 
29
  # Remove emojis from text
30
  def remove_emoji(string):
31
  emoji_pattern = re.compile("["
 
8
  from io import BytesIO
9
 
10
  def preprocess_data(df):
11
+ print("Preprocessing data...")
12
+
13
+ # Renaming the 'Queries' column to 'texts'
14
+ df.rename(columns={'Queries': 'texts'}, inplace=True)
15
+
 
 
 
 
 
 
 
16
  # Convert the 'texts' column to string
17
  df['texts'] = df['texts'].astype(str)
18
+
19
+ # Lowercase the 'texts' column
20
+ df['texts'] = df['texts'].str.lower()
21
+
22
  # Remove URL from text
23
  df['texts'] = df['texts'].apply(lambda text: re.sub(r'https?://\S+|www\.\S+', '', text))
24
 
25
+
26
  # Remove emojis from text
27
  def remove_emoji(string):
28
  emoji_pattern = re.compile("["