sabirbagwan commited on
Commit
d4e8bc1
·
1 Parent(s): b4b05bc

Update preprocess.py

Browse files
Files changed (1) hide show
  1. preprocess.py +6 -3
preprocess.py CHANGED
@@ -38,9 +38,9 @@ def preprocess(data):
38
 
39
  df = pd.DataFrame({'date': dates, 'message': messages})
40
 
41
- df['date'] = df['date'].astype(str).str.replace('[', '')
42
- df['date'] = df['date'].astype(str).str.replace(']', '')
43
- df['date'] = df['date'].astype(str).str.replace('-', '')
44
 
45
 
46
  # group_name = df.loc[df['message'].str.contains('added you')]['message'].str.split(':').str[0]
@@ -57,8 +57,11 @@ def preprocess(data):
57
  # df['sender'] = df.message.str.split(':').str[0]
58
  # df['message'] = df.message.str.split(':').str[1]
59
 
 
 
60
  df[['sender', 'message']] = df['message'].str.split(':', n=1, expand=True)
61
 
 
62
  # df['message'] = df['message'].str.encode('utf-8')
63
  df = df[['date', 'sender', 'message']]
64
 
 
38
 
39
  df = pd.DataFrame({'date': dates, 'message': messages})
40
 
41
+ df['date'] = df['date'].astype(str).str.replace('[', '', regex=False)
42
+ df['date'] = df['date'].astype(str).str.replace(']', '', regex=False)
43
+ df['date'] = df['date'].astype(str).str.replace('-', '', regex=False)
44
 
45
 
46
  # group_name = df.loc[df['message'].str.contains('added you')]['message'].str.split(':').str[0]
 
57
  # df['sender'] = df.message.str.split(':').str[0]
58
  # df['message'] = df.message.str.split(':').str[1]
59
 
60
+ # df[['sender', 'message']] = df['message'].str.split(':', n=1, expand=True)
61
+ df['message'] = df['message'].astype(str) # Convert column to string type
62
  df[['sender', 'message']] = df['message'].str.split(':', n=1, expand=True)
63
 
64
+
65
  # df['message'] = df['message'].str.encode('utf-8')
66
  df = df[['date', 'sender', 'message']]
67