Spaces:
Sleeping
Sleeping
Commit
·
d4e8bc1
1
Parent(s):
b4b05bc
Update preprocess.py
Browse files- preprocess.py +6 -3
preprocess.py
CHANGED
|
@@ -38,9 +38,9 @@ def preprocess(data):
|
|
| 38 |
|
| 39 |
df = pd.DataFrame({'date': dates, 'message': messages})
|
| 40 |
|
| 41 |
-
df['date'] = df['date'].astype(str).str.replace('[', '')
|
| 42 |
-
df['date'] = df['date'].astype(str).str.replace(']', '')
|
| 43 |
-
df['date'] = df['date'].astype(str).str.replace('-', '')
|
| 44 |
|
| 45 |
|
| 46 |
# group_name = df.loc[df['message'].str.contains('added you')]['message'].str.split(':').str[0]
|
|
@@ -57,8 +57,11 @@ def preprocess(data):
|
|
| 57 |
# df['sender'] = df.message.str.split(':').str[0]
|
| 58 |
# df['message'] = df.message.str.split(':').str[1]
|
| 59 |
|
|
|
|
|
|
|
| 60 |
df[['sender', 'message']] = df['message'].str.split(':', n=1, expand=True)
|
| 61 |
|
|
|
|
| 62 |
# df['message'] = df['message'].str.encode('utf-8')
|
| 63 |
df = df[['date', 'sender', 'message']]
|
| 64 |
|
|
|
|
| 38 |
|
| 39 |
df = pd.DataFrame({'date': dates, 'message': messages})
|
| 40 |
|
| 41 |
+
df['date'] = df['date'].astype(str).str.replace('[', '', regex=False)
|
| 42 |
+
df['date'] = df['date'].astype(str).str.replace(']', '', regex=False)
|
| 43 |
+
df['date'] = df['date'].astype(str).str.replace('-', '', regex=False)
|
| 44 |
|
| 45 |
|
| 46 |
# group_name = df.loc[df['message'].str.contains('added you')]['message'].str.split(':').str[0]
|
|
|
|
| 57 |
# df['sender'] = df.message.str.split(':').str[0]
|
| 58 |
# df['message'] = df.message.str.split(':').str[1]
|
| 59 |
|
| 60 |
+
# df[['sender', 'message']] = df['message'].str.split(':', n=1, expand=True)
|
| 61 |
+
df['message'] = df['message'].astype(str) # Convert column to string type
|
| 62 |
df[['sender', 'message']] = df['message'].str.split(':', n=1, expand=True)
|
| 63 |
|
| 64 |
+
|
| 65 |
# df['message'] = df['message'].str.encode('utf-8')
|
| 66 |
df = df[['date', 'sender', 'message']]
|
| 67 |
|