SurajJha21 commited on
Commit
2aa76f6
·
verified ·
1 Parent(s): 684eb5f

Update preprocessor.py

Browse files
Files changed (1) hide show
  1. preprocessor.py +4 -8
preprocessor.py CHANGED
@@ -2,17 +2,13 @@ import re
2
  import pandas as pd
3
 
4
  def preprocess(data):
5
- pattern = '\d{1,2}/\d{1,2}/\d{2,4},\s\d{1,2}:\d{2}\s-\s'
6
-
7
-
8
  messages_and_dates = re.findall(pattern, data)
9
- messages = [message_group[0] for message_group in messages_and_dates] # Extract messages (first group)
10
- dates = [message_group[1] for message_group in messages_and_dates] # Extract dates (second group)
11
 
12
- df = pd.DataFrame({'user_message': messages, 'date': pd.to_datetime(dates, format='%d/%m/%Y, %H:%M - ')})
13
-
14
 
15
-
16
 
17
  users = []
18
  messages = []
 
2
  import pandas as pd
3
 
4
  def preprocess(data):
5
+ pattern = r'\d{1,2}/\d{1,2}/\d{2,4},\s\d{1,2}:\d{2}-\s' # Capture date and time in separate groups
 
 
6
  messages_and_dates = re.findall(pattern, data)
 
 
7
 
8
+ messages = [message_group[0] for message_group in messages_and_dates]
9
+ dates = [message_group[1] or "NA" for message_group in messages_and_dates] # Assign "NA" for empty strings
10
 
11
+ df = pd.DataFrame({'user_message': messages, 'date': pd.to_datetime(dates, format='%d/%m/%Y, %H:%M - ')})
12
 
13
  users = []
14
  messages = []