import pandas as pd import numpy as np import re def preprocess(data): pattern = r'\d{1,2}/\d{1,2}/\d{2,4},\s\d{1,2}:\d{2}\s?(?:AM|PM|am|pm|[AaPp][Mm])\s-\s' message=re.split(pattern,data)[1:] print(message) dates=re.findall(pattern,data) df=pd.DataFrame({'user_message':message,'message_date':dates}) df["message_date"]=pd.to_datetime(df['message_date'], format="%d/%m/%Y, %I:%M\u202f%p - ") df.rename(columns={'message_date':'date'},inplace=True) users = [] messages = [] for message in df['user_message']: entry= re.split('([\w\W]+?):\s', message) if entry [1:]:# user name users.append(entry[1]) messages.append(entry[2]) else: users.append('group_notification') messages.append(entry[0]) df['user'] = users df['message'] = messages df.drop(columns=['user_message'], inplace=True) df['year']=df["date"].dt.year df['month']=df['date'].dt.month_name() df['day']=df['date'].dt.day df['hour']=df['date'].dt.hour df['min']=df['date'].dt.minute df['month_num']=df['date'].dt.month return df