File size: 1,682 Bytes
f03a6ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from datetime import datetime
import re
import pandas as pd
def preprocess(data):
    d = r'(\d{1,2}/\d{1,2}/\d{2}),\s(\d{1,2}:\d{2})\s(AM|PM|am|pm)?\s-\s(.+)'
    mess=re.findall(d, data)[1:]
   
    parsed_data=[]
    for entry in mess:
        dt, ti, apm, missg = entry
        date_time_str=f"{dt} {ti} {apm}"
        date_time_obj=datetime.strptime(date_time_str,"%d/%m/%y %I:%M %p")

        if ': ' in missg:
            phone,text=missg.split(': ',1)

        else:
            phone, text= None,missg

        parsed_entry={
            'date_time':date_time_obj,
            'phone':phone,
            'message':text,
            
            }
        parsed_data.append(parsed_entry)
    datte=[]
    Phon=[]
    Mesage=[]
    for entry in parsed_data:
        datte.append(entry['date_time'])
        Phon.append(entry['phone'])
        Mesage.append(entry['message'])   
    df1=pd.DataFrame({'DATE/TIME':datte,'user':Phon,'message':Mesage})
    df1['year']=df1['DATE/TIME'].dt.year
    df1['month']=df1['DATE/TIME'].dt.month_name()
    df1['day']=df1['DATE/TIME'].dt.day
    df1['hour']=df1['DATE/TIME'].dt.hour
    df1['minute']=df1['DATE/TIME'].dt.minute

    df1['only_date'] = df1['DATE/TIME'].dt.date
    
    df1['month_num'] = df1['DATE/TIME'].dt.month
   
    df1['day_name'] = df1['DATE/TIME'].dt.day_name()


    period = []
    for hour in df1[['day_name', 'hour']]['hour']:
        if hour == 23:
            period.append(str(hour) + "-" + str('00'))
        elif hour == 0:
            period.append(str('00') + "-" + str(hour + 1))
        else:
            period.append(str(hour) + "-" + str(hour + 1))

    df1['period'] = period
   
    return df1