Spaces:
Runtime error
Runtime error
| import re | |
| import pandas as pd | |
| def parse_whatsapp_chat(data): | |
| """ | |
| Parses WhatsApp chat data and converts it into a structured pandas DataFrame. | |
| Args: | |
| data (str): The raw WhatsApp chat log as a string. | |
| Returns: | |
| pd.DataFrame: A DataFrame with columns for date, time, user, and message, | |
| along with extracted components like day, month, year, hour, and period. | |
| """ | |
| # Define regex pattern to extract date, time, user, and message | |
| pattern = r"(\d{2}/\d{2}/\d{4}),\s(\d{1,2}:\d{2}\s?[ap]m)\s-\s([\w\s+]+):\s(.*)" | |
| # Extract matches using the regex pattern | |
| matches = re.findall(pattern, data) | |
| # Create a DataFrame from matches | |
| df = pd.DataFrame(matches, columns=['Date', 'Time', 'User', 'Message']) | |
| # Convert 'Date' column to datetime | |
| df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%Y') | |
| df['only_date'] = df['Date'].dt.date | |
| df['Year'] = df['Date'].dt.year | |
| df['Month'] = df['Date'].dt.strftime('%B') | |
| df['month_num'] = df['Date'].dt.month | |
| df['day_name'] = df['Date'].dt.strftime('%A') | |
| # Split 'Time' into 'Hour', 'Minute', and 'AM/PM' | |
| df[['Hour_Minute', 'AMPM']] = df['Time'].str.extract(r'(\d{1,2}:\d{2})\s?(am|pm)', expand=True) | |
| df[['Hour', 'Minute']] = df['Hour_Minute'].str.split(':', expand=True) | |
| df['Hour'] = df['Hour'].astype(int) | |
| # Add time periods in HH AM/PM - HH AM/PM format | |
| def format_period(hour, ampm): | |
| next_hour = (hour + 1) % 12 or 12 | |
| next_ampm = 'pm' if hour == 11 and ampm == 'am' else \ | |
| 'am' if hour == 11 and ampm == 'pm' else ampm | |
| return f"{hour} {ampm} - {next_hour} {next_ampm}" | |
| df['period'] = df.apply(lambda row: format_period(row['Hour'], row['AMPM']), axis=1) | |
| return df | |