SURESHBEEKHANI's picture
Upload 11 files
7089d06 verified
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt # type: ignore
from src.preprocessor import parse_whatsapp_chat
from src.helper import (
fetch_stats,
top_active_users,
create_wordcloud,
most_common_word,
emojis_analysis,
monthly_timeline,
daily_timeline,
week_activity_map,
month_activity_map,
activity_heatmap,
perform_sentiment_analysis
)
# Application Title
st.title("📊WhatsApp Chat Analyzer📊")
# Sidebar: File Uploader
st.sidebar.header("Upload Chat File")
uploaded_file = st.sidebar.file_uploader("Upload a WhatsApp chat file (.txt format)", type=["txt"])
# Check if a file is uploaded
if uploaded_file:
try:
# Decode and parse the uploaded file
file_data = uploaded_file.getvalue().decode("utf-8")
df = parse_whatsapp_chat(file_data)
# Display success message and show a preview of the data
st.success("✅Chat Data Loaded Successfully!✅")
st.dataframe(df.head()) # Show the first few rows of the DataFrame
# Sidebar: User selection for analysis
unique_users = sorted([user for user in df["User"].unique() if user != "group_notification"])
unique_users.insert(0, "Overall") # Add "Overall" option for global stats
selected_user = st.sidebar.selectbox("Select User for Analysis", unique_users)
# Analysis Section: Triggered by button
if st.sidebar.button("Show Analysis"):
# Fetch and display basic statistics
num_messages, words, num_media_messages, num_links = fetch_stats(df, selected_user)
st.subheader("📈Key Chat Statistics📈")
col1, col2, col3, col4 = st.columns(4)
col1.metric("Total Messages", num_messages)
col2.metric("Total Words", words)
col3.metric("Media Shared", num_media_messages)
col4.metric("Links Shared", num_links)
# Monthly Timeline Analysis
st.subheader("📅Monthly Timeline📅")
timeline = monthly_timeline(selected_user, df)
fig, ax = plt.subplots()
ax.plot(timeline["time"], timeline["Message"], color="green")
ax.set_title("Message Trends Over Time (Monthly)") # Plot title
ax.set_xlabel("Time (Month-Year)") # X-axis label
ax.set_ylabel("Number of Messages") # Y-axis label
plt.xticks(rotation="vertical")
plt.legend() # Show the legend for better clarity
plt.grid(True)
st.pyplot(fig)
# Daily Timeline
st.subheader("📅Daily Timeline📅")
daily_data = daily_timeline(selected_user, df) # Get daily timeline data
fig, ax = plt.subplots(figsize=(12, 6))
ax.plot(daily_data['only_date'], daily_data['Message'], color='blue') # Line chart
ax.set_title("Message Trends Over Time (Daily)") # Title for the plot
ax.set_xlabel("Date (Day-Month-Year)") # Label for x-axis
ax.set_ylabel("Number of Messages") # Label for y-axis
plt.xticks(rotation=80) # Rotate x-axis labels for better visibility
st.pyplot(fig)
# Activity Map
st.subheader("📅Activity Map📅")
col1, col2 = st.columns(2)
with col1:
st.header("Most Busy Day")
busy_day = week_activity_map(selected_user, df)
fig, ax = plt.subplots()
ax.bar(busy_day.index, busy_day.values, color='purple')
ax.set_title("Weekly Activity: Most Busy Day")
plt.xticks(rotation='vertical')
st.pyplot(fig)
with col2:
st.header("Most Busy Month")
busy_month = month_activity_map(selected_user, df)
fig, ax = plt.subplots()
ax.bar(busy_month.index, busy_month.values, color='orange')
ax.set_title("Monthly Activity: Most Busy Month")
plt.xticks(rotation='vertical')
st.pyplot(fig)
# Weekly Activity Heatmap
st.title("📅Weekly Activity Map📅")
user_heatmap = activity_heatmap(selected_user, df)
fig, ax = plt.subplots()
sns.heatmap(user_heatmap, ax=ax, cmap="coolwarm")
ax.set_title("Heatmap of Weekly Activity")
st.pyplot(fig)
# Most Active Users (Overall Analysis only)
if selected_user == "Overall":
st.subheader("📊 Most Active Users 🏆")
top_users, user_percentages = top_active_users(df)
# Display bar chart and percentage table for active users
col1, col2 = st.columns(2)
with col1:
# Bar chart for user activity
fig, ax = plt.subplots()
colors = ['#4C72B0', '#55A868', '#F1A340', '#C44E52', '#8172B2']
ax.bar(
top_users.index,
top_users.values,
color=[colors[i % len(colors)] for i in range(len(top_users))],
)
ax.set_title("Top Active Users")
plt.xticks(rotation='vertical')
st.pyplot(fig)
with col2:
# Table showing user activity percentages
st.dataframe(user_percentages)
# Word Cloud Visualization
st.subheader("🌟 Word Cloud 🌟")
wordcloud = create_wordcloud(selected_user, df)
fig, ax = plt.subplots()
ax.imshow(wordcloud, interpolation="bilinear")
ax.axis("off") # Hide axes for better visualization
ax.set_title("Word Cloud Representation")
st.pyplot(fig)
# Most Common Words
st.subheader("📋 Most Common Words 📋")
common_words = most_common_word(selected_user, df)
fig, ax = plt.subplots(figsize=(10, 6))
ax.bar(common_words[0], common_words[1], color="skyblue", edgecolor="black")
ax.set_title("Most Common Words Used")
ax.set_xlabel("Words")
ax.set_ylabel("Frequency")
ax.tick_params(axis="x", rotation=45)
st.pyplot(fig)
# Emoji Analysis
st.subheader("😊 Emoji Analysis 😊")
emoji_df = emojis_analysis(selected_user, df)
col1, col2 = st.columns(2)
with col1:
# Show emoji frequencies in a table
st.dataframe(emoji_df)
with col2:
# Pie chart for top emojis
fig, ax = plt.subplots()
ax.pie(emoji_df[1].head(), labels=emoji_df[0].head(), autopct="%0.2f%%")
ax.set_title("Top Emojis Distribution")
st.pyplot(fig)
# Sentiment Analysis
if st.sidebar.button("Show Sentiment"):
st.subheader("😊 Sentiment Analysis 😊")
sentiment_counts, sentiment_data = perform_sentiment_analysis(df, selected_user)
# Use simple for loop to display sentiment metrics
col1, col2 = st.columns(2)
with col1:
st.write("### Sentiment Summary")
st.metric("Positive Messages", sentiment_counts.get("Positive", 0))
st.metric("Negative Messages", sentiment_counts.get("Negative", 0))
st.metric("Neutral Messages", sentiment_counts.get("Neutral", 0))
with col2:
# Pie chart of sentiment distribution
st.write("### Sentiment Distribution")
fig, ax = plt.subplots()
ax.pie(
sentiment_counts,
labels=sentiment_counts.index,
autopct="%0.2f%%",
colors=["green", "red", "gray"]
)
ax.set_title("Sentiment Distribution")
st.pyplot(fig)
# Optional: Show sentiment trends over time
st.write("### Sentiment Over Time")
sentiment_data["Date"] = pd.to_datetime(sentiment_data["Date"]) # Ensure Date is datetime
sentiment_trends = sentiment_data.groupby(["Date", "Sentiment"]).size().unstack(fill_value=0)
fig, ax = plt.subplots(figsize=(10, 5))
sentiment_trends.plot(ax=ax, marker="o", linewidth=2)
ax.set_title("Sentiment Trends Over Time")
ax.set_xlabel("Date")
ax.set_ylabel("Message Count")
plt.grid(True)
st.pyplot(fig)
# Show Summary Features
if st.sidebar.button("Show Summary"):
# Create two columns for displaying the summaries side by side
col1, col2 = st.columns(2)
with col1:
# Summary Feature 1: Basic Statistics
num_messages, words, num_media_messages, num_links = fetch_stats(df, selected_user)
st.subheader("📋Chat Summary")
st.write(f"**Total Messages**: {num_messages}")
st.write(f"**Total Words**: {words}")
st.write(f"**Media Shared**: {num_media_messages}")
st.write(f"**Links Shared**: {num_links}")
with col2:
# Summary Feature 2: Sentiment Summary
sentiment_counts, _ = perform_sentiment_analysis(df, selected_user)
st.subheader("😊Sentiment Summary")
st.write(f"**Positive Messages**: {sentiment_counts.get('Positive', 0)}")
st.write(f"**Negative Messages**: {sentiment_counts.get('Negative', 0)}")
st.write(f"**Neutral Messages**: {sentiment_counts.get('Neutral', 0)}")
except Exception as e:
st.error(f"🚨 Error: {e}")
else:
st.warning("⚠️ Please upload a WhatsApp chat file to begin.")