File size: 10,262 Bytes
7089d06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt  # type: ignore
from src.preprocessor import parse_whatsapp_chat
from src.helper import (
    fetch_stats,
    top_active_users,
    create_wordcloud,
    most_common_word,
    emojis_analysis,
    monthly_timeline,
    daily_timeline,
    week_activity_map,
    month_activity_map,
    activity_heatmap,
    perform_sentiment_analysis
)

# Application Title
st.title("📊WhatsApp Chat Analyzer📊")

# Sidebar: File Uploader
st.sidebar.header("Upload Chat File")
uploaded_file = st.sidebar.file_uploader("Upload a WhatsApp chat file (.txt format)", type=["txt"])

# Check if a file is uploaded
if uploaded_file:
    try:
        # Decode and parse the uploaded file
        file_data = uploaded_file.getvalue().decode("utf-8")
        df = parse_whatsapp_chat(file_data)

        # Display success message and show a preview of the data
        st.success("✅Chat Data Loaded Successfully!✅")
        st.dataframe(df.head())  # Show the first few rows of the DataFrame

        # Sidebar: User selection for analysis
        unique_users = sorted([user for user in df["User"].unique() if user != "group_notification"])
        unique_users.insert(0, "Overall")  # Add "Overall" option for global stats
        selected_user = st.sidebar.selectbox("Select User for Analysis", unique_users)

        # Analysis Section: Triggered by button
        if st.sidebar.button("Show Analysis"):
            # Fetch and display basic statistics
            num_messages, words, num_media_messages, num_links = fetch_stats(df, selected_user)

            st.subheader("📈Key Chat Statistics📈")
            col1, col2, col3, col4 = st.columns(4)
            col1.metric("Total Messages", num_messages)
            col2.metric("Total Words", words)
            col3.metric("Media Shared", num_media_messages)
            col4.metric("Links Shared", num_links)

            # Monthly Timeline Analysis
            st.subheader("📅Monthly Timeline📅")
            timeline = monthly_timeline(selected_user, df)
            fig, ax = plt.subplots()
            ax.plot(timeline["time"], timeline["Message"], color="green")
            ax.set_title("Message Trends Over Time (Monthly)")  # Plot title
            ax.set_xlabel("Time (Month-Year)")  # X-axis label
            ax.set_ylabel("Number of Messages")  # Y-axis label
            plt.xticks(rotation="vertical")
            plt.legend()  # Show the legend for better clarity
            plt.grid(True)
            st.pyplot(fig)

            # Daily Timeline
            st.subheader("📅Daily Timeline📅")
            daily_data = daily_timeline(selected_user, df)  # Get daily timeline data
            fig, ax = plt.subplots(figsize=(12, 6))
            ax.plot(daily_data['only_date'], daily_data['Message'], color='blue')  # Line chart
            ax.set_title("Message Trends Over Time (Daily)")  # Title for the plot
            ax.set_xlabel("Date (Day-Month-Year)")  # Label for x-axis
            ax.set_ylabel("Number of Messages")  # Label for y-axis
            plt.xticks(rotation=80)  # Rotate x-axis labels for better visibility
            st.pyplot(fig)

            # Activity Map
            st.subheader("📅Activity Map📅")
            col1, col2 = st.columns(2)

            with col1:
                st.header("Most Busy Day")
                busy_day = week_activity_map(selected_user, df)
                fig, ax = plt.subplots()
                ax.bar(busy_day.index, busy_day.values, color='purple')
                ax.set_title("Weekly Activity: Most Busy Day")
                plt.xticks(rotation='vertical')
                st.pyplot(fig)

            with col2:
                st.header("Most Busy Month")
                busy_month = month_activity_map(selected_user, df)
                fig, ax = plt.subplots()
                ax.bar(busy_month.index, busy_month.values, color='orange')
                ax.set_title("Monthly Activity: Most Busy Month")
                plt.xticks(rotation='vertical')
                st.pyplot(fig)

            # Weekly Activity Heatmap
            st.title("📅Weekly Activity Map📅")
            user_heatmap = activity_heatmap(selected_user, df)
            fig, ax = plt.subplots()
            sns.heatmap(user_heatmap, ax=ax, cmap="coolwarm")
            ax.set_title("Heatmap of Weekly Activity")
            st.pyplot(fig)

            # Most Active Users (Overall Analysis only)
            if selected_user == "Overall":
                st.subheader("📊 Most Active Users 🏆")
                top_users, user_percentages = top_active_users(df)

                # Display bar chart and percentage table for active users
                col1, col2 = st.columns(2)

                with col1:
                    # Bar chart for user activity
                    fig, ax = plt.subplots()
                    colors = ['#4C72B0', '#55A868', '#F1A340', '#C44E52', '#8172B2']
                    ax.bar(
                        top_users.index,
                        top_users.values,
                        color=[colors[i % len(colors)] for i in range(len(top_users))],
                    )
                    ax.set_title("Top Active Users")
                    plt.xticks(rotation='vertical')
                    st.pyplot(fig)

                with col2:
                    # Table showing user activity percentages
                    st.dataframe(user_percentages)

            # Word Cloud Visualization
            st.subheader("🌟 Word Cloud 🌟")
            wordcloud = create_wordcloud(selected_user, df)
            fig, ax = plt.subplots()
            ax.imshow(wordcloud, interpolation="bilinear")
            ax.axis("off")  # Hide axes for better visualization
            ax.set_title("Word Cloud Representation")
            st.pyplot(fig)

            # Most Common Words
            st.subheader("📋 Most Common Words 📋")
            common_words = most_common_word(selected_user, df)
            fig, ax = plt.subplots(figsize=(10, 6))
            ax.bar(common_words[0], common_words[1], color="skyblue", edgecolor="black")
            ax.set_title("Most Common Words Used")
            ax.set_xlabel("Words")
            ax.set_ylabel("Frequency")
            ax.tick_params(axis="x", rotation=45)
            st.pyplot(fig)

            # Emoji Analysis
            st.subheader("😊 Emoji Analysis 😊")
            emoji_df = emojis_analysis(selected_user, df)

            col1, col2 = st.columns(2)
            with col1:
                # Show emoji frequencies in a table
                st.dataframe(emoji_df)

            with col2:
                # Pie chart for top emojis
                fig, ax = plt.subplots()
                ax.pie(emoji_df[1].head(), labels=emoji_df[0].head(), autopct="%0.2f%%")
                ax.set_title("Top Emojis Distribution")
                st.pyplot(fig)

        # Sentiment Analysis
        if st.sidebar.button("Show Sentiment"):
            st.subheader("😊 Sentiment Analysis 😊")
            sentiment_counts, sentiment_data = perform_sentiment_analysis(df, selected_user)
            # Use simple for loop to display sentiment metrics
            col1, col2 = st.columns(2)
            with col1:
                st.write("### Sentiment Summary")
                st.metric("Positive Messages", sentiment_counts.get("Positive", 0))
                st.metric("Negative Messages", sentiment_counts.get("Negative", 0))
                st.metric("Neutral Messages", sentiment_counts.get("Neutral", 0))

            with col2:
                # Pie chart of sentiment distribution
                st.write("### Sentiment Distribution")
                fig, ax = plt.subplots()
                ax.pie(
                    sentiment_counts,
                    labels=sentiment_counts.index,
                    autopct="%0.2f%%",
                    colors=["green", "red", "gray"]
                )
                ax.set_title("Sentiment Distribution")
                st.pyplot(fig)

            # Optional: Show sentiment trends over time
            st.write("### Sentiment Over Time")
            sentiment_data["Date"] = pd.to_datetime(sentiment_data["Date"])  # Ensure Date is datetime
            sentiment_trends = sentiment_data.groupby(["Date", "Sentiment"]).size().unstack(fill_value=0)

            fig, ax = plt.subplots(figsize=(10, 5))
            sentiment_trends.plot(ax=ax, marker="o", linewidth=2)
            ax.set_title("Sentiment Trends Over Time")
            ax.set_xlabel("Date")
            ax.set_ylabel("Message Count")
            plt.grid(True)
            st.pyplot(fig)

        # Show Summary Features
        if st.sidebar.button("Show Summary"):
            # Create two columns for displaying the summaries side by side
            col1, col2 = st.columns(2)

            with col1:
                # Summary Feature 1: Basic Statistics
                num_messages, words, num_media_messages, num_links = fetch_stats(df, selected_user)
                st.subheader("📋Chat Summary")
                st.write(f"**Total Messages**: {num_messages}")
                st.write(f"**Total Words**: {words}")
                st.write(f"**Media Shared**: {num_media_messages}")
                st.write(f"**Links Shared**: {num_links}")

            with col2:
                # Summary Feature 2: Sentiment Summary
                sentiment_counts, _ = perform_sentiment_analysis(df, selected_user)
                st.subheader("😊Sentiment Summary")
                st.write(f"**Positive Messages**: {sentiment_counts.get('Positive', 0)}")
                st.write(f"**Negative Messages**: {sentiment_counts.get('Negative', 0)}")
                st.write(f"**Neutral Messages**: {sentiment_counts.get('Neutral', 0)}")

    except Exception as e:
        st.error(f"🚨 Error: {e}")
else:
    st.warning("⚠️ Please upload a WhatsApp chat file to begin.")