Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import pandas as pd | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt # type: ignore | |
| from src.preprocessor import parse_whatsapp_chat | |
| from src.helper import ( | |
| fetch_stats, | |
| top_active_users, | |
| create_wordcloud, | |
| most_common_word, | |
| emojis_analysis, | |
| monthly_timeline, | |
| daily_timeline, | |
| week_activity_map, | |
| month_activity_map, | |
| activity_heatmap, | |
| perform_sentiment_analysis | |
| ) | |
| # Application Title | |
| st.title("📊WhatsApp Chat Analyzer📊") | |
| # Sidebar: File Uploader | |
| st.sidebar.header("Upload Chat File") | |
| uploaded_file = st.sidebar.file_uploader("Upload a WhatsApp chat file (.txt format)", type=["txt"]) | |
| # Check if a file is uploaded | |
| if uploaded_file: | |
| try: | |
| # Decode and parse the uploaded file | |
| file_data = uploaded_file.getvalue().decode("utf-8") | |
| df = parse_whatsapp_chat(file_data) | |
| # Display success message and show a preview of the data | |
| st.success("✅Chat Data Loaded Successfully!✅") | |
| st.dataframe(df.head()) # Show the first few rows of the DataFrame | |
| # Sidebar: User selection for analysis | |
| unique_users = sorted([user for user in df["User"].unique() if user != "group_notification"]) | |
| unique_users.insert(0, "Overall") # Add "Overall" option for global stats | |
| selected_user = st.sidebar.selectbox("Select User for Analysis", unique_users) | |
| # Analysis Section: Triggered by button | |
| if st.sidebar.button("Show Analysis"): | |
| # Fetch and display basic statistics | |
| num_messages, words, num_media_messages, num_links = fetch_stats(df, selected_user) | |
| st.subheader("📈Key Chat Statistics📈") | |
| col1, col2, col3, col4 = st.columns(4) | |
| col1.metric("Total Messages", num_messages) | |
| col2.metric("Total Words", words) | |
| col3.metric("Media Shared", num_media_messages) | |
| col4.metric("Links Shared", num_links) | |
| # Monthly Timeline Analysis | |
| st.subheader("📅Monthly Timeline📅") | |
| timeline = monthly_timeline(selected_user, df) | |
| fig, ax = plt.subplots() | |
| ax.plot(timeline["time"], timeline["Message"], color="green") | |
| ax.set_title("Message Trends Over Time (Monthly)") # Plot title | |
| ax.set_xlabel("Time (Month-Year)") # X-axis label | |
| ax.set_ylabel("Number of Messages") # Y-axis label | |
| plt.xticks(rotation="vertical") | |
| plt.legend() # Show the legend for better clarity | |
| plt.grid(True) | |
| st.pyplot(fig) | |
| # Daily Timeline | |
| st.subheader("📅Daily Timeline📅") | |
| daily_data = daily_timeline(selected_user, df) # Get daily timeline data | |
| fig, ax = plt.subplots(figsize=(12, 6)) | |
| ax.plot(daily_data['only_date'], daily_data['Message'], color='blue') # Line chart | |
| ax.set_title("Message Trends Over Time (Daily)") # Title for the plot | |
| ax.set_xlabel("Date (Day-Month-Year)") # Label for x-axis | |
| ax.set_ylabel("Number of Messages") # Label for y-axis | |
| plt.xticks(rotation=80) # Rotate x-axis labels for better visibility | |
| st.pyplot(fig) | |
| # Activity Map | |
| st.subheader("📅Activity Map📅") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.header("Most Busy Day") | |
| busy_day = week_activity_map(selected_user, df) | |
| fig, ax = plt.subplots() | |
| ax.bar(busy_day.index, busy_day.values, color='purple') | |
| ax.set_title("Weekly Activity: Most Busy Day") | |
| plt.xticks(rotation='vertical') | |
| st.pyplot(fig) | |
| with col2: | |
| st.header("Most Busy Month") | |
| busy_month = month_activity_map(selected_user, df) | |
| fig, ax = plt.subplots() | |
| ax.bar(busy_month.index, busy_month.values, color='orange') | |
| ax.set_title("Monthly Activity: Most Busy Month") | |
| plt.xticks(rotation='vertical') | |
| st.pyplot(fig) | |
| # Weekly Activity Heatmap | |
| st.title("📅Weekly Activity Map📅") | |
| user_heatmap = activity_heatmap(selected_user, df) | |
| fig, ax = plt.subplots() | |
| sns.heatmap(user_heatmap, ax=ax, cmap="coolwarm") | |
| ax.set_title("Heatmap of Weekly Activity") | |
| st.pyplot(fig) | |
| # Most Active Users (Overall Analysis only) | |
| if selected_user == "Overall": | |
| st.subheader("📊 Most Active Users 🏆") | |
| top_users, user_percentages = top_active_users(df) | |
| # Display bar chart and percentage table for active users | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| # Bar chart for user activity | |
| fig, ax = plt.subplots() | |
| colors = ['#4C72B0', '#55A868', '#F1A340', '#C44E52', '#8172B2'] | |
| ax.bar( | |
| top_users.index, | |
| top_users.values, | |
| color=[colors[i % len(colors)] for i in range(len(top_users))], | |
| ) | |
| ax.set_title("Top Active Users") | |
| plt.xticks(rotation='vertical') | |
| st.pyplot(fig) | |
| with col2: | |
| # Table showing user activity percentages | |
| st.dataframe(user_percentages) | |
| # Word Cloud Visualization | |
| st.subheader("🌟 Word Cloud 🌟") | |
| wordcloud = create_wordcloud(selected_user, df) | |
| fig, ax = plt.subplots() | |
| ax.imshow(wordcloud, interpolation="bilinear") | |
| ax.axis("off") # Hide axes for better visualization | |
| ax.set_title("Word Cloud Representation") | |
| st.pyplot(fig) | |
| # Most Common Words | |
| st.subheader("📋 Most Common Words 📋") | |
| common_words = most_common_word(selected_user, df) | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| ax.bar(common_words[0], common_words[1], color="skyblue", edgecolor="black") | |
| ax.set_title("Most Common Words Used") | |
| ax.set_xlabel("Words") | |
| ax.set_ylabel("Frequency") | |
| ax.tick_params(axis="x", rotation=45) | |
| st.pyplot(fig) | |
| # Emoji Analysis | |
| st.subheader("😊 Emoji Analysis 😊") | |
| emoji_df = emojis_analysis(selected_user, df) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| # Show emoji frequencies in a table | |
| st.dataframe(emoji_df) | |
| with col2: | |
| # Pie chart for top emojis | |
| fig, ax = plt.subplots() | |
| ax.pie(emoji_df[1].head(), labels=emoji_df[0].head(), autopct="%0.2f%%") | |
| ax.set_title("Top Emojis Distribution") | |
| st.pyplot(fig) | |
| # Sentiment Analysis | |
| if st.sidebar.button("Show Sentiment"): | |
| st.subheader("😊 Sentiment Analysis 😊") | |
| sentiment_counts, sentiment_data = perform_sentiment_analysis(df, selected_user) | |
| # Use simple for loop to display sentiment metrics | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.write("### Sentiment Summary") | |
| st.metric("Positive Messages", sentiment_counts.get("Positive", 0)) | |
| st.metric("Negative Messages", sentiment_counts.get("Negative", 0)) | |
| st.metric("Neutral Messages", sentiment_counts.get("Neutral", 0)) | |
| with col2: | |
| # Pie chart of sentiment distribution | |
| st.write("### Sentiment Distribution") | |
| fig, ax = plt.subplots() | |
| ax.pie( | |
| sentiment_counts, | |
| labels=sentiment_counts.index, | |
| autopct="%0.2f%%", | |
| colors=["green", "red", "gray"] | |
| ) | |
| ax.set_title("Sentiment Distribution") | |
| st.pyplot(fig) | |
| # Optional: Show sentiment trends over time | |
| st.write("### Sentiment Over Time") | |
| sentiment_data["Date"] = pd.to_datetime(sentiment_data["Date"]) # Ensure Date is datetime | |
| sentiment_trends = sentiment_data.groupby(["Date", "Sentiment"]).size().unstack(fill_value=0) | |
| fig, ax = plt.subplots(figsize=(10, 5)) | |
| sentiment_trends.plot(ax=ax, marker="o", linewidth=2) | |
| ax.set_title("Sentiment Trends Over Time") | |
| ax.set_xlabel("Date") | |
| ax.set_ylabel("Message Count") | |
| plt.grid(True) | |
| st.pyplot(fig) | |
| # Show Summary Features | |
| if st.sidebar.button("Show Summary"): | |
| # Create two columns for displaying the summaries side by side | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| # Summary Feature 1: Basic Statistics | |
| num_messages, words, num_media_messages, num_links = fetch_stats(df, selected_user) | |
| st.subheader("📋Chat Summary") | |
| st.write(f"**Total Messages**: {num_messages}") | |
| st.write(f"**Total Words**: {words}") | |
| st.write(f"**Media Shared**: {num_media_messages}") | |
| st.write(f"**Links Shared**: {num_links}") | |
| with col2: | |
| # Summary Feature 2: Sentiment Summary | |
| sentiment_counts, _ = perform_sentiment_analysis(df, selected_user) | |
| st.subheader("😊Sentiment Summary") | |
| st.write(f"**Positive Messages**: {sentiment_counts.get('Positive', 0)}") | |
| st.write(f"**Negative Messages**: {sentiment_counts.get('Negative', 0)}") | |
| st.write(f"**Neutral Messages**: {sentiment_counts.get('Neutral', 0)}") | |
| except Exception as e: | |
| st.error(f"🚨 Error: {e}") | |
| else: | |
| st.warning("⚠️ Please upload a WhatsApp chat file to begin.") | |