Spaces:
Sleeping
Sleeping
Commit
Β·
a544a7a
1
Parent(s):
0fe37af
Add Prime Bank Analytics Dashboard
Browse files- .gitignore +31 -0
- .streamlit/config.toml +0 -0
- app.py +204 -0
- create_test_data.py +35 -0
- src/__init__.py +0 -0
- src/data_processor.py +205 -0
- src/visualizations.py +380 -0
- test.py +34 -0
.gitignore
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Environment variables
|
| 2 |
+
.env
|
| 3 |
+
.env.local
|
| 4 |
+
|
| 5 |
+
# Data files
|
| 6 |
+
*.csv
|
| 7 |
+
*.txt
|
| 8 |
+
data/raw/*
|
| 9 |
+
data/processed/*
|
| 10 |
+
|
| 11 |
+
# Python
|
| 12 |
+
__pycache__/
|
| 13 |
+
*.py[cod]
|
| 14 |
+
*$py.class
|
| 15 |
+
*.so
|
| 16 |
+
.Python
|
| 17 |
+
|
| 18 |
+
# Streamlit
|
| 19 |
+
.streamlit/secrets.toml
|
| 20 |
+
|
| 21 |
+
# IDE
|
| 22 |
+
.vscode/
|
| 23 |
+
.idea/
|
| 24 |
+
|
| 25 |
+
# OS
|
| 26 |
+
.DS_Store
|
| 27 |
+
Thumbs.db
|
| 28 |
+
|
| 29 |
+
# Keep empty directories
|
| 30 |
+
!data/raw/.gitkeep
|
| 31 |
+
!data/processed/.gitkeep
|
.streamlit/config.toml
ADDED
|
File without changes
|
app.py
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from src.data_processor import DataProcessor
|
| 4 |
+
from src.visualizations import *
|
| 5 |
+
|
| 6 |
+
# Page config
|
| 7 |
+
st.set_page_config(
|
| 8 |
+
page_title="Prime Bank Analytics Dashboard",
|
| 9 |
+
page_icon="π¦",
|
| 10 |
+
layout="wide"
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
# Title
|
| 14 |
+
st.title("π¦ Prime Bank Social Media Analytics Dashboard")
|
| 15 |
+
|
| 16 |
+
# Sidebar for configuration
|
| 17 |
+
with st.sidebar:
|
| 18 |
+
st.header("βοΈ Configuration")
|
| 19 |
+
|
| 20 |
+
# API Key input
|
| 21 |
+
api_key = st.text_input(
|
| 22 |
+
"OpenAI API Key (optional):",
|
| 23 |
+
type="password",
|
| 24 |
+
help="Enter your OpenAI API key for advanced GPT analysis"
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
if api_key:
|
| 28 |
+
st.success("β
API Key configured")
|
| 29 |
+
use_gpt = st.checkbox("Enable GPT Analysis", value=True)
|
| 30 |
+
else:
|
| 31 |
+
st.info("π‘ Running without GPT features")
|
| 32 |
+
use_gpt = False
|
| 33 |
+
|
| 34 |
+
st.markdown("---")
|
| 35 |
+
st.markdown("### About")
|
| 36 |
+
st.markdown("Upload CSV files from social media platforms and TXT files with reviews to analyze Prime Bank's online presence.")
|
| 37 |
+
|
| 38 |
+
# Initialize processor with or without API key
|
| 39 |
+
processor = DataProcessor(openai_api_key=api_key if use_gpt else None)
|
| 40 |
+
|
| 41 |
+
# Main content
|
| 42 |
+
st.markdown("### π Upload Your Data Files")
|
| 43 |
+
|
| 44 |
+
# File upload section
|
| 45 |
+
col1, col2 = st.columns(2)
|
| 46 |
+
|
| 47 |
+
with col1:
|
| 48 |
+
csv_files = st.file_uploader(
|
| 49 |
+
"Upload CSV files (Facebook, Twitter, etc.)",
|
| 50 |
+
type=['csv'],
|
| 51 |
+
accept_multiple_files=True,
|
| 52 |
+
help="Upload one or more CSV files containing social media data"
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
with col2:
|
| 56 |
+
txt_file = st.file_uploader(
|
| 57 |
+
"Upload TXT file (Manual reviews)",
|
| 58 |
+
type=['txt'],
|
| 59 |
+
help="Upload a text file with reviews, one per line"
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
# Add sample data download option
|
| 63 |
+
with st.expander("π Need sample data to test?"):
|
| 64 |
+
st.markdown("""
|
| 65 |
+
Download these sample files to test the dashboard:
|
| 66 |
+
- [Sample CSV Data](https://example.com)
|
| 67 |
+
- [Sample TXT Reviews](https://example.com)
|
| 68 |
+
|
| 69 |
+
Or create test data by running:
|
| 70 |
+
```bash
|
| 71 |
+
python create_test_data.py
|
| 72 |
+
```
|
| 73 |
+
""")
|
| 74 |
+
|
| 75 |
+
# Process files when uploaded
|
| 76 |
+
if csv_files or txt_file:
|
| 77 |
+
with st.spinner('Processing files...'):
|
| 78 |
+
all_data = []
|
| 79 |
+
|
| 80 |
+
# Process CSV files
|
| 81 |
+
if csv_files:
|
| 82 |
+
st.write(f"π Processing {len(csv_files)} CSV file(s)...")
|
| 83 |
+
csv_data = processor.process_csv_files(csv_files)
|
| 84 |
+
if not csv_data.empty:
|
| 85 |
+
all_data.append(csv_data)
|
| 86 |
+
st.success(f"β
Loaded {len(csv_data)} rows from CSV files")
|
| 87 |
+
|
| 88 |
+
# Process TXT file
|
| 89 |
+
if txt_file:
|
| 90 |
+
st.write("π Processing TXT file...")
|
| 91 |
+
txt_data = processor.process_txt_file(txt_file)
|
| 92 |
+
if not txt_data.empty:
|
| 93 |
+
all_data.append(txt_data)
|
| 94 |
+
st.success(f"β
Loaded {len(txt_data)} reviews from TXT file")
|
| 95 |
+
|
| 96 |
+
# Combine all data
|
| 97 |
+
if all_data:
|
| 98 |
+
combined_df = pd.concat(all_data, ignore_index=True)
|
| 99 |
+
|
| 100 |
+
# Process the data
|
| 101 |
+
with st.spinner('Analyzing sentiment and emotions...'):
|
| 102 |
+
processed_df = processor.process_all_data(combined_df)
|
| 103 |
+
|
| 104 |
+
# Filter for Prime Bank mentions
|
| 105 |
+
prime_df = processed_df[processed_df['prime_mentions'] > 0]
|
| 106 |
+
|
| 107 |
+
st.success(f"β
Analysis complete! Found {len(prime_df)} posts mentioning Prime Bank out of {len(processed_df)} total posts")
|
| 108 |
+
|
| 109 |
+
# Display metrics
|
| 110 |
+
st.header("π Key Metrics")
|
| 111 |
+
metrics = create_summary_metrics(processed_df)
|
| 112 |
+
|
| 113 |
+
col1, col2, col3, col4 = st.columns(4)
|
| 114 |
+
for i, (label, value) in enumerate(metrics.items()):
|
| 115 |
+
with [col1, col2, col3, col4][i]:
|
| 116 |
+
st.metric(label, value)
|
| 117 |
+
|
| 118 |
+
# Display charts
|
| 119 |
+
st.header("π Analysis")
|
| 120 |
+
|
| 121 |
+
# First row of charts
|
| 122 |
+
col1, col2, col3 = st.columns(3)
|
| 123 |
+
|
| 124 |
+
with col1:
|
| 125 |
+
if len(prime_df) > 0:
|
| 126 |
+
fig = create_sentiment_pie(prime_df)
|
| 127 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 128 |
+
else:
|
| 129 |
+
st.info("No Prime Bank mentions found for sentiment analysis")
|
| 130 |
+
|
| 131 |
+
with col2:
|
| 132 |
+
if len(prime_df) > 0:
|
| 133 |
+
fig = create_emotion_bar(prime_df)
|
| 134 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 135 |
+
else:
|
| 136 |
+
st.info("No Prime Bank mentions found for emotion analysis")
|
| 137 |
+
|
| 138 |
+
with col3:
|
| 139 |
+
if len(prime_df) > 0:
|
| 140 |
+
fig = create_category_donut(prime_df)
|
| 141 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 142 |
+
else:
|
| 143 |
+
st.info("No Prime Bank mentions found for category analysis")
|
| 144 |
+
|
| 145 |
+
# Top Viral Posts
|
| 146 |
+
st.header("π₯ Top Viral Posts Mentioning Prime Bank")
|
| 147 |
+
|
| 148 |
+
if len(prime_df) > 0:
|
| 149 |
+
top_posts = prime_df.nlargest(5, 'viral_score')[['text', 'sentiment', 'emotion', 'category', 'prime_mentions']]
|
| 150 |
+
|
| 151 |
+
for idx, row in top_posts.iterrows():
|
| 152 |
+
with st.expander(f"Post #{idx+1} - {row['sentiment']} | {row['emotion']}"):
|
| 153 |
+
st.write(row['text'])
|
| 154 |
+
col1, col2, col3, col4 = st.columns(4)
|
| 155 |
+
col1.metric("Sentiment", row['sentiment'])
|
| 156 |
+
col2.metric("Emotion", row['emotion'])
|
| 157 |
+
col3.metric("Category", row['category'])
|
| 158 |
+
col4.metric("Mentions", row['prime_mentions'])
|
| 159 |
+
else:
|
| 160 |
+
st.info("No posts mentioning Prime Bank found")
|
| 161 |
+
|
| 162 |
+
# Data table
|
| 163 |
+
with st.expander("π View All Data"):
|
| 164 |
+
st.dataframe(processed_df)
|
| 165 |
+
|
| 166 |
+
# Download processed data
|
| 167 |
+
csv = processed_df.to_csv(index=False)
|
| 168 |
+
st.download_button(
|
| 169 |
+
label="π₯ Download Processed Data",
|
| 170 |
+
data=csv,
|
| 171 |
+
file_name="prime_bank_analysis.csv",
|
| 172 |
+
mime="text/csv"
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
else:
|
| 176 |
+
# No files uploaded yet
|
| 177 |
+
st.info("π Please upload CSV files and/or TXT file to begin analysis")
|
| 178 |
+
|
| 179 |
+
# Show instructions
|
| 180 |
+
col1, col2, col3 = st.columns(3)
|
| 181 |
+
|
| 182 |
+
with col1:
|
| 183 |
+
st.markdown("""
|
| 184 |
+
### π CSV Files Should Contain:
|
| 185 |
+
- A text column (text/content/message)
|
| 186 |
+
- Optional: date, likes, shares
|
| 187 |
+
- Can upload multiple files
|
| 188 |
+
""")
|
| 189 |
+
|
| 190 |
+
with col2:
|
| 191 |
+
st.markdown("""
|
| 192 |
+
### π TXT File Format:
|
| 193 |
+
- One review per line
|
| 194 |
+
- Plain text format
|
| 195 |
+
- Manual reviews/comments
|
| 196 |
+
""")
|
| 197 |
+
|
| 198 |
+
with col3:
|
| 199 |
+
st.markdown("""
|
| 200 |
+
### π― Analysis Includes:
|
| 201 |
+
- Sentiment (Positive/Negative)
|
| 202 |
+
- Emotions (Joy/Frustration)
|
| 203 |
+
- Categories (Inquiry/Complaint)
|
| 204 |
+
""")
|
create_test_data.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
|
| 3 |
+
# Create sample CSV data
|
| 4 |
+
sample_data = pd.DataFrame({
|
| 5 |
+
'text': [
|
| 6 |
+
'Prime Bank has the best customer service! Love their mobile app.',
|
| 7 |
+
'Worst experience at Prime Bank branch today. Waited 2 hours!',
|
| 8 |
+
'How do I apply for a loan at Prime Bank?',
|
| 9 |
+
'Prime Bank ATM is not working again. So frustrated!',
|
| 10 |
+
'Thank you Prime Bank staff for helping with my account.',
|
| 11 |
+
'What are Prime Bank interest rates?',
|
| 12 |
+
'Prime Bank online banking is confusing.',
|
| 13 |
+
'Excellent service at Prime Bank downtown branch!',
|
| 14 |
+
'Prime Bank charged me hidden fees. Very disappointed.',
|
| 15 |
+
'Can someone explain Prime Bank credit card benefits?'
|
| 16 |
+
],
|
| 17 |
+
'date': pd.date_range('2024-01-01', periods=10),
|
| 18 |
+
'likes': [45, 12, 5, 89, 34, 8, 15, 67, 102, 22],
|
| 19 |
+
'shares': [5, 2, 1, 15, 8, 1, 3, 12, 25, 4]
|
| 20 |
+
})
|
| 21 |
+
|
| 22 |
+
# Save as CSV
|
| 23 |
+
sample_data.to_csv('test_social_media_data.csv', index=False)
|
| 24 |
+
print("β
Created test_social_media_data.csv")
|
| 25 |
+
|
| 26 |
+
# Create sample TXT file with reviews
|
| 27 |
+
reviews = """Prime Bank provides exceptional service. Highly recommend!
|
| 28 |
+
Terrible experience with Prime Bank customer support.
|
| 29 |
+
Prime Bank mobile app keeps crashing. Please fix this!
|
| 30 |
+
Love the new features in Prime Bank online banking.
|
| 31 |
+
Why does Prime Bank charge so many fees?"""
|
| 32 |
+
|
| 33 |
+
with open('test_reviews.txt', 'w') as f:
|
| 34 |
+
f.write(reviews)
|
| 35 |
+
print("β
Created test_reviews.txt")
|
src/__init__.py
ADDED
|
File without changes
|
src/data_processor.py
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import re
|
| 3 |
+
from textblob import TextBlob
|
| 4 |
+
import numpy as np
|
| 5 |
+
import json
|
| 6 |
+
|
| 7 |
+
# Try to import optional dependencies
|
| 8 |
+
try:
|
| 9 |
+
import openai
|
| 10 |
+
OPENAI_AVAILABLE = True
|
| 11 |
+
except ImportError:
|
| 12 |
+
OPENAI_AVAILABLE = False
|
| 13 |
+
print("OpenAI not installed. GPT features will be disabled.")
|
| 14 |
+
|
| 15 |
+
try:
|
| 16 |
+
import nltk
|
| 17 |
+
from nltk.sentiment import SentimentIntensityAnalyzer
|
| 18 |
+
nltk.download('vader_lexicon', quiet=True)
|
| 19 |
+
NLTK_AVAILABLE = True
|
| 20 |
+
except ImportError:
|
| 21 |
+
NLTK_AVAILABLE = False
|
| 22 |
+
print("NLTK not installed. Using TextBlob only.")
|
| 23 |
+
|
| 24 |
+
class DataProcessor:
|
| 25 |
+
def __init__(self, openai_api_key=None):
|
| 26 |
+
self.processed_data = None
|
| 27 |
+
|
| 28 |
+
# Initialize VADER if available
|
| 29 |
+
if NLTK_AVAILABLE:
|
| 30 |
+
try:
|
| 31 |
+
self.sia = SentimentIntensityAnalyzer()
|
| 32 |
+
except:
|
| 33 |
+
self.sia = None
|
| 34 |
+
else:
|
| 35 |
+
self.sia = None
|
| 36 |
+
|
| 37 |
+
# Set up OpenAI if key provided and library available
|
| 38 |
+
self.use_gpt = False
|
| 39 |
+
if openai_api_key and OPENAI_AVAILABLE:
|
| 40 |
+
openai.api_key = openai_api_key
|
| 41 |
+
self.use_gpt = True
|
| 42 |
+
|
| 43 |
+
# Banking-specific patterns
|
| 44 |
+
self.banking_keywords = {
|
| 45 |
+
'service_quality': ['customer service', 'staff', 'support', 'help', 'assistance'],
|
| 46 |
+
'transaction': ['transfer', 'deposit', 'withdraw', 'payment', 'transaction'],
|
| 47 |
+
'account': ['account', 'savings', 'checking', 'balance'],
|
| 48 |
+
'loan': ['loan', 'mortgage', 'credit', 'interest rate'],
|
| 49 |
+
'digital': ['app', 'online banking', 'mobile', 'website', 'digital'],
|
| 50 |
+
'branch': ['branch', 'atm', 'location', 'queue', 'waiting']
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
def process_csv_files(self, uploaded_files):
|
| 54 |
+
"""Process multiple CSV files"""
|
| 55 |
+
all_dataframes = []
|
| 56 |
+
|
| 57 |
+
for uploaded_file in uploaded_files:
|
| 58 |
+
try:
|
| 59 |
+
df = pd.read_csv(uploaded_file)
|
| 60 |
+
df['source_file'] = uploaded_file.name
|
| 61 |
+
all_dataframes.append(df)
|
| 62 |
+
except Exception as e:
|
| 63 |
+
print(f"Error reading {uploaded_file.name}: {e}")
|
| 64 |
+
|
| 65 |
+
if all_dataframes:
|
| 66 |
+
combined_df = pd.concat(all_dataframes, ignore_index=True)
|
| 67 |
+
return combined_df
|
| 68 |
+
return pd.DataFrame()
|
| 69 |
+
|
| 70 |
+
def process_txt_file(self, txt_file):
|
| 71 |
+
"""Process text file with reviews"""
|
| 72 |
+
content = txt_file.read().decode('utf-8')
|
| 73 |
+
reviews = content.split('\n')
|
| 74 |
+
|
| 75 |
+
df = pd.DataFrame({
|
| 76 |
+
'text': [review.strip() for review in reviews if review.strip()],
|
| 77 |
+
'source_file': txt_file.name
|
| 78 |
+
})
|
| 79 |
+
return df
|
| 80 |
+
|
| 81 |
+
def analyze_sentiment(self, text):
|
| 82 |
+
"""Analyze sentiment - use VADER if available, else TextBlob"""
|
| 83 |
+
if pd.isna(text) or str(text).strip() == '':
|
| 84 |
+
return 'Neutral', 0
|
| 85 |
+
|
| 86 |
+
text_str = str(text)
|
| 87 |
+
|
| 88 |
+
# Try VADER first if available
|
| 89 |
+
if self.sia:
|
| 90 |
+
scores = self.sia.polarity_scores(text_str)
|
| 91 |
+
compound = scores['compound']
|
| 92 |
+
|
| 93 |
+
if compound >= 0.05:
|
| 94 |
+
return 'Positive', compound
|
| 95 |
+
elif compound <= -0.05:
|
| 96 |
+
return 'Negative', compound
|
| 97 |
+
else:
|
| 98 |
+
return 'Neutral', compound
|
| 99 |
+
|
| 100 |
+
# Fallback to TextBlob
|
| 101 |
+
try:
|
| 102 |
+
blob = TextBlob(text_str)
|
| 103 |
+
polarity = blob.sentiment.polarity
|
| 104 |
+
|
| 105 |
+
if polarity > 0.1:
|
| 106 |
+
return 'Positive', polarity
|
| 107 |
+
elif polarity < -0.1:
|
| 108 |
+
return 'Negative', polarity
|
| 109 |
+
else:
|
| 110 |
+
return 'Neutral', polarity
|
| 111 |
+
except:
|
| 112 |
+
return 'Neutral', 0
|
| 113 |
+
|
| 114 |
+
def detect_emotion(self, text):
|
| 115 |
+
"""Detect emotion in text"""
|
| 116 |
+
if pd.isna(text):
|
| 117 |
+
return 'Neutral'
|
| 118 |
+
|
| 119 |
+
text_lower = str(text).lower()
|
| 120 |
+
|
| 121 |
+
# Emotion keywords
|
| 122 |
+
emotions = {
|
| 123 |
+
'Joy': ['happy', 'excellent', 'amazing', 'great', 'wonderful', 'fantastic', 'love', 'best', 'thank you'],
|
| 124 |
+
'Frustration': ['frustrated', 'angry', 'terrible', 'horrible', 'worst', 'hate', 'annoyed', 'disappointed'],
|
| 125 |
+
'Confusion': ['confused', 'unclear', "don't understand", 'what', 'how', 'why', '?', 'help me']
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
emotion_scores = {}
|
| 129 |
+
for emotion, keywords in emotions.items():
|
| 130 |
+
score = sum(keyword in text_lower for keyword in keywords)
|
| 131 |
+
emotion_scores[emotion] = score
|
| 132 |
+
|
| 133 |
+
if max(emotion_scores.values()) > 0:
|
| 134 |
+
return max(emotion_scores, key=emotion_scores.get)
|
| 135 |
+
return 'Neutral'
|
| 136 |
+
|
| 137 |
+
def categorize_post(self, text):
|
| 138 |
+
"""Categorize post type"""
|
| 139 |
+
if pd.isna(text):
|
| 140 |
+
return 'Other'
|
| 141 |
+
|
| 142 |
+
text_lower = str(text).lower()
|
| 143 |
+
|
| 144 |
+
if '?' in text_lower or any(word in text_lower for word in ['how', 'what', 'when', 'where']):
|
| 145 |
+
return 'Inquiry'
|
| 146 |
+
elif any(word in text_lower for word in ['complaint', 'problem', 'issue', 'bad', 'terrible']):
|
| 147 |
+
return 'Complaint'
|
| 148 |
+
elif any(word in text_lower for word in ['thank', 'great', 'excellent', 'love', 'best']):
|
| 149 |
+
return 'Praise'
|
| 150 |
+
else:
|
| 151 |
+
return 'Other'
|
| 152 |
+
|
| 153 |
+
def count_prime_mentions(self, text):
|
| 154 |
+
"""Count Prime Bank mentions"""
|
| 155 |
+
if pd.isna(text):
|
| 156 |
+
return 0
|
| 157 |
+
|
| 158 |
+
text_lower = str(text).lower()
|
| 159 |
+
patterns = [
|
| 160 |
+
r'prime\s*bank',
|
| 161 |
+
r'primebank',
|
| 162 |
+
r'@primebank'
|
| 163 |
+
]
|
| 164 |
+
|
| 165 |
+
total_mentions = 0
|
| 166 |
+
for pattern in patterns:
|
| 167 |
+
mentions = len(re.findall(pattern, text_lower))
|
| 168 |
+
total_mentions += mentions
|
| 169 |
+
|
| 170 |
+
return total_mentions
|
| 171 |
+
|
| 172 |
+
def process_all_data(self, df):
|
| 173 |
+
"""Apply all processing to dataframe"""
|
| 174 |
+
# Find text column
|
| 175 |
+
text_columns = ['text', 'content', 'message', 'review', 'comment', 'post']
|
| 176 |
+
text_col = None
|
| 177 |
+
|
| 178 |
+
for col in text_columns:
|
| 179 |
+
if col in df.columns:
|
| 180 |
+
text_col = col
|
| 181 |
+
break
|
| 182 |
+
|
| 183 |
+
if text_col and text_col != 'text':
|
| 184 |
+
df['text'] = df[text_col]
|
| 185 |
+
|
| 186 |
+
if 'text' not in df.columns:
|
| 187 |
+
return df
|
| 188 |
+
|
| 189 |
+
# Apply all analyses
|
| 190 |
+
df[['sentiment', 'polarity']] = df['text'].apply(
|
| 191 |
+
lambda x: pd.Series(self.analyze_sentiment(x))
|
| 192 |
+
)
|
| 193 |
+
|
| 194 |
+
df['emotion'] = df['text'].apply(self.detect_emotion)
|
| 195 |
+
df['category'] = df['text'].apply(self.categorize_post)
|
| 196 |
+
df['prime_mentions'] = df['text'].apply(self.count_prime_mentions)
|
| 197 |
+
|
| 198 |
+
# Calculate viral score
|
| 199 |
+
df['viral_score'] = df['prime_mentions'] * 10
|
| 200 |
+
if 'likes' in df.columns:
|
| 201 |
+
df['viral_score'] += df['likes'].fillna(0)
|
| 202 |
+
if 'shares' in df.columns:
|
| 203 |
+
df['viral_score'] += df['shares'].fillna(0) * 2
|
| 204 |
+
|
| 205 |
+
return df
|
src/visualizations.py
ADDED
|
@@ -0,0 +1,380 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import plotly.express as px
|
| 2 |
+
import plotly.graph_objects as go
|
| 3 |
+
from plotly.subplots import make_subplots
|
| 4 |
+
import pandas as pd
|
| 5 |
+
|
| 6 |
+
def create_sentiment_pie(df):
|
| 7 |
+
"""Create sentiment distribution pie chart"""
|
| 8 |
+
sentiment_counts = df['sentiment'].value_counts()
|
| 9 |
+
|
| 10 |
+
fig = px.pie(
|
| 11 |
+
values=sentiment_counts.values,
|
| 12 |
+
names=sentiment_counts.index,
|
| 13 |
+
title="Sentiment Distribution",
|
| 14 |
+
color_discrete_map={
|
| 15 |
+
'Positive': '#2ecc71',
|
| 16 |
+
'Negative': '#e74c3c',
|
| 17 |
+
'Neutral': '#95a5a6'
|
| 18 |
+
}
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
fig.update_traces(
|
| 22 |
+
textposition='inside',
|
| 23 |
+
textinfo='percent+label',
|
| 24 |
+
hovertemplate='<b>%{label}</b><br>Count: %{value}<br>Percentage: %{percent}<extra></extra>'
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
return fig
|
| 28 |
+
|
| 29 |
+
def create_emotion_bar(df):
|
| 30 |
+
"""Create emotion distribution bar chart"""
|
| 31 |
+
emotion_counts = df['emotion'].value_counts()
|
| 32 |
+
|
| 33 |
+
# Define colors for emotions
|
| 34 |
+
color_map = {
|
| 35 |
+
'Joy': '#f39c12',
|
| 36 |
+
'Frustration': '#e74c3c',
|
| 37 |
+
'Confusion': '#3498db',
|
| 38 |
+
'Anxiety': '#9b59b6',
|
| 39 |
+
'Neutral': '#95a5a6'
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
colors = [color_map.get(emotion, '#95a5a6') for emotion in emotion_counts.index]
|
| 43 |
+
|
| 44 |
+
fig = px.bar(
|
| 45 |
+
x=emotion_counts.index,
|
| 46 |
+
y=emotion_counts.values,
|
| 47 |
+
title="Emotion Detection",
|
| 48 |
+
labels={'x': 'Emotion', 'y': 'Count'},
|
| 49 |
+
color=emotion_counts.index,
|
| 50 |
+
color_discrete_map=color_map
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
fig.update_layout(
|
| 54 |
+
showlegend=False,
|
| 55 |
+
xaxis_tickangle=-45,
|
| 56 |
+
yaxis=dict(gridcolor='rgba(0,0,0,0.1)')
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
return fig
|
| 60 |
+
|
| 61 |
+
def create_category_donut(df):
|
| 62 |
+
"""Create post category donut chart"""
|
| 63 |
+
category_counts = df['category'].value_counts()
|
| 64 |
+
|
| 65 |
+
# Define colors for categories
|
| 66 |
+
color_sequence = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']
|
| 67 |
+
|
| 68 |
+
fig = px.pie(
|
| 69 |
+
values=category_counts.values,
|
| 70 |
+
names=category_counts.index,
|
| 71 |
+
title="Post Categories",
|
| 72 |
+
hole=0.4,
|
| 73 |
+
color_discrete_sequence=color_sequence
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
# Add text in center
|
| 77 |
+
fig.update_traces(
|
| 78 |
+
textposition='inside',
|
| 79 |
+
textinfo='percent+label',
|
| 80 |
+
hovertemplate='<b>%{label}</b><br>Count: %{value}<br>Percentage: %{percent}<extra></extra>'
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
# Add annotation in center
|
| 84 |
+
fig.add_annotation(
|
| 85 |
+
text=f"Total<br>{len(df)}",
|
| 86 |
+
x=0.5, y=0.5,
|
| 87 |
+
xref="paper", yref="paper",
|
| 88 |
+
showarrow=False,
|
| 89 |
+
font=dict(size=20)
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
return fig
|
| 93 |
+
|
| 94 |
+
def create_mentions_timeline(df):
|
| 95 |
+
"""Create timeline of Prime Bank mentions if date column exists"""
|
| 96 |
+
date_columns = ['date', 'created_at', 'timestamp', 'Date', 'post_date']
|
| 97 |
+
date_col = None
|
| 98 |
+
|
| 99 |
+
# Find date column
|
| 100 |
+
for col in date_columns:
|
| 101 |
+
if col in df.columns:
|
| 102 |
+
date_col = col
|
| 103 |
+
break
|
| 104 |
+
|
| 105 |
+
if not date_col:
|
| 106 |
+
return None
|
| 107 |
+
|
| 108 |
+
try:
|
| 109 |
+
# Parse dates
|
| 110 |
+
df['date_parsed'] = pd.to_datetime(df[date_col], errors='coerce')
|
| 111 |
+
|
| 112 |
+
# Remove invalid dates
|
| 113 |
+
df_valid = df[df['date_parsed'].notna()]
|
| 114 |
+
|
| 115 |
+
if len(df_valid) == 0:
|
| 116 |
+
return None
|
| 117 |
+
|
| 118 |
+
# Group by date
|
| 119 |
+
timeline_df = df_valid.groupby(df_valid['date_parsed'].dt.date).agg({
|
| 120 |
+
'prime_mentions': 'sum',
|
| 121 |
+
'sentiment': lambda x: (x == 'Positive').sum()
|
| 122 |
+
}).reset_index()
|
| 123 |
+
|
| 124 |
+
timeline_df.columns = ['date', 'mentions', 'positive_posts']
|
| 125 |
+
|
| 126 |
+
# Create figure with secondary y-axis
|
| 127 |
+
fig = make_subplots(specs=[[{"secondary_y": True}]])
|
| 128 |
+
|
| 129 |
+
# Add mentions line
|
| 130 |
+
fig.add_trace(
|
| 131 |
+
go.Scatter(
|
| 132 |
+
x=timeline_df['date'],
|
| 133 |
+
y=timeline_df['mentions'],
|
| 134 |
+
name='Total Mentions',
|
| 135 |
+
line=dict(color='#3498db', width=3),
|
| 136 |
+
mode='lines+markers'
|
| 137 |
+
),
|
| 138 |
+
secondary_y=False,
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
# Add positive posts line
|
| 142 |
+
fig.add_trace(
|
| 143 |
+
go.Scatter(
|
| 144 |
+
x=timeline_df['date'],
|
| 145 |
+
y=timeline_df['positive_posts'],
|
| 146 |
+
name='Positive Posts',
|
| 147 |
+
line=dict(color='#2ecc71', width=2, dash='dot'),
|
| 148 |
+
mode='lines+markers'
|
| 149 |
+
),
|
| 150 |
+
secondary_y=True,
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
# Update layout
|
| 154 |
+
fig.update_xaxes(title_text="Date")
|
| 155 |
+
fig.update_yaxes(title_text="Number of Mentions", secondary_y=False)
|
| 156 |
+
fig.update_yaxes(title_text="Positive Posts", secondary_y=True)
|
| 157 |
+
|
| 158 |
+
fig.update_layout(
|
| 159 |
+
title="Prime Bank Mentions Over Time",
|
| 160 |
+
hovermode='x unified',
|
| 161 |
+
showlegend=True,
|
| 162 |
+
legend=dict(
|
| 163 |
+
yanchor="top",
|
| 164 |
+
y=0.99,
|
| 165 |
+
xanchor="left",
|
| 166 |
+
x=0.01
|
| 167 |
+
)
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
return fig
|
| 171 |
+
|
| 172 |
+
except Exception as e:
|
| 173 |
+
print(f"Error creating timeline: {e}")
|
| 174 |
+
return None
|
| 175 |
+
|
| 176 |
+
def create_summary_metrics(df):
|
| 177 |
+
"""Calculate summary metrics for display"""
|
| 178 |
+
total_posts = len(df)
|
| 179 |
+
prime_posts = len(df[df['prime_mentions'] > 0])
|
| 180 |
+
total_mentions = df['prime_mentions'].sum()
|
| 181 |
+
|
| 182 |
+
# Calculate positive sentiment rate
|
| 183 |
+
if prime_posts > 0:
|
| 184 |
+
prime_df = df[df['prime_mentions'] > 0]
|
| 185 |
+
positive_rate = (prime_df['sentiment'] == 'Positive').sum() / prime_posts * 100
|
| 186 |
+
else:
|
| 187 |
+
positive_rate = 0
|
| 188 |
+
|
| 189 |
+
metrics = {
|
| 190 |
+
'Total Posts Analyzed': f"{total_posts:,}",
|
| 191 |
+
'Posts Mentioning Prime Bank': f"{prime_posts:,}",
|
| 192 |
+
'Total Prime Bank Mentions': f"{total_mentions:,}",
|
| 193 |
+
'Positive Sentiment Rate': f"{positive_rate:.1f}%"
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
return metrics
|
| 197 |
+
|
| 198 |
+
def create_viral_posts_chart(df, top_n=10):
|
| 199 |
+
"""Create horizontal bar chart of most viral posts"""
|
| 200 |
+
# Get top viral posts
|
| 201 |
+
top_viral = df.nlargest(top_n, 'viral_score')
|
| 202 |
+
|
| 203 |
+
# Truncate text for display
|
| 204 |
+
top_viral['text_truncated'] = top_viral['text'].apply(
|
| 205 |
+
lambda x: x[:50] + '...' if len(str(x)) > 50 else x
|
| 206 |
+
)
|
| 207 |
+
|
| 208 |
+
# Create horizontal bar chart
|
| 209 |
+
fig = px.bar(
|
| 210 |
+
top_viral,
|
| 211 |
+
x='viral_score',
|
| 212 |
+
y='text_truncated',
|
| 213 |
+
orientation='h',
|
| 214 |
+
title=f'Top {top_n} Viral Posts',
|
| 215 |
+
color='sentiment',
|
| 216 |
+
color_discrete_map={
|
| 217 |
+
'Positive': '#2ecc71',
|
| 218 |
+
'Negative': '#e74c3c',
|
| 219 |
+
'Neutral': '#95a5a6'
|
| 220 |
+
},
|
| 221 |
+
hover_data=['text', 'emotion', 'category']
|
| 222 |
+
)
|
| 223 |
+
|
| 224 |
+
fig.update_layout(
|
| 225 |
+
yaxis={'categoryorder': 'total ascending'},
|
| 226 |
+
xaxis_title="Viral Score",
|
| 227 |
+
yaxis_title="Post Preview",
|
| 228 |
+
showlegend=True
|
| 229 |
+
)
|
| 230 |
+
|
| 231 |
+
return fig
|
| 232 |
+
|
| 233 |
+
def create_word_frequency_chart(df, top_n=15):
|
| 234 |
+
"""Create word frequency chart for Prime Bank posts"""
|
| 235 |
+
from collections import Counter
|
| 236 |
+
import re
|
| 237 |
+
|
| 238 |
+
# Get only Prime Bank posts
|
| 239 |
+
prime_posts = df[df['prime_mentions'] > 0]['text'].dropna()
|
| 240 |
+
|
| 241 |
+
if len(prime_posts) == 0:
|
| 242 |
+
return None
|
| 243 |
+
|
| 244 |
+
# Combine all text
|
| 245 |
+
all_text = ' '.join(prime_posts.astype(str)).lower()
|
| 246 |
+
|
| 247 |
+
# Remove common words and Prime Bank itself
|
| 248 |
+
stop_words = {
|
| 249 |
+
'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
|
| 250 |
+
'of', 'with', 'is', 'was', 'are', 'were', 'been', 'be', 'have', 'has',
|
| 251 |
+
'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may',
|
| 252 |
+
'might', 'must', 'can', 'prime', 'bank', 'primebank', 'i', 'me', 'my',
|
| 253 |
+
'we', 'you', 'your', 'they', 'their', 'this', 'that', 'these', 'those'
|
| 254 |
+
}
|
| 255 |
+
|
| 256 |
+
# Extract words
|
| 257 |
+
words = re.findall(r'\b[a-z]+\b', all_text)
|
| 258 |
+
words = [word for word in words if word not in stop_words and len(word) > 3]
|
| 259 |
+
|
| 260 |
+
# Count frequency
|
| 261 |
+
word_freq = Counter(words).most_common(top_n)
|
| 262 |
+
|
| 263 |
+
if not word_freq:
|
| 264 |
+
return None
|
| 265 |
+
|
| 266 |
+
# Create dataframe
|
| 267 |
+
freq_df = pd.DataFrame(word_freq, columns=['Word', 'Frequency'])
|
| 268 |
+
|
| 269 |
+
# Create bar chart
|
| 270 |
+
fig = px.bar(
|
| 271 |
+
freq_df,
|
| 272 |
+
x='Frequency',
|
| 273 |
+
y='Word',
|
| 274 |
+
orientation='h',
|
| 275 |
+
title=f'Top {top_n} Words in Prime Bank Posts',
|
| 276 |
+
color='Frequency',
|
| 277 |
+
color_continuous_scale='Blues'
|
| 278 |
+
)
|
| 279 |
+
|
| 280 |
+
fig.update_layout(
|
| 281 |
+
yaxis={'categoryorder': 'total ascending'},
|
| 282 |
+
showlegend=False
|
| 283 |
+
)
|
| 284 |
+
|
| 285 |
+
return fig
|
| 286 |
+
|
| 287 |
+
def create_sentiment_by_category(df):
|
| 288 |
+
"""Create stacked bar chart of sentiment by category"""
|
| 289 |
+
# Filter for Prime Bank mentions
|
| 290 |
+
prime_df = df[df['prime_mentions'] > 0]
|
| 291 |
+
|
| 292 |
+
if len(prime_df) == 0:
|
| 293 |
+
return None
|
| 294 |
+
|
| 295 |
+
# Create crosstab
|
| 296 |
+
sentiment_category = pd.crosstab(
|
| 297 |
+
prime_df['category'],
|
| 298 |
+
prime_df['sentiment'],
|
| 299 |
+
normalize='index'
|
| 300 |
+
) * 100
|
| 301 |
+
|
| 302 |
+
# Create stacked bar chart
|
| 303 |
+
fig = go.Figure()
|
| 304 |
+
|
| 305 |
+
sentiments = ['Positive', 'Negative', 'Neutral']
|
| 306 |
+
colors = {'Positive': '#2ecc71', 'Negative': '#e74c3c', 'Neutral': '#95a5a6'}
|
| 307 |
+
|
| 308 |
+
for sentiment in sentiments:
|
| 309 |
+
if sentiment in sentiment_category.columns:
|
| 310 |
+
fig.add_trace(go.Bar(
|
| 311 |
+
name=sentiment,
|
| 312 |
+
x=sentiment_category.index,
|
| 313 |
+
y=sentiment_category[sentiment],
|
| 314 |
+
marker_color=colors.get(sentiment, '#95a5a6'),
|
| 315 |
+
hovertemplate='%{x}<br>%{y:.1f}%<extra></extra>'
|
| 316 |
+
))
|
| 317 |
+
|
| 318 |
+
fig.update_layout(
|
| 319 |
+
barmode='stack',
|
| 320 |
+
title='Sentiment Distribution by Post Category',
|
| 321 |
+
xaxis_title='Category',
|
| 322 |
+
yaxis_title='Percentage',
|
| 323 |
+
yaxis=dict(tickformat='.0f', ticksuffix='%'),
|
| 324 |
+
showlegend=True,
|
| 325 |
+
legend=dict(
|
| 326 |
+
orientation="h",
|
| 327 |
+
yanchor="bottom",
|
| 328 |
+
y=1.02,
|
| 329 |
+
xanchor="right",
|
| 330 |
+
x=1
|
| 331 |
+
)
|
| 332 |
+
)
|
| 333 |
+
|
| 334 |
+
return fig
|
| 335 |
+
|
| 336 |
+
def create_priority_matrix(df):
|
| 337 |
+
"""Create scatter plot showing priority posts"""
|
| 338 |
+
# Filter for Prime Bank mentions
|
| 339 |
+
prime_df = df[df['prime_mentions'] > 0].copy()
|
| 340 |
+
|
| 341 |
+
if len(prime_df) == 0:
|
| 342 |
+
return None
|
| 343 |
+
|
| 344 |
+
# Calculate urgency score (based on negative sentiment + complaints)
|
| 345 |
+
prime_df['urgency'] = 0
|
| 346 |
+
prime_df.loc[prime_df['sentiment'] == 'Negative', 'urgency'] += 2
|
| 347 |
+
prime_df.loc[prime_df['category'] == 'Complaint', 'urgency'] += 2
|
| 348 |
+
prime_df.loc[prime_df['emotion'].isin(['Frustration', 'Anxiety']), 'urgency'] += 1
|
| 349 |
+
|
| 350 |
+
# Create scatter plot
|
| 351 |
+
fig = px.scatter(
|
| 352 |
+
prime_df,
|
| 353 |
+
x='viral_score',
|
| 354 |
+
y='urgency',
|
| 355 |
+
size='prime_mentions',
|
| 356 |
+
color='sentiment',
|
| 357 |
+
hover_data=['text', 'emotion', 'category'],
|
| 358 |
+
title='Priority Matrix: Viral Score vs Urgency',
|
| 359 |
+
color_discrete_map={
|
| 360 |
+
'Positive': '#2ecc71',
|
| 361 |
+
'Negative': '#e74c3c',
|
| 362 |
+
'Neutral': '#95a5a6'
|
| 363 |
+
}
|
| 364 |
+
)
|
| 365 |
+
|
| 366 |
+
# Add quadrant lines
|
| 367 |
+
fig.add_hline(y=2.5, line_dash="dash", line_color="gray", opacity=0.5)
|
| 368 |
+
fig.add_vline(x=prime_df['viral_score'].median(), line_dash="dash", line_color="gray", opacity=0.5)
|
| 369 |
+
|
| 370 |
+
# Add quadrant labels
|
| 371 |
+
fig.add_annotation(x=0.95, y=0.95, text="High Priority", xref="paper", yref="paper", showarrow=False)
|
| 372 |
+
fig.add_annotation(x=0.05, y=0.95, text="Monitor", xref="paper", yref="paper", showarrow=False)
|
| 373 |
+
|
| 374 |
+
fig.update_layout(
|
| 375 |
+
xaxis_title="Viral Score (Reach)",
|
| 376 |
+
yaxis_title="Urgency Score",
|
| 377 |
+
showlegend=True
|
| 378 |
+
)
|
| 379 |
+
|
| 380 |
+
return fig
|
test.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# test.py - Test if all packages are installed correctly
|
| 2 |
+
import streamlit as st
|
| 3 |
+
|
| 4 |
+
st.write("Testing if Streamlit works!")
|
| 5 |
+
|
| 6 |
+
try:
|
| 7 |
+
import pandas as pd
|
| 8 |
+
st.success("β
Pandas imported successfully")
|
| 9 |
+
except:
|
| 10 |
+
st.error("β Pandas import failed")
|
| 11 |
+
|
| 12 |
+
try:
|
| 13 |
+
import plotly
|
| 14 |
+
st.success("β
Plotly imported successfully")
|
| 15 |
+
except:
|
| 16 |
+
st.error("β Plotly import failed")
|
| 17 |
+
|
| 18 |
+
try:
|
| 19 |
+
from textblob import TextBlob
|
| 20 |
+
st.success("β
TextBlob imported successfully")
|
| 21 |
+
except:
|
| 22 |
+
st.error("β TextBlob import failed")
|
| 23 |
+
|
| 24 |
+
try:
|
| 25 |
+
import nltk
|
| 26 |
+
st.success("β
NLTK imported successfully")
|
| 27 |
+
except:
|
| 28 |
+
st.error("β NLTK import failed")
|
| 29 |
+
|
| 30 |
+
try:
|
| 31 |
+
import openai
|
| 32 |
+
st.success("β
OpenAI imported successfully")
|
| 33 |
+
except:
|
| 34 |
+
st.error("β OpenAI import failed")
|