FarhinSadia commited on
Commit
a544a7a
Β·
1 Parent(s): 0fe37af

Add Prime Bank Analytics Dashboard

Browse files
Files changed (8) hide show
  1. .gitignore +31 -0
  2. .streamlit/config.toml +0 -0
  3. app.py +204 -0
  4. create_test_data.py +35 -0
  5. src/__init__.py +0 -0
  6. src/data_processor.py +205 -0
  7. src/visualizations.py +380 -0
  8. test.py +34 -0
.gitignore ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Environment variables
2
+ .env
3
+ .env.local
4
+
5
+ # Data files
6
+ *.csv
7
+ *.txt
8
+ data/raw/*
9
+ data/processed/*
10
+
11
+ # Python
12
+ __pycache__/
13
+ *.py[cod]
14
+ *$py.class
15
+ *.so
16
+ .Python
17
+
18
+ # Streamlit
19
+ .streamlit/secrets.toml
20
+
21
+ # IDE
22
+ .vscode/
23
+ .idea/
24
+
25
+ # OS
26
+ .DS_Store
27
+ Thumbs.db
28
+
29
+ # Keep empty directories
30
+ !data/raw/.gitkeep
31
+ !data/processed/.gitkeep
.streamlit/config.toml ADDED
File without changes
app.py ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from src.data_processor import DataProcessor
4
+ from src.visualizations import *
5
+
6
+ # Page config
7
+ st.set_page_config(
8
+ page_title="Prime Bank Analytics Dashboard",
9
+ page_icon="🏦",
10
+ layout="wide"
11
+ )
12
+
13
+ # Title
14
+ st.title("🏦 Prime Bank Social Media Analytics Dashboard")
15
+
16
+ # Sidebar for configuration
17
+ with st.sidebar:
18
+ st.header("βš™οΈ Configuration")
19
+
20
+ # API Key input
21
+ api_key = st.text_input(
22
+ "OpenAI API Key (optional):",
23
+ type="password",
24
+ help="Enter your OpenAI API key for advanced GPT analysis"
25
+ )
26
+
27
+ if api_key:
28
+ st.success("βœ… API Key configured")
29
+ use_gpt = st.checkbox("Enable GPT Analysis", value=True)
30
+ else:
31
+ st.info("πŸ’‘ Running without GPT features")
32
+ use_gpt = False
33
+
34
+ st.markdown("---")
35
+ st.markdown("### About")
36
+ st.markdown("Upload CSV files from social media platforms and TXT files with reviews to analyze Prime Bank's online presence.")
37
+
38
+ # Initialize processor with or without API key
39
+ processor = DataProcessor(openai_api_key=api_key if use_gpt else None)
40
+
41
+ # Main content
42
+ st.markdown("### πŸ“ Upload Your Data Files")
43
+
44
+ # File upload section
45
+ col1, col2 = st.columns(2)
46
+
47
+ with col1:
48
+ csv_files = st.file_uploader(
49
+ "Upload CSV files (Facebook, Twitter, etc.)",
50
+ type=['csv'],
51
+ accept_multiple_files=True,
52
+ help="Upload one or more CSV files containing social media data"
53
+ )
54
+
55
+ with col2:
56
+ txt_file = st.file_uploader(
57
+ "Upload TXT file (Manual reviews)",
58
+ type=['txt'],
59
+ help="Upload a text file with reviews, one per line"
60
+ )
61
+
62
+ # Add sample data download option
63
+ with st.expander("πŸ“ Need sample data to test?"):
64
+ st.markdown("""
65
+ Download these sample files to test the dashboard:
66
+ - [Sample CSV Data](https://example.com)
67
+ - [Sample TXT Reviews](https://example.com)
68
+
69
+ Or create test data by running:
70
+ ```bash
71
+ python create_test_data.py
72
+ ```
73
+ """)
74
+
75
+ # Process files when uploaded
76
+ if csv_files or txt_file:
77
+ with st.spinner('Processing files...'):
78
+ all_data = []
79
+
80
+ # Process CSV files
81
+ if csv_files:
82
+ st.write(f"πŸ“Š Processing {len(csv_files)} CSV file(s)...")
83
+ csv_data = processor.process_csv_files(csv_files)
84
+ if not csv_data.empty:
85
+ all_data.append(csv_data)
86
+ st.success(f"βœ… Loaded {len(csv_data)} rows from CSV files")
87
+
88
+ # Process TXT file
89
+ if txt_file:
90
+ st.write("πŸ“ Processing TXT file...")
91
+ txt_data = processor.process_txt_file(txt_file)
92
+ if not txt_data.empty:
93
+ all_data.append(txt_data)
94
+ st.success(f"βœ… Loaded {len(txt_data)} reviews from TXT file")
95
+
96
+ # Combine all data
97
+ if all_data:
98
+ combined_df = pd.concat(all_data, ignore_index=True)
99
+
100
+ # Process the data
101
+ with st.spinner('Analyzing sentiment and emotions...'):
102
+ processed_df = processor.process_all_data(combined_df)
103
+
104
+ # Filter for Prime Bank mentions
105
+ prime_df = processed_df[processed_df['prime_mentions'] > 0]
106
+
107
+ st.success(f"βœ… Analysis complete! Found {len(prime_df)} posts mentioning Prime Bank out of {len(processed_df)} total posts")
108
+
109
+ # Display metrics
110
+ st.header("πŸ“Š Key Metrics")
111
+ metrics = create_summary_metrics(processed_df)
112
+
113
+ col1, col2, col3, col4 = st.columns(4)
114
+ for i, (label, value) in enumerate(metrics.items()):
115
+ with [col1, col2, col3, col4][i]:
116
+ st.metric(label, value)
117
+
118
+ # Display charts
119
+ st.header("πŸ“ˆ Analysis")
120
+
121
+ # First row of charts
122
+ col1, col2, col3 = st.columns(3)
123
+
124
+ with col1:
125
+ if len(prime_df) > 0:
126
+ fig = create_sentiment_pie(prime_df)
127
+ st.plotly_chart(fig, use_container_width=True)
128
+ else:
129
+ st.info("No Prime Bank mentions found for sentiment analysis")
130
+
131
+ with col2:
132
+ if len(prime_df) > 0:
133
+ fig = create_emotion_bar(prime_df)
134
+ st.plotly_chart(fig, use_container_width=True)
135
+ else:
136
+ st.info("No Prime Bank mentions found for emotion analysis")
137
+
138
+ with col3:
139
+ if len(prime_df) > 0:
140
+ fig = create_category_donut(prime_df)
141
+ st.plotly_chart(fig, use_container_width=True)
142
+ else:
143
+ st.info("No Prime Bank mentions found for category analysis")
144
+
145
+ # Top Viral Posts
146
+ st.header("πŸ”₯ Top Viral Posts Mentioning Prime Bank")
147
+
148
+ if len(prime_df) > 0:
149
+ top_posts = prime_df.nlargest(5, 'viral_score')[['text', 'sentiment', 'emotion', 'category', 'prime_mentions']]
150
+
151
+ for idx, row in top_posts.iterrows():
152
+ with st.expander(f"Post #{idx+1} - {row['sentiment']} | {row['emotion']}"):
153
+ st.write(row['text'])
154
+ col1, col2, col3, col4 = st.columns(4)
155
+ col1.metric("Sentiment", row['sentiment'])
156
+ col2.metric("Emotion", row['emotion'])
157
+ col3.metric("Category", row['category'])
158
+ col4.metric("Mentions", row['prime_mentions'])
159
+ else:
160
+ st.info("No posts mentioning Prime Bank found")
161
+
162
+ # Data table
163
+ with st.expander("πŸ“‹ View All Data"):
164
+ st.dataframe(processed_df)
165
+
166
+ # Download processed data
167
+ csv = processed_df.to_csv(index=False)
168
+ st.download_button(
169
+ label="πŸ“₯ Download Processed Data",
170
+ data=csv,
171
+ file_name="prime_bank_analysis.csv",
172
+ mime="text/csv"
173
+ )
174
+
175
+ else:
176
+ # No files uploaded yet
177
+ st.info("πŸ‘† Please upload CSV files and/or TXT file to begin analysis")
178
+
179
+ # Show instructions
180
+ col1, col2, col3 = st.columns(3)
181
+
182
+ with col1:
183
+ st.markdown("""
184
+ ### πŸ“Š CSV Files Should Contain:
185
+ - A text column (text/content/message)
186
+ - Optional: date, likes, shares
187
+ - Can upload multiple files
188
+ """)
189
+
190
+ with col2:
191
+ st.markdown("""
192
+ ### πŸ“ TXT File Format:
193
+ - One review per line
194
+ - Plain text format
195
+ - Manual reviews/comments
196
+ """)
197
+
198
+ with col3:
199
+ st.markdown("""
200
+ ### 🎯 Analysis Includes:
201
+ - Sentiment (Positive/Negative)
202
+ - Emotions (Joy/Frustration)
203
+ - Categories (Inquiry/Complaint)
204
+ """)
create_test_data.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ # Create sample CSV data
4
+ sample_data = pd.DataFrame({
5
+ 'text': [
6
+ 'Prime Bank has the best customer service! Love their mobile app.',
7
+ 'Worst experience at Prime Bank branch today. Waited 2 hours!',
8
+ 'How do I apply for a loan at Prime Bank?',
9
+ 'Prime Bank ATM is not working again. So frustrated!',
10
+ 'Thank you Prime Bank staff for helping with my account.',
11
+ 'What are Prime Bank interest rates?',
12
+ 'Prime Bank online banking is confusing.',
13
+ 'Excellent service at Prime Bank downtown branch!',
14
+ 'Prime Bank charged me hidden fees. Very disappointed.',
15
+ 'Can someone explain Prime Bank credit card benefits?'
16
+ ],
17
+ 'date': pd.date_range('2024-01-01', periods=10),
18
+ 'likes': [45, 12, 5, 89, 34, 8, 15, 67, 102, 22],
19
+ 'shares': [5, 2, 1, 15, 8, 1, 3, 12, 25, 4]
20
+ })
21
+
22
+ # Save as CSV
23
+ sample_data.to_csv('test_social_media_data.csv', index=False)
24
+ print("βœ… Created test_social_media_data.csv")
25
+
26
+ # Create sample TXT file with reviews
27
+ reviews = """Prime Bank provides exceptional service. Highly recommend!
28
+ Terrible experience with Prime Bank customer support.
29
+ Prime Bank mobile app keeps crashing. Please fix this!
30
+ Love the new features in Prime Bank online banking.
31
+ Why does Prime Bank charge so many fees?"""
32
+
33
+ with open('test_reviews.txt', 'w') as f:
34
+ f.write(reviews)
35
+ print("βœ… Created test_reviews.txt")
src/__init__.py ADDED
File without changes
src/data_processor.py ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import re
3
+ from textblob import TextBlob
4
+ import numpy as np
5
+ import json
6
+
7
+ # Try to import optional dependencies
8
+ try:
9
+ import openai
10
+ OPENAI_AVAILABLE = True
11
+ except ImportError:
12
+ OPENAI_AVAILABLE = False
13
+ print("OpenAI not installed. GPT features will be disabled.")
14
+
15
+ try:
16
+ import nltk
17
+ from nltk.sentiment import SentimentIntensityAnalyzer
18
+ nltk.download('vader_lexicon', quiet=True)
19
+ NLTK_AVAILABLE = True
20
+ except ImportError:
21
+ NLTK_AVAILABLE = False
22
+ print("NLTK not installed. Using TextBlob only.")
23
+
24
+ class DataProcessor:
25
+ def __init__(self, openai_api_key=None):
26
+ self.processed_data = None
27
+
28
+ # Initialize VADER if available
29
+ if NLTK_AVAILABLE:
30
+ try:
31
+ self.sia = SentimentIntensityAnalyzer()
32
+ except:
33
+ self.sia = None
34
+ else:
35
+ self.sia = None
36
+
37
+ # Set up OpenAI if key provided and library available
38
+ self.use_gpt = False
39
+ if openai_api_key and OPENAI_AVAILABLE:
40
+ openai.api_key = openai_api_key
41
+ self.use_gpt = True
42
+
43
+ # Banking-specific patterns
44
+ self.banking_keywords = {
45
+ 'service_quality': ['customer service', 'staff', 'support', 'help', 'assistance'],
46
+ 'transaction': ['transfer', 'deposit', 'withdraw', 'payment', 'transaction'],
47
+ 'account': ['account', 'savings', 'checking', 'balance'],
48
+ 'loan': ['loan', 'mortgage', 'credit', 'interest rate'],
49
+ 'digital': ['app', 'online banking', 'mobile', 'website', 'digital'],
50
+ 'branch': ['branch', 'atm', 'location', 'queue', 'waiting']
51
+ }
52
+
53
+ def process_csv_files(self, uploaded_files):
54
+ """Process multiple CSV files"""
55
+ all_dataframes = []
56
+
57
+ for uploaded_file in uploaded_files:
58
+ try:
59
+ df = pd.read_csv(uploaded_file)
60
+ df['source_file'] = uploaded_file.name
61
+ all_dataframes.append(df)
62
+ except Exception as e:
63
+ print(f"Error reading {uploaded_file.name}: {e}")
64
+
65
+ if all_dataframes:
66
+ combined_df = pd.concat(all_dataframes, ignore_index=True)
67
+ return combined_df
68
+ return pd.DataFrame()
69
+
70
+ def process_txt_file(self, txt_file):
71
+ """Process text file with reviews"""
72
+ content = txt_file.read().decode('utf-8')
73
+ reviews = content.split('\n')
74
+
75
+ df = pd.DataFrame({
76
+ 'text': [review.strip() for review in reviews if review.strip()],
77
+ 'source_file': txt_file.name
78
+ })
79
+ return df
80
+
81
+ def analyze_sentiment(self, text):
82
+ """Analyze sentiment - use VADER if available, else TextBlob"""
83
+ if pd.isna(text) or str(text).strip() == '':
84
+ return 'Neutral', 0
85
+
86
+ text_str = str(text)
87
+
88
+ # Try VADER first if available
89
+ if self.sia:
90
+ scores = self.sia.polarity_scores(text_str)
91
+ compound = scores['compound']
92
+
93
+ if compound >= 0.05:
94
+ return 'Positive', compound
95
+ elif compound <= -0.05:
96
+ return 'Negative', compound
97
+ else:
98
+ return 'Neutral', compound
99
+
100
+ # Fallback to TextBlob
101
+ try:
102
+ blob = TextBlob(text_str)
103
+ polarity = blob.sentiment.polarity
104
+
105
+ if polarity > 0.1:
106
+ return 'Positive', polarity
107
+ elif polarity < -0.1:
108
+ return 'Negative', polarity
109
+ else:
110
+ return 'Neutral', polarity
111
+ except:
112
+ return 'Neutral', 0
113
+
114
+ def detect_emotion(self, text):
115
+ """Detect emotion in text"""
116
+ if pd.isna(text):
117
+ return 'Neutral'
118
+
119
+ text_lower = str(text).lower()
120
+
121
+ # Emotion keywords
122
+ emotions = {
123
+ 'Joy': ['happy', 'excellent', 'amazing', 'great', 'wonderful', 'fantastic', 'love', 'best', 'thank you'],
124
+ 'Frustration': ['frustrated', 'angry', 'terrible', 'horrible', 'worst', 'hate', 'annoyed', 'disappointed'],
125
+ 'Confusion': ['confused', 'unclear', "don't understand", 'what', 'how', 'why', '?', 'help me']
126
+ }
127
+
128
+ emotion_scores = {}
129
+ for emotion, keywords in emotions.items():
130
+ score = sum(keyword in text_lower for keyword in keywords)
131
+ emotion_scores[emotion] = score
132
+
133
+ if max(emotion_scores.values()) > 0:
134
+ return max(emotion_scores, key=emotion_scores.get)
135
+ return 'Neutral'
136
+
137
+ def categorize_post(self, text):
138
+ """Categorize post type"""
139
+ if pd.isna(text):
140
+ return 'Other'
141
+
142
+ text_lower = str(text).lower()
143
+
144
+ if '?' in text_lower or any(word in text_lower for word in ['how', 'what', 'when', 'where']):
145
+ return 'Inquiry'
146
+ elif any(word in text_lower for word in ['complaint', 'problem', 'issue', 'bad', 'terrible']):
147
+ return 'Complaint'
148
+ elif any(word in text_lower for word in ['thank', 'great', 'excellent', 'love', 'best']):
149
+ return 'Praise'
150
+ else:
151
+ return 'Other'
152
+
153
+ def count_prime_mentions(self, text):
154
+ """Count Prime Bank mentions"""
155
+ if pd.isna(text):
156
+ return 0
157
+
158
+ text_lower = str(text).lower()
159
+ patterns = [
160
+ r'prime\s*bank',
161
+ r'primebank',
162
+ r'@primebank'
163
+ ]
164
+
165
+ total_mentions = 0
166
+ for pattern in patterns:
167
+ mentions = len(re.findall(pattern, text_lower))
168
+ total_mentions += mentions
169
+
170
+ return total_mentions
171
+
172
+ def process_all_data(self, df):
173
+ """Apply all processing to dataframe"""
174
+ # Find text column
175
+ text_columns = ['text', 'content', 'message', 'review', 'comment', 'post']
176
+ text_col = None
177
+
178
+ for col in text_columns:
179
+ if col in df.columns:
180
+ text_col = col
181
+ break
182
+
183
+ if text_col and text_col != 'text':
184
+ df['text'] = df[text_col]
185
+
186
+ if 'text' not in df.columns:
187
+ return df
188
+
189
+ # Apply all analyses
190
+ df[['sentiment', 'polarity']] = df['text'].apply(
191
+ lambda x: pd.Series(self.analyze_sentiment(x))
192
+ )
193
+
194
+ df['emotion'] = df['text'].apply(self.detect_emotion)
195
+ df['category'] = df['text'].apply(self.categorize_post)
196
+ df['prime_mentions'] = df['text'].apply(self.count_prime_mentions)
197
+
198
+ # Calculate viral score
199
+ df['viral_score'] = df['prime_mentions'] * 10
200
+ if 'likes' in df.columns:
201
+ df['viral_score'] += df['likes'].fillna(0)
202
+ if 'shares' in df.columns:
203
+ df['viral_score'] += df['shares'].fillna(0) * 2
204
+
205
+ return df
src/visualizations.py ADDED
@@ -0,0 +1,380 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import plotly.express as px
2
+ import plotly.graph_objects as go
3
+ from plotly.subplots import make_subplots
4
+ import pandas as pd
5
+
6
+ def create_sentiment_pie(df):
7
+ """Create sentiment distribution pie chart"""
8
+ sentiment_counts = df['sentiment'].value_counts()
9
+
10
+ fig = px.pie(
11
+ values=sentiment_counts.values,
12
+ names=sentiment_counts.index,
13
+ title="Sentiment Distribution",
14
+ color_discrete_map={
15
+ 'Positive': '#2ecc71',
16
+ 'Negative': '#e74c3c',
17
+ 'Neutral': '#95a5a6'
18
+ }
19
+ )
20
+
21
+ fig.update_traces(
22
+ textposition='inside',
23
+ textinfo='percent+label',
24
+ hovertemplate='<b>%{label}</b><br>Count: %{value}<br>Percentage: %{percent}<extra></extra>'
25
+ )
26
+
27
+ return fig
28
+
29
+ def create_emotion_bar(df):
30
+ """Create emotion distribution bar chart"""
31
+ emotion_counts = df['emotion'].value_counts()
32
+
33
+ # Define colors for emotions
34
+ color_map = {
35
+ 'Joy': '#f39c12',
36
+ 'Frustration': '#e74c3c',
37
+ 'Confusion': '#3498db',
38
+ 'Anxiety': '#9b59b6',
39
+ 'Neutral': '#95a5a6'
40
+ }
41
+
42
+ colors = [color_map.get(emotion, '#95a5a6') for emotion in emotion_counts.index]
43
+
44
+ fig = px.bar(
45
+ x=emotion_counts.index,
46
+ y=emotion_counts.values,
47
+ title="Emotion Detection",
48
+ labels={'x': 'Emotion', 'y': 'Count'},
49
+ color=emotion_counts.index,
50
+ color_discrete_map=color_map
51
+ )
52
+
53
+ fig.update_layout(
54
+ showlegend=False,
55
+ xaxis_tickangle=-45,
56
+ yaxis=dict(gridcolor='rgba(0,0,0,0.1)')
57
+ )
58
+
59
+ return fig
60
+
61
+ def create_category_donut(df):
62
+ """Create post category donut chart"""
63
+ category_counts = df['category'].value_counts()
64
+
65
+ # Define colors for categories
66
+ color_sequence = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']
67
+
68
+ fig = px.pie(
69
+ values=category_counts.values,
70
+ names=category_counts.index,
71
+ title="Post Categories",
72
+ hole=0.4,
73
+ color_discrete_sequence=color_sequence
74
+ )
75
+
76
+ # Add text in center
77
+ fig.update_traces(
78
+ textposition='inside',
79
+ textinfo='percent+label',
80
+ hovertemplate='<b>%{label}</b><br>Count: %{value}<br>Percentage: %{percent}<extra></extra>'
81
+ )
82
+
83
+ # Add annotation in center
84
+ fig.add_annotation(
85
+ text=f"Total<br>{len(df)}",
86
+ x=0.5, y=0.5,
87
+ xref="paper", yref="paper",
88
+ showarrow=False,
89
+ font=dict(size=20)
90
+ )
91
+
92
+ return fig
93
+
94
+ def create_mentions_timeline(df):
95
+ """Create timeline of Prime Bank mentions if date column exists"""
96
+ date_columns = ['date', 'created_at', 'timestamp', 'Date', 'post_date']
97
+ date_col = None
98
+
99
+ # Find date column
100
+ for col in date_columns:
101
+ if col in df.columns:
102
+ date_col = col
103
+ break
104
+
105
+ if not date_col:
106
+ return None
107
+
108
+ try:
109
+ # Parse dates
110
+ df['date_parsed'] = pd.to_datetime(df[date_col], errors='coerce')
111
+
112
+ # Remove invalid dates
113
+ df_valid = df[df['date_parsed'].notna()]
114
+
115
+ if len(df_valid) == 0:
116
+ return None
117
+
118
+ # Group by date
119
+ timeline_df = df_valid.groupby(df_valid['date_parsed'].dt.date).agg({
120
+ 'prime_mentions': 'sum',
121
+ 'sentiment': lambda x: (x == 'Positive').sum()
122
+ }).reset_index()
123
+
124
+ timeline_df.columns = ['date', 'mentions', 'positive_posts']
125
+
126
+ # Create figure with secondary y-axis
127
+ fig = make_subplots(specs=[[{"secondary_y": True}]])
128
+
129
+ # Add mentions line
130
+ fig.add_trace(
131
+ go.Scatter(
132
+ x=timeline_df['date'],
133
+ y=timeline_df['mentions'],
134
+ name='Total Mentions',
135
+ line=dict(color='#3498db', width=3),
136
+ mode='lines+markers'
137
+ ),
138
+ secondary_y=False,
139
+ )
140
+
141
+ # Add positive posts line
142
+ fig.add_trace(
143
+ go.Scatter(
144
+ x=timeline_df['date'],
145
+ y=timeline_df['positive_posts'],
146
+ name='Positive Posts',
147
+ line=dict(color='#2ecc71', width=2, dash='dot'),
148
+ mode='lines+markers'
149
+ ),
150
+ secondary_y=True,
151
+ )
152
+
153
+ # Update layout
154
+ fig.update_xaxes(title_text="Date")
155
+ fig.update_yaxes(title_text="Number of Mentions", secondary_y=False)
156
+ fig.update_yaxes(title_text="Positive Posts", secondary_y=True)
157
+
158
+ fig.update_layout(
159
+ title="Prime Bank Mentions Over Time",
160
+ hovermode='x unified',
161
+ showlegend=True,
162
+ legend=dict(
163
+ yanchor="top",
164
+ y=0.99,
165
+ xanchor="left",
166
+ x=0.01
167
+ )
168
+ )
169
+
170
+ return fig
171
+
172
+ except Exception as e:
173
+ print(f"Error creating timeline: {e}")
174
+ return None
175
+
176
+ def create_summary_metrics(df):
177
+ """Calculate summary metrics for display"""
178
+ total_posts = len(df)
179
+ prime_posts = len(df[df['prime_mentions'] > 0])
180
+ total_mentions = df['prime_mentions'].sum()
181
+
182
+ # Calculate positive sentiment rate
183
+ if prime_posts > 0:
184
+ prime_df = df[df['prime_mentions'] > 0]
185
+ positive_rate = (prime_df['sentiment'] == 'Positive').sum() / prime_posts * 100
186
+ else:
187
+ positive_rate = 0
188
+
189
+ metrics = {
190
+ 'Total Posts Analyzed': f"{total_posts:,}",
191
+ 'Posts Mentioning Prime Bank': f"{prime_posts:,}",
192
+ 'Total Prime Bank Mentions': f"{total_mentions:,}",
193
+ 'Positive Sentiment Rate': f"{positive_rate:.1f}%"
194
+ }
195
+
196
+ return metrics
197
+
198
+ def create_viral_posts_chart(df, top_n=10):
199
+ """Create horizontal bar chart of most viral posts"""
200
+ # Get top viral posts
201
+ top_viral = df.nlargest(top_n, 'viral_score')
202
+
203
+ # Truncate text for display
204
+ top_viral['text_truncated'] = top_viral['text'].apply(
205
+ lambda x: x[:50] + '...' if len(str(x)) > 50 else x
206
+ )
207
+
208
+ # Create horizontal bar chart
209
+ fig = px.bar(
210
+ top_viral,
211
+ x='viral_score',
212
+ y='text_truncated',
213
+ orientation='h',
214
+ title=f'Top {top_n} Viral Posts',
215
+ color='sentiment',
216
+ color_discrete_map={
217
+ 'Positive': '#2ecc71',
218
+ 'Negative': '#e74c3c',
219
+ 'Neutral': '#95a5a6'
220
+ },
221
+ hover_data=['text', 'emotion', 'category']
222
+ )
223
+
224
+ fig.update_layout(
225
+ yaxis={'categoryorder': 'total ascending'},
226
+ xaxis_title="Viral Score",
227
+ yaxis_title="Post Preview",
228
+ showlegend=True
229
+ )
230
+
231
+ return fig
232
+
233
+ def create_word_frequency_chart(df, top_n=15):
234
+ """Create word frequency chart for Prime Bank posts"""
235
+ from collections import Counter
236
+ import re
237
+
238
+ # Get only Prime Bank posts
239
+ prime_posts = df[df['prime_mentions'] > 0]['text'].dropna()
240
+
241
+ if len(prime_posts) == 0:
242
+ return None
243
+
244
+ # Combine all text
245
+ all_text = ' '.join(prime_posts.astype(str)).lower()
246
+
247
+ # Remove common words and Prime Bank itself
248
+ stop_words = {
249
+ 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
250
+ 'of', 'with', 'is', 'was', 'are', 'were', 'been', 'be', 'have', 'has',
251
+ 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may',
252
+ 'might', 'must', 'can', 'prime', 'bank', 'primebank', 'i', 'me', 'my',
253
+ 'we', 'you', 'your', 'they', 'their', 'this', 'that', 'these', 'those'
254
+ }
255
+
256
+ # Extract words
257
+ words = re.findall(r'\b[a-z]+\b', all_text)
258
+ words = [word for word in words if word not in stop_words and len(word) > 3]
259
+
260
+ # Count frequency
261
+ word_freq = Counter(words).most_common(top_n)
262
+
263
+ if not word_freq:
264
+ return None
265
+
266
+ # Create dataframe
267
+ freq_df = pd.DataFrame(word_freq, columns=['Word', 'Frequency'])
268
+
269
+ # Create bar chart
270
+ fig = px.bar(
271
+ freq_df,
272
+ x='Frequency',
273
+ y='Word',
274
+ orientation='h',
275
+ title=f'Top {top_n} Words in Prime Bank Posts',
276
+ color='Frequency',
277
+ color_continuous_scale='Blues'
278
+ )
279
+
280
+ fig.update_layout(
281
+ yaxis={'categoryorder': 'total ascending'},
282
+ showlegend=False
283
+ )
284
+
285
+ return fig
286
+
287
+ def create_sentiment_by_category(df):
288
+ """Create stacked bar chart of sentiment by category"""
289
+ # Filter for Prime Bank mentions
290
+ prime_df = df[df['prime_mentions'] > 0]
291
+
292
+ if len(prime_df) == 0:
293
+ return None
294
+
295
+ # Create crosstab
296
+ sentiment_category = pd.crosstab(
297
+ prime_df['category'],
298
+ prime_df['sentiment'],
299
+ normalize='index'
300
+ ) * 100
301
+
302
+ # Create stacked bar chart
303
+ fig = go.Figure()
304
+
305
+ sentiments = ['Positive', 'Negative', 'Neutral']
306
+ colors = {'Positive': '#2ecc71', 'Negative': '#e74c3c', 'Neutral': '#95a5a6'}
307
+
308
+ for sentiment in sentiments:
309
+ if sentiment in sentiment_category.columns:
310
+ fig.add_trace(go.Bar(
311
+ name=sentiment,
312
+ x=sentiment_category.index,
313
+ y=sentiment_category[sentiment],
314
+ marker_color=colors.get(sentiment, '#95a5a6'),
315
+ hovertemplate='%{x}<br>%{y:.1f}%<extra></extra>'
316
+ ))
317
+
318
+ fig.update_layout(
319
+ barmode='stack',
320
+ title='Sentiment Distribution by Post Category',
321
+ xaxis_title='Category',
322
+ yaxis_title='Percentage',
323
+ yaxis=dict(tickformat='.0f', ticksuffix='%'),
324
+ showlegend=True,
325
+ legend=dict(
326
+ orientation="h",
327
+ yanchor="bottom",
328
+ y=1.02,
329
+ xanchor="right",
330
+ x=1
331
+ )
332
+ )
333
+
334
+ return fig
335
+
336
+ def create_priority_matrix(df):
337
+ """Create scatter plot showing priority posts"""
338
+ # Filter for Prime Bank mentions
339
+ prime_df = df[df['prime_mentions'] > 0].copy()
340
+
341
+ if len(prime_df) == 0:
342
+ return None
343
+
344
+ # Calculate urgency score (based on negative sentiment + complaints)
345
+ prime_df['urgency'] = 0
346
+ prime_df.loc[prime_df['sentiment'] == 'Negative', 'urgency'] += 2
347
+ prime_df.loc[prime_df['category'] == 'Complaint', 'urgency'] += 2
348
+ prime_df.loc[prime_df['emotion'].isin(['Frustration', 'Anxiety']), 'urgency'] += 1
349
+
350
+ # Create scatter plot
351
+ fig = px.scatter(
352
+ prime_df,
353
+ x='viral_score',
354
+ y='urgency',
355
+ size='prime_mentions',
356
+ color='sentiment',
357
+ hover_data=['text', 'emotion', 'category'],
358
+ title='Priority Matrix: Viral Score vs Urgency',
359
+ color_discrete_map={
360
+ 'Positive': '#2ecc71',
361
+ 'Negative': '#e74c3c',
362
+ 'Neutral': '#95a5a6'
363
+ }
364
+ )
365
+
366
+ # Add quadrant lines
367
+ fig.add_hline(y=2.5, line_dash="dash", line_color="gray", opacity=0.5)
368
+ fig.add_vline(x=prime_df['viral_score'].median(), line_dash="dash", line_color="gray", opacity=0.5)
369
+
370
+ # Add quadrant labels
371
+ fig.add_annotation(x=0.95, y=0.95, text="High Priority", xref="paper", yref="paper", showarrow=False)
372
+ fig.add_annotation(x=0.05, y=0.95, text="Monitor", xref="paper", yref="paper", showarrow=False)
373
+
374
+ fig.update_layout(
375
+ xaxis_title="Viral Score (Reach)",
376
+ yaxis_title="Urgency Score",
377
+ showlegend=True
378
+ )
379
+
380
+ return fig
test.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # test.py - Test if all packages are installed correctly
2
+ import streamlit as st
3
+
4
+ st.write("Testing if Streamlit works!")
5
+
6
+ try:
7
+ import pandas as pd
8
+ st.success("βœ… Pandas imported successfully")
9
+ except:
10
+ st.error("❌ Pandas import failed")
11
+
12
+ try:
13
+ import plotly
14
+ st.success("βœ… Plotly imported successfully")
15
+ except:
16
+ st.error("❌ Plotly import failed")
17
+
18
+ try:
19
+ from textblob import TextBlob
20
+ st.success("βœ… TextBlob imported successfully")
21
+ except:
22
+ st.error("❌ TextBlob import failed")
23
+
24
+ try:
25
+ import nltk
26
+ st.success("βœ… NLTK imported successfully")
27
+ except:
28
+ st.error("❌ NLTK import failed")
29
+
30
+ try:
31
+ import openai
32
+ st.success("βœ… OpenAI imported successfully")
33
+ except:
34
+ st.error("❌ OpenAI import failed")