FarhinSadia commited on
Commit
4521b4e
Β·
1 Parent(s): 66aadcd

Add Hugging Face configuration

Browse files
Files changed (2) hide show
  1. README_HF.md +13 -0
  2. app_hf.py +297 -0
README_HF.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Bank User Sentiment Analysis
3
+ emoji: 🏦
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: streamlit
7
+ sdk_version: 1.35.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ # Bank User Sentiment Analysis
13
+ Analyzing customer sentiment for Prime Bank
app_hf.py ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import os
4
+ import glob
5
+ from src.data_processor import DataProcessor
6
+ from src.insights_generator import InsightsGenerator
7
+ from src.visualizations import *
8
+ from dotenv import load_dotenv
9
+
10
+ # Load environment variables from .env file
11
+ load_dotenv()
12
+
13
+ # --- Page Configuration ---
14
+ st.set_page_config(
15
+ page_title="Prime Bank Analytics Dashboard",
16
+ page_icon="🏦",
17
+ layout="wide"
18
+ )
19
+
20
+ # --- Helper function to identify text column ---
21
+ def find_text_column(df):
22
+ if df.empty:
23
+ return None
24
+ text_columns = [
25
+ 'text', 'Text', 'content', 'Content', 'message', 'Message',
26
+ 'review', 'Review', 'comment', 'Comment', 'post', 'Post',
27
+ 'review_text', 'Review Text', 'post_text', 'Post Text',
28
+ 'comment_text', 'Comment Text', 'description', 'Description'
29
+ ]
30
+ for col in text_columns:
31
+ if col in df.columns:
32
+ return col
33
+ for col in df.columns:
34
+ col_lower = col.lower()
35
+ if any(keyword in col_lower for keyword in ['text', 'content', 'review', 'comment', 'post']):
36
+ return col
37
+ for col in df.columns:
38
+ if df[col].dtype == 'object':
39
+ sample = df[col].dropna().head()
40
+ if not sample.empty:
41
+ try:
42
+ if sample.astype(str).str.len().mean() > 20:
43
+ return col
44
+ except:
45
+ continue
46
+ return None
47
+
48
+ # --- Caching for Performance ---
49
+ @st.cache_data
50
+ def load_and_process_data():
51
+ DATA_DIR = 'data/uploads'
52
+ PERFECTED_DATA_DIR = 'perfected_data' # New folder for perfected data
53
+ if not os.path.exists(DATA_DIR):
54
+ os.makedirs(DATA_DIR)
55
+ if not os.path.exists(PERFECTED_DATA_DIR):
56
+ os.makedirs(PERFECTED_DATA_DIR)
57
+
58
+ all_files = glob.glob(os.path.join(DATA_DIR, '*'))
59
+ perfected_data_file = os.path.join(PERFECTED_DATA_DIR, 'all_posts_with_comments.txt')
60
+
61
+ if not all_files and not os.path.exists(perfected_data_file):
62
+ return pd.DataFrame(), pd.DataFrame(), None, pd.DataFrame()
63
+
64
+ post_files = [f for f in all_files if 'post' in os.path.basename(f).lower() and f.endswith('.csv')]
65
+ comment_files = [f for f in all_files if 'comment' in os.path.basename(f).lower() and f.endswith('.csv')]
66
+ txt_files = [f for f in all_files if f.endswith('.txt')]
67
+ other_csv_files = [f for f in all_files if f.endswith('.csv') and f not in post_files and f not in comment_files]
68
+
69
+ def read_files(files_list, file_type):
70
+ dfs = []
71
+ for f in files_list:
72
+ try:
73
+ if f.endswith('.csv'):
74
+ df = pd.read_csv(f, on_bad_lines='skip')
75
+ else: # txt
76
+ with open(f, 'r', encoding='utf-8') as file:
77
+ df = pd.DataFrame({'text': [p.strip() for p in file.read().split('\n') if p.strip()]})
78
+
79
+ text_col = find_text_column(df)
80
+ if not text_col:
81
+ continue
82
+ if text_col != 'text':
83
+ df = df.rename(columns={text_col: 'text'})
84
+
85
+ df['source_file'] = os.path.basename(f)
86
+ df['file_type'] = file_type
87
+ df = df[df['text'].notna() & (df['text'].str.strip() != '')]
88
+ if not df.empty:
89
+ dfs.append(df)
90
+ except Exception as e:
91
+ st.error(f"Error reading {os.path.basename(f)}: {e}")
92
+ return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
93
+
94
+ # Load regular data for general insights
95
+ raw_posts_df = read_files(post_files + other_csv_files, 'post')
96
+ raw_comments_df = read_files(comment_files + txt_files, 'comment')
97
+
98
+ # Load perfected data specifically for AI recommendations
99
+ perfected_df = pd.DataFrame()
100
+ if os.path.exists(perfected_data_file):
101
+ try:
102
+ with open(perfected_data_file, 'r', encoding='utf-8') as f:
103
+ perfected_df = pd.DataFrame({'text': [p.strip() for p in f.read().split('\n') if p.strip()]})
104
+ perfected_df['source_file'] = 'all_posts_with_comments.txt'
105
+ perfected_df['file_type'] = 'perfected'
106
+ perfected_df = perfected_df[perfected_df['text'].notna() & (perfected_df['text'].str.strip() != '')]
107
+ except Exception as e:
108
+ st.error(f"Error reading perfected data file: {e}")
109
+
110
+ # --- Pass the API key from your .env file to the processors ---
111
+ openai_key = os.getenv("OPENAI_API_KEY")
112
+
113
+ processor = DataProcessor(openai_api_key=openai_key)
114
+ processed_posts_df = processor.process_all_data(raw_posts_df)
115
+ processed_comments_df = processor.process_all_data(raw_comments_df)
116
+ processed_perfected_df = processor.process_all_data(perfected_df) if not perfected_df.empty else pd.DataFrame()
117
+
118
+ all_text_df = pd.concat([processed_posts_df, processed_comments_df], ignore_index=True)
119
+ if all_text_df.empty:
120
+ return pd.DataFrame(), pd.DataFrame(), None, pd.DataFrame()
121
+
122
+ insight_gen = InsightsGenerator(openai_api_key=openai_key)
123
+ insights = insight_gen.generate_all_insights(posts_df=processed_posts_df, all_text_df=all_text_df)
124
+
125
+ # Generate AI Recommendations using perfected data if available
126
+ prime_perfected_df = processed_perfected_df[processed_perfected_df['prime_mentions'] > 0].copy() if 'prime_mentions' in processed_perfected_df.columns and not processed_perfected_df.empty else pd.DataFrame()
127
+ if not prime_perfected_df.empty:
128
+ insights['ai_recommendations'] = insight_gen.generate_ai_recommendations(prime_perfected_df)
129
+ else:
130
+ prime_all_text_df = all_text_df[all_text_df['prime_mentions'] > 0].copy() if 'prime_mentions' in all_text_df.columns else pd.DataFrame()
131
+ if not prime_all_text_df.empty:
132
+ insights['ai_recommendations'] = insight_gen.generate_ai_recommendations(prime_all_text_df)
133
+ else:
134
+ insights['ai_recommendations'] = {}
135
+
136
+ return processed_posts_df, all_text_df, insights, processed_perfected_df
137
+
138
+ # --- Main Application ---
139
+ st.title("🏦 Prime Bank Social Media Analytics")
140
+
141
+ posts_df, all_text_df, insights, perfected_df = load_and_process_data()
142
+
143
+ if all_text_df.empty and perfected_df.empty or insights is None:
144
+ st.error("No data files found or processed in 'data/uploads' or 'perfected_data'. Please add CSV or TXT files.")
145
+ st.info("Ensure filenames contain 'post' for post data or 'comment' for comment data for best results, and ensure 'all_posts_with_comments.txt' exists in 'perfected_data' for AI recommendations.")
146
+ st.stop()
147
+
148
+ # Filter for Prime Bank mentions (for general insights)
149
+ prime_posts_df = posts_df[posts_df['prime_mentions'] > 0].copy() if 'prime_mentions' in posts_df.columns else pd.DataFrame()
150
+ prime_all_text_df = all_text_df[all_text_df['prime_mentions'] > 0].copy() if 'prime_mentions' in all_text_df.columns else pd.DataFrame()
151
+
152
+ # --- KPI Section ---
153
+ st.header("πŸ“ˆ Prime Bank Key Performance Indicators")
154
+ total_mentions = all_text_df['prime_mentions'].sum()
155
+ total_posts_with_mentions = len(prime_posts_df)
156
+ new_metrics = create_summary_metrics(all_text_df)
157
+
158
+ kpi1, kpi2, kpi3, kpi4 = st.columns(4)
159
+ kpi1.metric("Total Mentions (Posts & Comments)", f"{int(total_mentions):,}")
160
+ kpi2.metric("Posts Mentioning Prime Bank", f"{total_posts_with_mentions:,}")
161
+ kpi3.metric("Bank Sentiment Score", new_metrics['Bank Sentiment Score'], help="Positive Mentions - Negative Mentions. A positive score is good.")
162
+ kpi4.metric("Engagement-Weighted Sentiment", new_metrics['Engagement-Weighted Sentiment'], help="A combined score of sentiment polarity and virality (likes, shares, etc.). Higher is better.")
163
+ st.markdown("---")
164
+
165
+ # --- Tabbed Interface ---
166
+ tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([
167
+ "Sentiment & Virality (Posts)",
168
+ "Emotion & Categories (All Text)",
169
+ "Strategic Overview",
170
+ "πŸ€– AI Recommendations",
171
+ "Action Items",
172
+ "Full Data View"
173
+ ])
174
+
175
+ # --- Tab 1: Posts Only Analysis ---
176
+ with tab1:
177
+ st.header("Sentiment & Virality Analysis (Posts Only)")
178
+ if not prime_posts_df.empty:
179
+ col1, col2 = st.columns([1, 2])
180
+ with col1:
181
+ st.subheader("Sentiment of Posts")
182
+ st.plotly_chart(create_sentiment_pie(prime_posts_df), use_container_width=True)
183
+ with col2:
184
+ st.subheader("Top Viral Posts")
185
+ viral_chart = create_viral_posts_chart(prime_posts_df)
186
+ if viral_chart:
187
+ st.plotly_chart(viral_chart, use_container_width=True)
188
+ else:
189
+ st.info("No viral score data (likes, shares, comments) found to display chart.")
190
+ else:
191
+ st.info("No posts mentioning Prime Bank were found in the data.")
192
+
193
+ # --- Tab 2: All Text Analysis ---
194
+ with tab2:
195
+ st.header("Emotion & Category Analysis (Posts & Comments)")
196
+ if not prime_all_text_df.empty:
197
+ col1, col2 = st.columns(2)
198
+ with col1:
199
+ st.subheader("Emotion Detection")
200
+ st.plotly_chart(create_emotion_bar(prime_all_text_df), use_container_width=True)
201
+ with col2:
202
+ st.subheader("Post & Comment Categories")
203
+ st.plotly_chart(create_category_donut(prime_all_text_df), use_container_width=True)
204
+ else:
205
+ st.info("No text mentioning Prime Bank was found in the data.")
206
+
207
+ # --- Tab 3: Strategic Overview ---
208
+ with tab3:
209
+ st.header("Strategic Overview")
210
+ st.write("High-level insights into market position and geographic distribution.")
211
+ col1, col2 = st.columns(2)
212
+ with col1:
213
+ bank_comp_chart = create_bank_comparison_chart(all_text_df)
214
+ if bank_comp_chart:
215
+ st.plotly_chart(bank_comp_chart, use_container_width=True)
216
+ else:
217
+ st.info("Not enough data to compare bank mentions.")
218
+ with col2:
219
+ geo_map = create_geolocation_map(all_text_df)
220
+ if geo_map:
221
+ st.plotly_chart(geo_map, use_container_width=True)
222
+ else:
223
+ pass
224
+
225
+ # --- Tab 4: AI Recommendations ---
226
+ with tab4:
227
+ st.header("πŸ€– AI-Powered Strategic Recommendations")
228
+ st.write("Automatically generated advice based on an analysis of customer feedback.")
229
+ if insights and insights.get('ai_recommendations'):
230
+ recs = insights['ai_recommendations']
231
+
232
+ st.subheader("For Customer Complaints")
233
+ with st.expander("Show AI Insight on Complaints", expanded=True):
234
+ st.markdown(f"πŸ’‘ {recs.get('Complaint', 'No recommendation available.')}")
235
+
236
+ st.subheader("For Customer Suggestions")
237
+ with st.expander("Show AI Insight on Suggestions"):
238
+ st.markdown(f"πŸ’‘ {recs.get('Suggestion', 'No recommendation available.')}")
239
+
240
+ st.subheader("For Customer Praise")
241
+ with st.expander("Show AI Insight on Praise"):
242
+ st.markdown(f"πŸ’‘ {recs.get('Praise', 'No recommendation available.')}")
243
+
244
+ st.subheader("For Customer Inquiries")
245
+ with st.expander("Show AI Insight on Inquiries"):
246
+ st.markdown(f"πŸ’‘ {recs.get('Inquiry', 'No recommendation available.')}")
247
+ else:
248
+ st.info("No AI recommendations could be generated. This may be due to a lack of data or a missing OpenAI API key.")
249
+
250
+ # --- Tab 5: Action Items ---
251
+ with tab5:
252
+ st.header("Posts & Comments That Need Attention")
253
+ st.write("A prioritized list of negative or inquiry-based comments mentioning Prime Bank.")
254
+ if not prime_all_text_df.empty:
255
+ attention_df = prime_all_text_df[
256
+ (prime_all_text_df['sentiment'] == 'Negative') |
257
+ (prime_all_text_df['category'].isin(['Complaint', 'Inquiry']))
258
+ ].copy()
259
+
260
+ if not attention_df.empty:
261
+ attention_df['priority_score'] = (
262
+ (attention_df['sentiment'] == 'Negative') * 2 +
263
+ (attention_df['category'] == 'Complaint') * 1.5 +
264
+ (attention_df['category'] == 'Inquiry') * 1
265
+ )
266
+ attention_df.sort_values(by='priority_score', ascending=False, inplace=True)
267
+
268
+ display_columns = ['text', 'sentiment', 'category', 'emotion', 'viral_score']
269
+ link_col = None
270
+ if 'link' in attention_df.columns:
271
+ link_col = 'link'
272
+ elif 'url' in attention_df.columns:
273
+ link_col = 'url'
274
+
275
+ if link_col:
276
+ attention_df['Source'] = attention_df[link_col].apply(
277
+ lambda url: f"[Open Post β†—]({url})" if pd.notna(url) else "No Link"
278
+ )
279
+ display_columns.insert(1, 'Source')
280
+
281
+ st.dataframe(attention_df[display_columns], use_container_width=True, hide_index=True)
282
+ else:
283
+ st.success("βœ… No negative comments or inquiries found that require attention.")
284
+ else:
285
+ st.info("No data mentioning Prime Bank to analyze for action items.")
286
+
287
+ # --- Tab 6: Data View ---
288
+ with tab6:
289
+ st.header("Explore the Raw and Processed Data")
290
+ if not posts_df.empty:
291
+ st.subheader("Processed Posts Data")
292
+ st.dataframe(posts_df.head(100))
293
+
294
+ comments_df = all_text_df[all_text_df['file_type'] == 'comment'] if 'file_type' in all_text_df.columns else pd.DataFrame()
295
+ if not comments_df.empty:
296
+ st.subheader("Processed Comments & Reviews Data")
297
+ st.dataframe(comments_df.head(100))