Spaces:
Sleeping
Sleeping
| #app.py | |
| import streamlit as st | |
| import pandas as pd | |
| import os | |
| import glob | |
| from src.data_processor import DataProcessor | |
| from src.insights_generator import InsightsGenerator | |
| from src.visualizations import * | |
| from dotenv import load_dotenv | |
| from parse_raw_data import run_parser | |
| # Load environment variables from .env file | |
| load_dotenv() | |
| # --- Page Configuration --- | |
| st.set_page_config( | |
| page_title="Prime Bank Analytics Dashboard", | |
| page_icon="π¦", | |
| layout="wide" | |
| ) | |
| # --- Helper function to identify text column --- | |
| def find_text_column(df): | |
| if df.empty: | |
| return None | |
| text_columns = [ | |
| 'text', 'Text', 'content', 'Content', 'message', 'Message', | |
| 'review', 'Review', 'comment', 'Comment', 'post', 'Post', | |
| 'review_text', 'Review Text', 'post_text', 'Post Text', | |
| 'comment_text', 'Comment Text', 'description', 'Description' | |
| ] | |
| for col in text_columns: | |
| if col in df.columns: | |
| return col | |
| for col in df.columns: | |
| col_lower = col.lower() | |
| if any(keyword in col_lower for keyword in ['text', 'content', 'review', 'comment', 'post']): | |
| return col | |
| for col in df.columns: | |
| if df[col].dtype == 'object': | |
| sample = df[col].dropna().head() | |
| if not sample.empty: | |
| try: | |
| if sample.astype(str).str.len().mean() > 20: | |
| return col | |
| except: | |
| continue | |
| return None | |
| # --- Caching for Performance --- | |
| # @st.cache_data # Temporarily disabled to fix API key issue | |
| def load_and_process_data(): | |
| DATA_DIR = 'data/uploads' | |
| PERFECTED_DATA_DIR = 'perfected_data' | |
| run_parser() | |
| if not os.path.exists(DATA_DIR): | |
| os.makedirs(DATA_DIR) | |
| if not os.path.exists(PERFECTED_DATA_DIR): | |
| os.makedirs(PERFECTED_DATA_DIR) | |
| all_files = glob.glob(os.path.join(DATA_DIR, '*')) | |
| # --- CHANGED: Look for a CSV file for perfected data to preserve links and other metadata --- | |
| perfected_data_file = os.path.join(PERFECTED_DATA_DIR, 'all_posts_with_comments.csv') | |
| if not all_files and not os.path.exists(perfected_data_file): | |
| return pd.DataFrame(), pd.DataFrame(), None, pd.DataFrame() | |
| post_files = [f for f in all_files if 'post' in os.path.basename(f).lower() and f.endswith('.csv')] | |
| comment_files = [f for f in all_files if 'comment' in os.path.basename(f).lower() and f.endswith('.csv')] | |
| txt_files = [f for f in all_files if f.endswith('.txt')] | |
| other_csv_files = [f for f in all_files if f.endswith('.csv') and f not in post_files and f not in comment_files] | |
| def read_files(files_list, file_type): | |
| dfs = [] | |
| for f in files_list: | |
| try: | |
| if f.endswith('.csv'): | |
| df = pd.read_csv(f, on_bad_lines='skip') | |
| else: # txt | |
| with open(f, 'r', encoding='utf-8') as file: | |
| df = pd.DataFrame({'text': [p.strip() for p in file.read().split('\n') if p.strip()]}) | |
| text_col = find_text_column(df) | |
| if not text_col: | |
| continue | |
| if text_col != 'text': | |
| df = df.rename(columns={text_col: 'text'}) | |
| df['source_file'] = os.path.basename(f) | |
| df['file_type'] = file_type | |
| df = df[df['text'].notna() & (df['text'].str.strip() != '')] | |
| if not df.empty: | |
| dfs.append(df) | |
| except Exception as e: | |
| st.error(f"Error reading {os.path.basename(f)}: {e}") | |
| return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame() | |
| # Load regular data for general insights | |
| raw_posts_df = read_files(post_files + other_csv_files, 'post') | |
| raw_comments_df = read_files(comment_files + txt_files, 'comment') | |
| # --- MODIFIED: Load perfected data from CSV for AI recommendations & Action Items --- | |
| perfected_df = pd.DataFrame() | |
| if os.path.exists(perfected_data_file): | |
| try: | |
| perfected_df = pd.read_csv(perfected_data_file, on_bad_lines='skip') | |
| perfected_df['source_file'] = os.path.basename(perfected_data_file) | |
| perfected_df['file_type'] = 'perfected' | |
| # Ensure text column is standardized | |
| text_col = find_text_column(perfected_df) | |
| if text_col and text_col != 'text': | |
| perfected_df = perfected_df.rename(columns={text_col: 'text'}) | |
| perfected_df = perfected_df[perfected_df['text'].notna() & (perfected_df['text'].str.strip() != '')] | |
| except Exception as e: | |
| st.error(f"Error reading perfected data file '{perfected_data_file}': {e}") | |
| # --- Pass the API key from your .env file to the processors --- | |
| openai_key = "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" | |
| processor = DataProcessor(openai_api_key=openai_key) | |
| insight_gen = InsightsGenerator(openai_api_key=openai_key) | |
| # Process all data sets | |
| processed_posts_df = processor.process_all_data(raw_posts_df) | |
| processed_comments_df = processor.process_all_data(raw_comments_df) | |
| processed_perfected_df = processor.process_all_data(perfected_df) if not perfected_df.empty else pd.DataFrame() | |
| # Create combined dataframe for general dashboard (excluding perfected data) | |
| all_text_df = pd.concat([processed_posts_df, processed_comments_df], ignore_index=True) | |
| if all_text_df.empty and processed_perfected_df.empty: | |
| return pd.DataFrame(), pd.DataFrame(), None, pd.DataFrame() | |
| # Generate general insights from regular data | |
| insights = insight_gen.generate_all_insights(posts_df=processed_posts_df, all_text_df=all_text_df) | |
| # --- MODIFIED: Generate AI Recommendations ONLY from perfected data --- | |
| insights['ai_recommendations'] = {} | |
| if not processed_perfected_df.empty and 'prime_mentions' in processed_perfected_df.columns: | |
| prime_perfected_df = processed_perfected_df[processed_perfected_df['prime_mentions'] > 0].copy() | |
| if not prime_perfected_df.empty: | |
| insights['ai_recommendations'] = insight_gen.generate_ai_recommendations(prime_perfected_df) | |
| return processed_posts_df, all_text_df, insights, processed_perfected_df | |
| # --- Main Application --- | |
| st.title("π¦ Prime Bank Social Media Analytics") | |
| # Manual API key input if not found in environment | |
| if not os.getenv("OPENAI_API_KEY"): | |
| api_key = st.sidebar.text_input("π Enter OpenAI API Key:", type="password", help="Key not found in environment") | |
| if api_key: | |
| os.environ["OPENAI_API_KEY"] = api_key | |
| # --- MODIFIED: Renamed variable for clarity --- | |
| posts_df, all_text_df, insights, processed_perfected_df = load_and_process_data() | |
| if all_text_df.empty and processed_perfected_df.empty: | |
| st.error("No data files found. Please add CSV/TXT files to 'data/uploads' or 'all_posts_with_comments.csv' to 'perfected_data'.") | |
| st.info("The 'AI Recommendations' and 'Action Items' tabs are powered exclusively by 'perfected_data/all_posts_with_comments.csv'.") | |
| st.stop() | |
| # Filter regular data for Prime Bank mentions for general insights | |
| prime_posts_df = posts_df[posts_df['prime_mentions'] > 0].copy() if 'prime_mentions' in posts_df.columns else pd.DataFrame() | |
| prime_all_text_df = all_text_df[all_text_df['prime_mentions'] > 0].copy() if 'prime_mentions' in all_text_df.columns else pd.DataFrame() | |
| # --- KPI Section (uses general data from 'uploads' folder) --- | |
| st.header("π Prime Bank Key Performance Indicators") | |
| total_mentions = all_text_df['prime_mentions'].sum() if 'prime_mentions' in all_text_df.columns else 0 | |
| total_posts_with_mentions = len(prime_posts_df) | |
| new_metrics = create_summary_metrics(all_text_df) | |
| kpi1, kpi2, kpi3, kpi4 = st.columns(4) | |
| kpi1.metric("Total Mentions (Posts & Comments)", f"{int(total_mentions):,}") | |
| kpi2.metric("Posts Mentioning Prime Bank", f"{total_posts_with_mentions:,}") | |
| kpi3.metric("Bank Sentiment Score", new_metrics['Bank Sentiment Score'], help="Positive Mentions - Negative Mentions. A positive score is good.") | |
| kpi4.metric("Engagement-Weighted Sentiment", new_metrics['Engagement-Weighted Sentiment'], help="A combined score of sentiment polarity and virality (likes, shares, etc.). Higher is better.") | |
| st.markdown("---") | |
| # --- Tabbed Interface --- | |
| tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([ | |
| "Sentiment & Virality (Posts)", | |
| "Emotion & Categories (All Text)", | |
| "Strategic Overview", | |
| "π€ AI Recommendations", | |
| "Action Items", | |
| "Full Data View" | |
| ]) | |
| # --- Tab 1 to 3 use GENERAL data from 'uploads' folder --- | |
| with tab1: | |
| st.header("Sentiment & Virality Analysis (Posts Only)") | |
| if not prime_posts_df.empty: | |
| col1, col2 = st.columns([1, 2]) | |
| with col1: | |
| st.subheader("Sentiment of Posts") | |
| st.plotly_chart(create_sentiment_pie(prime_posts_df), use_container_width=True) | |
| with col2: | |
| st.subheader("Top Viral Posts") | |
| viral_chart = create_viral_posts_chart(prime_posts_df) | |
| if viral_chart: | |
| st.plotly_chart(viral_chart, use_container_width=True) | |
| else: | |
| st.info("No viral score data (likes, shares, comments) found to display chart.") | |
| else: | |
| st.info("No posts mentioning Prime Bank were found in the data.") | |
| with tab2: | |
| st.header("Emotion & Category Analysis (Posts & Comments)") | |
| if not prime_all_text_df.empty: | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.subheader("Emotion Detection") | |
| st.plotly_chart(create_emotion_bar(prime_all_text_df), use_container_width=True) | |
| with col2: | |
| st.subheader("Post & Comment Categories") | |
| st.plotly_chart(create_category_donut(prime_all_text_df), use_container_width=True) | |
| else: | |
| st.info("No text mentioning Prime Bank was found in the data.") | |
| with tab3: | |
| st.header("Strategic Overview") | |
| st.write("High-level insights into market position and geographic distribution.") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| bank_comp_chart = create_bank_comparison_chart(all_text_df) | |
| if bank_comp_chart: | |
| st.plotly_chart(bank_comp_chart, use_container_width=True) | |
| else: | |
| st.info("Not enough data to compare bank mentions.") | |
| with col2: | |
| geo_map = create_geolocation_map(all_text_df) | |
| if geo_map: | |
| st.plotly_chart(geo_map, use_container_width=True) | |
| else: | |
| # Message is handled inside the create_geolocation_map function | |
| pass | |
| # --- Tab 4: AI Recommendations (uses PERFECTED data) --- | |
| with tab4: | |
| st.header("π€ AI-Powered Strategic Recommendations") | |
| st.info("These recommendations are generated exclusively from the curated data in `perfected_data/all_posts_with_comments.csv`.") | |
| if insights and 'ai_recommendations' in insights and insights['ai_recommendations']: | |
| recs = insights['ai_recommendations'] | |
| st.subheader("For Customer Complaints") | |
| with st.expander("Show AI Insight on Complaints", expanded=True): | |
| st.markdown(f"π‘ {recs.get('Complaint', 'No specific recommendation available.')}") | |
| st.subheader("For Customer Suggestions") | |
| with st.expander("Show AI Insight on Suggestions"): | |
| st.markdown(f"π‘ {recs.get('Suggestion', 'No specific recommendation available.')}") | |
| st.subheader("For Customer Praise") | |
| with st.expander("Show AI Insight on Praise"): | |
| st.markdown(f"π‘ {recs.get('Praise', 'No specific recommendation available.')}") | |
| st.subheader("For Customer Inquiries") | |
| with st.expander("Show AI Insight on Inquiries"): | |
| st.markdown(f"π‘ {recs.get('Inquiry', 'No specific recommendation available.')}") | |
| else: | |
| st.warning("No AI recommendations could be generated. This requires a valid `perfected_data/all_posts_with_comments.csv` file with relevant data and a configured OpenAI API key.") | |
| # --- Tab 5: Action Items (uses PERFECTED data) --- | |
| # --- Tab 5: Action Items (uses PERFECTED data) --- | |
| with tab5: | |
| st.header("Posts & Comments That Need Attention") | |
| st.info("This prioritized list is generated exclusively from the curated data in `perfected_data/all_posts_with_comments.csv`.") | |
| if not processed_perfected_df.empty and 'prime_mentions' in processed_perfected_df.columns: | |
| prime_perfected_df = processed_perfected_df[processed_perfected_df['prime_mentions'] > 0].copy() | |
| attention_df = prime_perfected_df[ | |
| (prime_perfected_df['sentiment'] == 'Negative') | | |
| (prime_perfected_df['category'].isin(['Complaint', 'Inquiry'])) | |
| ].copy() | |
| if not attention_df.empty: | |
| # Calculate priority score | |
| attention_df['priority_score'] = ( | |
| (attention_df['sentiment'] == 'Negative') * 2 + | |
| (attention_df['category'] == 'Complaint') * 1.5 + | |
| (attention_df['category'] == 'Inquiry') * 1 | |
| ) | |
| attention_df.sort_values(by=['priority_score', 'viral_score'], ascending=[False, False], inplace=True) | |
| # Prepare display columns and add clickable source link | |
| display_columns = ['text', 'sentiment', 'category', 'emotion', 'viral_score'] | |
| link_col = None | |
| if 'link' in attention_df.columns: | |
| link_col = 'link' | |
| elif 'url' in attention_df.columns: | |
| link_col = 'url' | |
| if link_col: | |
| # Check if links are real or just placeholders | |
| attention_df['Source'] = attention_df[link_col].apply( | |
| lambda url: "Customer Post" if pd.notna(url) and 'example.com' in str(url) | |
| else (f"[Open Post β]({url})" if pd.notna(url) and str(url).startswith('http') | |
| else "Direct Feedback") | |
| ) | |
| display_columns.insert(1, 'Source') | |
| st.dataframe(attention_df[display_columns], use_container_width=True, hide_index=True) | |
| else: | |
| st.success("β No negative comments or inquiries found in the perfected data that require attention.") | |
| else: | |
| st.warning("No data found in `perfected_data/all_posts_with_comments.csv` to analyze for action items.") | |
| # --- Tab 6: Data View (shows both general and perfected data separately) --- | |
| with tab6: | |
| st.header("Explore the Raw and Processed Data") | |
| if not processed_perfected_df.empty: | |
| st.subheader("Processed Perfected Data (from `perfected_data`)") | |
| st.dataframe(processed_perfected_df.head(100)) | |
| if not posts_df.empty: | |
| st.subheader("Processed Posts Data (from `data/uploads`)") | |
| st.dataframe(posts_df.head(100)) | |
| comments_df = all_text_df[all_text_df['file_type'] == 'comment'] if 'file_type' in all_text_df.columns else pd.DataFrame() | |
| if not comments_df.empty: | |
| st.subheader("Processed Comments & Reviews Data (from `data/uploads`)") | |
| st.dataframe(comments_df.head(100)) |