Spaces:

Tanmoy-AI
/

customer-connect

Sleeping

App Files Files Community

FarhinSadia commited on Jun 21, 2025

Commit

4521b4e

1 Parent(s): 66aadcd

Add Hugging Face configuration

Browse files

Files changed (2) hide show

README_HF.md +13 -0
app_hf.py +297 -0

README_HF.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: Bank User Sentiment Analysis
+emoji: 🏦
+colorFrom: blue
+colorTo: green
+sdk: streamlit
+sdk_version: 1.35.0
+app_file: app.py
+pinned: false
+---
+# Bank User Sentiment Analysis
+Analyzing customer sentiment for Prime Bank

app_hf.py ADDED Viewed

	@@ -0,0 +1,297 @@

+import streamlit as st
+import pandas as pd
+import os
+import glob
+from src.data_processor import DataProcessor
+from src.insights_generator import InsightsGenerator
+from src.visualizations import *
+from dotenv import load_dotenv
+# Load environment variables from .env file
+load_dotenv()
+# --- Page Configuration ---
+st.set_page_config(
+    page_title="Prime Bank Analytics Dashboard",
+    page_icon="🏦",
+    layout="wide"
+)
+# --- Helper function to identify text column ---
+def find_text_column(df):
+    if df.empty:
+        return None
+    text_columns = [
+        'text', 'Text', 'content', 'Content', 'message', 'Message',
+        'review', 'Review', 'comment', 'Comment', 'post', 'Post',
+        'review_text', 'Review Text', 'post_text', 'Post Text',
+        'comment_text', 'Comment Text', 'description', 'Description'
+    ]
+    for col in text_columns:
+        if col in df.columns:
+            return col
+    for col in df.columns:
+        col_lower = col.lower()
+        if any(keyword in col_lower for keyword in ['text', 'content', 'review', 'comment', 'post']):
+            return col
+    for col in df.columns:
+        if df[col].dtype == 'object':
+            sample = df[col].dropna().head()
+            if not sample.empty:
+                try:
+                    if sample.astype(str).str.len().mean() > 20:
+                        return col
+                except:
+                    continue
+    return None
+# --- Caching for Performance ---
+@st.cache_data
+def load_and_process_data():
+    DATA_DIR = 'data/uploads'
+    PERFECTED_DATA_DIR = 'perfected_data'  # New folder for perfected data
+    if not os.path.exists(DATA_DIR):
+        os.makedirs(DATA_DIR)
+    if not os.path.exists(PERFECTED_DATA_DIR):
+        os.makedirs(PERFECTED_DATA_DIR)
+    all_files = glob.glob(os.path.join(DATA_DIR, '*'))
+    perfected_data_file = os.path.join(PERFECTED_DATA_DIR, 'all_posts_with_comments.txt')
+    if not all_files and not os.path.exists(perfected_data_file):
+        return pd.DataFrame(), pd.DataFrame(), None, pd.DataFrame()
+    post_files = [f for f in all_files if 'post' in os.path.basename(f).lower() and f.endswith('.csv')]
+    comment_files = [f for f in all_files if 'comment' in os.path.basename(f).lower() and f.endswith('.csv')]
+    txt_files = [f for f in all_files if f.endswith('.txt')]
+    other_csv_files = [f for f in all_files if f.endswith('.csv') and f not in post_files and f not in comment_files]
+    def read_files(files_list, file_type):
+        dfs = []
+        for f in files_list:
+            try:
+                if f.endswith('.csv'):
+                    df = pd.read_csv(f, on_bad_lines='skip')
+                else:  # txt
+                    with open(f, 'r', encoding='utf-8') as file:
+                        df = pd.DataFrame({'text': [p.strip() for p in file.read().split('\n') if p.strip()]})
+                text_col = find_text_column(df)
+                if not text_col:
+                    continue
+                if text_col != 'text':
+                    df = df.rename(columns={text_col: 'text'})
+                df['source_file'] = os.path.basename(f)
+                df['file_type'] = file_type
+                df = df[df['text'].notna() & (df['text'].str.strip() != '')]
+                if not df.empty:
+                    dfs.append(df)
+            except Exception as e:
+                st.error(f"Error reading {os.path.basename(f)}: {e}")
+        return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
+    # Load regular data for general insights
+    raw_posts_df = read_files(post_files + other_csv_files, 'post')
+    raw_comments_df = read_files(comment_files + txt_files, 'comment')
+    # Load perfected data specifically for AI recommendations
+    perfected_df = pd.DataFrame()
+    if os.path.exists(perfected_data_file):
+        try:
+            with open(perfected_data_file, 'r', encoding='utf-8') as f:
+                perfected_df = pd.DataFrame({'text': [p.strip() for p in f.read().split('\n') if p.strip()]})
+            perfected_df['source_file'] = 'all_posts_with_comments.txt'
+            perfected_df['file_type'] = 'perfected'
+            perfected_df = perfected_df[perfected_df['text'].notna() & (perfected_df['text'].str.strip() != '')]
+        except Exception as e:
+            st.error(f"Error reading perfected data file: {e}")
+    # --- Pass the API key from your .env file to the processors ---
+    openai_key = os.getenv("OPENAI_API_KEY")
+    processor = DataProcessor(openai_api_key=openai_key)
+    processed_posts_df = processor.process_all_data(raw_posts_df)
+    processed_comments_df = processor.process_all_data(raw_comments_df)
+    processed_perfected_df = processor.process_all_data(perfected_df) if not perfected_df.empty else pd.DataFrame()
+    all_text_df = pd.concat([processed_posts_df, processed_comments_df], ignore_index=True)
+    if all_text_df.empty:
+        return pd.DataFrame(), pd.DataFrame(), None, pd.DataFrame()
+    insight_gen = InsightsGenerator(openai_api_key=openai_key)
+    insights = insight_gen.generate_all_insights(posts_df=processed_posts_df, all_text_df=all_text_df)
+    # Generate AI Recommendations using perfected data if available
+    prime_perfected_df = processed_perfected_df[processed_perfected_df['prime_mentions'] > 0].copy() if 'prime_mentions' in processed_perfected_df.columns and not processed_perfected_df.empty else pd.DataFrame()
+    if not prime_perfected_df.empty:
+        insights['ai_recommendations'] = insight_gen.generate_ai_recommendations(prime_perfected_df)
+    else:
+        prime_all_text_df = all_text_df[all_text_df['prime_mentions'] > 0].copy() if 'prime_mentions' in all_text_df.columns else pd.DataFrame()
+        if not prime_all_text_df.empty:
+            insights['ai_recommendations'] = insight_gen.generate_ai_recommendations(prime_all_text_df)
+        else:
+            insights['ai_recommendations'] = {}
+    return processed_posts_df, all_text_df, insights, processed_perfected_df
+# --- Main Application ---
+st.title("🏦 Prime Bank Social Media Analytics")
+posts_df, all_text_df, insights, perfected_df = load_and_process_data()
+if all_text_df.empty and perfected_df.empty or insights is None:
+    st.error("No data files found or processed in 'data/uploads' or 'perfected_data'. Please add CSV or TXT files.")
+    st.info("Ensure filenames contain 'post' for post data or 'comment' for comment data for best results, and ensure 'all_posts_with_comments.txt' exists in 'perfected_data' for AI recommendations.")
+    st.stop()
+# Filter for Prime Bank mentions (for general insights)
+prime_posts_df = posts_df[posts_df['prime_mentions'] > 0].copy() if 'prime_mentions' in posts_df.columns else pd.DataFrame()
+prime_all_text_df = all_text_df[all_text_df['prime_mentions'] > 0].copy() if 'prime_mentions' in all_text_df.columns else pd.DataFrame()
+# --- KPI Section ---
+st.header("📈 Prime Bank Key Performance Indicators")
+total_mentions = all_text_df['prime_mentions'].sum()
+total_posts_with_mentions = len(prime_posts_df)
+new_metrics = create_summary_metrics(all_text_df)
+kpi1, kpi2, kpi3, kpi4 = st.columns(4)
+kpi1.metric("Total Mentions (Posts & Comments)", f"{int(total_mentions):,}")
+kpi2.metric("Posts Mentioning Prime Bank", f"{total_posts_with_mentions:,}")
+kpi3.metric("Bank Sentiment Score", new_metrics['Bank Sentiment Score'], help="Positive Mentions - Negative Mentions. A positive score is good.")
+kpi4.metric("Engagement-Weighted Sentiment", new_metrics['Engagement-Weighted Sentiment'], help="A combined score of sentiment polarity and virality (likes, shares, etc.). Higher is better.")
+st.markdown("---")
+# --- Tabbed Interface ---
+tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([
+    "Sentiment & Virality (Posts)",
+    "Emotion & Categories (All Text)",
+    "Strategic Overview",
+    "🤖 AI Recommendations",
+    "Action Items",
+    "Full Data View"
+])
+# --- Tab 1: Posts Only Analysis ---
+with tab1:
+    st.header("Sentiment & Virality Analysis (Posts Only)")
+    if not prime_posts_df.empty:
+        col1, col2 = st.columns([1, 2])
+        with col1:
+            st.subheader("Sentiment of Posts")
+            st.plotly_chart(create_sentiment_pie(prime_posts_df), use_container_width=True)
+        with col2:
+            st.subheader("Top Viral Posts")
+            viral_chart = create_viral_posts_chart(prime_posts_df)
+            if viral_chart:
+                st.plotly_chart(viral_chart, use_container_width=True)
+            else:
+                st.info("No viral score data (likes, shares, comments) found to display chart.")
+    else:
+        st.info("No posts mentioning Prime Bank were found in the data.")
+# --- Tab 2: All Text Analysis ---
+with tab2:
+    st.header("Emotion & Category Analysis (Posts & Comments)")
+    if not prime_all_text_df.empty:
+        col1, col2 = st.columns(2)
+        with col1:
+            st.subheader("Emotion Detection")
+            st.plotly_chart(create_emotion_bar(prime_all_text_df), use_container_width=True)
+        with col2:
+            st.subheader("Post & Comment Categories")
+            st.plotly_chart(create_category_donut(prime_all_text_df), use_container_width=True)
+    else:
+        st.info("No text mentioning Prime Bank was found in the data.")
+# --- Tab 3: Strategic Overview ---
+with tab3:
+    st.header("Strategic Overview")
+    st.write("High-level insights into market position and geographic distribution.")
+    col1, col2 = st.columns(2)
+    with col1:
+        bank_comp_chart = create_bank_comparison_chart(all_text_df)
+        if bank_comp_chart:
+            st.plotly_chart(bank_comp_chart, use_container_width=True)
+        else:
+            st.info("Not enough data to compare bank mentions.")
+    with col2:
+        geo_map = create_geolocation_map(all_text_df)
+        if geo_map:
+            st.plotly_chart(geo_map, use_container_width=True)
+        else:
+            pass
+# --- Tab 4: AI Recommendations ---
+with tab4:
+    st.header("🤖 AI-Powered Strategic Recommendations")
+    st.write("Automatically generated advice based on an analysis of customer feedback.")
+    if insights and insights.get('ai_recommendations'):
+        recs = insights['ai_recommendations']
+        st.subheader("For Customer Complaints")
+        with st.expander("Show AI Insight on Complaints", expanded=True):
+            st.markdown(f"💡 {recs.get('Complaint', 'No recommendation available.')}")
+        st.subheader("For Customer Suggestions")
+        with st.expander("Show AI Insight on Suggestions"):
+            st.markdown(f"💡 {recs.get('Suggestion', 'No recommendation available.')}")
+        st.subheader("For Customer Praise")
+        with st.expander("Show AI Insight on Praise"):
+            st.markdown(f"💡 {recs.get('Praise', 'No recommendation available.')}")
+        st.subheader("For Customer Inquiries")
+        with st.expander("Show AI Insight on Inquiries"):
+            st.markdown(f"💡 {recs.get('Inquiry', 'No recommendation available.')}")
+    else:
+        st.info("No AI recommendations could be generated. This may be due to a lack of data or a missing OpenAI API key.")
+# --- Tab 5: Action Items ---
+with tab5:
+    st.header("Posts & Comments That Need Attention")
+    st.write("A prioritized list of negative or inquiry-based comments mentioning Prime Bank.")
+    if not prime_all_text_df.empty:
+        attention_df = prime_all_text_df[
+            (prime_all_text_df['sentiment'] == 'Negative') |
+            (prime_all_text_df['category'].isin(['Complaint', 'Inquiry']))
+        ].copy()
+        if not attention_df.empty:
+            attention_df['priority_score'] = (
+                (attention_df['sentiment'] == 'Negative') * 2 +
+                (attention_df['category'] == 'Complaint') * 1.5 +
+                (attention_df['category'] == 'Inquiry') * 1
+            )
+            attention_df.sort_values(by='priority_score', ascending=False, inplace=True)
+            display_columns = ['text', 'sentiment', 'category', 'emotion', 'viral_score']
+            link_col = None
+            if 'link' in attention_df.columns:
+                link_col = 'link'
+            elif 'url' in attention_df.columns:
+                link_col = 'url'
+            if link_col:
+                attention_df['Source'] = attention_df[link_col].apply(
+                    lambda url: f"[Open Post ↗]({url})" if pd.notna(url) else "No Link"
+                )
+                display_columns.insert(1, 'Source')
+            st.dataframe(attention_df[display_columns], use_container_width=True, hide_index=True)
+        else:
+            st.success("✅ No negative comments or inquiries found that require attention.")
+    else:
+        st.info("No data mentioning Prime Bank to analyze for action items.")
+# --- Tab 6: Data View ---
+with tab6:
+    st.header("Explore the Raw and Processed Data")
+    if not posts_df.empty:
+        st.subheader("Processed Posts Data")
+        st.dataframe(posts_df.head(100))
+    comments_df = all_text_df[all_text_df['file_type'] == 'comment'] if 'file_type' in all_text_df.columns else pd.DataFrame()
+    if not comments_df.empty:
+        st.subheader("Processed Comments & Reviews Data")
+        st.dataframe(comments_df.head(100))