Danialebrat's picture
updating names
24b804f
"""
Sentiment Analysis Page
Analyze content performance across all sentiment types with advanced filtering.
Data is fetched on-demand: user sets filters then clicks "Fetch Data".
Global filters (platform/brand/date) from the sidebar are pre-populated.
"""
import streamlit as st
import pandas as pd
import sys
from pathlib import Path
parent_dir = Path(__file__).resolve().parent.parent
sys.path.append(str(parent_dir))
from visualizations.sentiment_charts import SentimentCharts
from visualizations.distribution_charts import DistributionCharts
from visualizations.content_cards import ContentCards
from agents.content_summary_agent import ContentSummaryAgent
def render_sentiment_analysis(data_loader):
"""
Render the Sentiment Analysis page.
Args:
data_loader: SentimentDataLoader instance
"""
st.title("πŸ” Custom Sentiment Queries")
st.markdown("Analyze content performance based on sentiment patterns and user feedback")
st.markdown("---")
sentiment_charts = SentimentCharts()
distribution_charts = DistributionCharts()
summary_agent = ContentSummaryAgent(model="gpt-5-nano", temperature=1)
if 'content_summaries' not in st.session_state:
st.session_state.content_summaries = {}
# ── Get filter options from the already-loaded (lightweight) dashboard df ─
dashboard_df = st.session_state.get('dashboard_df')
if dashboard_df is None or dashboard_df.empty:
st.warning("Dashboard data not loaded yet. Please wait for the app to initialise.")
return
available_platforms = sorted(dashboard_df['platform'].dropna().unique().tolist())
available_brands = sorted(dashboard_df['brand'].dropna().unique().tolist())
# ── Pre-populate from global sidebar filters ───────────────────────────────
global_filters = st.session_state.get('global_filters', {})
global_platforms = global_filters.get('platforms', [])
global_brands = global_filters.get('brands', [])
global_date_range = global_filters.get('date_range')
# ── Platform & Brand selection ─────────────────────────────────────────────
st.markdown("### 🎯 Select Platform and Brand")
st.info(
"⚑ **Performance**: Choose a platform and brand, set optional filters, "
"then click **Fetch Data** to run a targeted Snowflake query."
)
filter_col1, filter_col2 = st.columns(2)
with filter_col1:
default_platform_idx = 0
if global_platforms and global_platforms[0] in available_platforms:
default_platform_idx = available_platforms.index(global_platforms[0]) + 1 # +1 for blank
selected_platform = st.selectbox(
"Platform *",
options=[''] + available_platforms,
index=default_platform_idx,
help="Select the platform to analyse"
)
with filter_col2:
default_brand_idx = 0
if global_brands and global_brands[0] in available_brands:
default_brand_idx = available_brands.index(global_brands[0]) + 1
selected_brand = st.selectbox(
"Brand *",
options=[''] + available_brands,
index=default_brand_idx,
help="Select the brand to analyse"
)
if not selected_platform or not selected_brand:
st.warning("⚠️ Please select both **Platform** and **Brand** to continue.")
st.markdown("---")
# Quick summary from dashboard data
st.markdown("### πŸ“Š Available Data Summary")
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Total Comments", f"{len(dashboard_df):,}")
with col2:
st.metric("Platforms", len(available_platforms))
with st.expander("View Platforms"):
for p in available_platforms:
cnt = (dashboard_df['platform'] == p).sum()
st.write(f"- **{p}**: {cnt:,} comments")
with col3:
st.metric("Brands", len(available_brands))
with st.expander("View Brands"):
for b in available_brands:
cnt = (dashboard_df['brand'] == b).sum()
st.write(f"- **{b}**: {cnt:,} comments")
return
st.markdown("---")
# ── Content filters ────────────────────────────────────────────────────────
st.markdown("### πŸ” Content Filters")
# Build available sentiment / intent options from dashboard_df filtered to
# selected platform+brand (fast β€” no text columns involved)
mask = (dashboard_df['platform'] == selected_platform) & (dashboard_df['brand'] == selected_brand)
preview_df = dashboard_df[mask]
filter_col1, filter_col2, filter_col3, filter_col4 = st.columns(4)
with filter_col1:
sentiment_options = sorted(preview_df['sentiment_polarity'].unique().tolist())
selected_sentiments = st.multiselect(
"Sentiment",
options=sentiment_options,
default=[],
help="Filter by dominant sentiment. Leave empty for all."
)
with filter_col2:
intent_list = (
preview_df['intent']
.str.split(',').explode().str.strip()
.dropna().unique().tolist()
)
selected_intents = st.multiselect(
"Intent",
options=sorted(i for i in intent_list if i),
default=[],
help="Filter contents that have comments with these intents"
)
with filter_col3:
top_n = st.selectbox(
"Top N Contents",
options=[5, 10, 15, 20, 25],
index=1,
help="Number of contents to display"
)
with filter_col4:
filter_active = bool(selected_sentiments or selected_intents)
st.metric(
"Filters Active",
"βœ“ Yes" if filter_active else "βœ— No",
help="Sentiment or intent filters applied" if filter_active else "Showing all sentiments"
)
st.markdown("---")
# ── Advanced ranking controls ──────────────────────────────────────────────
with st.expander("βš™οΈ Advanced Ranking Controls", expanded=False):
adv_col1, adv_col2 = st.columns(2)
with adv_col1:
min_comments = st.slider(
"Minimum Comments Required",
min_value=1, max_value=50, value=10, step=1,
help="Exclude contents with fewer comments than this threshold."
)
with adv_col2:
sort_by = st.selectbox(
"Sort By",
options=[
('severity_score', '🎯 Severity Score (Balanced) β€” Recommended'),
('sentiment_percentage', 'πŸ“Š Sentiment Percentage'),
('sentiment_count', 'πŸ”’ Sentiment Count (Absolute)'),
('total_comments', 'πŸ’¬ Total Comments (Volume)'),
],
format_func=lambda x: x[1],
index=0
)
sort_by_value = sort_by[0]
sentiment_label = "selected sentiments" if selected_sentiments else "negative sentiments"
info_map = {
'severity_score': f"πŸ“˜ **Severity Score** = Sentiment % Γ— √(Total Comments). Balances {sentiment_label} % with volume.",
'sentiment_percentage': f"πŸ“˜ Ranks by highest % of {sentiment_label}. May include low-volume contents.",
'sentiment_count': f"πŸ“˜ Ranks by absolute number of {sentiment_label} comments.",
'total_comments': "πŸ“˜ Ranks by total comment volume, regardless of sentiment.",
}
st.info(info_map.get(sort_by_value, ""))
# Date range for the query (inherit from global filters if set)
if global_date_range and len(global_date_range) == 2:
query_date_range = global_date_range
else:
query_date_range = None
# ── Fetch button ───────────────────────────────────────────────────────────
fetch_key = (
selected_platform, selected_brand, top_n, min_comments, sort_by_value,
tuple(sorted(selected_sentiments)), tuple(sorted(selected_intents)),
str(query_date_range)
)
fetch_col, info_col = st.columns([1, 3])
with fetch_col:
fetch_clicked = st.button("πŸš€ Fetch Data", use_container_width=True, type="primary")
# Auto-fetch if the key hasn't changed and we already have data
has_data = (
'sa_contents' in st.session_state
and st.session_state.get('sa_fetch_key') == fetch_key
and not st.session_state['sa_contents'].empty
)
with info_col:
if has_data:
n_contents = len(st.session_state['sa_contents'])
n_comments = len(st.session_state.get('sa_comments', []))
st.success(f"βœ… Showing **{n_contents}** contents with **{n_comments:,}** sampled comments")
elif fetch_clicked:
pass # spinner shown below
else:
st.info("πŸ‘† Click **Fetch Data** to run a targeted Snowflake query with the settings above.")
if fetch_clicked:
with st.spinner("Fetching data from Snowflake…"):
contents_df, comments_df = data_loader.load_sa_data(
platform=selected_platform,
brand=selected_brand,
top_n=top_n,
min_comments=min_comments,
sort_by=sort_by_value,
sentiments=selected_sentiments or None,
intents=selected_intents or None,
date_range=query_date_range,
)
st.session_state['sa_contents'] = contents_df
st.session_state['sa_comments'] = comments_df
st.session_state['sa_fetch_key'] = fetch_key
st.session_state['sa_platform'] = selected_platform
st.session_state['sa_brand'] = selected_brand
# Reset pagination on new fetch
st.session_state['sentiment_page'] = 1
st.rerun()
# ── Nothing fetched yet ────────────────────────────────────────────────────
if not has_data and not fetch_clicked:
return
filtered_contents = st.session_state.get('sa_contents', pd.DataFrame())
comments_df = st.session_state.get('sa_comments', pd.DataFrame())
if filtered_contents.empty:
st.warning("No content data found with the selected filters. Try adjusting and re-fetching.")
return
# ── Summary stats ──────────────────────────────────────────────────────────
st.markdown("### πŸ“Š Summary")
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("Contents Analysed", len(filtered_contents))
with col2:
if 'selected_sentiment_percentage' in filtered_contents.columns:
avg_pct = filtered_contents['selected_sentiment_percentage'].mean()
label = "Selected Sentiment %" if selected_sentiments else "Avg Negative %"
st.metric(label, f"{avg_pct:.1f}%")
else:
st.metric("Avg Negative %", f"{filtered_contents['negative_percentage'].mean():.1f}%")
with col3:
st.metric("Total Comments", int(filtered_contents['total_comments'].sum()))
with col4:
st.metric("Total Replies Needed", int(filtered_contents['reply_required_count'].sum()))
st.markdown("---")
# ── Engagement scatter ─────────────────────────────────────────────────────
st.markdown("### πŸ“ˆ Content Engagement Analysis")
scatter = distribution_charts.create_engagement_scatter(
filtered_contents, title="Content Engagement vs. Sentiment"
)
st.plotly_chart(scatter, use_container_width=True, key="engagement_scatter_chart")
st.markdown("---")
# ── Paginated content cards ────────────────────────────────────────────────
st.markdown("### πŸ” Detailed Content Analysis")
if 'sentiment_page' not in st.session_state:
st.session_state.sentiment_page = 1
items_per_page = 5
total_contents = len(filtered_contents)
total_pages = (total_contents + items_per_page - 1) // items_per_page
if total_contents > items_per_page:
st.info(f"πŸ“„ Page {st.session_state.sentiment_page} of {total_pages} ({total_contents} total contents)")
col_prev, col_info, col_next = st.columns([1, 2, 1])
with col_prev:
if st.button("⬅️ Previous", key="prev_top",
disabled=st.session_state.sentiment_page == 1):
st.session_state.sentiment_page -= 1
st.rerun()
with col_info:
st.markdown(
f"<div style='text-align:center;padding-top:8px;'>"
f"Page {st.session_state.sentiment_page} / {total_pages}</div>",
unsafe_allow_html=True
)
with col_next:
if st.button("Next ➑️", key="next_top",
disabled=st.session_state.sentiment_page >= total_pages):
st.session_state.sentiment_page += 1
st.rerun()
st.markdown("---")
start_idx = (st.session_state.sentiment_page - 1) * items_per_page
end_idx = min(start_idx + items_per_page, total_contents)
paginated = filtered_contents.iloc[start_idx:end_idx]
for idx, (_, content_row) in enumerate(paginated.iterrows(), start_idx + 1):
ContentCards.display_content_card(content_row, rank=idx)
# Comments from the sampled set (pre-fetched, no extra Snowflake call)
if not comments_df.empty and 'content_sk' in comments_df.columns:
content_comments = comments_df[comments_df['content_sk'] == content_row['content_sk']]
else:
content_comments = pd.DataFrame()
if content_comments.empty:
st.info("No sampled comment details available for this content.")
else:
viz_col1, viz_col2 = st.columns(2)
with viz_col1:
pie = sentiment_charts.create_sentiment_pie_chart(
content_comments, title="Sentiment Distribution (sample)"
)
st.plotly_chart(pie, use_container_width=True,
key=f"sentiment_pie_{content_row['content_sk']}")
with viz_col2:
bar = distribution_charts.create_intent_bar_chart(
content_comments, title="Intent Distribution (sample)", orientation='h'
)
st.plotly_chart(bar, use_container_width=True,
key=f"intent_bar_{content_row['content_sk']}")
# AI Analysis
st.markdown("#### πŸ€– AI-Powered Analysis")
content_sk = content_row['content_sk']
st.markdown("**Select analysis type:**")
btn_col1, btn_col2, btn_col3 = st.columns(3)
with btn_col1:
gen_neg = st.button("πŸ“‰ Negative Summary", key=f"ai_negative_{content_sk}",
use_container_width=True)
with btn_col2:
gen_combined = st.button("πŸ“Š Combined Summary", key=f"ai_combined_{content_sk}",
use_container_width=True)
with btn_col3:
gen_pos = st.button("πŸ“ˆ Positive Summary", key=f"ai_positive_{content_sk}",
use_container_width=True)
summary_type = None
if gen_neg:
summary_type = 'negative'
elif gen_pos:
summary_type = 'positive'
elif gen_combined:
summary_type = 'combined'
key_neg = f"{content_sk}_negative"
key_pos = f"{content_sk}_positive"
key_com = f"{content_sk}_combined"
if summary_type or any(k in st.session_state.content_summaries for k in (key_neg, key_pos, key_com)):
if summary_type:
summary_key = f"{content_sk}_{summary_type}"
with st.spinner(f"Analysing {summary_type} comments with AI…"):
result = summary_agent.process({
'content_sk': content_sk,
'content_description': content_row['content_description'],
'comments': content_comments,
'sentiment_type': summary_type
})
st.session_state.content_summaries[summary_key] = result
for label, key in [('Negative', key_neg), ('Combined', key_com), ('Positive', key_pos)]:
if key not in st.session_state.content_summaries:
continue
result = st.session_state.content_summaries[key]
if result['success']:
summary = result['summary']
with st.expander(f"πŸ“Š AI Analysis Report β€” {label}", expanded=True):
st.markdown("### Executive Summary")
st.info(summary['executive_summary'])
if summary['main_themes']:
st.markdown("### 🎯 Main Themes")
for theme in summary['main_themes']:
emoji = {'positive': '😊', 'negative': '😟', 'mixed': 'πŸ€”'}.get(
theme.get('sentiment', 'mixed'), 'πŸ€”')
st.markdown(f"**{emoji} {theme.get('theme')}** ({theme.get('sentiment','mixed').title()})\n- {theme.get('description','')}")
col_p, col_c = st.columns(2)
with col_p:
st.markdown("### βœ… Praise Points")
for pt in summary.get('praise_points', []):
st.markdown(f"- {pt}")
with col_c:
st.markdown("### ⚠️ Key Complaints")
for pt in summary.get('key_complaints', []):
st.markdown(f"- {pt}")
col_f, col_i = st.columns(2)
with col_f:
st.markdown("### ❓ FAQs")
for q in summary.get('frequently_asked_questions', []):
st.markdown(f"- {q}")
with col_i:
st.markdown("### πŸ’‘ Insights")
for ins in summary.get('unexpected_insights', []):
st.markdown(f"- {ins}")
if summary.get('action_recommendations'):
st.markdown("### 🎯 Recommended Actions")
for action in summary['action_recommendations']:
priority = action.get('priority', 'medium').upper()
emoji = {'HIGH': 'πŸ”΄', 'MEDIUM': '🟑', 'LOW': '🟒'}.get(priority, '🟑')
st.markdown(f"{emoji} **[{priority}]** {action.get('action','')}")
with st.expander("ℹ️ Analysis Metadata"):
meta = result.get('metadata', {})
mc1, mc2, mc3 = st.columns(3)
mc1.metric("Comments Analysed", meta.get('total_comments_analyzed', 0))
mc2.metric("Model Used", meta.get('model_used', 'N/A'))
mc3.metric("Tokens Used", meta.get('tokens_used', 0))
else:
st.error(f"❌ AI analysis failed: {result.get('error','Unknown error')}")
if st.button("πŸ”„ Retry", key=f"retry_{key}"):
del st.session_state.content_summaries[key]
st.rerun()
# Comment expansion (text already loaded from fetch)
st.markdown("#### πŸ’¬ View Comments by Sentiment")
if not content_comments.empty:
neg_comments = content_comments[
content_comments['sentiment_polarity'].isin(['negative', 'very_negative'])
]
pos_comments = content_comments[
content_comments['sentiment_polarity'].isin(['positive', 'very_positive'])
]
col_neg, col_pos = st.columns(2)
with col_neg:
with st.expander(f"πŸ“‰ Negative Comments ({len(neg_comments)} sampled)", expanded=False):
if not neg_comments.empty:
for _, comment in neg_comments.iterrows():
ContentCards.display_comment_card(comment, show_original=True)
else:
st.info("No negative comments in sample.")
with col_pos:
with st.expander(f"πŸ“ˆ Positive Comments ({len(pos_comments)} sampled)", expanded=False):
if not pos_comments.empty:
for _, comment in pos_comments.iterrows():
ContentCards.display_comment_card(comment, show_original=True)
else:
st.info("No positive comments in sample.")
else:
st.info("No comments available for this content in the current sample.")
st.markdown("---")
# ── Bottom pagination ──────────────────────────────────────────────────────
if total_contents > items_per_page:
col_prev_b, col_info_b, col_next_b = st.columns([1, 2, 1])
with col_prev_b:
if st.button("⬅️ Previous", key="prev_bottom",
disabled=st.session_state.sentiment_page == 1):
st.session_state.sentiment_page -= 1
st.rerun()
with col_info_b:
st.markdown(
f"<div style='text-align:center;padding-top:8px;'>"
f"Page {st.session_state.sentiment_page} / {total_pages}</div>",
unsafe_allow_html=True
)
with col_next_b:
if st.button("Next ➑️", key="next_bottom",
disabled=st.session_state.sentiment_page >= total_pages):
st.session_state.sentiment_page += 1
st.rerun()
st.markdown("---")
# ── Insights & recommendations (using sampled comments) ───────────────────
st.markdown("### πŸ’‘ Insights & Recommendations")
from utils.data_processor import SentimentDataProcessor
processor = SentimentDataProcessor()
all_sampled = comments_df[
comments_df['content_sk'].isin(filtered_contents['content_sk'])
] if not comments_df.empty else pd.DataFrame()
insight_col1, insight_col2 = st.columns(2)
with insight_col1:
st.markdown("#### 🎯 Common Intent Patterns")
if not all_sampled.empty:
intent_dist = processor.get_intent_distribution(all_sampled)
for _, row in intent_dist.sort_values('count', ascending=False).head(5).iterrows():
st.markdown(f"- **{row['intent']}**: {row['count']} ({row['percentage']:.1f}%)")
with insight_col2:
st.markdown("#### 🌐 Platform Breakdown")
if not all_sampled.empty:
for platform, count in all_sampled['platform'].value_counts().items():
pct = count / len(all_sampled) * 100
st.markdown(f"- **{platform.title()}**: {count} comments ({pct:.1f}%)")
st.markdown("---")
# ── Action items ───────────────────────────────────────────────────────────
st.markdown("### βœ… Recommended Actions")
action_items = []
total_replies = int(filtered_contents['reply_required_count'].sum())
if total_replies > 0:
action_items.append(f"πŸ”΄ **High Priority**: {total_replies} comments require immediate response")
critical = filtered_contents[filtered_contents['negative_percentage'] > 50]
if not critical.empty:
action_items.append(
f"🚨 **Critical**: {len(critical)} content(s) have >50% negative sentiment β€” investigate root causes"
)
if not all_sampled.empty:
feedback_cnt = all_sampled['intent'].str.contains('feedback_negative', na=False).sum()
if feedback_cnt:
action_items.append(f"πŸ’¬ **Feedback**: {feedback_cnt} negative-feedback comments β€” consider product improvements")
question_cnt = all_sampled['intent'].str.contains('question', na=False).sum()
if question_cnt:
action_items.append(f"❓ **Questions**: {question_cnt} questions β€” improve FAQ or support docs")
if action_items:
for item in action_items:
st.markdown(item)
else:
st.success("No critical action items at this time.")
st.markdown("---")
# ── Export ─────────────────────────────────────────────────────────────────
st.markdown("### πŸ’Ύ Export Data")
col1, col2 = st.columns([1, 3])
with col1:
base_cols = ['content_sk', 'content_description', 'permalink_url',
'total_comments', 'reply_required_count', 'dominant_sentiment']
for extra in ['selected_sentiment_count', 'selected_sentiment_percentage',
'negative_count', 'negative_percentage']:
if extra in filtered_contents.columns:
base_cols.append(extra)
export_cols = [c for c in base_cols if c in filtered_contents.columns]
csv = filtered_contents[export_cols].to_csv(index=False)
st.download_button(
label="πŸ“₯ Download as CSV",
data=csv,
file_name=f"sentiment_analysis_top{top_n}.csv",
mime="text/csv"
)
with col2:
st.info("Download the data for further analysis or reporting.")