Spaces:

MusoraProductDepartment
/

Sentiment_analysis

Sleeping

App Files Files Community

Sentiment_analysis / visualization /components /sentiment_analysis.py

Danialebrat

updating names

24b804f 12 days ago

raw

history blame contribute delete

27 kB

	"""
	Sentiment Analysis Page
	Analyze content performance across all sentiment types with advanced filtering.

	Data is fetched on-demand: user sets filters then clicks "Fetch Data".
	Global filters (platform/brand/date) from the sidebar are pre-populated.
	"""
	import streamlit as st
	import pandas as pd
	import sys
	from pathlib import Path

	parent_dir = Path(__file__).resolve().parent.parent
	sys.path.append(str(parent_dir))

	from visualizations.sentiment_charts import SentimentCharts
	from visualizations.distribution_charts import DistributionCharts
	from visualizations.content_cards import ContentCards
	from agents.content_summary_agent import ContentSummaryAgent


	def render_sentiment_analysis(data_loader):
	"""
	Render the Sentiment Analysis page.

	Args:
	data_loader: SentimentDataLoader instance
	"""
	st.title("🔍 Custom Sentiment Queries")
	st.markdown("Analyze content performance based on sentiment patterns and user feedback")
	st.markdown("---")

	sentiment_charts = SentimentCharts()
	distribution_charts = DistributionCharts()
	summary_agent = ContentSummaryAgent(model="gpt-5-nano", temperature=1)

	if 'content_summaries' not in st.session_state:
	st.session_state.content_summaries = {}

	# ── Get filter options from the already-loaded (lightweight) dashboard df ─
	dashboard_df = st.session_state.get('dashboard_df')
	if dashboard_df is None or dashboard_df.empty:
	st.warning("Dashboard data not loaded yet. Please wait for the app to initialise.")
	return

	available_platforms = sorted(dashboard_df['platform'].dropna().unique().tolist())
	available_brands = sorted(dashboard_df['brand'].dropna().unique().tolist())

	# ── Pre-populate from global sidebar filters ───────────────────────────────
	global_filters = st.session_state.get('global_filters', {})
	global_platforms = global_filters.get('platforms', [])
	global_brands = global_filters.get('brands', [])
	global_date_range = global_filters.get('date_range')

	# ── Platform & Brand selection ─────────────────────────────────────────────
	st.markdown("### 🎯 Select Platform and Brand")
	st.info(
	"⚡ Performance: Choose a platform and brand, set optional filters, "
	"then click Fetch Data to run a targeted Snowflake query."
	)

	filter_col1, filter_col2 = st.columns(2)

	with filter_col1:
	default_platform_idx = 0
	if global_platforms and global_platforms[0] in available_platforms:
	default_platform_idx = available_platforms.index(global_platforms[0]) + 1 # +1 for blank
	selected_platform = st.selectbox(
	"Platform *",
	options=[''] + available_platforms,
	index=default_platform_idx,
	help="Select the platform to analyse"
	)

	with filter_col2:
	default_brand_idx = 0
	if global_brands and global_brands[0] in available_brands:
	default_brand_idx = available_brands.index(global_brands[0]) + 1
	selected_brand = st.selectbox(
	"Brand *",
	options=[''] + available_brands,
	index=default_brand_idx,
	help="Select the brand to analyse"
	)

	if not selected_platform or not selected_brand:
	st.warning("⚠️ Please select both Platform and Brand to continue.")
	st.markdown("---")

	# Quick summary from dashboard data
	st.markdown("### 📊 Available Data Summary")
	col1, col2, col3 = st.columns(3)
	with col1:
	st.metric("Total Comments", f"{len(dashboard_df):,}")
	with col2:
	st.metric("Platforms", len(available_platforms))
	with st.expander("View Platforms"):
	for p in available_platforms:
	cnt = (dashboard_df['platform'] == p).sum()
	st.write(f"- {p}: {cnt:,} comments")
	with col3:
	st.metric("Brands", len(available_brands))
	with st.expander("View Brands"):
	for b in available_brands:
	cnt = (dashboard_df['brand'] == b).sum()
	st.write(f"- {b}: {cnt:,} comments")
	return

	st.markdown("---")

	# ── Content filters ────────────────────────────────────────────────────────
	st.markdown("### 🔍 Content Filters")

	# Build available sentiment / intent options from dashboard_df filtered to
	# selected platform+brand (fast — no text columns involved)
	mask = (dashboard_df['platform'] == selected_platform) & (dashboard_df['brand'] == selected_brand)
	preview_df = dashboard_df[mask]

	filter_col1, filter_col2, filter_col3, filter_col4 = st.columns(4)

	with filter_col1:
	sentiment_options = sorted(preview_df['sentiment_polarity'].unique().tolist())
	selected_sentiments = st.multiselect(
	"Sentiment",
	options=sentiment_options,
	default=[],
	help="Filter by dominant sentiment. Leave empty for all."
	)

	with filter_col2:
	intent_list = (
	preview_df['intent']
	.str.split(',').explode().str.strip()
	.dropna().unique().tolist()
	)
	selected_intents = st.multiselect(
	"Intent",
	options=sorted(i for i in intent_list if i),
	default=[],
	help="Filter contents that have comments with these intents"
	)

	with filter_col3:
	top_n = st.selectbox(
	"Top N Contents",
	options=[5, 10, 15, 20, 25],
	index=1,
	help="Number of contents to display"
	)

	with filter_col4:
	filter_active = bool(selected_sentiments or selected_intents)
	st.metric(
	"Filters Active",
	"✓ Yes" if filter_active else "✗ No",
	help="Sentiment or intent filters applied" if filter_active else "Showing all sentiments"
	)

	st.markdown("---")

	# ── Advanced ranking controls ──────────────────────────────────────────────
	with st.expander("⚙️ Advanced Ranking Controls", expanded=False):
	adv_col1, adv_col2 = st.columns(2)
	with adv_col1:
	min_comments = st.slider(
	"Minimum Comments Required",
	min_value=1, max_value=50, value=10, step=1,
	help="Exclude contents with fewer comments than this threshold."
	)
	with adv_col2:
	sort_by = st.selectbox(
	"Sort By",
	options=[
	('severity_score', '🎯 Severity Score (Balanced) — Recommended'),
	('sentiment_percentage', '📊 Sentiment Percentage'),
	('sentiment_count', '🔢 Sentiment Count (Absolute)'),
	('total_comments', '💬 Total Comments (Volume)'),
	],
	format_func=lambda x: x[1],
	index=0
	)
	sort_by_value = sort_by[0]

	sentiment_label = "selected sentiments" if selected_sentiments else "negative sentiments"
	info_map = {
	'severity_score': f"📘 Severity Score = Sentiment % × √(Total Comments). Balances {sentiment_label} % with volume.",
	'sentiment_percentage': f"📘 Ranks by highest % of {sentiment_label}. May include low-volume contents.",
	'sentiment_count': f"📘 Ranks by absolute number of {sentiment_label} comments.",
	'total_comments': "📘 Ranks by total comment volume, regardless of sentiment.",
	}
	st.info(info_map.get(sort_by_value, ""))

	# Date range for the query (inherit from global filters if set)
	if global_date_range and len(global_date_range) == 2:
	query_date_range = global_date_range
	else:
	query_date_range = None

	# ── Fetch button ───────────────────────────────────────────────────────────
	fetch_key = (
	selected_platform, selected_brand, top_n, min_comments, sort_by_value,
	tuple(sorted(selected_sentiments)), tuple(sorted(selected_intents)),
	str(query_date_range)
	)

	fetch_col, info_col = st.columns([1, 3])
	with fetch_col:
	fetch_clicked = st.button("🚀 Fetch Data", use_container_width=True, type="primary")

	# Auto-fetch if the key hasn't changed and we already have data
	has_data = (
	'sa_contents' in st.session_state
	and st.session_state.get('sa_fetch_key') == fetch_key
	and not st.session_state['sa_contents'].empty
	)

	with info_col:
	if has_data:
	n_contents = len(st.session_state['sa_contents'])
	n_comments = len(st.session_state.get('sa_comments', []))
	st.success(f"✅ Showing {n_contents} contents with {n_comments:,} sampled comments")
	elif fetch_clicked:
	pass # spinner shown below
	else:
	st.info("👆 Click Fetch Data to run a targeted Snowflake query with the settings above.")

	if fetch_clicked:
	with st.spinner("Fetching data from Snowflake…"):
	contents_df, comments_df = data_loader.load_sa_data(
	platform=selected_platform,
	brand=selected_brand,
	top_n=top_n,
	min_comments=min_comments,
	sort_by=sort_by_value,
	sentiments=selected_sentiments or None,
	intents=selected_intents or None,
	date_range=query_date_range,
	)
	st.session_state['sa_contents'] = contents_df
	st.session_state['sa_comments'] = comments_df
	st.session_state['sa_fetch_key'] = fetch_key
	st.session_state['sa_platform'] = selected_platform
	st.session_state['sa_brand'] = selected_brand
	# Reset pagination on new fetch
	st.session_state['sentiment_page'] = 1
	st.rerun()

	# ── Nothing fetched yet ────────────────────────────────────────────────────
	if not has_data and not fetch_clicked:
	return

	filtered_contents = st.session_state.get('sa_contents', pd.DataFrame())
	comments_df = st.session_state.get('sa_comments', pd.DataFrame())

	if filtered_contents.empty:
	st.warning("No content data found with the selected filters. Try adjusting and re-fetching.")
	return

	# ── Summary stats ──────────────────────────────────────────────────────────
	st.markdown("### 📊 Summary")
	col1, col2, col3, col4 = st.columns(4)

	with col1:
	st.metric("Contents Analysed", len(filtered_contents))
	with col2:
	if 'selected_sentiment_percentage' in filtered_contents.columns:
	avg_pct = filtered_contents['selected_sentiment_percentage'].mean()
	label = "Selected Sentiment %" if selected_sentiments else "Avg Negative %"
	st.metric(label, f"{avg_pct:.1f}%")
	else:
	st.metric("Avg Negative %", f"{filtered_contents['negative_percentage'].mean():.1f}%")
	with col3:
	st.metric("Total Comments", int(filtered_contents['total_comments'].sum()))
	with col4:
	st.metric("Total Replies Needed", int(filtered_contents['reply_required_count'].sum()))

	st.markdown("---")

	# ── Engagement scatter ─────────────────────────────────────────────────────
	st.markdown("### 📈 Content Engagement Analysis")
	scatter = distribution_charts.create_engagement_scatter(
	filtered_contents, title="Content Engagement vs. Sentiment"
	)
	st.plotly_chart(scatter, use_container_width=True, key="engagement_scatter_chart")

	st.markdown("---")

	# ── Paginated content cards ────────────────────────────────────────────────
	st.markdown("### 🔍 Detailed Content Analysis")

	if 'sentiment_page' not in st.session_state:
	st.session_state.sentiment_page = 1

	items_per_page = 5
	total_contents = len(filtered_contents)
	total_pages = (total_contents + items_per_page - 1) // items_per_page

	if total_contents > items_per_page:
	st.info(f"📄 Page {st.session_state.sentiment_page} of {total_pages} ({total_contents} total contents)")
	col_prev, col_info, col_next = st.columns([1, 2, 1])
	with col_prev:
	if st.button("⬅️ Previous", key="prev_top",
	disabled=st.session_state.sentiment_page == 1):
	st.session_state.sentiment_page -= 1
	st.rerun()
	with col_info:
	st.markdown(
	f"<div style='text-align:center;padding-top:8px;'>"
	f"Page {st.session_state.sentiment_page} / {total_pages}</div>",
	unsafe_allow_html=True
	)
	with col_next:
	if st.button("Next ➡️", key="next_top",
	disabled=st.session_state.sentiment_page >= total_pages):
	st.session_state.sentiment_page += 1
	st.rerun()
	st.markdown("---")

	start_idx = (st.session_state.sentiment_page - 1) * items_per_page
	end_idx = min(start_idx + items_per_page, total_contents)
	paginated = filtered_contents.iloc[start_idx:end_idx]

	for idx, (_, content_row) in enumerate(paginated.iterrows(), start_idx + 1):
	ContentCards.display_content_card(content_row, rank=idx)

	# Comments from the sampled set (pre-fetched, no extra Snowflake call)
	if not comments_df.empty and 'content_sk' in comments_df.columns:
	content_comments = comments_df[comments_df['content_sk'] == content_row['content_sk']]
	else:
	content_comments = pd.DataFrame()

	if content_comments.empty:
	st.info("No sampled comment details available for this content.")
	else:
	viz_col1, viz_col2 = st.columns(2)
	with viz_col1:
	pie = sentiment_charts.create_sentiment_pie_chart(
	content_comments, title="Sentiment Distribution (sample)"
	)
	st.plotly_chart(pie, use_container_width=True,
	key=f"sentiment_pie_{content_row['content_sk']}")
	with viz_col2:
	bar = distribution_charts.create_intent_bar_chart(
	content_comments, title="Intent Distribution (sample)", orientation='h'
	)
	st.plotly_chart(bar, use_container_width=True,
	key=f"intent_bar_{content_row['content_sk']}")

	# AI Analysis
	st.markdown("#### 🤖 AI-Powered Analysis")
	content_sk = content_row['content_sk']
	st.markdown("Select analysis type:")
	btn_col1, btn_col2, btn_col3 = st.columns(3)

	with btn_col1:
	gen_neg = st.button("📉 Negative Summary", key=f"ai_negative_{content_sk}",
	use_container_width=True)
	with btn_col2:
	gen_combined = st.button("📊 Combined Summary", key=f"ai_combined_{content_sk}",
	use_container_width=True)
	with btn_col3:
	gen_pos = st.button("📈 Positive Summary", key=f"ai_positive_{content_sk}",
	use_container_width=True)

	summary_type = None
	if gen_neg:
	summary_type = 'negative'
	elif gen_pos:
	summary_type = 'positive'
	elif gen_combined:
	summary_type = 'combined'

	key_neg = f"{content_sk}_negative"
	key_pos = f"{content_sk}_positive"
	key_com = f"{content_sk}_combined"

	if summary_type or any(k in st.session_state.content_summaries for k in (key_neg, key_pos, key_com)):
	if summary_type:
	summary_key = f"{content_sk}_{summary_type}"
	with st.spinner(f"Analysing {summary_type} comments with AI…"):
	result = summary_agent.process({
	'content_sk': content_sk,
	'content_description': content_row['content_description'],
	'comments': content_comments,
	'sentiment_type': summary_type
	})
	st.session_state.content_summaries[summary_key] = result

	for label, key in [('Negative', key_neg), ('Combined', key_com), ('Positive', key_pos)]:
	if key not in st.session_state.content_summaries:
	continue
	result = st.session_state.content_summaries[key]
	if result['success']:
	summary = result['summary']
	with st.expander(f"📊 AI Analysis Report — {label}", expanded=True):
	st.markdown("### Executive Summary")
	st.info(summary['executive_summary'])
	if summary['main_themes']:
	st.markdown("### 🎯 Main Themes")
	for theme in summary['main_themes']:
	emoji = {'positive': '😊', 'negative': '😟', 'mixed': '🤔'}.get(
	theme.get('sentiment', 'mixed'), '🤔')
	st.markdown(f"{emoji} {theme.get('theme')} ({theme.get('sentiment','mixed').title()})\n- {theme.get('description','')}")
	col_p, col_c = st.columns(2)
	with col_p:
	st.markdown("### ✅ Praise Points")
	for pt in summary.get('praise_points', []):
	st.markdown(f"- {pt}")
	with col_c:
	st.markdown("### ⚠️ Key Complaints")
	for pt in summary.get('key_complaints', []):
	st.markdown(f"- {pt}")
	col_f, col_i = st.columns(2)
	with col_f:
	st.markdown("### ❓ FAQs")
	for q in summary.get('frequently_asked_questions', []):
	st.markdown(f"- {q}")
	with col_i:
	st.markdown("### 💡 Insights")
	for ins in summary.get('unexpected_insights', []):
	st.markdown(f"- {ins}")
	if summary.get('action_recommendations'):
	st.markdown("### 🎯 Recommended Actions")
	for action in summary['action_recommendations']:
	priority = action.get('priority', 'medium').upper()
	emoji = {'HIGH': '🔴', 'MEDIUM': '🟡', 'LOW': '🟢'}.get(priority, '🟡')
	st.markdown(f"{emoji} [{priority}] {action.get('action','')}")
	with st.expander("ℹ️ Analysis Metadata"):
	meta = result.get('metadata', {})
	mc1, mc2, mc3 = st.columns(3)
	mc1.metric("Comments Analysed", meta.get('total_comments_analyzed', 0))
	mc2.metric("Model Used", meta.get('model_used', 'N/A'))
	mc3.metric("Tokens Used", meta.get('tokens_used', 0))
	else:
	st.error(f"❌ AI analysis failed: {result.get('error','Unknown error')}")
	if st.button("🔄 Retry", key=f"retry_{key}"):
	del st.session_state.content_summaries[key]
	st.rerun()

	# Comment expansion (text already loaded from fetch)
	st.markdown("#### 💬 View Comments by Sentiment")

	if not content_comments.empty:
	neg_comments = content_comments[
	content_comments['sentiment_polarity'].isin(['negative', 'very_negative'])
	]
	pos_comments = content_comments[
	content_comments['sentiment_polarity'].isin(['positive', 'very_positive'])
	]

	col_neg, col_pos = st.columns(2)
	with col_neg:
	with st.expander(f"📉 Negative Comments ({len(neg_comments)} sampled)", expanded=False):
	if not neg_comments.empty:
	for _, comment in neg_comments.iterrows():
	ContentCards.display_comment_card(comment, show_original=True)
	else:
	st.info("No negative comments in sample.")
	with col_pos:
	with st.expander(f"📈 Positive Comments ({len(pos_comments)} sampled)", expanded=False):
	if not pos_comments.empty:
	for _, comment in pos_comments.iterrows():
	ContentCards.display_comment_card(comment, show_original=True)
	else:
	st.info("No positive comments in sample.")
	else:
	st.info("No comments available for this content in the current sample.")

	st.markdown("---")

	# ── Bottom pagination ──────────────────────────────────────────────────────
	if total_contents > items_per_page:
	col_prev_b, col_info_b, col_next_b = st.columns([1, 2, 1])
	with col_prev_b:
	if st.button("⬅️ Previous", key="prev_bottom",
	disabled=st.session_state.sentiment_page == 1):
	st.session_state.sentiment_page -= 1
	st.rerun()
	with col_info_b:
	st.markdown(
	f"<div style='text-align:center;padding-top:8px;'>"
	f"Page {st.session_state.sentiment_page} / {total_pages}</div>",
	unsafe_allow_html=True
	)
	with col_next_b:
	if st.button("Next ➡️", key="next_bottom",
	disabled=st.session_state.sentiment_page >= total_pages):
	st.session_state.sentiment_page += 1
	st.rerun()

	st.markdown("---")

	# ── Insights & recommendations (using sampled comments) ───────────────────
	st.markdown("### 💡 Insights & Recommendations")

	from utils.data_processor import SentimentDataProcessor
	processor = SentimentDataProcessor()

	all_sampled = comments_df[
	comments_df['content_sk'].isin(filtered_contents['content_sk'])
	] if not comments_df.empty else pd.DataFrame()

	insight_col1, insight_col2 = st.columns(2)
	with insight_col1:
	st.markdown("#### 🎯 Common Intent Patterns")
	if not all_sampled.empty:
	intent_dist = processor.get_intent_distribution(all_sampled)
	for _, row in intent_dist.sort_values('count', ascending=False).head(5).iterrows():
	st.markdown(f"- {row['intent']}: {row['count']} ({row['percentage']:.1f}%)")

	with insight_col2:
	st.markdown("#### 🌐 Platform Breakdown")
	if not all_sampled.empty:
	for platform, count in all_sampled['platform'].value_counts().items():
	pct = count / len(all_sampled) * 100
	st.markdown(f"- {platform.title()}: {count} comments ({pct:.1f}%)")

	st.markdown("---")

	# ── Action items ───────────────────────────────────────────────────────────
	st.markdown("### ✅ Recommended Actions")
	action_items = []

	total_replies = int(filtered_contents['reply_required_count'].sum())
	if total_replies > 0:
	action_items.append(f"🔴 High Priority: {total_replies} comments require immediate response")

	critical = filtered_contents[filtered_contents['negative_percentage'] > 50]
	if not critical.empty:
	action_items.append(
	f"🚨 Critical: {len(critical)} content(s) have >50% negative sentiment — investigate root causes"
	)

	if not all_sampled.empty:
	feedback_cnt = all_sampled['intent'].str.contains('feedback_negative', na=False).sum()
	if feedback_cnt:
	action_items.append(f"💬 Feedback: {feedback_cnt} negative-feedback comments — consider product improvements")

	question_cnt = all_sampled['intent'].str.contains('question', na=False).sum()
	if question_cnt:
	action_items.append(f"❓ Questions: {question_cnt} questions — improve FAQ or support docs")

	if action_items:
	for item in action_items:
	st.markdown(item)
	else:
	st.success("No critical action items at this time.")

	st.markdown("---")

	# ── Export ─────────────────────────────────────────────────────────────────
	st.markdown("### 💾 Export Data")
	col1, col2 = st.columns([1, 3])
	with col1:
	base_cols = ['content_sk', 'content_description', 'permalink_url',
	'total_comments', 'reply_required_count', 'dominant_sentiment']
	for extra in ['selected_sentiment_count', 'selected_sentiment_percentage',
	'negative_count', 'negative_percentage']:
	if extra in filtered_contents.columns:
	base_cols.append(extra)
	export_cols = [c for c in base_cols if c in filtered_contents.columns]
	csv = filtered_contents[export_cols].to_csv(index=False)
	st.download_button(
	label="📥 Download as CSV",
	data=csv,
	file_name=f"sentiment_analysis_top{top_n}.csv",
	mime="text/csv"
	)
	with col2:
	st.info("Download the data for further analysis or reporting.")