Danialebrat's picture
Deploying sentiment analysis project
9858829
"""
Demographic visualization charts for sentiment analysis
Handles age, timezone, and experience level visualizations
"""
import plotly.graph_objects as go
import plotly.express as px
import json
from pathlib import Path
class DemographicCharts:
"""
Creates demographic-related visualizations for musora_app data
"""
def __init__(self):
"""Initialize with configuration"""
config_path = Path(__file__).parent.parent / "config" / "viz_config.json"
with open(config_path, 'r') as f:
self.config = json.load(f)
self.sentiment_colors = self.config['color_schemes']['sentiment_polarity']
self.sentiment_order = self.config['sentiment_order']
self.chart_height = self.config['dashboard']['chart_height']
def create_age_distribution_chart(self, age_dist_df, title="Age Distribution"):
"""
Create bar chart for age group distribution
Args:
age_dist_df: DataFrame with age_group, count, percentage columns
title: Chart title
Returns:
plotly.graph_objects.Figure
"""
if age_dist_df.empty:
return self._create_empty_chart(title, "No demographic data available")
# Define custom age group order
age_order = ['18-24', '25-34', '35-44', '45-54', '55+']
# Sort by custom order
age_dist_df['age_group'] = pd.Categorical(
age_dist_df['age_group'],
categories=age_order,
ordered=True
)
age_dist_df = age_dist_df.sort_values('age_group')
fig = go.Figure()
fig.add_trace(go.Bar(
x=age_dist_df['age_group'],
y=age_dist_df['count'],
text=age_dist_df.apply(lambda row: f"{row['count']}<br>({row['percentage']:.1f}%)", axis=1),
textposition='auto',
marker=dict(
color='#4A90E2',
line=dict(color='#2E5C8A', width=1)
),
hovertemplate='<b>%{x}</b><br>Comments: %{y}<br>Percentage: %{customdata:.1f}%<extra></extra>',
customdata=age_dist_df['percentage']
))
fig.update_layout(
title=title,
xaxis_title="Age Group",
yaxis_title="Number of Comments",
height=self.chart_height,
showlegend=False,
hovermode='x'
)
return fig
def create_age_sentiment_chart(self, age_sentiment_df, title="Sentiment by Age Group"):
"""
Create stacked bar chart showing sentiment distribution for each age group
Args:
age_sentiment_df: DataFrame with age_group, sentiment_polarity, count, percentage
title: Chart title
Returns:
plotly.graph_objects.Figure
"""
if age_sentiment_df.empty:
return self._create_empty_chart(title, "No demographic data available")
# Define custom age group order
age_order = ['18-24', '25-34', '35-44', '45-54', '55+']
fig = go.Figure()
# Create a trace for each sentiment
for sentiment in self.sentiment_order:
sentiment_data = age_sentiment_df[age_sentiment_df['sentiment_polarity'] == sentiment]
if not sentiment_data.empty:
fig.add_trace(go.Bar(
name=sentiment.replace('_', ' ').title(),
x=sentiment_data['age_group'],
y=sentiment_data['percentage'],
marker=dict(color=self.sentiment_colors.get(sentiment, '#999999')),
hovertemplate='<b>%{fullData.name}</b><br>Age: %{x}<br>Percentage: %{y:.1f}%<extra></extra>'
))
fig.update_layout(
title=title,
xaxis=dict(
title="Age Group",
categoryorder='array',
categoryarray=age_order
),
yaxis=dict(
title="Percentage (%)",
range=[0, 100]
),
barmode='stack',
height=self.chart_height,
hovermode='x unified',
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1
)
)
return fig
def create_timezone_chart(self, timezone_df, title="Top Timezones", top_n=15):
"""
Create horizontal bar chart for top timezones
Args:
timezone_df: DataFrame with timezone, count, percentage columns
title: Chart title
top_n: Number of top timezones to display
Returns:
plotly.graph_objects.Figure
"""
if timezone_df.empty:
return self._create_empty_chart(title, "No timezone data available")
# Take top N and reverse for better display (highest at top)
display_df = timezone_df.head(top_n).iloc[::-1]
fig = go.Figure()
fig.add_trace(go.Bar(
y=display_df['timezone'],
x=display_df['count'],
orientation='h',
text=display_df.apply(lambda row: f"{row['count']} ({row['percentage']:.1f}%)", axis=1),
textposition='auto',
marker=dict(
color='#50C878',
line=dict(color='#2E7D4E', width=1)
),
hovertemplate='<b>%{y}</b><br>Comments: %{x}<br>Percentage: %{customdata:.1f}%<extra></extra>',
customdata=display_df['percentage']
))
fig.update_layout(
title=title,
xaxis_title="Number of Comments",
yaxis_title="Timezone",
height=max(self.chart_height, top_n * 25), # Dynamic height based on number of timezones
showlegend=False,
hovermode='y'
)
return fig
def create_region_distribution_chart(self, region_df, title="Distribution by Region"):
"""
Create pie chart for timezone region distribution
Args:
region_df: DataFrame with timezone_region, count, percentage columns
title: Chart title
Returns:
plotly.graph_objects.Figure
"""
if region_df.empty:
return self._create_empty_chart(title, "No region data available")
# Define color palette for regions
colors = px.colors.qualitative.Set3
fig = go.Figure()
fig.add_trace(go.Pie(
labels=region_df['timezone_region'],
values=region_df['count'],
textinfo='label+percent',
hovertemplate='<b>%{label}</b><br>Comments: %{value}<br>Percentage: %{percent}<extra></extra>',
marker=dict(colors=colors)
))
fig.update_layout(
title=title,
height=self.chart_height,
showlegend=True,
legend=dict(
orientation="v",
yanchor="middle",
y=0.5,
xanchor="left",
x=1
)
)
return fig
def create_region_sentiment_chart(self, region_sentiment_df, title="Sentiment by Region"):
"""
Create grouped bar chart showing sentiment distribution for each region
Args:
region_sentiment_df: DataFrame with timezone_region, sentiment_polarity, count, percentage
title: Chart title
Returns:
plotly.graph_objects.Figure
"""
if region_sentiment_df.empty:
return self._create_empty_chart(title, "No region sentiment data available")
fig = go.Figure()
# Create a trace for each sentiment
for sentiment in self.sentiment_order:
sentiment_data = region_sentiment_df[region_sentiment_df['sentiment_polarity'] == sentiment]
if not sentiment_data.empty:
fig.add_trace(go.Bar(
name=sentiment.replace('_', ' ').title(),
x=sentiment_data['timezone_region'],
y=sentiment_data['percentage'],
marker=dict(color=self.sentiment_colors.get(sentiment, '#999999')),
hovertemplate='<b>%{fullData.name}</b><br>Region: %{x}<br>Percentage: %{y:.1f}%<extra></extra>'
))
fig.update_layout(
title=title,
xaxis_title="Region",
yaxis=dict(
title="Percentage (%)",
range=[0, 100]
),
barmode='stack',
height=self.chart_height,
hovermode='x unified',
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1
)
)
return fig
def create_experience_distribution_chart(self, exp_df, title="Experience Level Distribution", use_groups=False):
"""
Create bar chart for experience level distribution
Args:
exp_df: DataFrame with experience_level/experience_group, count, percentage columns
title: Chart title
use_groups: If True, display grouped experience levels
Returns:
plotly.graph_objects.Figure
"""
if exp_df.empty:
return self._create_empty_chart(title, "No experience data available")
field = 'experience_group' if use_groups else 'experience_level'
# Define custom order for grouped experience
if use_groups:
exp_order = ['Beginner (0-3)', 'Intermediate (4-7)', 'Advanced (8-10)']
exp_df[field] = pd.Categorical(
exp_df[field],
categories=exp_order,
ordered=True
)
exp_df = exp_df.sort_values(field)
else:
# Sort by experience level numerically
exp_df = exp_df.sort_values(field)
fig = go.Figure()
fig.add_trace(go.Bar(
x=exp_df[field],
y=exp_df['count'],
text=exp_df.apply(lambda row: f"{row['count']}<br>({row['percentage']:.1f}%)", axis=1),
textposition='auto',
marker=dict(
color='#9B59B6',
line=dict(color='#6C3483', width=1)
),
hovertemplate='<b>%{x}</b><br>Comments: %{y}<br>Percentage: %{customdata:.1f}%<extra></extra>',
customdata=exp_df['percentage']
))
fig.update_layout(
title=title,
xaxis_title="Experience Level" if not use_groups else "Experience Group",
yaxis_title="Number of Comments",
height=self.chart_height,
showlegend=False,
hovermode='x'
)
return fig
def create_experience_sentiment_chart(self, exp_sentiment_df, title="Sentiment by Experience Level", use_groups=False):
"""
Create stacked bar chart showing sentiment distribution for each experience level
Args:
exp_sentiment_df: DataFrame with experience_level/experience_group, sentiment_polarity, count, percentage
title: Chart title
use_groups: If True, use grouped experience levels
Returns:
plotly.graph_objects.Figure
"""
if exp_sentiment_df.empty:
return self._create_empty_chart(title, "No experience sentiment data available")
field = 'experience_group' if use_groups else 'experience_level'
fig = go.Figure()
# Create a trace for each sentiment
for sentiment in self.sentiment_order:
sentiment_data = exp_sentiment_df[exp_sentiment_df['sentiment_polarity'] == sentiment]
if not sentiment_data.empty:
fig.add_trace(go.Bar(
name=sentiment.replace('_', ' ').title(),
x=sentiment_data[field],
y=sentiment_data['percentage'],
marker=dict(color=self.sentiment_colors.get(sentiment, '#999999')),
hovertemplate='<b>%{fullData.name}</b><br>Experience: %{x}<br>Percentage: %{y:.1f}%<extra></extra>'
))
# Define custom order for grouped experience
if use_groups:
exp_order = ['Beginner (0-3)', 'Intermediate (4-7)', 'Advanced (8-10)']
xaxis_config = dict(
title="Experience Group",
categoryorder='array',
categoryarray=exp_order
)
else:
xaxis_config = dict(title="Experience Level")
fig.update_layout(
title=title,
xaxis=xaxis_config,
yaxis=dict(
title="Percentage (%)",
range=[0, 100]
),
barmode='stack',
height=self.chart_height,
hovermode='x unified',
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1
)
)
return fig
def create_demographics_heatmap(self, df, row_field, col_field, title="Demographics Heatmap"):
"""
Create heatmap for cross-demographic analysis
Args:
df: DataFrame with demographic fields and sentiment
row_field: Field for rows (e.g., 'age_group')
col_field: Field for columns (e.g., 'experience_group')
title: Chart title
Returns:
plotly.graph_objects.Figure
"""
if df.empty:
return self._create_empty_chart(title, "No data available for heatmap")
# Create pivot table
pivot = df.pivot_table(
index=row_field,
columns=col_field,
values='count',
aggfunc='sum',
fill_value=0
)
fig = go.Figure(data=go.Heatmap(
z=pivot.values,
x=pivot.columns,
y=pivot.index,
colorscale='Blues',
text=pivot.values,
texttemplate='%{text}',
textfont={"size": 10},
hovertemplate='<b>%{y}</b> × <b>%{x}</b><br>Comments: %{z}<extra></extra>'
))
fig.update_layout(
title=title,
xaxis_title=col_field.replace('_', ' ').title(),
yaxis_title=row_field.replace('_', ' ').title(),
height=self.chart_height
)
return fig
def _create_empty_chart(self, title, message):
"""
Create an empty chart with a message
Args:
title: Chart title
message: Message to display
Returns:
plotly.graph_objects.Figure
"""
fig = go.Figure()
fig.add_annotation(
text=message,
xref="paper",
yref="paper",
x=0.5,
y=0.5,
showarrow=False,
font=dict(size=14, color="gray")
)
fig.update_layout(
title=title,
height=self.chart_height,
xaxis=dict(visible=False),
yaxis=dict(visible=False)
)
return fig
# Import pandas for use in methods (needed for Categorical)
import pandas as pd