File size: 8,006 Bytes
00a8f8d
 
a544a7a
 
 
 
00a8f8d
 
a544a7a
 
 
 
 
 
 
 
00a8f8d
a544a7a
00a8f8d
a544a7a
 
 
 
 
00a8f8d
a544a7a
00a8f8d
 
a544a7a
00a8f8d
a544a7a
 
 
 
 
 
 
00a8f8d
 
a544a7a
00a8f8d
 
a544a7a
 
 
 
 
 
 
 
 
 
00a8f8d
a544a7a
 
 
00a8f8d
 
a544a7a
 
00a8f8d
 
a544a7a
 
 
00a8f8d
a544a7a
 
 
 
 
 
 
00a8f8d
a544a7a
00a8f8d
a544a7a
00a8f8d
 
a544a7a
 
00a8f8d
a544a7a
 
00a8f8d
 
 
 
 
 
 
 
 
 
a544a7a
00a8f8d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a544a7a
00a8f8d
 
 
 
 
 
 
 
 
a544a7a
00a8f8d
a544a7a
00a8f8d
 
 
a544a7a
00a8f8d
a544a7a
 
00a8f8d
 
a544a7a
00a8f8d
 
a544a7a
 
00a8f8d
 
 
 
 
a544a7a
 
00a8f8d
 
 
 
 
 
 
 
 
a544a7a
00a8f8d
 
 
a544a7a
00a8f8d
a544a7a
00a8f8d
 
a544a7a
00a8f8d
 
 
 
 
 
 
 
 
 
a544a7a
00a8f8d
a544a7a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# src/visualizations.py

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
from collections import Counter
import re

def create_sentiment_pie(df):
    """Create sentiment distribution pie chart"""
    sentiment_counts = df['sentiment'].value_counts()
    fig = px.pie(
        values=sentiment_counts.values,
        names=sentiment_counts.index,
        title="Sentiment Distribution",
        color_discrete_map={'Positive': '#2ecc71', 'Negative': '#e74c3c', 'Neutral': '#95a5a6'}
    )
    fig.update_traces(textposition='inside', textinfo='percent+label', hovertemplate='<b>%{label}</b><br>Count: %{value}<br>Percentage: %{percent}<extra></extra>')
    return fig

def create_emotion_bar(df):
    """Create emotion distribution bar chart"""
    emotion_counts = df['emotion'].value_counts()
    color_map = {'Joy': '#f39c12', 'Frustration': '#e74c3c', 'Confusion': '#3498db', 'Anxiety': '#9b59b6', 'Neutral': '#95a5a6'}
    fig = px.bar(
        x=emotion_counts.index, y=emotion_counts.values, title="Emotion Detection",
        labels={'x': 'Emotion', 'y': 'Count'}, color=emotion_counts.index, color_discrete_map=color_map
    )
    fig.update_layout(showlegend=False, xaxis_tickangle=-45, yaxis=dict(gridcolor='rgba(0,0,0,0.1)'))
    return fig

def create_category_donut(df):
    """Create post category donut chart"""
    category_counts = df['category'].value_counts()
    color_sequence = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']
    fig = px.pie(
        values=category_counts.values, names=category_counts.index, title="Post Categories",
        hole=0.4, color_discrete_sequence=color_sequence
    )
    fig.update_traces(textposition='inside', textinfo='percent+label', hovertemplate='<b>%{label}</b><br>Count: %{value}<br>Percentage: %{percent}<extra></extra>')
    fig.add_annotation(text=f"Total<br>{len(df)}", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False, font=dict(size=20))
    return fig

def create_mentions_timeline(df):
    """Create timeline of Prime Bank mentions if date column exists"""
    date_columns = ['date', 'created_at', 'timestamp', 'Date', 'post_date']
    date_col = None
    for col in date_columns:
        if col in df.columns:
            date_col = col
            break
    if not date_col: return None
    try:
        df['date_parsed'] = pd.to_datetime(df[date_col], errors='coerce')
        df_valid = df[df['date_parsed'].notna()]
        if len(df_valid) == 0: return None
        timeline_df = df_valid.groupby(df_valid['date_parsed'].dt.date).agg({'prime_mentions': 'sum', 'sentiment': lambda x: (x == 'Positive').sum()}).reset_index()
        timeline_df.columns = ['date', 'mentions', 'positive_posts']
        fig = make_subplots(specs=[[{"secondary_y": True}]])
        fig.add_trace(go.Scatter(x=timeline_df['date'], y=timeline_df['mentions'], name='Total Mentions', line=dict(color='#3498db', width=3), mode='lines+markers'), secondary_y=False)
        fig.add_trace(go.Scatter(x=timeline_df['date'], y=timeline_df['positive_posts'], name='Positive Posts', line=dict(color='#2ecc71', width=2, dash='dot'), mode='lines+markers'), secondary_y=True)
        fig.update_xaxes(title_text="Date")
        fig.update_yaxes(title_text="Number of Mentions", secondary_y=False)
        fig.update_yaxes(title_text="Positive Posts", secondary_y=True)
        fig.update_layout(title="Prime Bank Mentions Over Time", hovermode='x unified', showlegend=True, legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01))
        return fig
    except Exception as e:
        print(f"Error creating timeline: {e}")
        return None

def create_viral_posts_chart(df, top_n=10):
    """Create horizontal bar chart of most viral posts"""
    if 'viral_score' not in df.columns or df.empty: return None
    top_viral = df.nlargest(top_n, 'viral_score')
    top_viral['text_truncated'] = top_viral['text'].apply(lambda x: x[:50] + '...' if len(str(x)) > 50 else x)
    fig = px.bar(
        top_viral, x='viral_score', y='text_truncated', orientation='h', title=f'Top {top_n} Viral Posts',
        color='sentiment', color_discrete_map={'Positive': '#2ecc71', 'Negative': '#e74c3c', 'Neutral': '#95a5a6'},
        hover_data=['text', 'emotion', 'category']
    )
    fig.update_layout(yaxis={'categoryorder': 'total ascending'}, xaxis_title="Viral Score", yaxis_title="Post Preview", showlegend=True)
    return fig

# --- MODIFIED FUNCTION ---
def create_summary_metrics(df):
    """Calculate summary metrics for display, including new scores."""
    if 'prime_mentions' not in df.columns or df.empty:
        return {
            'Bank Sentiment Score': 0,
            'Engagement-Weighted Sentiment': '0.00'
        }

    prime_df = df[df['prime_mentions'] > 0]
    
    if not prime_df.empty:
        positive_mentions = (prime_df['sentiment'] == 'Positive').sum()
        negative_mentions = (prime_df['sentiment'] == 'Negative').sum()
        
        # New Metric 1: Bank Sentiment Score
        bank_sentiment_score = positive_mentions - negative_mentions
        
        # New Metric 2: Engagement-Weighted Sentiment
        ew_sentiment = (prime_df['polarity'] * prime_df['viral_score']).sum()
    else:
        bank_sentiment_score = 0
        ew_sentiment = 0

    metrics = {
        'Bank Sentiment Score': f"{bank_sentiment_score:+,}", # Add sign
        'Engagement-Weighted Sentiment': f"{ew_sentiment:,.2f}"
    }
    return metrics

# --- NEW FUNCTION 1 ---
def create_bank_comparison_chart(df):
    """Create bar chart comparing mentions of Prime Bank vs competitors."""
    if 'all_banks_mentioned' not in df.columns or df.empty:
        return None
        
    mentions = df['all_banks_mentioned'].explode().dropna()
    
    if mentions.empty:
        return None
        
    bank_counts = mentions.value_counts().reset_index()
    bank_counts.columns = ['Bank', 'Mentions']
    
    bank_counts['Bank'] = bank_counts['Bank'].str.replace('_', ' ').str.title()
    
    fig = px.bar(
        bank_counts, x='Bank', y='Mentions', title='Bank Mention Comparison',
        color='Bank', text='Mentions'
    )
    fig.update_layout(xaxis_title=None, yaxis_title="Total Mentions", showlegend=False)
    fig.update_traces(textposition='outside')
    return fig

# --- NEW FUNCTION 2 ---
def create_geolocation_map(df, mapbox_token=None):
    """Create a map showing where Prime Bank mentions are coming from."""
    if 'location' not in df.columns or df.empty:
        st.info("No 'location' column found in data to generate map.")
        return None
    
    geo_mapping = {
        'Dhaka': {'lat': 23.8103, 'lon': 90.4125},
        'Chittagong': {'lat': 22.3569, 'lon': 91.7832},
        'Sylhet': {'lat': 24.8949, 'lon': 91.8687},
        'Rajshahi': {'lat': 24.3745, 'lon': 88.6042},
        'Khulna': {'lat': 22.8456, 'lon': 89.5403},
        'Barisal': {'lat': 22.7010, 'lon': 90.3535},
        'Rangpur': {'lat': 25.7439, 'lon': 89.2752},
    }
    
    df_loc = df.copy()
    df_loc['lat'] = df_loc['location'].map(lambda x: geo_mapping.get(x, {}).get('lat'))
    df_loc['lon'] = df_loc['location'].map(lambda x: geo_mapping.get(x, {}).get('lon'))
    
    df_loc.dropna(subset=['lat', 'lon'], inplace=True)
    
    if df_loc.empty:
        st.info("No valid locations found in data to plot on map.")
        return None

    location_counts = df_loc.groupby(['location', 'lat', 'lon']).size().reset_index(name='mentions')

    fig = px.scatter_mapbox(
        location_counts, lat="lat", lon="lon", size="mentions", color="mentions",
        hover_name="location", hover_data={"lat": False, "lon": False, "mentions": True},
        color_continuous_scale=px.colors.cyclical.IceFire, size_max=30, zoom=5,
        center={"lat": 23.6850, "lon": 90.3563},
        title="Geographic Hotspots for Prime Bank Mentions",
        mapbox_style="carto-positron"
    )
    fig.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
    return fig