# -*- coding: utf-8 -*-
"""
Visualization utilities for Gradio app.
Creates Plotly visualizations for different tabs.
"""
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from functools import lru_cache
import hashlib
def create_kpi_display(kpi_metrics):
"""
Create KPI metrics display as HTML.
Args:
kpi_metrics: dict with KPI values
Returns:
HTML string
"""
html = f"""
{kpi_metrics['total_customers']:,}
Tổng số khách hàng
{kpi_metrics['total_transactions']:,}
Tổng số giao dịch
£{kpi_metrics['avg_revenue']:.2f}
Doanh thu trung bình/giao dịch
"""
return html
def plot_revenue_over_time(df, date_start=None, date_end=None):
"""
Plot revenue over time with date filtering.
Args:
df: Cleaned data DataFrame
date_start: Start date for filtering
date_end: End date for filtering
Returns:
Plotly figure
"""
data = df.copy()
# Filter by date range if provided
if date_start:
data = data[data["InvoiceDate"] >= date_start]
if date_end:
data = data[data["InvoiceDate"] <= date_end]
# Calculate daily revenue
daily_revenue = data.groupby(data["InvoiceDate"].dt.date)["TotalPrice"].sum()
fig = go.Figure()
fig.add_trace(go.Scatter(
x=daily_revenue.index,
y=daily_revenue.values,
mode='lines',
line=dict(color='#2E86AB', width=2),
fill='tozeroy',
name='Doanh thu'
))
fig.update_layout(
title="Doanh thu theo ngày",
xaxis_title="Ngày",
yaxis_title="Doanh thu (GBP)",
hovermode='x unified',
height=400,
template='plotly_white'
)
return fig
def plot_hourly_daily_heatmap(df):
"""
Create heatmap of purchases by hour and day of week.
Args:
df: Cleaned data DataFrame with DayOfWeek and HourOfDay
Returns:
Plotly figure
"""
heatmap_data = df.groupby(["DayOfWeek", "HourOfDay"]).size().unstack(fill_value=0)
day_names = ["Thứ 2", "Thứ 3", "Thứ 4", "Thứ 5", "Thứ 6", "Thứ 7", "Chủ nhật"]
fig = go.Figure(data=go.Heatmap(
z=heatmap_data.values,
x=heatmap_data.columns,
y=[day_names[i] for i in heatmap_data.index],
colorscale='Viridis',
name='Số giao dịch'
))
fig.update_layout(
title="Heatmap thời gian mua hàng: Giờ trong ngày x Ngày trong tuần",
xaxis_title="Giờ trong ngày",
yaxis_title="Ngày trong tuần",
height=400,
template='plotly_white'
)
return fig
def plot_elbow_silhouette(inertias, silhouette_scores, k_range=range(2, 11)):
"""
Plot Elbow method and Silhouette scores.
Args:
inertias: List of inertias for different K
silhouette_scores: List of silhouette scores
k_range: Range of K values
Returns:
Plotly figure
"""
fig = make_subplots(
rows=1, cols=2,
subplot_titles=("Phương pháp Elbow", "Silhouette Score")
)
k_list = list(k_range)
# Elbow plot
fig.add_trace(
go.Scatter(
x=k_list, y=inertias,
mode='lines+markers',
name='Inertia',
line=dict(color='#2E86AB', width=2),
marker=dict(size=8),
),
row=1, col=1
)
# Silhouette plot
best_k_idx = np.argmax(silhouette_scores)
best_k = k_list[best_k_idx]
fig.add_trace(
go.Scatter(
x=k_list, y=silhouette_scores,
mode='lines+markers',
name='Silhouette Score',
line=dict(color='#2ECC71', width=2),
marker=dict(size=8),
),
row=1, col=2
)
# Add best K annotation
fig.add_annotation(
x=best_k, y=silhouette_scores[best_k_idx],
text=f"Tốt nhất: K={best_k}",
showarrow=True,
arrowhead=2,
arrowsize=1,
arrowwidth=2,
arrowcolor="red",
bgcolor="yellow",
bordercolor="red",
borderwidth=2,
row=1, col=2
)
fig.update_xaxes(title_text="Số lượng clusters (K)", row=1, col=1)
fig.update_yaxes(title_text="Inertia", row=1, col=1)
fig.update_xaxes(title_text="Số lượng clusters (K)", row=1, col=2)
fig.update_yaxes(title_text="Silhouette Score", row=1, col=2)
fig.update_layout(height=400, showlegend=False, template='plotly_white')
return fig
def plot_clusters_pca_2d(pca_features, cluster_labels, k):
"""
Plot clusters in 2D PCA space with minimal hover data for performance.
Args:
pca_features: DataFrame with PCA features
cluster_labels: Array of cluster labels
k: Number of clusters
Returns:
Plotly figure
"""
df_plot = pca_features.copy()
df_plot['Cluster'] = cluster_labels
# Minimal hover data for faster rendering
fig = px.scatter(
df_plot,
x='PC1', y='PC2',
color='Cluster',
hover_data={'PC1': ':.2f', 'PC2': ':.2f'},
color_continuous_scale='Viridis',
title=f'Phân cụm K-Means (K={k}) - Không gian PCA',
labels={'Cluster': 'Cluster'},
)
fig.update_traces(
marker=dict(size=4, opacity=0.7),
hovertemplate='