xxnithicxx's picture
update tab 2
1f4d19b
# -*- coding: utf-8 -*-
"""
Visualization utilities for Gradio app.
Creates Plotly visualizations for different tabs.
"""
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from functools import lru_cache
import hashlib
def create_kpi_display(kpi_metrics):
"""
Create KPI metrics display as HTML.
Args:
kpi_metrics: dict with KPI values
Returns:
HTML string
"""
html = f"""
<div style="display: flex; justify-content: space-around; gap: 20px; margin: 20px 0;">
<div style="text-align: center; padding: 20px; background: #f0f0f0; border-radius: 8px; flex: 1;">
<div style="font-size: 28px; font-weight: bold; color: #2E86AB;">
{kpi_metrics['total_customers']:,}
</div>
<div style="font-size: 14px; color: #666; margin-top: 5px;">
Tổng số khách hàng
</div>
</div>
<div style="text-align: center; padding: 20px; background: #f0f0f0; border-radius: 8px; flex: 1;">
<div style="font-size: 28px; font-weight: bold; color: #A23B72;">
{kpi_metrics['total_transactions']:,}
</div>
<div style="font-size: 14px; color: #666; margin-top: 5px;">
Tổng số giao dịch
</div>
</div>
<div style="text-align: center; padding: 20px; background: #f0f0f0; border-radius: 8px; flex: 1;">
<div style="font-size: 28px; font-weight: bold; color: #F18F01;">
£{kpi_metrics['avg_revenue']:.2f}
</div>
<div style="font-size: 14px; color: #666; margin-top: 5px;">
Doanh thu trung bình/giao dịch
</div>
</div>
</div>
"""
return html
def plot_revenue_over_time(df, date_start=None, date_end=None):
"""
Plot revenue over time with date filtering.
Args:
df: Cleaned data DataFrame
date_start: Start date for filtering
date_end: End date for filtering
Returns:
Plotly figure
"""
data = df.copy()
# Filter by date range if provided
if date_start:
data = data[data["InvoiceDate"] >= date_start]
if date_end:
data = data[data["InvoiceDate"] <= date_end]
# Calculate daily revenue
daily_revenue = data.groupby(data["InvoiceDate"].dt.date)["TotalPrice"].sum()
fig = go.Figure()
fig.add_trace(go.Scatter(
x=daily_revenue.index,
y=daily_revenue.values,
mode='lines',
line=dict(color='#2E86AB', width=2),
fill='tozeroy',
name='Doanh thu'
))
fig.update_layout(
title="Doanh thu theo ngày",
xaxis_title="Ngày",
yaxis_title="Doanh thu (GBP)",
hovermode='x unified',
height=400,
template='plotly_white'
)
return fig
def plot_hourly_daily_heatmap(df):
"""
Create heatmap of purchases by hour and day of week.
Args:
df: Cleaned data DataFrame with DayOfWeek and HourOfDay
Returns:
Plotly figure
"""
heatmap_data = df.groupby(["DayOfWeek", "HourOfDay"]).size().unstack(fill_value=0)
day_names = ["Thứ 2", "Thứ 3", "Thứ 4", "Thứ 5", "Thứ 6", "Thứ 7", "Chủ nhật"]
fig = go.Figure(data=go.Heatmap(
z=heatmap_data.values,
x=heatmap_data.columns,
y=[day_names[i] for i in heatmap_data.index],
colorscale='Viridis',
name='Số giao dịch'
))
fig.update_layout(
title="Heatmap thời gian mua hàng: Giờ trong ngày x Ngày trong tuần",
xaxis_title="Giờ trong ngày",
yaxis_title="Ngày trong tuần",
height=400,
template='plotly_white'
)
return fig
def plot_elbow_silhouette(inertias, silhouette_scores, k_range=range(2, 11)):
"""
Plot Elbow method and Silhouette scores.
Args:
inertias: List of inertias for different K
silhouette_scores: List of silhouette scores
k_range: Range of K values
Returns:
Plotly figure
"""
fig = make_subplots(
rows=1, cols=2,
subplot_titles=("Phương pháp Elbow", "Silhouette Score")
)
k_list = list(k_range)
# Elbow plot
fig.add_trace(
go.Scatter(
x=k_list, y=inertias,
mode='lines+markers',
name='Inertia',
line=dict(color='#2E86AB', width=2),
marker=dict(size=8),
),
row=1, col=1
)
# Silhouette plot
best_k_idx = np.argmax(silhouette_scores)
best_k = k_list[best_k_idx]
fig.add_trace(
go.Scatter(
x=k_list, y=silhouette_scores,
mode='lines+markers',
name='Silhouette Score',
line=dict(color='#2ECC71', width=2),
marker=dict(size=8),
),
row=1, col=2
)
# Add best K annotation
fig.add_annotation(
x=best_k, y=silhouette_scores[best_k_idx],
text=f"Tốt nhất: K={best_k}",
showarrow=True,
arrowhead=2,
arrowsize=1,
arrowwidth=2,
arrowcolor="red",
bgcolor="yellow",
bordercolor="red",
borderwidth=2,
row=1, col=2
)
fig.update_xaxes(title_text="Số lượng clusters (K)", row=1, col=1)
fig.update_yaxes(title_text="Inertia", row=1, col=1)
fig.update_xaxes(title_text="Số lượng clusters (K)", row=1, col=2)
fig.update_yaxes(title_text="Silhouette Score", row=1, col=2)
fig.update_layout(height=400, showlegend=False, template='plotly_white')
return fig
def plot_clusters_pca_2d(pca_features, cluster_labels, k):
"""
Plot clusters in 2D PCA space with minimal hover data for performance.
Args:
pca_features: DataFrame with PCA features
cluster_labels: Array of cluster labels
k: Number of clusters
Returns:
Plotly figure
"""
df_plot = pca_features.copy()
df_plot['Cluster'] = cluster_labels
# Minimal hover data for faster rendering
fig = px.scatter(
df_plot,
x='PC1', y='PC2',
color='Cluster',
hover_data={'PC1': ':.2f', 'PC2': ':.2f'},
color_continuous_scale='Viridis',
title=f'Phân cụm K-Means (K={k}) - Không gian PCA',
labels={'Cluster': 'Cluster'},
)
fig.update_traces(
marker=dict(size=4, opacity=0.7),
hovertemplate='<b>Cluster %{customdata[0]}</b><br>PC1: %{x:.2f}<br>PC2: %{y:.2f}<extra></extra>'
)
fig.update_layout(
height=500,
template='plotly_white',
hovermode='closest',
)
return fig
def plot_radar_chart(cluster_means, k, cluster_idx=0, features_to_plot=None):
"""
Create radar chart for a specific cluster using 8 features (like create_individual_radar_plots).
Args:
cluster_means: DataFrame with cluster means
k: Number of clusters
cluster_idx: Index of cluster to display (default: 0)
features_to_plot: List of features to include in radar (default: 8 features)
Returns:
Plotly figure
"""
if features_to_plot is None:
features_to_plot = [
"Sum_Quantity",
"Sum_TotalPrice",
"Mean_UnitPrice",
"Count_Invoice",
"Count_Stock",
"Mean_TotalPriceSumPerInvoice",
"Mean_TotalPriceMeanPerStock",
"Mean_StockCountPerInvoice",
]
# Select data
data_selected = cluster_means[features_to_plot].copy()
# Normalize each feature independently (column-wise)
# Each feature is scaled from 0 to 1 based on its own min/max across all clusters
global_min = data_selected.min() # Min for each feature
global_max = data_selected.max() # Max for each feature
data_normalized = (data_selected - global_min) / (global_max - global_min)
data_normalized = data_normalized.fillna(0)
# Feature labels in Vietnamese (8 features)
feature_labels = {
"Sum_Quantity": "Khối lượng mua",
"Sum_TotalPrice": "Tổng chi tiêu",
"Mean_UnitPrice": "Mức giá ưa thích",
"Count_Invoice": "Tần suất mua",
"Count_Stock": "Đa dạng sản phẩm",
"Mean_TotalPriceSumPerInvoice": "Giá trị/giao dịch",
"Mean_TotalPriceMeanPerStock": "Chi tiêu/sản phẩm",
"Mean_StockCountPerInvoice": "Sản phẩm/giao dịch",
}
categories = [feature_labels.get(f, f) for f in features_to_plot]
colors = ["#2E86AB", "#A23B72", "#F18F01", "#C73E1D"]
fig = go.Figure()
# Get the selected cluster data
if cluster_idx in data_normalized.index:
cluster_row = data_normalized.loc[cluster_idx]
color = colors[cluster_idx % len(colors)]
fig.add_trace(go.Scatterpolar(
r=cluster_row.tolist(),
theta=categories,
fill='toself',
name=f'Cluster {cluster_idx}',
line=dict(color=color, width=2),
marker=dict(size=8),
))
fig.update_layout(
polar=dict(
radialaxis=dict(
visible=True,
range=[0, 1],
tickformat='.0%',
)
),
title=f'Chi tiết Cluster {cluster_idx} - Biểu đồ Radar (K={k})',
height=500,
showlegend=True,
template='plotly_white',
)
return fig
def create_cluster_stats_table(cluster_means, k):
"""
Create HTML table for cluster statistics.
Args:
cluster_means: DataFrame with cluster means
k: Number of clusters
Returns:
Pandas DataFrame formatted for display
"""
# Round values and format
df_display = cluster_means.copy()
df_display = df_display.round(2)
df_display.index.name = "Cluster"
return df_display