# -*- coding: utf-8 -*- """ Visualization utilities for Gradio app. Creates Plotly visualizations for different tabs. """ import pandas as pd import numpy as np import plotly.graph_objects as go import plotly.express as px from plotly.subplots import make_subplots from functools import lru_cache import hashlib def create_kpi_display(kpi_metrics): """ Create KPI metrics display as HTML. Args: kpi_metrics: dict with KPI values Returns: HTML string """ html = f"""
{kpi_metrics['total_customers']:,}
Tổng số khách hàng
{kpi_metrics['total_transactions']:,}
Tổng số giao dịch
£{kpi_metrics['avg_revenue']:.2f}
Doanh thu trung bình/giao dịch
""" return html def plot_revenue_over_time(df, date_start=None, date_end=None): """ Plot revenue over time with date filtering. Args: df: Cleaned data DataFrame date_start: Start date for filtering date_end: End date for filtering Returns: Plotly figure """ data = df.copy() # Filter by date range if provided if date_start: data = data[data["InvoiceDate"] >= date_start] if date_end: data = data[data["InvoiceDate"] <= date_end] # Calculate daily revenue daily_revenue = data.groupby(data["InvoiceDate"].dt.date)["TotalPrice"].sum() fig = go.Figure() fig.add_trace(go.Scatter( x=daily_revenue.index, y=daily_revenue.values, mode='lines', line=dict(color='#2E86AB', width=2), fill='tozeroy', name='Doanh thu' )) fig.update_layout( title="Doanh thu theo ngày", xaxis_title="Ngày", yaxis_title="Doanh thu (GBP)", hovermode='x unified', height=400, template='plotly_white' ) return fig def plot_hourly_daily_heatmap(df): """ Create heatmap of purchases by hour and day of week. Args: df: Cleaned data DataFrame with DayOfWeek and HourOfDay Returns: Plotly figure """ heatmap_data = df.groupby(["DayOfWeek", "HourOfDay"]).size().unstack(fill_value=0) day_names = ["Thứ 2", "Thứ 3", "Thứ 4", "Thứ 5", "Thứ 6", "Thứ 7", "Chủ nhật"] fig = go.Figure(data=go.Heatmap( z=heatmap_data.values, x=heatmap_data.columns, y=[day_names[i] for i in heatmap_data.index], colorscale='Viridis', name='Số giao dịch' )) fig.update_layout( title="Heatmap thời gian mua hàng: Giờ trong ngày x Ngày trong tuần", xaxis_title="Giờ trong ngày", yaxis_title="Ngày trong tuần", height=400, template='plotly_white' ) return fig def plot_elbow_silhouette(inertias, silhouette_scores, k_range=range(2, 11)): """ Plot Elbow method and Silhouette scores. Args: inertias: List of inertias for different K silhouette_scores: List of silhouette scores k_range: Range of K values Returns: Plotly figure """ fig = make_subplots( rows=1, cols=2, subplot_titles=("Phương pháp Elbow", "Silhouette Score") ) k_list = list(k_range) # Elbow plot fig.add_trace( go.Scatter( x=k_list, y=inertias, mode='lines+markers', name='Inertia', line=dict(color='#2E86AB', width=2), marker=dict(size=8), ), row=1, col=1 ) # Silhouette plot best_k_idx = np.argmax(silhouette_scores) best_k = k_list[best_k_idx] fig.add_trace( go.Scatter( x=k_list, y=silhouette_scores, mode='lines+markers', name='Silhouette Score', line=dict(color='#2ECC71', width=2), marker=dict(size=8), ), row=1, col=2 ) # Add best K annotation fig.add_annotation( x=best_k, y=silhouette_scores[best_k_idx], text=f"Tốt nhất: K={best_k}", showarrow=True, arrowhead=2, arrowsize=1, arrowwidth=2, arrowcolor="red", bgcolor="yellow", bordercolor="red", borderwidth=2, row=1, col=2 ) fig.update_xaxes(title_text="Số lượng clusters (K)", row=1, col=1) fig.update_yaxes(title_text="Inertia", row=1, col=1) fig.update_xaxes(title_text="Số lượng clusters (K)", row=1, col=2) fig.update_yaxes(title_text="Silhouette Score", row=1, col=2) fig.update_layout(height=400, showlegend=False, template='plotly_white') return fig def plot_clusters_pca_2d(pca_features, cluster_labels, k): """ Plot clusters in 2D PCA space with minimal hover data for performance. Args: pca_features: DataFrame with PCA features cluster_labels: Array of cluster labels k: Number of clusters Returns: Plotly figure """ df_plot = pca_features.copy() df_plot['Cluster'] = cluster_labels # Minimal hover data for faster rendering fig = px.scatter( df_plot, x='PC1', y='PC2', color='Cluster', hover_data={'PC1': ':.2f', 'PC2': ':.2f'}, color_continuous_scale='Viridis', title=f'Phân cụm K-Means (K={k}) - Không gian PCA', labels={'Cluster': 'Cluster'}, ) fig.update_traces( marker=dict(size=4, opacity=0.7), hovertemplate='Cluster %{customdata[0]}
PC1: %{x:.2f}
PC2: %{y:.2f}' ) fig.update_layout( height=500, template='plotly_white', hovermode='closest', ) return fig def plot_radar_chart(cluster_means, k, cluster_idx=0, features_to_plot=None): """ Create radar chart for a specific cluster using 8 features (like create_individual_radar_plots). Args: cluster_means: DataFrame with cluster means k: Number of clusters cluster_idx: Index of cluster to display (default: 0) features_to_plot: List of features to include in radar (default: 8 features) Returns: Plotly figure """ if features_to_plot is None: features_to_plot = [ "Sum_Quantity", "Sum_TotalPrice", "Mean_UnitPrice", "Count_Invoice", "Count_Stock", "Mean_TotalPriceSumPerInvoice", "Mean_TotalPriceMeanPerStock", "Mean_StockCountPerInvoice", ] # Select data data_selected = cluster_means[features_to_plot].copy() # Normalize each feature independently (column-wise) # Each feature is scaled from 0 to 1 based on its own min/max across all clusters global_min = data_selected.min() # Min for each feature global_max = data_selected.max() # Max for each feature data_normalized = (data_selected - global_min) / (global_max - global_min) data_normalized = data_normalized.fillna(0) # Feature labels in Vietnamese (8 features) feature_labels = { "Sum_Quantity": "Khối lượng mua", "Sum_TotalPrice": "Tổng chi tiêu", "Mean_UnitPrice": "Mức giá ưa thích", "Count_Invoice": "Tần suất mua", "Count_Stock": "Đa dạng sản phẩm", "Mean_TotalPriceSumPerInvoice": "Giá trị/giao dịch", "Mean_TotalPriceMeanPerStock": "Chi tiêu/sản phẩm", "Mean_StockCountPerInvoice": "Sản phẩm/giao dịch", } categories = [feature_labels.get(f, f) for f in features_to_plot] colors = ["#2E86AB", "#A23B72", "#F18F01", "#C73E1D"] fig = go.Figure() # Get the selected cluster data if cluster_idx in data_normalized.index: cluster_row = data_normalized.loc[cluster_idx] color = colors[cluster_idx % len(colors)] fig.add_trace(go.Scatterpolar( r=cluster_row.tolist(), theta=categories, fill='toself', name=f'Cluster {cluster_idx}', line=dict(color=color, width=2), marker=dict(size=8), )) fig.update_layout( polar=dict( radialaxis=dict( visible=True, range=[0, 1], tickformat='.0%', ) ), title=f'Chi tiết Cluster {cluster_idx} - Biểu đồ Radar (K={k})', height=500, showlegend=True, template='plotly_white', ) return fig def create_cluster_stats_table(cluster_means, k): """ Create HTML table for cluster statistics. Args: cluster_means: DataFrame with cluster means k: Number of clusters Returns: Pandas DataFrame formatted for display """ # Round values and format df_display = cluster_means.copy() df_display = df_display.round(2) df_display.index.name = "Cluster" return df_display