Spaces:

yoon-gu
/

drift-detection-ui

Sleeping

Yoon-gu Hwang Claude commited on Oct 24, 2025

Commit

1ee2788

1 Parent(s): 403ebf2

월별 데이터 드리프트 감지 및 분석 대시보드 추가

- Frouros 라이브러리를 이용한 데이터 드리프트 감지 (KS Test, Wasserstein Distance)
- 4개의 탭으로 구성된 종합 드리프트 분석 대시보드:
1. Time Series + Drift Markers: 드리프트 발생 지점 표시
2. Monthly Drift Scores: 월별 드리프트 점수 (KS Statistic, WD)
3. Drift Heatmap: 전체 메트릭 드리프트 히트맵
4. Data Tables: 원본 데이터 및 모델 정보

- 1월을 기준(baseline)으로 2-8월 드리프트 비교
- p-value < 0.05 기준으로 드리프트 자동 감지
- 시각화: 바 차트, 라인 차트, 히트맵 등 다양한 방식 제공

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (3) hide show

app.py +250 -23
pyproject.toml +3 -0
requirements.txt +3 -0

app.py CHANGED Viewed

@@ -2,9 +2,13 @@ import sqlite3
 import gradio as gr
 import pandas as pd
 import plotly.express as px
 from datetime import datetime, timedelta
 import os
 import subprocess
 # Initialize database if it doesn't exist
 if not os.path.exists('drift_detection.db'):
@@ -87,6 +91,59 @@ def load_model_info():
     return df
 def create_metric_chart(df, metric='precision'):
     """Create Plotly line chart for selected metric over time by model"""
     if df.empty:
@@ -163,15 +220,170 @@ def create_metric_chart(df, metric='precision'):
     return fig
 def update_chart(metric):
     """Update chart based on selected metric"""
     df = load_drift_data()
     chart = create_metric_chart(df, metric)
     return chart
 # Create Gradio interface
 with gr.Blocks(title="Drift Detection Dashboard", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# Drift Detection Dashboard")
     with gr.Row():
         metric_dropdown = gr.Dropdown(
@@ -182,44 +394,59 @@ with gr.Blocks(title="Drift Detection Dashboard", theme=gr.themes.Soft()) as dem
                 ("Wasserstein Distance", "wd_value")
             ],
             value="precision",
-            label="Metric",
             scale=1
         )
-    with gr.Row():
-        plot_output = gr.Plot()
-    with gr.Row():
-        with gr.Column(scale=2):
-            dataframe_output = gr.Dataframe(
-                value=load_drift_data(),
-                interactive=False,
-                wrap=True,
-                label="Drift Records"
-            )
-        with gr.Column(scale=1):
-            model_info_output = gr.Dataframe(
-                value=load_model_info(),
-                interactive=False,
-                wrap=True,
-                label="Model Info"
-            )
     # Event handlers
     metric_dropdown.change(
-        fn=update_chart,
         inputs=[metric_dropdown],
-        outputs=[plot_output]
     )
-    # Load initial data and chart
     def load_initial_data():
         df = load_drift_data()
-        return df, create_metric_chart(df, 'precision')
     demo.load(
         fn=load_initial_data,
-        outputs=[dataframe_output, plot_output]
     )
 if __name__ == "__main__":

 import gradio as gr
 import pandas as pd
 import plotly.express as px
+import plotly.graph_objects as go
 from datetime import datetime, timedelta
 import os
 import subprocess
+import numpy as np
+from frouros.detectors.data_drift import KSTest
+from scipy.stats import wasserstein_distance
 # Initialize database if it doesn't exist
 if not os.path.exists('drift_detection.db'):
     return df
+def split_data_by_month(df):
+    """Split dataframe by month"""
+    df = df.copy()
+    df['prediction_date'] = pd.to_datetime(df['prediction_date'])
+    df['month'] = df['prediction_date'].dt.to_period('M')
+    return df
+def detect_drift_ks_test(reference_data, current_data):
+    """Detect drift using Kolmogorov-Smirnov test"""
+    detector = KSTest()
+    detector.fit(X=reference_data)
+    result, _ = detector.compare(X=current_data)
+    return {
+        'p_value': result.p_value,
+        'statistic': result.statistic,
+        'drift_detected': result.p_value < 0.05
+    }
+def calculate_monthly_drift(df, metric='precision'):
+    """Calculate drift for each month compared to January (baseline)"""
+    df_with_month = split_data_by_month(df)
+    months = sorted(df_with_month['month'].unique())
+    if len(months) < 2:
+        return pd.DataFrame()
+    # Use January as baseline
+    baseline_month = months[0]
+    baseline_data = df_with_month[df_with_month['month'] == baseline_month][metric].values
+    drift_results = []
+    for month in months[1:]:
+        current_data = df_with_month[df_with_month['month'] == month][metric].values
+        if len(current_data) > 0 and len(baseline_data) > 0:
+            # KS Test
+            ks_result = detect_drift_ks_test(baseline_data, current_data)
+            # Wasserstein Distance
+            wd = wasserstein_distance(baseline_data, current_data)
+            drift_results.append({
+                'month': str(month),
+                'month_name': month.strftime('%Y-%m'),
+                'ks_statistic': ks_result['statistic'],
+                'p_value': ks_result['p_value'],
+                'drift_detected': ks_result['drift_detected'],
+                'wasserstein_distance': wd,
+                'sample_size': len(current_data)
+            })
+    return pd.DataFrame(drift_results)
 def create_metric_chart(df, metric='precision'):
     """Create Plotly line chart for selected metric over time by model"""
     if df.empty:
     return fig
+def create_drift_markers_chart(df, metric='precision'):
+    """Create time series chart with drift markers"""
+    df_with_month = split_data_by_month(df)
+    drift_df = calculate_monthly_drift(df, metric)
+    # Create base chart
+    fig = create_metric_chart(df, metric)
+    # Add drift markers for each month with drift
+    if not drift_df.empty:
+        for _, row in drift_df[drift_df['drift_detected']].iterrows():
+            month_str = row['month']
+            # Add vertical line at month boundary
+            month_date = pd.Period(month_str).to_timestamp()
+            fig.add_vline(
+                x=month_date,
+                line_dash="dash",
+                line_color="red",
+                line_width=2,
+                annotation_text=f"Drift Detected<br>{row['month_name']}",
+                annotation_position="top",
+                annotation=dict(font_size=9, font_color="red")
+            )
+    return fig
+def create_monthly_drift_chart(df, metric='precision'):
+    """Create bar chart of monthly drift scores"""
+    drift_df = calculate_monthly_drift(df, metric)
+    if drift_df.empty:
+        return go.Figure().add_annotation(
+            text="Not enough data for drift detection",
+            xref="paper", yref="paper",
+            x=0.5, y=0.5, showarrow=False
+        )
+    fig = go.Figure()
+    # KS Statistic bars
+    fig.add_trace(go.Bar(
+        x=drift_df['month_name'],
+        y=drift_df['ks_statistic'],
+        name='KS Statistic',
+        marker_color=['red' if d else 'blue' for d in drift_df['drift_detected']],
+        text=[f"p={p:.4f}" for p in drift_df['p_value']],
+        textposition='outside'
+    ))
+    # Wasserstein Distance (secondary y-axis)
+    fig.add_trace(go.Scatter(
+        x=drift_df['month_name'],
+        y=drift_df['wasserstein_distance'],
+        name='Wasserstein Distance',
+        yaxis='y2',
+        mode='lines+markers',
+        line=dict(color='orange', width=2),
+        marker=dict(size=8)
+    ))
+    fig.update_layout(
+        title=f'Monthly Drift Detection for {metric.capitalize()}',
+        xaxis_title='Month',
+        yaxis_title='KS Statistic',
+        yaxis2=dict(
+            title='Wasserstein Distance',
+            overlaying='y',
+            side='right'
+        ),
+        height=500,
+        hovermode='x unified',
+        showlegend=True
+    )
+    return fig
+def create_drift_heatmap(df):
+    """Create heatmap showing drift across all metrics and months"""
+    metrics = ['precision', 'recall', 'js_value', 'wd_value']
+    metric_names = ['Precision', 'Recall', 'JS Divergence', 'WD Value']
+    all_drift_data = {}
+    all_months = set()
+    for metric in metrics:
+        drift_df = calculate_monthly_drift(df, metric)
+        if not drift_df.empty:
+            all_drift_data[metric] = drift_df
+            all_months.update(drift_df['month_name'].values)
+    if not all_drift_data:
+        return go.Figure().add_annotation(
+            text="Not enough data for drift heatmap",
+            xref="paper", yref="paper",
+            x=0.5, y=0.5, showarrow=False
+        )
+    months = sorted(list(all_months))
+    z_data = []
+    hover_text = []
+    for metric in metrics:
+        if metric in all_drift_data:
+            drift_df = all_drift_data[metric]
+            row_z = []
+            row_hover = []
+            for month in months:
+                month_data = drift_df[drift_df['month_name'] == month]
+                if not month_data.empty:
+                    row = month_data.iloc[0]
+                    # Use p-value as color intensity (lower p-value = more drift = darker color)
+                    row_z.append(1 - row['p_value'])  # Invert so drift shows as high value
+                    row_hover.append(
+                        f"KS: {row['ks_statistic']:.4f}<br>" +
+                        f"p-value: {row['p_value']:.4f}<br>" +
+                        f"WD: {row['wasserstein_distance']:.4f}<br>" +
+                        f"Drift: {'Yes' if row['drift_detected'] else 'No'}"
+                    )
+                else:
+                    row_z.append(0)
+                    row_hover.append("No data")
+            z_data.append(row_z)
+            hover_text.append(row_hover)
+        else:
+            z_data.append([0] * len(months))
+            hover_text.append(["No data"] * len(months))
+    fig = go.Figure(data=go.Heatmap(
+        z=z_data,
+        x=months,
+        y=metric_names,
+        colorscale='RdYlGn_r',  # Red for drift, Green for no drift
+        text=hover_text,
+        hovertemplate='%{y}<br>%{x}<br>%{text}<extra></extra>',
+        colorbar=dict(title="Drift<br>Intensity")
+    ))
+    fig.update_layout(
+        title='Drift Detection Heatmap (All Metrics)',
+        xaxis_title='Month',
+        yaxis_title='Metric',
+        height=400
+    )
+    return fig
 def update_chart(metric):
     """Update chart based on selected metric"""
     df = load_drift_data()
     chart = create_metric_chart(df, metric)
     return chart
+def update_all_drift_visualizations(metric):
+    """Update all drift-related visualizations"""
+    df = load_drift_data()
+    drift_markers_chart = create_drift_markers_chart(df, metric)
+    monthly_drift_chart = create_monthly_drift_chart(df, metric)
+    drift_heatmap = create_drift_heatmap(df)
+    return drift_markers_chart, monthly_drift_chart, drift_heatmap
 # Create Gradio interface
 with gr.Blocks(title="Drift Detection Dashboard", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# Drift Detection Dashboard")
+    gr.Markdown("모델별 메트릭 시계열 및 월별 데이터 드리프트 분석")
     with gr.Row():
         metric_dropdown = gr.Dropdown(
                 ("Wasserstein Distance", "wd_value")
             ],
             value="precision",
+            label="Metric to Analyze",
             scale=1
         )
+    with gr.Tabs():
+        with gr.Tab("📈 Time Series + Drift Markers"):
+            gr.Markdown("### 시계열 차트 (드리프트 발생 지점 표시)")
+            drift_markers_plot = gr.Plot()
+        with gr.Tab("📊 Monthly Drift Scores"):
+            gr.Markdown("### 월별 드리프트 점수 (1월 대비)")
+            monthly_drift_plot = gr.Plot()
+        with gr.Tab("🔥 Drift Heatmap"):
+            gr.Markdown("### 전체 메트릭 드리프트 히트맵")
+            heatmap_plot = gr.Plot()
+        with gr.Tab("📋 Data Tables"):
+            gr.Markdown("### 원본 데이터")
+            with gr.Row():
+                with gr.Column(scale=2):
+                    dataframe_output = gr.Dataframe(
+                        value=load_drift_data(),
+                        interactive=False,
+                        wrap=True,
+                        label="Drift Records"
+                    )
+                with gr.Column(scale=1):
+                    model_info_output = gr.Dataframe(
+                        value=load_model_info(),
+                        interactive=False,
+                        wrap=True,
+                        label="Model Info"
+                    )
     # Event handlers
     metric_dropdown.change(
+        fn=update_all_drift_visualizations,
         inputs=[metric_dropdown],
+        outputs=[drift_markers_plot, monthly_drift_plot, heatmap_plot]
     )
+    # Load initial data
     def load_initial_data():
         df = load_drift_data()
+        drift_markers = create_drift_markers_chart(df, 'precision')
+        monthly_drift = create_monthly_drift_chart(df, 'precision')
+        heatmap = create_drift_heatmap(df)
+        return drift_markers, monthly_drift, heatmap
     demo.load(
         fn=load_initial_data,
+        outputs=[drift_markers_plot, monthly_drift_plot, heatmap_plot]
     )
 if __name__ == "__main__":

pyproject.toml CHANGED Viewed

@@ -5,7 +5,10 @@ description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.13"
 dependencies = [
     "gradio>=5.49.1",
     "pandas>=2.3.3",
     "plotly>=6.3.1",
 ]

 readme = "README.md"
 requires-python = ">=3.13"
 dependencies = [
+    "frouros>=0.9.0",
     "gradio>=5.49.1",
+    "numpy>=2.1.3",
     "pandas>=2.3.3",
     "plotly>=6.3.1",
+    "scipy>=1.14.1",
 ]

requirements.txt CHANGED Viewed

@@ -1,2 +1,5 @@
 pandas
 plotly

 pandas
 plotly
+frouros
+scipy
+numpy