Spaces:
Runtime error
Runtime error
Upload folder using huggingface_hub
Browse files- __pycache__/app.cpython-311.pyc +2 -2
- app.py +282 -135
- logs/privacy_audit_detailed.log +10 -0
__pycache__/app.cpython-311.pyc
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a5a5f57c1929a7fa923f92f6a6522741c51bb6b424ff1abf62a5084a0f3d0d9e
|
| 3 |
+
size 126179
|
app.py
CHANGED
|
@@ -1002,150 +1002,297 @@ except Exception as e:
|
|
| 1002 |
auditor = None
|
| 1003 |
|
| 1004 |
def create_safe_distance_plot(result: Dict[str, Any]) -> go.Figure:
|
| 1005 |
-
"""Create
|
| 1006 |
try:
|
| 1007 |
if "error" in result:
|
| 1008 |
-
|
| 1009 |
-
fig.add_annotation(
|
| 1010 |
-
text=f"Audit Error: {result.get('error', 'Unknown error')}<br>Step: {result.get('step_failed', 'Unknown')}",
|
| 1011 |
-
x=0.5, y=0.5, showarrow=False,
|
| 1012 |
-
font=dict(size=16, color="red"),
|
| 1013 |
-
align="center"
|
| 1014 |
-
)
|
| 1015 |
-
fig.update_layout(
|
| 1016 |
-
title="Audit Failed - No Visualization Available",
|
| 1017 |
-
xaxis=dict(visible=False),
|
| 1018 |
-
yaxis=dict(visible=False)
|
| 1019 |
-
)
|
| 1020 |
-
return fig
|
| 1021 |
-
|
| 1022 |
-
stats = result.get("distance_statistics", {})
|
| 1023 |
-
|
| 1024 |
-
# Create comprehensive dashboard
|
| 1025 |
-
from plotly.subplots import make_subplots
|
| 1026 |
-
|
| 1027 |
-
fig = make_subplots(
|
| 1028 |
-
rows=2, cols=2,
|
| 1029 |
-
subplot_titles=("Distance Statistics", "Risk Assessment", "Data Quality", "Audit Summary"),
|
| 1030 |
-
specs=[[{"type": "bar"}, {"type": "indicator"}],
|
| 1031 |
-
[{"type": "bar"}, {"type": "table"}]]
|
| 1032 |
-
)
|
| 1033 |
|
| 1034 |
-
|
| 1035 |
-
metrics = ["Mean", "Median", "Std Dev", "Min", "Max"]
|
| 1036 |
-
values = [
|
| 1037 |
-
stats.get("mean_nearest_distance", 0),
|
| 1038 |
-
stats.get("median_nearest_distance", 0),
|
| 1039 |
-
stats.get("std_nearest_distance", 0),
|
| 1040 |
-
stats.get("min_nearest_distance", 0),
|
| 1041 |
-
stats.get("max_nearest_distance", 0)
|
| 1042 |
-
]
|
| 1043 |
-
|
| 1044 |
-
fig.add_trace(
|
| 1045 |
-
go.Bar(
|
| 1046 |
-
x=metrics,
|
| 1047 |
-
y=values,
|
| 1048 |
-
name="Distance Stats",
|
| 1049 |
-
marker_color=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd'],
|
| 1050 |
-
text=[f"{v:.4f}" for v in values],
|
| 1051 |
-
textposition='auto'
|
| 1052 |
-
),
|
| 1053 |
-
row=1, col=1
|
| 1054 |
-
)
|
| 1055 |
|
| 1056 |
-
|
| 1057 |
-
|
| 1058 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1059 |
|
| 1060 |
-
risk_colors = {
|
| 1061 |
-
"EXCEPTIONAL": "#2ca02c", "VERY LOW": "#8dd3c7", "LOW": "#ffd92f",
|
| 1062 |
-
"MEDIUM": "#ff7f0e", "HIGH": "#d62728", "VERY HIGH": "#8b0000",
|
| 1063 |
-
"CRITICAL": "#4b0082", "UNKNOWN": "#gray"
|
| 1064 |
-
}
|
| 1065 |
|
| 1066 |
-
|
| 1067 |
-
|
| 1068 |
-
|
| 1069 |
-
|
| 1070 |
-
|
| 1071 |
-
|
| 1072 |
-
|
| 1073 |
-
|
| 1074 |
-
|
| 1075 |
-
|
| 1076 |
-
|
| 1077 |
-
|
| 1078 |
-
|
| 1079 |
-
|
| 1080 |
-
|
| 1081 |
-
|
| 1082 |
-
|
| 1083 |
-
|
| 1084 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1085 |
}
|
| 1086 |
-
|
| 1087 |
-
|
| 1088 |
-
|
| 1089 |
-
|
| 1090 |
-
# Data quality metrics
|
| 1091 |
-
quality_metrics = ["Zero Distances", "Close Matches", "Total Samples"]
|
| 1092 |
-
quality_values = [
|
| 1093 |
-
stats.get("zero_distance_count", 0),
|
| 1094 |
-
stats.get("small_distance_count", 0),
|
| 1095 |
-
result.get("dataset_info", {}).get("real_samples_used", 0)
|
| 1096 |
-
]
|
| 1097 |
-
|
| 1098 |
-
fig.add_trace(
|
| 1099 |
-
go.Bar(
|
| 1100 |
-
x=quality_metrics,
|
| 1101 |
-
y=quality_values,
|
| 1102 |
-
name="Quality Metrics",
|
| 1103 |
-
marker_color=['#d62728', '#ff7f0e', '#1f77b4'],
|
| 1104 |
-
text=quality_values,
|
| 1105 |
-
textposition='auto'
|
| 1106 |
-
),
|
| 1107 |
-
row=2, col=1
|
| 1108 |
-
)
|
| 1109 |
-
|
| 1110 |
-
# Summary table
|
| 1111 |
-
audit_meta = result.get("audit_metadata", {})
|
| 1112 |
-
summary_data = [
|
| 1113 |
-
["Metric", "Value"],
|
| 1114 |
-
["Audit ID", result.get("audit_id", "N/A")],
|
| 1115 |
-
["ε (95% confidence)", f"{epsilon:.6f}"],
|
| 1116 |
-
["Risk Level", risk_level],
|
| 1117 |
-
["Distance Metric", audit_meta.get("distance_metric", "N/A")],
|
| 1118 |
-
["Duration (s)", str(audit_meta.get("duration_seconds", "N/A"))],
|
| 1119 |
-
["Dimensions", str(result.get("dataset_info", {}).get("dimensions", "N/A"))]
|
| 1120 |
-
]
|
| 1121 |
-
|
| 1122 |
-
fig.add_trace(
|
| 1123 |
-
go.Table(
|
| 1124 |
-
header=dict(values=["Metric", "Value"], fill_color="lightblue", font=dict(size=12)),
|
| 1125 |
-
cells=dict(values=list(zip(*summary_data[1:])), fill_color="white", font=dict(size=11))
|
| 1126 |
-
),
|
| 1127 |
-
row=2, col=2
|
| 1128 |
-
)
|
| 1129 |
-
|
| 1130 |
-
fig.update_layout(
|
| 1131 |
-
title="Privacy Audit Dashboard",
|
| 1132 |
-
height=600,
|
| 1133 |
-
showlegend=False,
|
| 1134 |
-
template="plotly_white"
|
| 1135 |
-
)
|
| 1136 |
|
| 1137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1138 |
|
| 1139 |
-
|
| 1140 |
-
|
| 1141 |
-
|
| 1142 |
-
|
| 1143 |
-
|
| 1144 |
-
|
| 1145 |
-
|
| 1146 |
-
|
| 1147 |
-
)
|
| 1148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1149 |
|
| 1150 |
def create_safe_epsilon_plot(result: Dict[str, Any]) -> go.Figure:
|
| 1151 |
"""Create safe epsilon plot with error handling"""
|
|
|
|
| 1002 |
auditor = None
|
| 1003 |
|
| 1004 |
def create_safe_distance_plot(result: Dict[str, Any]) -> go.Figure:
|
| 1005 |
+
"""Create enhanced privacy audit dashboard with improved data visualization"""
|
| 1006 |
try:
|
| 1007 |
if "error" in result:
|
| 1008 |
+
return _create_error_figure(result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1009 |
|
| 1010 |
+
return _create_comprehensive_dashboard(result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1011 |
|
| 1012 |
+
except Exception as e:
|
| 1013 |
+
logger.error(f"Distance plot creation failed: {e}")
|
| 1014 |
+
return _create_error_figure({"error": str(e)})
|
| 1015 |
+
|
| 1016 |
+
def _create_error_figure(result: Dict[str, Any]) -> go.Figure:
|
| 1017 |
+
"""Create error visualization with clear messaging"""
|
| 1018 |
+
fig = go.Figure()
|
| 1019 |
+
fig.add_annotation(
|
| 1020 |
+
text=f"<b>Audit Error</b><br>{result.get('error', 'Unknown error')}<br><span style='font-size:12px'>Step: {result.get('step_failed', 'Unknown')}</span>",
|
| 1021 |
+
x=0.5, y=0.5, showarrow=False,
|
| 1022 |
+
font=dict(size=16, color="#dc3545"),
|
| 1023 |
+
align="center",
|
| 1024 |
+
bgcolor="rgba(220, 53, 69, 0.1)",
|
| 1025 |
+
bordercolor="#dc3545",
|
| 1026 |
+
borderwidth=2
|
| 1027 |
+
)
|
| 1028 |
+
fig.update_layout(
|
| 1029 |
+
title="Privacy Audit Failed",
|
| 1030 |
+
xaxis=dict(visible=False),
|
| 1031 |
+
yaxis=dict(visible=False),
|
| 1032 |
+
plot_bgcolor="white",
|
| 1033 |
+
paper_bgcolor="white"
|
| 1034 |
+
)
|
| 1035 |
+
return fig
|
| 1036 |
+
|
| 1037 |
+
def _create_comprehensive_dashboard(result: Dict[str, Any]) -> go.Figure:
|
| 1038 |
+
"""Create comprehensive privacy dashboard with multiple visualizations"""
|
| 1039 |
+
from plotly.subplots import make_subplots
|
| 1040 |
+
|
| 1041 |
+
# Create subplot structure with better spacing
|
| 1042 |
+
fig = make_subplots(
|
| 1043 |
+
rows=2, cols=3,
|
| 1044 |
+
subplot_titles=(
|
| 1045 |
+
"Distance Distribution Analysis",
|
| 1046 |
+
"Privacy Risk Assessment",
|
| 1047 |
+
"Data Quality Indicators",
|
| 1048 |
+
"Dataset Overview",
|
| 1049 |
+
"Privacy Bounds Comparison",
|
| 1050 |
+
"Processing Pipeline Status"
|
| 1051 |
+
),
|
| 1052 |
+
specs=[
|
| 1053 |
+
[{"type": "bar"}, {"type": "indicator"}, {"type": "scatter"}],
|
| 1054 |
+
[{"type": "bar"}, {"type": "bar"}, {"type": "bar"}]
|
| 1055 |
+
],
|
| 1056 |
+
vertical_spacing=0.15,
|
| 1057 |
+
horizontal_spacing=0.1
|
| 1058 |
+
)
|
| 1059 |
+
|
| 1060 |
+
_add_distance_analysis(fig, result)
|
| 1061 |
+
_add_risk_assessment(fig, result)
|
| 1062 |
+
_add_quality_indicators(fig, result)
|
| 1063 |
+
_add_dataset_overview(fig, result)
|
| 1064 |
+
_add_privacy_bounds(fig, result)
|
| 1065 |
+
_add_processing_status(fig, result)
|
| 1066 |
+
|
| 1067 |
+
# Enhanced layout with professional styling
|
| 1068 |
+
fig.update_layout(
|
| 1069 |
+
title={
|
| 1070 |
+
"text": "<b>Privacy Audit Dashboard</b><br><sub>Comprehensive Analysis of Synthetic Data Privacy</sub>",
|
| 1071 |
+
"x": 0.5,
|
| 1072 |
+
"xanchor": "center",
|
| 1073 |
+
"font": {"size": 20, "color": "#2c3e50"}
|
| 1074 |
+
},
|
| 1075 |
+
height=700,
|
| 1076 |
+
showlegend=False,
|
| 1077 |
+
plot_bgcolor="white",
|
| 1078 |
+
paper_bgcolor="#f8f9fa",
|
| 1079 |
+
font=dict(family="Arial, sans-serif", size=11, color="#2c3e50"),
|
| 1080 |
+
margin=dict(t=120, b=50, l=50, r=50)
|
| 1081 |
+
)
|
| 1082 |
+
|
| 1083 |
+
return fig
|
| 1084 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1085 |
|
| 1086 |
+
def _add_distance_analysis(fig, result: Dict[str, Any]):
|
| 1087 |
+
"""Add enhanced distance distribution analysis"""
|
| 1088 |
+
stats = result.get("distance_statistics", {})
|
| 1089 |
+
|
| 1090 |
+
# Create meaningful distance metrics with better labeling
|
| 1091 |
+
metrics = ["Mean Distance", "Median Distance", "Standard Dev", "25th Percentile", "75th Percentile"]
|
| 1092 |
+
values = [
|
| 1093 |
+
stats.get("mean_nearest_distance", 0),
|
| 1094 |
+
stats.get("median_nearest_distance", 0),
|
| 1095 |
+
stats.get("std_nearest_distance", 0),
|
| 1096 |
+
stats.get("q25_nearest_distance", 0),
|
| 1097 |
+
stats.get("q75_nearest_distance", 0)
|
| 1098 |
+
]
|
| 1099 |
+
|
| 1100 |
+
# Use colorblind-friendly palette with semantic meaning
|
| 1101 |
+
colors = ['#3498db', '#2ecc71', '#f39c12', '#9b59b6', '#e74c3c']
|
| 1102 |
+
|
| 1103 |
+
fig.add_trace(
|
| 1104 |
+
go.Bar(
|
| 1105 |
+
x=metrics,
|
| 1106 |
+
y=values,
|
| 1107 |
+
marker_color=colors,
|
| 1108 |
+
text=[f"{v:.6f}" if v > 0 else "0" for v in values],
|
| 1109 |
+
textposition='outside',
|
| 1110 |
+
textfont=dict(size=10, color="#2c3e50"),
|
| 1111 |
+
hovertemplate="<b>%{x}</b><br>Value: %{y:.6f}<extra></extra>",
|
| 1112 |
+
name="Distance Metrics"
|
| 1113 |
+
),
|
| 1114 |
+
row=1, col=1
|
| 1115 |
+
)
|
| 1116 |
+
|
| 1117 |
+
# Customize axes for better readability
|
| 1118 |
+
fig.update_xaxes(title_text="Distance Metrics", row=1, col=1, tickangle=45)
|
| 1119 |
+
fig.update_yaxes(title_text="Distance Value", row=1, col=1, tickformat=".2e")
|
| 1120 |
+
|
| 1121 |
+
def _add_risk_assessment(fig, result: Dict[str, Any]):
|
| 1122 |
+
"""Add enhanced privacy risk assessment gauge"""
|
| 1123 |
+
risk_level = result.get("privacy_assessment", {}).get("risk_level", "UNKNOWN")
|
| 1124 |
+
epsilon = result.get("privacy_assessment", {}).get("primary_epsilon", 0)
|
| 1125 |
+
|
| 1126 |
+
# Enhanced risk color palette with better accessibility
|
| 1127 |
+
risk_colors = {
|
| 1128 |
+
"EXCEPTIONAL": "#27ae60", "VERY LOW": "#2ecc71", "LOW": "#f1c40f",
|
| 1129 |
+
"MEDIUM": "#e67e22", "HIGH": "#e74c3c", "VERY HIGH": "#c0392b",
|
| 1130 |
+
"CRITICAL": "#8e44ad", "UNKNOWN": "#7f8c8d"
|
| 1131 |
+
}
|
| 1132 |
+
|
| 1133 |
+
# Determine gauge range based on epsilon value
|
| 1134 |
+
max_range = max(5.0, epsilon * 1.5) if epsilon > 0 else 5.0
|
| 1135 |
+
|
| 1136 |
+
fig.add_trace(
|
| 1137 |
+
go.Indicator(
|
| 1138 |
+
mode="gauge+number+delta",
|
| 1139 |
+
value=epsilon,
|
| 1140 |
+
title={
|
| 1141 |
+
"text": f"<b>ε-DP Privacy Budget</b><br><span style='font-size:14px'>{risk_level} Risk</span>",
|
| 1142 |
+
"font": {"size": 16}
|
| 1143 |
+
},
|
| 1144 |
+
number={"font": {"size": 24, "color": risk_colors.get(risk_level, "#7f8c8d")}},
|
| 1145 |
+
delta={"reference": 1.0, "valueformat": ".6f"},
|
| 1146 |
+
gauge={
|
| 1147 |
+
"axis": {
|
| 1148 |
+
"range": [0, max_range],
|
| 1149 |
+
"tickwidth": 1,
|
| 1150 |
+
"tickcolor": "#2c3e50",
|
| 1151 |
+
"tickfont": {"size": 10}
|
| 1152 |
+
},
|
| 1153 |
+
"bar": {"color": risk_colors.get(risk_level, "#7f8c8d"), "thickness": 0.8},
|
| 1154 |
+
"steps": [
|
| 1155 |
+
{"range": [0, 0.01], "color": "#d5f4e6", "name": "Exceptional"},
|
| 1156 |
+
{"range": [0.01, 0.1], "color": "#a9dfbf", "name": "Very Low"},
|
| 1157 |
+
{"range": [0.1, 0.5], "color": "#fcf3cf", "name": "Low"},
|
| 1158 |
+
{"range": [0.5, 1.0], "color": "#f8c471", "name": "Medium"},
|
| 1159 |
+
{"range": [1.0, 2.0], "color": "#f1948a", "name": "High"},
|
| 1160 |
+
{"range": [2.0, max_range], "color": "#e8daef", "name": "Critical"}
|
| 1161 |
+
],
|
| 1162 |
+
"threshold": {
|
| 1163 |
+
"line": {"color": "#2c3e50", "width": 3},
|
| 1164 |
+
"thickness": 0.9,
|
| 1165 |
+
"value": 1.0 # Reference line at ε = 1.0
|
| 1166 |
}
|
| 1167 |
+
}
|
| 1168 |
+
),
|
| 1169 |
+
row=1, col=2
|
| 1170 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1171 |
|
| 1172 |
+
def _add_quality_indicators(fig, result: Dict[str, Any]):
|
| 1173 |
+
"""Add data quality indicators with memorization detection"""
|
| 1174 |
+
stats = result.get("distance_statistics", {})
|
| 1175 |
+
|
| 1176 |
+
# Calculate meaningful quality percentages
|
| 1177 |
+
total_samples = result.get("dataset_info", {}).get("real_samples_used", 1)
|
| 1178 |
+
zero_distances = stats.get("zero_distance_count", 0)
|
| 1179 |
+
small_distances = stats.get("small_distance_count", 0)
|
| 1180 |
+
|
| 1181 |
+
# Create scatter plot showing memorization vs privacy
|
| 1182 |
+
memorization_pct = (zero_distances / total_samples) * 100 if total_samples > 0 else 0
|
| 1183 |
+
near_memorization_pct = ((small_distances - zero_distances) / total_samples) * 100 if total_samples > 0 else 0
|
| 1184 |
+
safe_samples_pct = 100 - memorization_pct - near_memorization_pct
|
| 1185 |
+
|
| 1186 |
+
categories = ["Safe Samples", "Near Matches", "Exact Matches"]
|
| 1187 |
+
percentages = [safe_samples_pct, near_memorization_pct, memorization_pct]
|
| 1188 |
+
colors = ['#27ae60', '#f39c12', '#e74c3c']
|
| 1189 |
+
|
| 1190 |
+
# Create horizontal bar chart for better readability
|
| 1191 |
+
fig.add_trace(
|
| 1192 |
+
go.Bar(
|
| 1193 |
+
y=categories,
|
| 1194 |
+
x=percentages,
|
| 1195 |
+
orientation='h',
|
| 1196 |
+
marker_color=colors,
|
| 1197 |
+
text=[f"{p:.1f}%" for p in percentages],
|
| 1198 |
+
textposition='auto',
|
| 1199 |
+
hovertemplate="<b>%{y}</b><br>Percentage: %{x:.1f}%<br>Count: %{customdata}<extra></extra>",
|
| 1200 |
+
customdata=[total_samples - small_distances, small_distances - zero_distances, zero_distances],
|
| 1201 |
+
name="Data Quality"
|
| 1202 |
+
),
|
| 1203 |
+
row=1, col=3
|
| 1204 |
+
)
|
| 1205 |
+
|
| 1206 |
+
fig.update_xaxes(title_text="Percentage of Samples", row=1, col=3, range=[0, 100])
|
| 1207 |
+
fig.update_yaxes(title_text="Sample Categories", row=1, col=3)
|
| 1208 |
|
| 1209 |
+
def _add_dataset_overview(fig, result: Dict[str, Any]):
|
| 1210 |
+
"""Add dataset overview with key metrics"""
|
| 1211 |
+
dataset_info = result.get("dataset_info", {})
|
| 1212 |
+
|
| 1213 |
+
metrics = ["Real Samples", "Synthetic Samples", "Dimensions", "Common Features"]
|
| 1214 |
+
values = [
|
| 1215 |
+
dataset_info.get("real_samples_used", 0),
|
| 1216 |
+
dataset_info.get("synthetic_samples", 0),
|
| 1217 |
+
dataset_info.get("dimensions", 0),
|
| 1218 |
+
dataset_info.get("common_features", 0)
|
| 1219 |
+
]
|
| 1220 |
+
|
| 1221 |
+
fig.add_trace(
|
| 1222 |
+
go.Bar(
|
| 1223 |
+
x=metrics,
|
| 1224 |
+
y=values,
|
| 1225 |
+
marker_color=['#3498db', '#9b59b6', '#e74c3c', '#2ecc71'],
|
| 1226 |
+
text=[f"{v:,}" for v in values],
|
| 1227 |
+
textposition='outside',
|
| 1228 |
+
hovertemplate="<b>%{x}</b><br>Count: %{y:,}<extra></extra>",
|
| 1229 |
+
name="Dataset Metrics"
|
| 1230 |
+
),
|
| 1231 |
+
row=2, col=1
|
| 1232 |
+
)
|
| 1233 |
+
|
| 1234 |
+
fig.update_xaxes(title_text="Dataset Characteristics", row=2, col=1, tickangle=45)
|
| 1235 |
+
fig.update_yaxes(title_text="Count", row=2, col=1)
|
| 1236 |
+
|
| 1237 |
+
def _add_privacy_bounds(fig, result: Dict[str, Any]):
|
| 1238 |
+
"""Add privacy bounds comparison across confidence levels"""
|
| 1239 |
+
epsilon_bounds = result.get("epsilon_bounds", {})
|
| 1240 |
+
|
| 1241 |
+
confidence_levels = ["90%", "95%", "99%"]
|
| 1242 |
+
epsilon_values = [
|
| 1243 |
+
epsilon_bounds.get("eps_lb_90", 0),
|
| 1244 |
+
epsilon_bounds.get("eps_lb_95", 0),
|
| 1245 |
+
epsilon_bounds.get("eps_lb_99", 0)
|
| 1246 |
+
]
|
| 1247 |
+
|
| 1248 |
+
# Use gradient colors to show increasing confidence
|
| 1249 |
+
colors = ['#52c41a', '#1890ff', '#722ed1']
|
| 1250 |
+
|
| 1251 |
+
fig.add_trace(
|
| 1252 |
+
go.Bar(
|
| 1253 |
+
x=confidence_levels,
|
| 1254 |
+
y=epsilon_values,
|
| 1255 |
+
marker_color=colors,
|
| 1256 |
+
text=[f"ε = {v:.6f}" for v in epsilon_values],
|
| 1257 |
+
textposition='outside',
|
| 1258 |
+
hovertemplate="<b>%{x} Confidence</b><br>ε Lower Bound: %{y:.6f}<extra></extra>",
|
| 1259 |
+
name="Privacy Bounds"
|
| 1260 |
+
),
|
| 1261 |
+
row=2, col=2
|
| 1262 |
+
)
|
| 1263 |
+
|
| 1264 |
+
fig.update_xaxes(title_text="Confidence Level", row=2, col=2)
|
| 1265 |
+
fig.update_yaxes(title_text="ε Lower Bound", row=2, col=2, type="log" if max(epsilon_values) > 0 else "linear")
|
| 1266 |
+
|
| 1267 |
+
def _add_processing_status(fig, result: Dict[str, Any]):
|
| 1268 |
+
"""Add processing pipeline status visualization"""
|
| 1269 |
+
real_report = result.get("preprocessing_reports", {}).get("real_dataset", {})
|
| 1270 |
+
synth_report = result.get("preprocessing_reports", {}).get("synthetic_dataset", {})
|
| 1271 |
+
|
| 1272 |
+
# Count completed processing steps
|
| 1273 |
+
real_steps = len(real_report.get("steps_completed", []))
|
| 1274 |
+
synth_steps = len(synth_report.get("steps_completed", []))
|
| 1275 |
+
total_steps = 6 # Expected number of processing steps
|
| 1276 |
+
|
| 1277 |
+
datasets = ["Real Dataset", "Synthetic Dataset"]
|
| 1278 |
+
completion = [real_steps / total_steps * 100, synth_steps / total_steps * 100]
|
| 1279 |
+
colors = ['#28a745' if c == 100 else '#ffc107' for c in completion]
|
| 1280 |
+
|
| 1281 |
+
fig.add_trace(
|
| 1282 |
+
go.Bar(
|
| 1283 |
+
x=datasets,
|
| 1284 |
+
y=completion,
|
| 1285 |
+
marker_color=colors,
|
| 1286 |
+
text=[f"{c:.0f}%<br>({int(c/100*total_steps)}/{total_steps})" for c in completion],
|
| 1287 |
+
textposition='auto',
|
| 1288 |
+
hovertemplate="<b>%{x}</b><br>Processing: %{y:.0f}% Complete<extra></extra>",
|
| 1289 |
+
name="Processing Status"
|
| 1290 |
+
),
|
| 1291 |
+
row=2, col=3
|
| 1292 |
+
)
|
| 1293 |
+
|
| 1294 |
+
fig.update_xaxes(title_text="Dataset Type", row=2, col=3)
|
| 1295 |
+
fig.update_yaxes(title_text="Processing Completion %", row=2, col=3, range=[0, 100])
|
| 1296 |
|
| 1297 |
def create_safe_epsilon_plot(result: Dict[str, Any]) -> go.Figure:
|
| 1298 |
"""Create safe epsilon plot with error handling"""
|
logs/privacy_audit_detailed.log
CHANGED
|
@@ -27,3 +27,13 @@
|
|
| 27 |
2025-09-07 02:43:44,053 - app - INFO - <module>:999 - Privacy auditor initialized successfully
|
| 28 |
2025-09-07 02:43:44,583 - httpx - INFO - _send_single_request:1038 - HTTP Request: GET https://checkip.amazonaws.com/ "HTTP/1.1 200 "
|
| 29 |
2025-09-07 02:43:44,859 - httpx - INFO - _send_single_request:1038 - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
2025-09-07 02:43:44,053 - app - INFO - <module>:999 - Privacy auditor initialized successfully
|
| 28 |
2025-09-07 02:43:44,583 - httpx - INFO - _send_single_request:1038 - HTTP Request: GET https://checkip.amazonaws.com/ "HTTP/1.1 200 "
|
| 29 |
2025-09-07 02:43:44,859 - httpx - INFO - _send_single_request:1038 - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
|
| 30 |
+
2025-09-07 02:50:11,253 - app - INFO - <module>:51 - Privacy Auditor Starting - 2025-09-07 02:50:11
|
| 31 |
+
2025-09-07 02:50:11,254 - app - INFO - __init__:265 - Initialized Privacy Auditor - Session: df0d8164
|
| 32 |
+
2025-09-07 02:50:11,254 - app - INFO - __init__:266 - Configuration: {'confidence_level': 0.95, 'subsample_size': None, 'categorical_encoding': 'onehot', 'numerical_scaling': 'standard', 'distance_metric': 'euclidean', 'enable_preprocessing_report': True, 'max_file_size_mb': 500, 'timeout_seconds': 300, 'enable_data_validation': True, 'chunk_size': 10000, 'max_categories_onehot': 50}
|
| 33 |
+
2025-09-07 02:50:11,254 - app - INFO - <module>:999 - Privacy auditor initialized successfully
|
| 34 |
+
2025-09-07 02:50:11,639 - httpx - INFO - _send_single_request:1038 - HTTP Request: GET https://checkip.amazonaws.com/ "HTTP/1.1 200 "
|
| 35 |
+
2025-09-07 02:50:11,964 - httpx - INFO - _send_single_request:1038 - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
|
| 36 |
+
2025-09-07 02:50:30,254 - app - INFO - <module>:51 - Privacy Auditor Starting - 2025-09-07 02:50:30
|
| 37 |
+
2025-09-07 02:50:30,255 - app - INFO - __init__:265 - Initialized Privacy Auditor - Session: 2d9998de
|
| 38 |
+
2025-09-07 02:50:30,255 - app - INFO - __init__:266 - Configuration: {'confidence_level': 0.95, 'subsample_size': None, 'categorical_encoding': 'onehot', 'numerical_scaling': 'standard', 'distance_metric': 'euclidean', 'enable_preprocessing_report': True, 'max_file_size_mb': 500, 'timeout_seconds': 300, 'enable_data_validation': True, 'chunk_size': 10000, 'max_categories_onehot': 50}
|
| 39 |
+
2025-09-07 02:50:30,255 - app - INFO - <module>:999 - Privacy auditor initialized successfully
|