Update app.py
Browse files
app.py
CHANGED
|
@@ -15,64 +15,6 @@ data = pd.DataFrame({
|
|
| 15 |
|
| 16 |
scenario_map = {"0": "No cleansing", "A": "Urgent cleansing", "B": "Urgent+Low urgency cleansing"}
|
| 17 |
|
| 18 |
-
# --- Traffic light plotting utility ---
|
| 19 |
-
def plot_dimension(dim_col, scenario):
|
| 20 |
-
value = data.set_index("scenario").loc[scenario, dim_col]
|
| 21 |
-
fig, ax = plt.subplots(figsize=(4, 7)) # Wider and taller
|
| 22 |
-
# Traffic light backgrounds
|
| 23 |
-
ax.axhspan(0, 0.6, color="#FF4D4F", alpha=0.30)
|
| 24 |
-
ax.axhspan(0.6, 0.8, color="#FFE58F", alpha=0.30)
|
| 25 |
-
ax.axhspan(0.8, 0.95, color="#52C41A", alpha=0.30)
|
| 26 |
-
# Value marker (smaller, thinner)
|
| 27 |
-
ax.axhline(value, color='black', lw=2, xmin=0.25, xmax=0.75)
|
| 28 |
-
# Value text - annotate, inside the plot area, with white outline for readability
|
| 29 |
-
ax.annotate(
|
| 30 |
-
f"{value:.3f}",
|
| 31 |
-
xy=(0.5, value),
|
| 32 |
-
xycoords=('axes fraction', 'data'),
|
| 33 |
-
ha='center', va='bottom',
|
| 34 |
-
fontsize=22, weight='bold',
|
| 35 |
-
color='black',
|
| 36 |
-
bbox=dict(facecolor='white', edgecolor='none', alpha=0.8, boxstyle='round,pad=0.2')
|
| 37 |
-
)
|
| 38 |
-
# Style
|
| 39 |
-
ax.set_xlim(0, 1)
|
| 40 |
-
ax.set_ylim(0, 0.95)
|
| 41 |
-
ax.set_xticks([])
|
| 42 |
-
ax.set_yticks([0, 0.6, 0.8, 0.95])
|
| 43 |
-
ax.set_yticklabels(["0.0", "0.6", "0.8", "1.0"], fontsize=15)
|
| 44 |
-
for spine in ax.spines.values():
|
| 45 |
-
spine.set_visible(False)
|
| 46 |
-
ax.spines['left'].set_visible(True)
|
| 47 |
-
ax.spines['left'].set_linewidth(2)
|
| 48 |
-
# Add extra space above title to prevent overlap
|
| 49 |
-
plt.subplots_adjust(top=0.88)
|
| 50 |
-
ax.set_title(
|
| 51 |
-
f"{dim_col.replace('_', ' ').title()}\n({scenario_map[scenario]})",
|
| 52 |
-
fontsize=15, weight='bold', pad=10
|
| 53 |
-
)
|
| 54 |
-
plt.tight_layout()
|
| 55 |
-
return fig
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
# --- Scenario selector callback ---
|
| 60 |
-
def show_plots(scenario):
|
| 61 |
-
fig1 = plot_dimension("consistency_score", scenario)
|
| 62 |
-
fig2 = plot_dimension("overall_representativity_score", scenario)
|
| 63 |
-
fig3 = plot_dimension("integrity_score", scenario)
|
| 64 |
-
# Convert figs to images for Gradio
|
| 65 |
-
img_list = []
|
| 66 |
-
for fig in [fig1, fig2, fig3]:
|
| 67 |
-
buf = io.BytesIO()
|
| 68 |
-
fig.savefig(buf, format='png', bbox_inches='tight')
|
| 69 |
-
buf.seek(0)
|
| 70 |
-
img = Image.open(buf)
|
| 71 |
-
img_list.append(img)
|
| 72 |
-
plt.close(fig)
|
| 73 |
-
return img_list
|
| 74 |
-
|
| 75 |
-
# --- Button for quality text (scenario A only) ---
|
| 76 |
QUALITY_TEXT = """
|
| 77 |
### Overall Data Quality Analysis
|
| 78 |
|
|
@@ -171,13 +113,70 @@ After low urgency cleansing (Scenario B), the score is 0.757 (declined, Δ = -0.
|
|
| 171 |
No enumerator bias has been found.
|
| 172 |
"""
|
| 173 |
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
if col and val:
|
| 178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
return df
|
| 180 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
# --- Gradio UI ---
|
| 182 |
with gr.Blocks() as demo:
|
| 183 |
gr.Markdown("## Data Quality Scenario Explorer")
|
|
@@ -195,30 +194,23 @@ with gr.Blocks() as demo:
|
|
| 195 |
out3 = gr.Image(label="Integrity Score Traffic Light")
|
| 196 |
scenario.change(show_plots, scenario, [out1, out2, out3])
|
| 197 |
|
| 198 |
-
# Button for analysis (scenario A)
|
| 199 |
with gr.Row():
|
| 200 |
-
|
| 201 |
-
analysis_text = gr.Markdown(visible=
|
| 202 |
-
|
| 203 |
-
if selected_scenario == "A":
|
| 204 |
-
return gr.update(value=QUALITY_TEXT, visible=True)
|
| 205 |
-
else:
|
| 206 |
-
return gr.update(value="Select scenario 'A' (Urgent cleansing) to view the analysis.", visible=True)
|
| 207 |
-
analysis_btn.click(show_analysis, scenario, analysis_text)
|
| 208 |
|
| 209 |
-
# Table with filter
|
| 210 |
with gr.Row():
|
| 211 |
gr.Markdown("### Data Consistency Issues Deep Dive (Table 1.2)")
|
| 212 |
with gr.Row():
|
| 213 |
-
filter_col = gr.Textbox(label="Column
|
| 214 |
-
filter_val = gr.Textbox(label="Value
|
| 215 |
-
table_out = gr.Dataframe(label="
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
demo.load(lambda: show_plots("0"), outputs=[out1, out2, out3])
|
| 221 |
|
| 222 |
if __name__ == "__main__":
|
| 223 |
demo.launch()
|
| 224 |
|
|
|
|
|
|
| 15 |
|
| 16 |
scenario_map = {"0": "No cleansing", "A": "Urgent cleansing", "B": "Urgent+Low urgency cleansing"}
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
QUALITY_TEXT = """
|
| 19 |
### Overall Data Quality Analysis
|
| 20 |
|
|
|
|
| 113 |
No enumerator bias has been found.
|
| 114 |
"""
|
| 115 |
|
| 116 |
+
def plot_dimension(dim_col, scenario):
|
| 117 |
+
value = data.set_index("scenario").loc[scenario, dim_col]
|
| 118 |
+
fig, ax = plt.subplots(figsize=(4, 7))
|
| 119 |
+
ax.axhspan(0, 0.6, color="#FF4D4F", alpha=0.30)
|
| 120 |
+
ax.axhspan(0.6, 0.8, color="#FFE58F", alpha=0.30)
|
| 121 |
+
ax.axhspan(0.8, 0.95, color="#52C41A", alpha=0.30)
|
| 122 |
+
ax.axhline(value, color='black', lw=2, xmin=0.35, xmax=0.65)
|
| 123 |
+
ax.annotate(
|
| 124 |
+
f"{value:.3f}",
|
| 125 |
+
xy=(0.5, value),
|
| 126 |
+
xycoords=('axes fraction', 'data'),
|
| 127 |
+
ha='center', va='bottom',
|
| 128 |
+
fontsize=22, weight='bold',
|
| 129 |
+
color='black',
|
| 130 |
+
bbox=dict(facecolor='white', edgecolor='none', alpha=0.8, boxstyle='round,pad=0.2')
|
| 131 |
+
)
|
| 132 |
+
ax.set_xlim(0, 1)
|
| 133 |
+
ax.set_ylim(0, 0.95)
|
| 134 |
+
ax.set_xticks([])
|
| 135 |
+
ax.set_yticks([0, 0.6, 0.8, 0.95])
|
| 136 |
+
ax.set_yticklabels(["0.0", "0.6", "0.8", "1.0"], fontsize=15)
|
| 137 |
+
for spine in ax.spines.values():
|
| 138 |
+
spine.set_visible(False)
|
| 139 |
+
ax.spines['left'].set_visible(True)
|
| 140 |
+
ax.spines['left'].set_linewidth(2)
|
| 141 |
+
plt.subplots_adjust(top=0.88)
|
| 142 |
+
ax.set_title(
|
| 143 |
+
f"{dim_col.replace('_', ' ').title()}\n({scenario_map[scenario]})",
|
| 144 |
+
fontsize=15, weight='bold', pad=10
|
| 145 |
+
)
|
| 146 |
+
plt.tight_layout()
|
| 147 |
+
return fig
|
| 148 |
+
|
| 149 |
+
def show_plots(scenario):
|
| 150 |
+
fig1 = plot_dimension("consistency_score", scenario)
|
| 151 |
+
fig2 = plot_dimension("overall_representativity_score", scenario)
|
| 152 |
+
fig3 = plot_dimension("integrity_score", scenario)
|
| 153 |
+
img_list = []
|
| 154 |
+
for fig in [fig1, fig2, fig3]:
|
| 155 |
+
buf = io.BytesIO()
|
| 156 |
+
fig.savefig(buf, format='png', bbox_inches='tight')
|
| 157 |
+
buf.seek(0)
|
| 158 |
+
img = Image.open(buf)
|
| 159 |
+
img_list.append(img)
|
| 160 |
+
plt.close(fig)
|
| 161 |
+
return img_list
|
| 162 |
+
|
| 163 |
+
# --- Simple table filter function ---
|
| 164 |
+
def filter_table(col, val):
|
| 165 |
+
df = pd.read_csv("issues_log.csv")
|
| 166 |
if col and val:
|
| 167 |
+
if col in df.columns:
|
| 168 |
+
mask = df[col].astype(str).str.contains(str(val), case=False, na=False)
|
| 169 |
+
return df[mask]
|
| 170 |
+
else:
|
| 171 |
+
return pd.DataFrame({"error": [f"Column '{col}' not in table."]})
|
| 172 |
return df
|
| 173 |
|
| 174 |
+
def get_quality_text(selected_scenario):
|
| 175 |
+
if selected_scenario == "A":
|
| 176 |
+
return QUALITY_TEXT
|
| 177 |
+
else:
|
| 178 |
+
return f"Select scenario 'Urgent cleansing' to see the detailed data quality analysis."
|
| 179 |
+
|
| 180 |
# --- Gradio UI ---
|
| 181 |
with gr.Blocks() as demo:
|
| 182 |
gr.Markdown("## Data Quality Scenario Explorer")
|
|
|
|
| 194 |
out3 = gr.Image(label="Integrity Score Traffic Light")
|
| 195 |
scenario.change(show_plots, scenario, [out1, out2, out3])
|
| 196 |
|
|
|
|
| 197 |
with gr.Row():
|
| 198 |
+
gr.Markdown("### Overall Data Quality Analysis")
|
| 199 |
+
analysis_text = gr.Markdown(value=get_quality_text("0"), visible=True)
|
| 200 |
+
scenario.change(get_quality_text, scenario, analysis_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
|
|
|
|
| 202 |
with gr.Row():
|
| 203 |
gr.Markdown("### Data Consistency Issues Deep Dive (Table 1.2)")
|
| 204 |
with gr.Row():
|
| 205 |
+
filter_col = gr.Textbox(label="Column (optional)")
|
| 206 |
+
filter_val = gr.Textbox(label="Value (optional)")
|
| 207 |
+
table_out = gr.Dataframe(label="Filtered Table 1.2 (issues_log.csv)")
|
| 208 |
+
|
| 209 |
+
filter_col.change(filter_table, [filter_col, filter_val], table_out)
|
| 210 |
+
filter_val.change(filter_table, [filter_col, filter_val], table_out)
|
| 211 |
+
demo.load(lambda: filter_table("", ""), outputs=table_out)
|
|
|
|
| 212 |
|
| 213 |
if __name__ == "__main__":
|
| 214 |
demo.launch()
|
| 215 |
|
| 216 |
+
|