Spaces:

leonardoimpact
/

Data_Validation_Process

Sleeping

App Files Files Community

fortuala commited on Aug 5, 2025

Commit

730ab8c

verified ·

1 Parent(s): 8b7e968

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -80

app.py CHANGED Viewed

@@ -15,64 +15,6 @@ data = pd.DataFrame({
 scenario_map = {"0": "No cleansing", "A": "Urgent cleansing", "B": "Urgent+Low urgency cleansing"}
-# --- Traffic light plotting utility ---
-def plot_dimension(dim_col, scenario):
-    value = data.set_index("scenario").loc[scenario, dim_col]
-    fig, ax = plt.subplots(figsize=(4, 7))  # Wider and taller
-    # Traffic light backgrounds
-    ax.axhspan(0, 0.6, color="#FF4D4F", alpha=0.30)
-    ax.axhspan(0.6, 0.8, color="#FFE58F", alpha=0.30)
-    ax.axhspan(0.8, 0.95, color="#52C41A", alpha=0.30)
-    # Value marker (smaller, thinner)
-    ax.axhline(value, color='black', lw=2, xmin=0.25, xmax=0.75)
-    # Value text - annotate, inside the plot area, with white outline for readability
-    ax.annotate(
-        f"{value:.3f}",
-        xy=(0.5, value),
-        xycoords=('axes fraction', 'data'),
-        ha='center', va='bottom',
-        fontsize=22, weight='bold',
-        color='black',
-        bbox=dict(facecolor='white', edgecolor='none', alpha=0.8, boxstyle='round,pad=0.2')
-    )
-    # Style
-    ax.set_xlim(0, 1)
-    ax.set_ylim(0, 0.95)
-    ax.set_xticks([])
-    ax.set_yticks([0, 0.6, 0.8, 0.95])
-    ax.set_yticklabels(["0.0", "0.6", "0.8", "1.0"], fontsize=15)
-    for spine in ax.spines.values():
-        spine.set_visible(False)
-    ax.spines['left'].set_visible(True)
-    ax.spines['left'].set_linewidth(2)
-    # Add extra space above title to prevent overlap
-    plt.subplots_adjust(top=0.88)
-    ax.set_title(
-        f"{dim_col.replace('_', ' ').title()}\n({scenario_map[scenario]})",
-        fontsize=15, weight='bold', pad=10
-    )
-    plt.tight_layout()
-    return fig
-# --- Scenario selector callback ---
-def show_plots(scenario):
-    fig1 = plot_dimension("consistency_score", scenario)
-    fig2 = plot_dimension("overall_representativity_score", scenario)
-    fig3 = plot_dimension("integrity_score", scenario)
-    # Convert figs to images for Gradio
-    img_list = []
-    for fig in [fig1, fig2, fig3]:
-        buf = io.BytesIO()
-        fig.savefig(buf, format='png', bbox_inches='tight')
-        buf.seek(0)
-        img = Image.open(buf)
-        img_list.append(img)
-        plt.close(fig)
-    return img_list
-# --- Button for quality text (scenario A only) ---
 QUALITY_TEXT = """
 ### Overall Data Quality Analysis
@@ -171,13 +113,70 @@ After low urgency cleansing (Scenario B), the score is 0.757 (declined, Δ = -0.
 No enumerator bias has been found.
 """
-# --- Table 1.2 loader & filter ---
-def load_and_filter_table(col=None, val=None):
-    df = pd.read_csv("issues_log.csv")  # Your table_1_2 file
     if col and val:
-        df = df[df[col].astype(str).str.contains(str(val), case=False, na=False)]
     return df
 # --- Gradio UI ---
 with gr.Blocks() as demo:
     gr.Markdown("## Data Quality Scenario Explorer")
@@ -195,30 +194,23 @@ with gr.Blocks() as demo:
         out3 = gr.Image(label="Integrity Score Traffic Light")
     scenario.change(show_plots, scenario, [out1, out2, out3])
-    # Button for analysis (scenario A)
     with gr.Row():
-        analysis_btn = gr.Button("Show Overall Data Quality Analysis (Scenario A Only)")
-        analysis_text = gr.Markdown(visible=False)
-    def show_analysis(selected_scenario):
-        if selected_scenario == "A":
-            return gr.update(value=QUALITY_TEXT, visible=True)
-        else:
-            return gr.update(value="Select scenario 'A' (Urgent cleansing) to view the analysis.", visible=True)
-    analysis_btn.click(show_analysis, scenario, analysis_text)
-    # Table with filter
     with gr.Row():
         gr.Markdown("### Data Consistency Issues Deep Dive (Table 1.2)")
     with gr.Row():
-        filter_col = gr.Textbox(label="Column to Filter (optional)", value="")
-        filter_val = gr.Textbox(label="Value to Filter (optional)", value="")
-        table_out = gr.Dataframe(label="table_1_2.csv Filtered Results")
-    filter_col.change(lambda col, val: load_and_filter_table(col, val), [filter_col, filter_val], table_out)
-    filter_val.change(lambda col, val: load_and_filter_table(col, val), [filter_col, filter_val], table_out)
-    # Default show plots for initial scenario
-    demo.load(lambda: show_plots("0"), outputs=[out1, out2, out3])
 if __name__ == "__main__":
     demo.launch()

 scenario_map = {"0": "No cleansing", "A": "Urgent cleansing", "B": "Urgent+Low urgency cleansing"}
 QUALITY_TEXT = """
 ### Overall Data Quality Analysis
 No enumerator bias has been found.
 """
+def plot_dimension(dim_col, scenario):
+    value = data.set_index("scenario").loc[scenario, dim_col]
+    fig, ax = plt.subplots(figsize=(4, 7))
+    ax.axhspan(0, 0.6, color="#FF4D4F", alpha=0.30)
+    ax.axhspan(0.6, 0.8, color="#FFE58F", alpha=0.30)
+    ax.axhspan(0.8, 0.95, color="#52C41A", alpha=0.30)
+    ax.axhline(value, color='black', lw=2, xmin=0.35, xmax=0.65)
+    ax.annotate(
+        f"{value:.3f}",
+        xy=(0.5, value),
+        xycoords=('axes fraction', 'data'),
+        ha='center', va='bottom',
+        fontsize=22, weight='bold',
+        color='black',
+        bbox=dict(facecolor='white', edgecolor='none', alpha=0.8, boxstyle='round,pad=0.2')
+    )
+    ax.set_xlim(0, 1)
+    ax.set_ylim(0, 0.95)
+    ax.set_xticks([])
+    ax.set_yticks([0, 0.6, 0.8, 0.95])
+    ax.set_yticklabels(["0.0", "0.6", "0.8", "1.0"], fontsize=15)
+    for spine in ax.spines.values():
+        spine.set_visible(False)
+    ax.spines['left'].set_visible(True)
+    ax.spines['left'].set_linewidth(2)
+    plt.subplots_adjust(top=0.88)
+    ax.set_title(
+        f"{dim_col.replace('_', ' ').title()}\n({scenario_map[scenario]})",
+        fontsize=15, weight='bold', pad=10
+    )
+    plt.tight_layout()
+    return fig
+def show_plots(scenario):
+    fig1 = plot_dimension("consistency_score", scenario)
+    fig2 = plot_dimension("overall_representativity_score", scenario)
+    fig3 = plot_dimension("integrity_score", scenario)
+    img_list = []
+    for fig in [fig1, fig2, fig3]:
+        buf = io.BytesIO()
+        fig.savefig(buf, format='png', bbox_inches='tight')
+        buf.seek(0)
+        img = Image.open(buf)
+        img_list.append(img)
+        plt.close(fig)
+    return img_list
+# --- Simple table filter function ---
+def filter_table(col, val):
+    df = pd.read_csv("issues_log.csv")
     if col and val:
+        if col in df.columns:
+            mask = df[col].astype(str).str.contains(str(val), case=False, na=False)
+            return df[mask]
+        else:
+            return pd.DataFrame({"error": [f"Column '{col}' not in table."]})
     return df
+def get_quality_text(selected_scenario):
+    if selected_scenario == "A":
+        return QUALITY_TEXT
+    else:
+        return f"Select scenario 'Urgent cleansing' to see the detailed data quality analysis."
 # --- Gradio UI ---
 with gr.Blocks() as demo:
     gr.Markdown("## Data Quality Scenario Explorer")
         out3 = gr.Image(label="Integrity Score Traffic Light")
     scenario.change(show_plots, scenario, [out1, out2, out3])
     with gr.Row():
+        gr.Markdown("### Overall Data Quality Analysis")
+        analysis_text = gr.Markdown(value=get_quality_text("0"), visible=True)
+    scenario.change(get_quality_text, scenario, analysis_text)
     with gr.Row():
         gr.Markdown("### Data Consistency Issues Deep Dive (Table 1.2)")
     with gr.Row():
+        filter_col = gr.Textbox(label="Column (optional)")
+        filter_val = gr.Textbox(label="Value (optional)")
+        table_out = gr.Dataframe(label="Filtered Table 1.2 (issues_log.csv)")
+    filter_col.change(filter_table, [filter_col, filter_val], table_out)
+    filter_val.change(filter_table, [filter_col, filter_val], table_out)
+    demo.load(lambda: filter_table("", ""), outputs=table_out)
 if __name__ == "__main__":
     demo.launch()