fortuala commited on
Commit
730ab8c
·
verified ·
1 Parent(s): 8b7e968

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -80
app.py CHANGED
@@ -15,64 +15,6 @@ data = pd.DataFrame({
15
 
16
  scenario_map = {"0": "No cleansing", "A": "Urgent cleansing", "B": "Urgent+Low urgency cleansing"}
17
 
18
- # --- Traffic light plotting utility ---
19
- def plot_dimension(dim_col, scenario):
20
- value = data.set_index("scenario").loc[scenario, dim_col]
21
- fig, ax = plt.subplots(figsize=(4, 7)) # Wider and taller
22
- # Traffic light backgrounds
23
- ax.axhspan(0, 0.6, color="#FF4D4F", alpha=0.30)
24
- ax.axhspan(0.6, 0.8, color="#FFE58F", alpha=0.30)
25
- ax.axhspan(0.8, 0.95, color="#52C41A", alpha=0.30)
26
- # Value marker (smaller, thinner)
27
- ax.axhline(value, color='black', lw=2, xmin=0.25, xmax=0.75)
28
- # Value text - annotate, inside the plot area, with white outline for readability
29
- ax.annotate(
30
- f"{value:.3f}",
31
- xy=(0.5, value),
32
- xycoords=('axes fraction', 'data'),
33
- ha='center', va='bottom',
34
- fontsize=22, weight='bold',
35
- color='black',
36
- bbox=dict(facecolor='white', edgecolor='none', alpha=0.8, boxstyle='round,pad=0.2')
37
- )
38
- # Style
39
- ax.set_xlim(0, 1)
40
- ax.set_ylim(0, 0.95)
41
- ax.set_xticks([])
42
- ax.set_yticks([0, 0.6, 0.8, 0.95])
43
- ax.set_yticklabels(["0.0", "0.6", "0.8", "1.0"], fontsize=15)
44
- for spine in ax.spines.values():
45
- spine.set_visible(False)
46
- ax.spines['left'].set_visible(True)
47
- ax.spines['left'].set_linewidth(2)
48
- # Add extra space above title to prevent overlap
49
- plt.subplots_adjust(top=0.88)
50
- ax.set_title(
51
- f"{dim_col.replace('_', ' ').title()}\n({scenario_map[scenario]})",
52
- fontsize=15, weight='bold', pad=10
53
- )
54
- plt.tight_layout()
55
- return fig
56
-
57
-
58
-
59
- # --- Scenario selector callback ---
60
- def show_plots(scenario):
61
- fig1 = plot_dimension("consistency_score", scenario)
62
- fig2 = plot_dimension("overall_representativity_score", scenario)
63
- fig3 = plot_dimension("integrity_score", scenario)
64
- # Convert figs to images for Gradio
65
- img_list = []
66
- for fig in [fig1, fig2, fig3]:
67
- buf = io.BytesIO()
68
- fig.savefig(buf, format='png', bbox_inches='tight')
69
- buf.seek(0)
70
- img = Image.open(buf)
71
- img_list.append(img)
72
- plt.close(fig)
73
- return img_list
74
-
75
- # --- Button for quality text (scenario A only) ---
76
  QUALITY_TEXT = """
77
  ### Overall Data Quality Analysis
78
 
@@ -171,13 +113,70 @@ After low urgency cleansing (Scenario B), the score is 0.757 (declined, Δ = -0.
171
  No enumerator bias has been found.
172
  """
173
 
174
- # --- Table 1.2 loader & filter ---
175
- def load_and_filter_table(col=None, val=None):
176
- df = pd.read_csv("issues_log.csv") # Your table_1_2 file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  if col and val:
178
- df = df[df[col].astype(str).str.contains(str(val), case=False, na=False)]
 
 
 
 
179
  return df
180
 
 
 
 
 
 
 
181
  # --- Gradio UI ---
182
  with gr.Blocks() as demo:
183
  gr.Markdown("## Data Quality Scenario Explorer")
@@ -195,30 +194,23 @@ with gr.Blocks() as demo:
195
  out3 = gr.Image(label="Integrity Score Traffic Light")
196
  scenario.change(show_plots, scenario, [out1, out2, out3])
197
 
198
- # Button for analysis (scenario A)
199
  with gr.Row():
200
- analysis_btn = gr.Button("Show Overall Data Quality Analysis (Scenario A Only)")
201
- analysis_text = gr.Markdown(visible=False)
202
- def show_analysis(selected_scenario):
203
- if selected_scenario == "A":
204
- return gr.update(value=QUALITY_TEXT, visible=True)
205
- else:
206
- return gr.update(value="Select scenario 'A' (Urgent cleansing) to view the analysis.", visible=True)
207
- analysis_btn.click(show_analysis, scenario, analysis_text)
208
 
209
- # Table with filter
210
  with gr.Row():
211
  gr.Markdown("### Data Consistency Issues Deep Dive (Table 1.2)")
212
  with gr.Row():
213
- filter_col = gr.Textbox(label="Column to Filter (optional)", value="")
214
- filter_val = gr.Textbox(label="Value to Filter (optional)", value="")
215
- table_out = gr.Dataframe(label="table_1_2.csv Filtered Results")
216
- filter_col.change(lambda col, val: load_and_filter_table(col, val), [filter_col, filter_val], table_out)
217
- filter_val.change(lambda col, val: load_and_filter_table(col, val), [filter_col, filter_val], table_out)
218
-
219
- # Default show plots for initial scenario
220
- demo.load(lambda: show_plots("0"), outputs=[out1, out2, out3])
221
 
222
  if __name__ == "__main__":
223
  demo.launch()
224
 
 
 
15
 
16
  scenario_map = {"0": "No cleansing", "A": "Urgent cleansing", "B": "Urgent+Low urgency cleansing"}
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  QUALITY_TEXT = """
19
  ### Overall Data Quality Analysis
20
 
 
113
  No enumerator bias has been found.
114
  """
115
 
116
+ def plot_dimension(dim_col, scenario):
117
+ value = data.set_index("scenario").loc[scenario, dim_col]
118
+ fig, ax = plt.subplots(figsize=(4, 7))
119
+ ax.axhspan(0, 0.6, color="#FF4D4F", alpha=0.30)
120
+ ax.axhspan(0.6, 0.8, color="#FFE58F", alpha=0.30)
121
+ ax.axhspan(0.8, 0.95, color="#52C41A", alpha=0.30)
122
+ ax.axhline(value, color='black', lw=2, xmin=0.35, xmax=0.65)
123
+ ax.annotate(
124
+ f"{value:.3f}",
125
+ xy=(0.5, value),
126
+ xycoords=('axes fraction', 'data'),
127
+ ha='center', va='bottom',
128
+ fontsize=22, weight='bold',
129
+ color='black',
130
+ bbox=dict(facecolor='white', edgecolor='none', alpha=0.8, boxstyle='round,pad=0.2')
131
+ )
132
+ ax.set_xlim(0, 1)
133
+ ax.set_ylim(0, 0.95)
134
+ ax.set_xticks([])
135
+ ax.set_yticks([0, 0.6, 0.8, 0.95])
136
+ ax.set_yticklabels(["0.0", "0.6", "0.8", "1.0"], fontsize=15)
137
+ for spine in ax.spines.values():
138
+ spine.set_visible(False)
139
+ ax.spines['left'].set_visible(True)
140
+ ax.spines['left'].set_linewidth(2)
141
+ plt.subplots_adjust(top=0.88)
142
+ ax.set_title(
143
+ f"{dim_col.replace('_', ' ').title()}\n({scenario_map[scenario]})",
144
+ fontsize=15, weight='bold', pad=10
145
+ )
146
+ plt.tight_layout()
147
+ return fig
148
+
149
+ def show_plots(scenario):
150
+ fig1 = plot_dimension("consistency_score", scenario)
151
+ fig2 = plot_dimension("overall_representativity_score", scenario)
152
+ fig3 = plot_dimension("integrity_score", scenario)
153
+ img_list = []
154
+ for fig in [fig1, fig2, fig3]:
155
+ buf = io.BytesIO()
156
+ fig.savefig(buf, format='png', bbox_inches='tight')
157
+ buf.seek(0)
158
+ img = Image.open(buf)
159
+ img_list.append(img)
160
+ plt.close(fig)
161
+ return img_list
162
+
163
+ # --- Simple table filter function ---
164
+ def filter_table(col, val):
165
+ df = pd.read_csv("issues_log.csv")
166
  if col and val:
167
+ if col in df.columns:
168
+ mask = df[col].astype(str).str.contains(str(val), case=False, na=False)
169
+ return df[mask]
170
+ else:
171
+ return pd.DataFrame({"error": [f"Column '{col}' not in table."]})
172
  return df
173
 
174
+ def get_quality_text(selected_scenario):
175
+ if selected_scenario == "A":
176
+ return QUALITY_TEXT
177
+ else:
178
+ return f"Select scenario 'Urgent cleansing' to see the detailed data quality analysis."
179
+
180
  # --- Gradio UI ---
181
  with gr.Blocks() as demo:
182
  gr.Markdown("## Data Quality Scenario Explorer")
 
194
  out3 = gr.Image(label="Integrity Score Traffic Light")
195
  scenario.change(show_plots, scenario, [out1, out2, out3])
196
 
 
197
  with gr.Row():
198
+ gr.Markdown("### Overall Data Quality Analysis")
199
+ analysis_text = gr.Markdown(value=get_quality_text("0"), visible=True)
200
+ scenario.change(get_quality_text, scenario, analysis_text)
 
 
 
 
 
201
 
 
202
  with gr.Row():
203
  gr.Markdown("### Data Consistency Issues Deep Dive (Table 1.2)")
204
  with gr.Row():
205
+ filter_col = gr.Textbox(label="Column (optional)")
206
+ filter_val = gr.Textbox(label="Value (optional)")
207
+ table_out = gr.Dataframe(label="Filtered Table 1.2 (issues_log.csv)")
208
+
209
+ filter_col.change(filter_table, [filter_col, filter_val], table_out)
210
+ filter_val.change(filter_table, [filter_col, filter_val], table_out)
211
+ demo.load(lambda: filter_table("", ""), outputs=table_out)
 
212
 
213
  if __name__ == "__main__":
214
  demo.launch()
215
 
216
+