| with gr.Tab("3️⃣ Correlation Explorer"): |
| col_select = gr.Dropdown(choices=categorical_cols.tolist(), label="Select Column") |
| col1 = gr.Dropdown(choices=df.columns.tolist(), label="Column 1") |
| col2 = gr.Dropdown(choices=df.columns.tolist(), label="Column 2") |
| corr_btn = gr.Button("Compute Correlation") |
| corr_text = gr.Textbox(label="Correlation Result") |
| confusion_out = gr.Dataframe(label="Categorical Crosstab (if applicable)") |
| corr_btn.click(compute_correlation, inputs=[col1, col2], outputs=[corr_text, confusion_out]) |
|
|
|
|
| def compute_correlation(col1, col2): |
| c1 = df[col1] |
| c2 = df[col2] |
|
|
| |
| if col1 in numeric_cols and col2 in numeric_cols: |
| corr = c1.corr(c2) |
| return f"Pearson Correlation = {corr:.4f}", None |
|
|
| |
| if col1 in categorical_cols and col2 in categorical_cols: |
| confusion = pd.crosstab(c1, c2) |
| v = cramers_v(confusion) |
| return f"Cramér’s V = {v:.4f}", confusion |
|
|
| |
| |
| def keyword_expand(col): |
| return df[col].dropna().astype(str).str.split(',').explode().str.strip() |
|
|
| if col1 in categorical_cols: |
| k = keyword_expand(col1) |
| k_counts = k.value_counts() |
| df_k = df.assign(**{f"{col1}_KEYWORD_COUNTS": df[col1].fillna("").apply( |
| lambda x: sum([k_counts.get(i.strip(), 0) for i in x.split(',') if i.strip()]) |
| )}) |
| c1 = df_k[f"{col1}_KEYWORD_COUNTS"] |
|
|
| if col2 in categorical_cols: |
| k = keyword_expand(col2) |
| k_counts = k.value_counts() |
| df_k = df.assign(**{f"{col2}_KEYWORD_COUNTS": df[col2].fillna("").apply( |
| lambda x: sum([k_counts.get(i.strip(), 0) for i in x.split(',') if i.strip()]) |
| )}) |
| c2 = df_k[f"{col2}_KEYWORD_COUNTS"] |
|
|
| corr = c1.corr(c2) |
| return f"Keyword-Frequency Based Correlation = {corr:.4f}", None |
|
|
|
|