crime / old /deprecated.py
venkatl's picture
Upload folder using huggingface_hub
54ba97b verified
with gr.Tab("3️⃣ Correlation Explorer"):
col_select = gr.Dropdown(choices=categorical_cols.tolist(), label="Select Column")
col1 = gr.Dropdown(choices=df.columns.tolist(), label="Column 1")
col2 = gr.Dropdown(choices=df.columns.tolist(), label="Column 2")
corr_btn = gr.Button("Compute Correlation")
corr_text = gr.Textbox(label="Correlation Result")
confusion_out = gr.Dataframe(label="Categorical Crosstab (if applicable)")
corr_btn.click(compute_correlation, inputs=[col1, col2], outputs=[corr_text, confusion_out])
def compute_correlation(col1, col2):
c1 = df[col1]
c2 = df[col2]
# Case 1: numeric vs numeric
if col1 in numeric_cols and col2 in numeric_cols:
corr = c1.corr(c2)
return f"Pearson Correlation = {corr:.4f}", None
# Case 2: categorical vs categorical → Cramér’s V
if col1 in categorical_cols and col2 in categorical_cols:
confusion = pd.crosstab(c1, c2)
v = cramers_v(confusion)
return f"Cramér’s V = {v:.4f}", confusion
# Case 3: keyword frequency vs numeric/categorical
# Convert col1 or col2 (if comma-separated) into frequency counts
def keyword_expand(col):
return df[col].dropna().astype(str).str.split(',').explode().str.strip()
if col1 in categorical_cols:
k = keyword_expand(col1)
k_counts = k.value_counts()
df_k = df.assign(**{f"{col1}_KEYWORD_COUNTS": df[col1].fillna("").apply(
lambda x: sum([k_counts.get(i.strip(), 0) for i in x.split(',') if i.strip()])
)})
c1 = df_k[f"{col1}_KEYWORD_COUNTS"]
if col2 in categorical_cols:
k = keyword_expand(col2)
k_counts = k.value_counts()
df_k = df.assign(**{f"{col2}_KEYWORD_COUNTS": df[col2].fillna("").apply(
lambda x: sum([k_counts.get(i.strip(), 0) for i in x.split(',') if i.strip()])
)})
c2 = df_k[f"{col2}_KEYWORD_COUNTS"]
corr = c1.corr(c2)
return f"Keyword-Frequency Based Correlation = {corr:.4f}", None