Spaces:

venkatl
/

crime

Sleeping

App Files Files Community

crime / old /deprecated.py

venkatl

Upload folder using huggingface_hub

54ba97b verified 5 months ago

raw

history blame contribute delete

2.07 kB

	with gr.Tab("3️⃣ Correlation Explorer"):
	col_select = gr.Dropdown(choices=categorical_cols.tolist(), label="Select Column")
	col1 = gr.Dropdown(choices=df.columns.tolist(), label="Column 1")
	col2 = gr.Dropdown(choices=df.columns.tolist(), label="Column 2")
	corr_btn = gr.Button("Compute Correlation")
	corr_text = gr.Textbox(label="Correlation Result")
	confusion_out = gr.Dataframe(label="Categorical Crosstab (if applicable)")
	corr_btn.click(compute_correlation, inputs=[col1, col2], outputs=[corr_text, confusion_out])


	def compute_correlation(col1, col2):
	c1 = df[col1]
	c2 = df[col2]

	# Case 1: numeric vs numeric
	if col1 in numeric_cols and col2 in numeric_cols:
	corr = c1.corr(c2)
	return f"Pearson Correlation = {corr:.4f}", None

	# Case 2: categorical vs categorical → Cramér’s V
	if col1 in categorical_cols and col2 in categorical_cols:
	confusion = pd.crosstab(c1, c2)
	v = cramers_v(confusion)
	return f"Cramér’s V = {v:.4f}", confusion

	# Case 3: keyword frequency vs numeric/categorical
	# Convert col1 or col2 (if comma-separated) into frequency counts
	def keyword_expand(col):
	return df[col].dropna().astype(str).str.split(',').explode().str.strip()

	if col1 in categorical_cols:
	k = keyword_expand(col1)
	k_counts = k.value_counts()
	df_k = df.assign(**{f"{col1}_KEYWORD_COUNTS": df[col1].fillna("").apply(
	lambda x: sum([k_counts.get(i.strip(), 0) for i in x.split(',') if i.strip()])
	)})
	c1 = df_k[f"{col1}_KEYWORD_COUNTS"]

	if col2 in categorical_cols:
	k = keyword_expand(col2)
	k_counts = k.value_counts()
	df_k = df.assign(**{f"{col2}_KEYWORD_COUNTS": df[col2].fillna("").apply(
	lambda x: sum([k_counts.get(i.strip(), 0) for i in x.split(',') if i.strip()])
	)})
	c2 = df_k[f"{col2}_KEYWORD_COUNTS"]

	corr = c1.corr(c2)
	return f"Keyword-Frequency Based Correlation = {corr:.4f}", None