hfariborzi's picture
Update app.py
337939b verified
import streamlit as st
import pandas as pd
import numpy as np
def composite_correlations(R, composite_idx, var_names=None, augment=False):
"""Compute unit-weighted composite correlations."""
R_mat = np.asarray(R, dtype=float)
n_all = R_mat.shape[0]
n = len(composite_idx)
# sub-matrix of the composite items
R_yy = R_mat[np.ix_(composite_idx, composite_idx)]
# mean off-diagonal
iu = np.triu_indices(n, k=1)
rbar = R_yy[iu].mean() if iu[0].size > 0 else 0.0
denom = np.sqrt(n + n*(n-1)*rbar)
numer = R_mat[composite_idx, :].sum(axis=0)
r_comp = numer / denom
if var_names is not None:
r_comp = pd.Series(r_comp, index=var_names, name="Composite")
if not augment:
return r_comp
# build augmented matrix
if var_names is not None:
idx = list(var_names) + ["Composite"]
R_aug = pd.DataFrame(np.zeros((n_all+1, n_all+1)), index=idx, columns=idx)
R_aug.iloc[:-1, :-1] = R_mat
R_aug.iloc[-1, :-1] = r_comp.values
R_aug.iloc[:-1, -1] = r_comp.values
R_aug.iloc[-1, -1] = 1.0
else:
R_aug = np.zeros((n_all+1, n_all+1))
R_aug[:n_all, :n_all] = R_mat
R_aug[n_all, :n_all] = r_comp
R_aug[:n_all, n_all] = r_comp
R_aug[n_all, n_all] = 1.0
return r_comp, R_aug
import os
# On Huggingface Spaces the home directory may be unwritable; override it to the current working directory
os.environ['HOME'] = os.getcwd()
# Disable streamlit usage stats to avoid write attempts
os.environ['STREAMLIT_BROWSER_GATHER_USAGE_STATS'] = 'false'
# Streamlit UI
st.title("Composite-Correlation Calculator")
st.markdown(
"""
Upload a CSV containing your (possibly lower-triangular) correlation matrix.
The app will fill in missing cells by symmetry, set diagonals to 1,
then let you select two sets of variables to form two unit-weighted composites.
"""
)
uploaded = st.file_uploader("Upload correlation matrix (CSV)", type=["csv"])
if uploaded is not None:
# 1) read and label
try:
df = pd.read_csv(uploaded, index_col=0)
except Exception:
st.error("Failed to read CSV. Make sure the first column contains row labels.")
st.stop()
if df.shape[0] != df.shape[1]:
st.error("Matrix must be square.")
st.stop()
st.success(f"Loaded a {df.shape[0]}×{df.shape[1]} matrix.")
# 2) symmetrize and fill diagonal
mat = df.values.astype(float)
mat = np.where(np.isnan(mat), mat.T, mat)
np.fill_diagonal(mat, 1.0)
df_sym = pd.DataFrame(mat, index=df.index, columns=df.columns)
st.write("**Symmetrized & filled diagonal:**")
st.dataframe(df_sym)
all_vars = list(df_sym.columns)
cols1, cols2 = st.columns(2)
with cols1:
group1 = st.multiselect(
"Select variables for Composite 1",
options=all_vars,
default=all_vars[: min(3, len(all_vars))],
key='g1'
)
with cols2:
group2 = st.multiselect(
"Select variables for Composite 2",
options=all_vars,
default=all_vars[: min(3, len(all_vars))],
key='g2'
)
ok1 = len(group1) >= 2
ok2 = len(group2) >= 2
if not ok1 or not ok2:
st.warning("Pick at least 2 variables for each composite.")
else:
if st.button("Compute composites and their correlations"):
idx1 = [all_vars.index(v) for v in group1]
idx2 = [all_vars.index(v) for v in group2]
# compute each composite vs all vars
r_comp1 = composite_correlations(
df_sym.values, composite_idx=idx1, var_names=all_vars, augment=False
)
r_comp2 = composite_correlations(
df_sym.values, composite_idx=idx2, var_names=all_vars, augment=False
)
# compute composite vs composite
# numerator: sum R[i,j] i in idx1, j in idx2
R_mat = df_sym.values
numer_cc = R_mat[np.ix_(idx1, idx2)].sum()
# denom: group1 denom and group2 denom
# reuse denom calculation
def denom_for(idx):
sub = R_mat[np.ix_(idx, idx)]
iu = np.triu_indices(len(idx), k=1)
rbar = sub[iu].mean() if iu[0].size>0 else 0.0
return np.sqrt(len(idx) + len(idx)*(len(idx)-1)*rbar)
denom1 = denom_for(idx1)
denom2 = denom_for(idx2)
r_cc = numer_cc/(denom1*denom2)
st.subheader("Composite 1 vs. Each Variable")
st.dataframe(r_comp1.to_frame(name="Comp1"))
st.subheader("Composite 2 vs. Each Variable")
st.dataframe(r_comp2.to_frame(name="Comp2"))
st.subheader("Composite1 vs Composite2 Correlation")
st.write(f"**r = {r_cc:.4f}**")
st.subheader("Augmented Correlation Matrix (with Composites)")
# build augmented matrix with two composites
idx = all_vars + ["Comp1", "Comp2"]
R_aug2 = pd.DataFrame(
np.zeros((len(all_vars)+2, len(all_vars)+2)), index=idx, columns=idx
)
R_aug2.iloc[:len(all_vars), :len(all_vars)] = R_mat
R_aug2.loc["Comp1", all_vars] = r_comp1.values
R_aug2.loc[all_vars, "Comp1"] = r_comp1.values
R_aug2.loc["Comp1", "Comp1"] = 1.0
R_aug2.loc["Comp2", all_vars] = r_comp2.values
R_aug2.loc[all_vars, "Comp2"] = r_comp2.values
R_aug2.loc["Comp2", "Comp2"] = 1.0
R_aug2.loc["Comp1", "Comp2"] = r_cc
R_aug2.loc["Comp2", "Comp1"] = r_cc
st.dataframe(R_aug2)
else:
st.info("🤖 Upload a CSV file to get started.")