Spaces:
Sleeping
Sleeping
File size: 5,765 Bytes
3b6c3ea 60f9996 4c26f88 da2973a 4c26f88 3b6c3ea 4c26f88 3b6c3ea 60f9996 3b6c3ea 4c26f88 3b6c3ea 4c26f88 d7044da 4c26f88 337939b 4c26f88 337939b 4c26f88 3b6c3ea 4c26f88 3b6c3ea | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 | import streamlit as st
import pandas as pd
import numpy as np
def composite_correlations(R, composite_idx, var_names=None, augment=False):
"""Compute unit-weighted composite correlations."""
R_mat = np.asarray(R, dtype=float)
n_all = R_mat.shape[0]
n = len(composite_idx)
# sub-matrix of the composite items
R_yy = R_mat[np.ix_(composite_idx, composite_idx)]
# mean off-diagonal
iu = np.triu_indices(n, k=1)
rbar = R_yy[iu].mean() if iu[0].size > 0 else 0.0
denom = np.sqrt(n + n*(n-1)*rbar)
numer = R_mat[composite_idx, :].sum(axis=0)
r_comp = numer / denom
if var_names is not None:
r_comp = pd.Series(r_comp, index=var_names, name="Composite")
if not augment:
return r_comp
# build augmented matrix
if var_names is not None:
idx = list(var_names) + ["Composite"]
R_aug = pd.DataFrame(np.zeros((n_all+1, n_all+1)), index=idx, columns=idx)
R_aug.iloc[:-1, :-1] = R_mat
R_aug.iloc[-1, :-1] = r_comp.values
R_aug.iloc[:-1, -1] = r_comp.values
R_aug.iloc[-1, -1] = 1.0
else:
R_aug = np.zeros((n_all+1, n_all+1))
R_aug[:n_all, :n_all] = R_mat
R_aug[n_all, :n_all] = r_comp
R_aug[:n_all, n_all] = r_comp
R_aug[n_all, n_all] = 1.0
return r_comp, R_aug
import os
# On Huggingface Spaces the home directory may be unwritable; override it to the current working directory
os.environ['HOME'] = os.getcwd()
# Disable streamlit usage stats to avoid write attempts
os.environ['STREAMLIT_BROWSER_GATHER_USAGE_STATS'] = 'false'
# Streamlit UI
st.title("Composite-Correlation Calculator")
st.markdown(
"""
Upload a CSV containing your (possibly lower-triangular) correlation matrix.
The app will fill in missing cells by symmetry, set diagonals to 1,
then let you select two sets of variables to form two unit-weighted composites.
"""
)
uploaded = st.file_uploader("Upload correlation matrix (CSV)", type=["csv"])
if uploaded is not None:
# 1) read and label
try:
df = pd.read_csv(uploaded, index_col=0)
except Exception:
st.error("Failed to read CSV. Make sure the first column contains row labels.")
st.stop()
if df.shape[0] != df.shape[1]:
st.error("Matrix must be square.")
st.stop()
st.success(f"Loaded a {df.shape[0]}×{df.shape[1]} matrix.")
# 2) symmetrize and fill diagonal
mat = df.values.astype(float)
mat = np.where(np.isnan(mat), mat.T, mat)
np.fill_diagonal(mat, 1.0)
df_sym = pd.DataFrame(mat, index=df.index, columns=df.columns)
st.write("**Symmetrized & filled diagonal:**")
st.dataframe(df_sym)
all_vars = list(df_sym.columns)
cols1, cols2 = st.columns(2)
with cols1:
group1 = st.multiselect(
"Select variables for Composite 1",
options=all_vars,
default=all_vars[: min(3, len(all_vars))],
key='g1'
)
with cols2:
group2 = st.multiselect(
"Select variables for Composite 2",
options=all_vars,
default=all_vars[: min(3, len(all_vars))],
key='g2'
)
ok1 = len(group1) >= 2
ok2 = len(group2) >= 2
if not ok1 or not ok2:
st.warning("Pick at least 2 variables for each composite.")
else:
if st.button("Compute composites and their correlations"):
idx1 = [all_vars.index(v) for v in group1]
idx2 = [all_vars.index(v) for v in group2]
# compute each composite vs all vars
r_comp1 = composite_correlations(
df_sym.values, composite_idx=idx1, var_names=all_vars, augment=False
)
r_comp2 = composite_correlations(
df_sym.values, composite_idx=idx2, var_names=all_vars, augment=False
)
# compute composite vs composite
# numerator: sum R[i,j] i in idx1, j in idx2
R_mat = df_sym.values
numer_cc = R_mat[np.ix_(idx1, idx2)].sum()
# denom: group1 denom and group2 denom
# reuse denom calculation
def denom_for(idx):
sub = R_mat[np.ix_(idx, idx)]
iu = np.triu_indices(len(idx), k=1)
rbar = sub[iu].mean() if iu[0].size>0 else 0.0
return np.sqrt(len(idx) + len(idx)*(len(idx)-1)*rbar)
denom1 = denom_for(idx1)
denom2 = denom_for(idx2)
r_cc = numer_cc/(denom1*denom2)
st.subheader("Composite 1 vs. Each Variable")
st.dataframe(r_comp1.to_frame(name="Comp1"))
st.subheader("Composite 2 vs. Each Variable")
st.dataframe(r_comp2.to_frame(name="Comp2"))
st.subheader("Composite1 vs Composite2 Correlation")
st.write(f"**r = {r_cc:.4f}**")
st.subheader("Augmented Correlation Matrix (with Composites)")
# build augmented matrix with two composites
idx = all_vars + ["Comp1", "Comp2"]
R_aug2 = pd.DataFrame(
np.zeros((len(all_vars)+2, len(all_vars)+2)), index=idx, columns=idx
)
R_aug2.iloc[:len(all_vars), :len(all_vars)] = R_mat
R_aug2.loc["Comp1", all_vars] = r_comp1.values
R_aug2.loc[all_vars, "Comp1"] = r_comp1.values
R_aug2.loc["Comp1", "Comp1"] = 1.0
R_aug2.loc["Comp2", all_vars] = r_comp2.values
R_aug2.loc[all_vars, "Comp2"] = r_comp2.values
R_aug2.loc["Comp2", "Comp2"] = 1.0
R_aug2.loc["Comp1", "Comp2"] = r_cc
R_aug2.loc["Comp2", "Comp1"] = r_cc
st.dataframe(R_aug2)
else:
st.info("🤖 Upload a CSV file to get started.")
|