Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| def composite_correlations(R, composite_idx, var_names=None, augment=False): | |
| """Compute unit-weighted composite correlations.""" | |
| R_mat = np.asarray(R, dtype=float) | |
| n_all = R_mat.shape[0] | |
| n = len(composite_idx) | |
| # sub-matrix of the composite items | |
| R_yy = R_mat[np.ix_(composite_idx, composite_idx)] | |
| # mean off-diagonal | |
| iu = np.triu_indices(n, k=1) | |
| rbar = R_yy[iu].mean() if iu[0].size > 0 else 0.0 | |
| denom = np.sqrt(n + n*(n-1)*rbar) | |
| numer = R_mat[composite_idx, :].sum(axis=0) | |
| r_comp = numer / denom | |
| if var_names is not None: | |
| r_comp = pd.Series(r_comp, index=var_names, name="Composite") | |
| if not augment: | |
| return r_comp | |
| # build augmented matrix | |
| if var_names is not None: | |
| idx = list(var_names) + ["Composite"] | |
| R_aug = pd.DataFrame(np.zeros((n_all+1, n_all+1)), index=idx, columns=idx) | |
| R_aug.iloc[:-1, :-1] = R_mat | |
| R_aug.iloc[-1, :-1] = r_comp.values | |
| R_aug.iloc[:-1, -1] = r_comp.values | |
| R_aug.iloc[-1, -1] = 1.0 | |
| else: | |
| R_aug = np.zeros((n_all+1, n_all+1)) | |
| R_aug[:n_all, :n_all] = R_mat | |
| R_aug[n_all, :n_all] = r_comp | |
| R_aug[:n_all, n_all] = r_comp | |
| R_aug[n_all, n_all] = 1.0 | |
| return r_comp, R_aug | |
| import os | |
| # On Huggingface Spaces the home directory may be unwritable; override it to the current working directory | |
| os.environ['HOME'] = os.getcwd() | |
| # Disable streamlit usage stats to avoid write attempts | |
| os.environ['STREAMLIT_BROWSER_GATHER_USAGE_STATS'] = 'false' | |
| # Streamlit UI | |
| st.title("Composite-Correlation Calculator") | |
| st.markdown( | |
| """ | |
| Upload a CSV containing your (possibly lower-triangular) correlation matrix. | |
| The app will fill in missing cells by symmetry, set diagonals to 1, | |
| then let you select two sets of variables to form two unit-weighted composites. | |
| """ | |
| ) | |
| uploaded = st.file_uploader("Upload correlation matrix (CSV)", type=["csv"]) | |
| if uploaded is not None: | |
| # 1) read and label | |
| try: | |
| df = pd.read_csv(uploaded, index_col=0) | |
| except Exception: | |
| st.error("Failed to read CSV. Make sure the first column contains row labels.") | |
| st.stop() | |
| if df.shape[0] != df.shape[1]: | |
| st.error("Matrix must be square.") | |
| st.stop() | |
| st.success(f"Loaded a {df.shape[0]}×{df.shape[1]} matrix.") | |
| # 2) symmetrize and fill diagonal | |
| mat = df.values.astype(float) | |
| mat = np.where(np.isnan(mat), mat.T, mat) | |
| np.fill_diagonal(mat, 1.0) | |
| df_sym = pd.DataFrame(mat, index=df.index, columns=df.columns) | |
| st.write("**Symmetrized & filled diagonal:**") | |
| st.dataframe(df_sym) | |
| all_vars = list(df_sym.columns) | |
| cols1, cols2 = st.columns(2) | |
| with cols1: | |
| group1 = st.multiselect( | |
| "Select variables for Composite 1", | |
| options=all_vars, | |
| default=all_vars[: min(3, len(all_vars))], | |
| key='g1' | |
| ) | |
| with cols2: | |
| group2 = st.multiselect( | |
| "Select variables for Composite 2", | |
| options=all_vars, | |
| default=all_vars[: min(3, len(all_vars))], | |
| key='g2' | |
| ) | |
| ok1 = len(group1) >= 2 | |
| ok2 = len(group2) >= 2 | |
| if not ok1 or not ok2: | |
| st.warning("Pick at least 2 variables for each composite.") | |
| else: | |
| if st.button("Compute composites and their correlations"): | |
| idx1 = [all_vars.index(v) for v in group1] | |
| idx2 = [all_vars.index(v) for v in group2] | |
| # compute each composite vs all vars | |
| r_comp1 = composite_correlations( | |
| df_sym.values, composite_idx=idx1, var_names=all_vars, augment=False | |
| ) | |
| r_comp2 = composite_correlations( | |
| df_sym.values, composite_idx=idx2, var_names=all_vars, augment=False | |
| ) | |
| # compute composite vs composite | |
| # numerator: sum R[i,j] i in idx1, j in idx2 | |
| R_mat = df_sym.values | |
| numer_cc = R_mat[np.ix_(idx1, idx2)].sum() | |
| # denom: group1 denom and group2 denom | |
| # reuse denom calculation | |
| def denom_for(idx): | |
| sub = R_mat[np.ix_(idx, idx)] | |
| iu = np.triu_indices(len(idx), k=1) | |
| rbar = sub[iu].mean() if iu[0].size>0 else 0.0 | |
| return np.sqrt(len(idx) + len(idx)*(len(idx)-1)*rbar) | |
| denom1 = denom_for(idx1) | |
| denom2 = denom_for(idx2) | |
| r_cc = numer_cc/(denom1*denom2) | |
| st.subheader("Composite 1 vs. Each Variable") | |
| st.dataframe(r_comp1.to_frame(name="Comp1")) | |
| st.subheader("Composite 2 vs. Each Variable") | |
| st.dataframe(r_comp2.to_frame(name="Comp2")) | |
| st.subheader("Composite1 vs Composite2 Correlation") | |
| st.write(f"**r = {r_cc:.4f}**") | |
| st.subheader("Augmented Correlation Matrix (with Composites)") | |
| # build augmented matrix with two composites | |
| idx = all_vars + ["Comp1", "Comp2"] | |
| R_aug2 = pd.DataFrame( | |
| np.zeros((len(all_vars)+2, len(all_vars)+2)), index=idx, columns=idx | |
| ) | |
| R_aug2.iloc[:len(all_vars), :len(all_vars)] = R_mat | |
| R_aug2.loc["Comp1", all_vars] = r_comp1.values | |
| R_aug2.loc[all_vars, "Comp1"] = r_comp1.values | |
| R_aug2.loc["Comp1", "Comp1"] = 1.0 | |
| R_aug2.loc["Comp2", all_vars] = r_comp2.values | |
| R_aug2.loc[all_vars, "Comp2"] = r_comp2.values | |
| R_aug2.loc["Comp2", "Comp2"] = 1.0 | |
| R_aug2.loc["Comp1", "Comp2"] = r_cc | |
| R_aug2.loc["Comp2", "Comp1"] = r_cc | |
| st.dataframe(R_aug2) | |
| else: | |
| st.info("🤖 Upload a CSV file to get started.") | |