File size: 5,765 Bytes
3b6c3ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60f9996
4c26f88
 
da2973a
4c26f88
 
 
3b6c3ea
 
4c26f88
 
 
 
 
 
 
3b6c3ea
 
 
 
60f9996
3b6c3ea
 
 
 
 
 
 
 
 
 
 
 
4c26f88
 
3b6c3ea
 
 
 
 
4c26f88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d7044da
4c26f88
 
 
 
337939b
4c26f88
 
337939b
4c26f88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b6c3ea
4c26f88
 
 
 
 
 
 
 
 
 
3b6c3ea
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import streamlit as st
import pandas as pd
import numpy as np

def composite_correlations(R, composite_idx, var_names=None, augment=False):
    """Compute unit-weighted composite correlations."""
    R_mat = np.asarray(R, dtype=float)
    n_all = R_mat.shape[0]
    n = len(composite_idx)
    # sub-matrix of the composite items
    R_yy = R_mat[np.ix_(composite_idx, composite_idx)]
    # mean off-diagonal
    iu = np.triu_indices(n, k=1)
    rbar = R_yy[iu].mean() if iu[0].size > 0 else 0.0
    denom = np.sqrt(n + n*(n-1)*rbar)
    numer = R_mat[composite_idx, :].sum(axis=0)
    r_comp = numer / denom
    if var_names is not None:
        r_comp = pd.Series(r_comp, index=var_names, name="Composite")
    if not augment:
        return r_comp
    # build augmented matrix
    if var_names is not None:
        idx = list(var_names) + ["Composite"]
        R_aug = pd.DataFrame(np.zeros((n_all+1, n_all+1)), index=idx, columns=idx)
        R_aug.iloc[:-1, :-1] = R_mat
        R_aug.iloc[-1, :-1] = r_comp.values
        R_aug.iloc[:-1, -1] = r_comp.values
        R_aug.iloc[-1, -1] = 1.0
    else:
        R_aug = np.zeros((n_all+1, n_all+1))
        R_aug[:n_all, :n_all] = R_mat
        R_aug[n_all, :n_all] = r_comp
        R_aug[:n_all, n_all] = r_comp
        R_aug[n_all, n_all] = 1.0
    return r_comp, R_aug

import os
# On Huggingface Spaces the home directory may be unwritable; override it to the current working directory
os.environ['HOME'] = os.getcwd()
# Disable streamlit usage stats to avoid write attempts
os.environ['STREAMLIT_BROWSER_GATHER_USAGE_STATS'] = 'false'

# Streamlit UI
st.title("Composite-Correlation Calculator")
st.markdown(
    """
    Upload a CSV containing your (possibly lower-triangular) correlation matrix.  
    The app will fill in missing cells by symmetry, set diagonals to 1,  
    then let you select two sets of variables to form two unit-weighted composites.
    """
)

uploaded = st.file_uploader("Upload correlation matrix (CSV)", type=["csv"])
if uploaded is not None:
    # 1) read and label
    try:
        df = pd.read_csv(uploaded, index_col=0)
    except Exception:
        st.error("Failed to read CSV. Make sure the first column contains row labels.")
        st.stop()
    if df.shape[0] != df.shape[1]:
        st.error("Matrix must be square.")
        st.stop()

    st.success(f"Loaded a {df.shape[0]}×{df.shape[1]} matrix.")

    # 2) symmetrize and fill diagonal
    mat = df.values.astype(float)
    mat = np.where(np.isnan(mat), mat.T, mat)
    np.fill_diagonal(mat, 1.0)
    df_sym = pd.DataFrame(mat, index=df.index, columns=df.columns)
    st.write("**Symmetrized & filled diagonal:**")
    st.dataframe(df_sym)

    all_vars = list(df_sym.columns)
    cols1, cols2 = st.columns(2)
    with cols1:
        group1 = st.multiselect(
            "Select variables for Composite 1", 
            options=all_vars,
            default=all_vars[: min(3, len(all_vars))],
            key='g1'
        )
    with cols2:
        group2 = st.multiselect(
            "Select variables for Composite 2", 
            options=all_vars,
            default=all_vars[: min(3, len(all_vars))],
            key='g2'
        )

    ok1 = len(group1) >= 2
    ok2 = len(group2) >= 2
    if not ok1 or not ok2:
        st.warning("Pick at least 2 variables for each composite.")
    else:
        if st.button("Compute composites and their correlations"):
            idx1 = [all_vars.index(v) for v in group1]
            idx2 = [all_vars.index(v) for v in group2]
            # compute each composite vs all vars
            r_comp1 = composite_correlations(
                df_sym.values, composite_idx=idx1, var_names=all_vars, augment=False
            )
            r_comp2 = composite_correlations(
                df_sym.values, composite_idx=idx2, var_names=all_vars, augment=False
            )
            # compute composite vs composite
            # numerator: sum R[i,j] i in idx1, j in idx2
            R_mat = df_sym.values
            numer_cc = R_mat[np.ix_(idx1, idx2)].sum()
            # denom: group1 denom and group2 denom
            # reuse denom calculation
            def denom_for(idx):
                sub = R_mat[np.ix_(idx, idx)]
                iu = np.triu_indices(len(idx), k=1)
                rbar = sub[iu].mean() if iu[0].size>0 else 0.0
                return np.sqrt(len(idx) + len(idx)*(len(idx)-1)*rbar)
            denom1 = denom_for(idx1)
            denom2 = denom_for(idx2)
            r_cc = numer_cc/(denom1*denom2)

            st.subheader("Composite 1 vs. Each Variable")
            st.dataframe(r_comp1.to_frame(name="Comp1"))
            st.subheader("Composite 2 vs. Each Variable")
            st.dataframe(r_comp2.to_frame(name="Comp2"))
            st.subheader("Composite1 vs Composite2 Correlation")
            st.write(f"**r = {r_cc:.4f}**")

            st.subheader("Augmented Correlation Matrix (with Composites)")
            # build augmented matrix with two composites
            idx = all_vars + ["Comp1", "Comp2"]
            R_aug2 = pd.DataFrame(
                np.zeros((len(all_vars)+2, len(all_vars)+2)), index=idx, columns=idx
            )
            R_aug2.iloc[:len(all_vars), :len(all_vars)] = R_mat
            R_aug2.loc["Comp1", all_vars] = r_comp1.values
            R_aug2.loc[all_vars, "Comp1"] = r_comp1.values
            R_aug2.loc["Comp1", "Comp1"] = 1.0
            R_aug2.loc["Comp2", all_vars] = r_comp2.values
            R_aug2.loc[all_vars, "Comp2"] = r_comp2.values
            R_aug2.loc["Comp2", "Comp2"] = 1.0
            R_aug2.loc["Comp1", "Comp2"] = r_cc
            R_aug2.loc["Comp2", "Comp1"] = r_cc
            st.dataframe(R_aug2)
else:
    st.info("🤖 Upload a CSV file to get started.")