Spaces:

Eric2mangel
/

Partial_correlations

Sleeping

App Files Files Community

Eric2mangel commited on Dec 3, 2025

Commit

15bcfd9

verified ·

1 Parent(s): 77cd14a

Upload 2 files

Browse files

Files changed (2) hide show

app.py +179 -0
requirements.txt +6 -3

app.py ADDED Viewed

	@@ -0,0 +1,179 @@

+import streamlit as st
+import seaborn as sns
+import pandas as pd
+import numpy as np
+from sklearn.linear_model import LinearRegression
+import itertools
+import matplotlib.pyplot as plt
+# ------------------------------
+# Configuration Streamlit
+# ------------------------------
+st.set_page_config(layout="wide")
+st.sidebar.title("Exploration des corrélations partielles")
+st.sidebar.markdown("---")
+st.sidebar.header("Configuration")
+# Sélection dataset
+available_datasets = []
+for name in sns.get_dataset_names():
+    try:
+        df_test = sns.load_dataset(name).dropna()
+        numeric_cols = df_test.select_dtypes(include=['float64', 'int64']).columns
+        if len(numeric_cols) >= 3:
+            available_datasets.append(name)
+    except:
+        pass
+dataset_name = st.sidebar.selectbox("Sélectionnez un jeu de données Seaborn :", available_datasets)
+df = sns.load_dataset(dataset_name).dropna()
+# Colonnes numériques uniquement
+df_numeric = df.select_dtypes(include=['float64', 'int64'])
+# Choix Pearson / Spearman
+corr_type = st.sidebar.radio(
+    "Type de corrélation :",
+    ["Pearson", "Spearman"],
+    index=0
+)
+# Choix variables à éliminer
+all_vars = list(df_numeric.columns)
+control_vars = st.sidebar.multiselect(
+    "Variables dont vous voulez éliminer l'influence :",
+    all_vars
+)
+# Variables restantes
+vars_remaining = [v for v in all_vars if v not in control_vars]
+if len(vars_remaining) < 2:
+    st.error("Il faut au moins deux variables restantes pour afficher une corrélation.")
+    st.stop()
+# ------------------------------
+# Prétraitement Spearman (si sélectionné)
+# ------------------------------
+if corr_type == "Spearman":
+    df_for_corr = df_numeric.rank()
+else:
+    df_for_corr = df_numeric.copy()
+# ------------------------------
+# Matrice brute
+# ------------------------------
+corr_raw = df_for_corr[vars_remaining].corr(method=("spearman" if corr_type=="Spearman" else "pearson"))
+# ------------------------------
+# Fonction corrélation partielle
+# ------------------------------
+def partial_corr(df, controls):
+    vars_to_corr = [v for v in df.columns if v not in controls]
+    partial_corr_matrix = pd.DataFrame(
+        np.zeros((len(vars_to_corr), len(vars_to_corr))),
+        columns=vars_to_corr,
+        index=vars_to_corr
+    )
+    for x, y in itertools.product(vars_to_corr, repeat=2):
+        if x == y:
+            partial_corr_matrix.loc[x, y] = 1.0
+            continue
+        X = df[[x]]
+        Y = df[[y]]
+        if len(controls) > 0:
+            Z = df[controls]
+            model_x = LinearRegression().fit(Z, X)
+            X_res = X - model_x.predict(Z)
+            model_y = LinearRegression().fit(Z, Y)
+            Y_res = Y - model_y.predict(Z)
+            r = np.corrcoef(X_res.T, Y_res.T)[0, 1]
+        else:
+            r = df[[x, y]].corr(method=("spearman" if corr_type=="Spearman" else "pearson")).iloc[0, 1]
+        partial_corr_matrix.loc[x, y] = r
+    return partial_corr_matrix
+# ------------------------------
+# Matrice partielle
+# ------------------------------
+corr_partial = partial_corr(df_for_corr, control_vars)
+# ------------------------------
+# Calcul des différences
+# ------------------------------
+# Extraire uniquement les paires uniques (triangle supérieur sans diagonale)
+diff_data = []
+n = len(vars_remaining)
+for i in range(n):
+    for j in range(i+1, n):
+        var1 = vars_remaining[i]
+        var2 = vars_remaining[j]
+        raw_val = corr_raw.loc[var1, var2]
+        partial_val = corr_partial.loc[var1, var2]
+        diff = partial_val - raw_val
+        diff_data.append({
+            'Paire': f"{var1} - {var2}",
+            'Différence': diff
+        })
+df_diff = pd.DataFrame(diff_data).sort_values('Différence', ascending=True)
+# ------------------------------
+# ONGLET PRINCIPAL
+# ------------------------------
+tab1, tab2 = st.tabs(["📊 Matrices", "📄 Données"])
+# ----------- TAB 1 -----------
+with tab1:
+    col1, col2 = st.columns(2)
+    # Heatmap corrélation brute (triangle inférieur)
+    with col1:
+        st.write(f"**Corrélation brute ({corr_type})**")
+        mask_raw = np.triu(np.ones_like(corr_raw, dtype=bool))
+        fig, ax = plt.subplots(figsize=(5.5, 4))
+        sns.heatmap(corr_raw, annot=True, cmap="coolwarm", center=0, ax=ax,
+                    mask=mask_raw, square=True, vmin=-1, vmax=1, cbar_kws={'shrink': 0.75}, annot_kws={'size': 9})
+        plt.tight_layout()
+        st.pyplot(fig)
+    # Heatmap corrélation partielle (triangle inférieur)
+    with col2:
+        st.write(f"**Corrélation partielle ({corr_type})**")
+        mask_partial = np.triu(np.ones_like(corr_partial, dtype=bool))
+        fig2, ax2 = plt.subplots(figsize=(5.5, 4))
+        sns.heatmap(corr_partial, annot=True, cmap="coolwarm", center=0, ax=ax2,
+                    mask=mask_partial, square=True, vmin=-1, vmax=1, cbar_kws={'shrink': 0.75}, annot_kws={'size': 9})
+        plt.tight_layout()
+        st.pyplot(fig2)
+    # Graphique des différences (pleine largeur en dessous)
+    st.write("**Différences (Partielle - Brute)**")
+    fig3, ax3 = plt.subplots(figsize=(12, 2.2))
+    colors = ['#d7191c' if x < 0 else '#2b83ba' for x in df_diff['Différence']]
+    ax3.barh(df_diff['Paire'], df_diff['Différence'], color=colors, height=0.55)
+    ax3.axvline(0, color='black', linewidth=0.8, linestyle='--')
+    ax3.set_xlabel('Différence de corrélation', fontsize=8)
+    ax3.tick_params(axis='both', labelsize=7.5)
+    ax3.grid(axis='x', alpha=0.3, linestyle=':')
+    plt.tight_layout()
+    st.pyplot(fig3)
+    st.caption("🔵 Corrélation renforcée après contrôle | 🔴 Corrélation affaiblie après contrôle")
+# ----------- TAB 2 -----------
+with tab2:
+    st.write("**Aperçu des données (10 premières lignes)**")
+    st.dataframe(df_numeric.head(10))

requirements.txt CHANGED Viewed

@@ -1,3 +1,6 @@
-altair
-pandas
-streamlit

+streamlit
+pandas
+numpy
+seaborn
+matplotlib
+scikit-learn