Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import seaborn as sns | |
| import pandas as pd | |
| import numpy as np | |
| from sklearn.linear_model import LinearRegression | |
| import itertools | |
| import matplotlib.pyplot as plt | |
| # ------------------------------ | |
| # Configuration Streamlit | |
| # ------------------------------ | |
| st.set_page_config(layout="wide") | |
| st.sidebar.title("Exploration des corrélations partielles") | |
| st.sidebar.markdown("---") | |
| st.sidebar.header("Configuration") | |
| # Sélection dataset | |
| available_datasets = [] | |
| for name in sns.get_dataset_names(): | |
| try: | |
| df_test = sns.load_dataset(name).dropna() | |
| numeric_cols = df_test.select_dtypes(include=['float64', 'int64']).columns | |
| if len(numeric_cols) >= 3: | |
| available_datasets.append(name) | |
| except: | |
| pass | |
| dataset_name = st.sidebar.selectbox("Sélectionnez un jeu de données Seaborn :", available_datasets) | |
| df = sns.load_dataset(dataset_name).dropna() | |
| # Colonnes numériques uniquement | |
| df_numeric = df.select_dtypes(include=['float64', 'int64']) | |
| # Choix Pearson / Spearman | |
| corr_type = st.sidebar.radio( | |
| "Type de corrélation :", | |
| ["Pearson", "Spearman"], | |
| index=0 | |
| ) | |
| # Choix variables à éliminer | |
| all_vars = list(df_numeric.columns) | |
| control_vars = st.sidebar.multiselect( | |
| "Variables dont vous voulez éliminer l'influence :", | |
| all_vars | |
| ) | |
| # Variables restantes | |
| vars_remaining = [v for v in all_vars if v not in control_vars] | |
| if len(vars_remaining) < 2: | |
| st.error("Il faut au moins deux variables restantes pour afficher une corrélation.") | |
| st.stop() | |
| # ------------------------------ | |
| # Prétraitement Spearman (si sélectionné) | |
| # ------------------------------ | |
| if corr_type == "Spearman": | |
| df_for_corr = df_numeric.rank() | |
| else: | |
| df_for_corr = df_numeric.copy() | |
| # ------------------------------ | |
| # Matrice brute | |
| # ------------------------------ | |
| corr_raw = df_for_corr[vars_remaining].corr(method=("spearman" if corr_type=="Spearman" else "pearson")) | |
| # ------------------------------ | |
| # Fonction corrélation partielle | |
| # ------------------------------ | |
| def partial_corr(df, controls): | |
| vars_to_corr = [v for v in df.columns if v not in controls] | |
| partial_corr_matrix = pd.DataFrame( | |
| np.zeros((len(vars_to_corr), len(vars_to_corr))), | |
| columns=vars_to_corr, | |
| index=vars_to_corr | |
| ) | |
| for x, y in itertools.product(vars_to_corr, repeat=2): | |
| if x == y: | |
| partial_corr_matrix.loc[x, y] = 1.0 | |
| continue | |
| X = df[[x]] | |
| Y = df[[y]] | |
| if len(controls) > 0: | |
| Z = df[controls] | |
| model_x = LinearRegression().fit(Z, X) | |
| X_res = X - model_x.predict(Z) | |
| model_y = LinearRegression().fit(Z, Y) | |
| Y_res = Y - model_y.predict(Z) | |
| r = np.corrcoef(X_res.T, Y_res.T)[0, 1] | |
| else: | |
| r = df[[x, y]].corr(method=("spearman" if corr_type=="Spearman" else "pearson")).iloc[0, 1] | |
| partial_corr_matrix.loc[x, y] = r | |
| return partial_corr_matrix | |
| # ------------------------------ | |
| # Matrice partielle | |
| # ------------------------------ | |
| corr_partial = partial_corr(df_for_corr, control_vars) | |
| # ------------------------------ | |
| # Calcul des différences | |
| # ------------------------------ | |
| # Extraire uniquement les paires uniques (triangle supérieur sans diagonale) | |
| diff_data = [] | |
| n = len(vars_remaining) | |
| for i in range(n): | |
| for j in range(i+1, n): | |
| var1 = vars_remaining[i] | |
| var2 = vars_remaining[j] | |
| raw_val = corr_raw.loc[var1, var2] | |
| partial_val = corr_partial.loc[var1, var2] | |
| diff = partial_val - raw_val | |
| diff_data.append({ | |
| 'Paire': f"{var1} - {var2}", | |
| 'Différence': diff | |
| }) | |
| df_diff = pd.DataFrame(diff_data).sort_values('Différence', ascending=True) | |
| # ------------------------------ | |
| # ONGLET PRINCIPAL | |
| # ------------------------------ | |
| tab1, tab2 = st.tabs(["📊 Matrices", "📄 Données"]) | |
| # ----------- TAB 1 ----------- | |
| with tab1: | |
| col1, col2 = st.columns(2) | |
| # Heatmap corrélation brute (triangle inférieur) | |
| # with col1: | |
| # st.write(f"**Corrélation brute ({corr_type})**") | |
| # mask_raw = np.triu(np.ones_like(corr_raw, dtype=bool)) | |
| # fig, ax = plt.subplots(figsize=(5.5, 4)) | |
| # sns.heatmap(corr_raw, annot=True, cmap="coolwarm", center=0, ax=ax, | |
| # mask=mask_raw, square=True, vmin=-1, vmax=1, cbar_kws={'shrink': 0.75}, annot_kws={'size': 9}) | |
| # plt.tight_layout() | |
| # st.pyplot(fig) | |
| # # Heatmap corrélation partielle (triangle inférieur) | |
| # with col2: | |
| # st.write(f"**Corrélation partielle ({corr_type})**") | |
| # mask_partial = np.triu(np.ones_like(corr_partial, dtype=bool)) | |
| # fig2, ax2 = plt.subplots(figsize=(5.5, 4)) | |
| # sns.heatmap(corr_partial, annot=True, cmap="coolwarm", center=0, ax=ax2, | |
| # mask=mask_partial, square=True, vmin=-1, vmax=1, cbar_kws={'shrink': 0.75}, annot_kws={'size': 9}) | |
| # plt.tight_layout() | |
| # st.pyplot(fig2) | |
| # # Graphique des différences (pleine largeur en dessous) | |
| # st.write("**Différences (Partielle - Brute)**") | |
| # fig3, ax3 = plt.subplots(figsize=(12, 2.2)) | |
| # colors = ['#d7191c' if x < 0 else '#2b83ba' for x in df_diff['Différence']] | |
| # ax3.barh(df_diff['Paire'], df_diff['Différence'], color=colors, height=0.55) | |
| # ax3.axvline(0, color='black', linewidth=0.8, linestyle='--') | |
| # ax3.set_xlabel('Différence de corrélation', fontsize=8) | |
| # ax3.tick_params(axis='both', labelsize=7.5) | |
| # ax3.grid(axis='x', alpha=0.3, linestyle=':') | |
| # plt.tight_layout() | |
| # st.pyplot(fig3) | |
| with col1: | |
| st.write(f"**Corrélation brute ({corr_type})**") | |
| mask_raw = np.triu(np.ones_like(corr_raw, dtype=bool)) | |
| fig, ax = plt.subplots(figsize=(5, 4)) # Diminuez un peu la taille pour la place | |
| sns.heatmap(corr_raw, annot=True, cmap="coolwarm", center=0, ax=ax, | |
| mask=mask_raw, square=True, vmin=-1, vmax=1, cbar_kws={'shrink': 0.75}, annot_kws={'size': 9}) | |
| ax.tick_params(axis='both', labelsize=8) # Ajout pour fixer la taille de la police | |
| # RETRAIT de plt.tight_layout() | |
| st.pyplot(fig, use_container_width=False) # **IMPORTANT : Forcer la taille fixe** | |
| # --- COLONNE 2 : CORRÉLATION PARTIELLE --- | |
| with col2: | |
| st.write(f"**Corrélation partielle ({corr_type})**") | |
| mask_partial = np.triu(np.ones_like(corr_partial, dtype=bool)) | |
| fig2, ax2 = plt.subplots(figsize=(5, 4)) # Même taille que fig | |
| sns.heatmap(corr_partial, annot=True, cmap="coolwarm", center=0, ax=ax2, | |
| mask=mask_partial, square=True, vmin=-1, vmax=1, cbar_kws={'shrink': 0.75}, annot_kws={'size': 9}) | |
| ax2.tick_params(axis='both', labelsize=8) # Ajout pour fixer la taille de la police | |
| # RETRAIT de plt.tight_layout() | |
| st.pyplot(fig2, use_container_width=False) # **IMPORTANT : Forcer la taille fixe** | |
| # --- GRAPHIQUE DES DIFFÉRENCES (Celui-ci est souvent moins problématique) --- | |
| # Vous pouvez garder plt.tight_layout() ici ou le remplacer par ax3.set_... si le problème persiste. | |
| st.write("**Différences (Partielle - Brute)**") | |
| fig3, ax3 = plt.subplots(figsize=(12, 2.2)) | |
| colors = ['#d7191c' if x < 0 else '#2b83ba' for x in df_diff['Différence']] | |
| ax3.barh(df_diff['Paire'], df_diff['Différence'], color=colors, height=0.55) | |
| ax3.axvline(0, color='black', linewidth=0.8, linestyle='--') | |
| ax3.set_xlabel('Différence de corrélation', fontsize=8) | |
| ax3.tick_params(axis='both', labelsize=7.5) | |
| ax3.grid(axis='x', alpha=0.3, linestyle=':') | |
| plt.tight_layout() | |
| st.pyplot(fig3) | |
| st.caption("🔵 Corrélation renforcée après contrôle | 🔴 Corrélation affaiblie après contrôle") | |
| # ----------- TAB 2 ----------- | |
| with tab2: | |
| st.write("**Aperçu des données (10 premières lignes)**") | |
| st.dataframe(df_numeric.head(10)) |