Spaces:
Build error
Build error
Update src/dashboard_app.py
Browse files- src/dashboard_app.py +461 -271
src/dashboard_app.py
CHANGED
|
@@ -29,75 +29,54 @@ from sklearn.model_selection import train_test_split
|
|
| 29 |
from sklearn.metrics import mean_squared_error, r2_score
|
| 30 |
import scipy.stats as stats
|
| 31 |
import numpy as np
|
|
|
|
| 32 |
|
| 33 |
# --- Configuration de la Page Streamlit ---
|
| 34 |
st.set_page_config(layout="wide", page_title="Suite d'Analyse Interactive", page_icon="📊")
|
| 35 |
|
| 36 |
# --- Configuration Initiale ---
|
| 37 |
-
|
| 38 |
-
# Charger les variables d'environnement (pour local, HF utilise les Secrets)
|
| 39 |
load_dotenv()
|
| 40 |
api_key = os.getenv("GOOGLE_API_KEY")
|
| 41 |
|
| 42 |
-
# --- CORRECTION CHEMINS ---
|
| 43 |
-
# Le script est dans src/, les ressources (template, excel) sont à la racine (un niveau au-dessus)
|
| 44 |
script_dir = os.path.dirname(os.path.abspath(__file__))
|
| 45 |
-
# Chemin vers la racine de l'application (où se trouvent template.html et sample_excel.xlsx)
|
| 46 |
app_root_dir = os.path.join(script_dir, '..') # Aller un niveau au-dessus de 'src'
|
| 47 |
|
| 48 |
-
#
|
| 49 |
-
# st.sidebar.info(f"DEBUG: Script directory (script_dir): {script_dir}")
|
| 50 |
-
# st.sidebar.info(f"DEBUG: App root directory (app_root_dir): {app_root_dir}")
|
| 51 |
-
# ----------------
|
| 52 |
-
|
| 53 |
-
# Charger le template HTML depuis la racine de l'application
|
| 54 |
TEMPLATE_FILE = "report_template.html"
|
| 55 |
template = None
|
| 56 |
try:
|
| 57 |
-
# Dire à Jinja de chercher dans le dossier racine de l'app
|
| 58 |
env = Environment(loader=FileSystemLoader(app_root_dir))
|
| 59 |
template = env.get_template(TEMPLATE_FILE)
|
| 60 |
-
# st.sidebar.success("Template HTML chargé avec succès.") # Debug success
|
| 61 |
except TemplateNotFound:
|
| 62 |
-
st.error(f"Erreur chargement template : '{TEMPLATE_FILE}' NON TROUVÉ dans '{app_root_dir}'. Vérifiez qu'il est bien à la racine du dépôt
|
| 63 |
except Exception as e:
|
| 64 |
-
st.error(f"Erreur
|
| 65 |
|
| 66 |
-
# Affichage état API Key
|
| 67 |
if not api_key:
|
| 68 |
-
st.sidebar.warning("⚠️ Clé API Google Gemini
|
| 69 |
-
print("⚠️ Clé API Google Gemini non trouvée.")
|
| 70 |
else:
|
| 71 |
-
# st.sidebar.success("✔️ Clé API Google Gemini trouvée.") # Optionnel
|
| 72 |
-
|
| 73 |
-
|
| 74 |
|
| 75 |
# --- Fonctions Utilitaires ---
|
| 76 |
-
# (generate_html_report, get_safe_index, init_analysis_state - Pas de changements ici, sauf dépendance au 'template' chargé)
|
| 77 |
def generate_html_report(data, num_submissions, columns, tables_html="", charts_html=""):
|
| 78 |
-
|
| 79 |
-
if template is None:
|
| 80 |
-
# L'erreur est déjà affichée lors du chargement initial
|
| 81 |
-
return "Erreur: Template HTML manquant ou non chargé. Impossible de générer le rapport."
|
| 82 |
last_sync = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
| 83 |
try:
|
| 84 |
context = {
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
'tables': tables_html,
|
| 89 |
-
'charts': charts_html,
|
| 90 |
-
'data_preview': data.head().to_html(classes='table table-sm table-striped', index=False, border=0) if data is not None else "<p>Aperçu indisponible.</p>"
|
| 91 |
}
|
| 92 |
-
|
| 93 |
-
return html_content
|
| 94 |
except Exception as e:
|
| 95 |
-
st.error(f"Erreur
|
| 96 |
-
return f"Erreur
|
| 97 |
|
| 98 |
def get_safe_index(options, value, default_index=0):
|
| 99 |
if not options or value is None: return default_index
|
| 100 |
-
options_list = list(options)
|
| 101 |
try: return options_list.index(value)
|
| 102 |
except (ValueError, TypeError): return default_index
|
| 103 |
|
|
@@ -106,10 +85,8 @@ def init_analysis_state(analysis_index, param_key, default_value):
|
|
| 106 |
if param_key not in st.session_state.analyses[analysis_index]['params']:
|
| 107 |
st.session_state.analyses[analysis_index]['params'][param_key] = default_value
|
| 108 |
|
| 109 |
-
|
| 110 |
# --- Titre et Description ---
|
| 111 |
st.markdown("<h1 style='text-align: center;'>📊 Suite d'Analyse de Données Interactive</h1>", unsafe_allow_html=True)
|
| 112 |
-
# (Description HTML inchangée)
|
| 113 |
st.markdown(
|
| 114 |
"""
|
| 115 |
<div style='background-color: #f0f2f6; border-left: 5px solid #1f77b4; padding: 15px; border-radius: 5px; margin-bottom: 20px;'>
|
|
@@ -121,6 +98,7 @@ st.markdown(
|
|
| 121 |
st.caption(f"Heure du serveur : {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
| 122 |
|
| 123 |
# --- Initialisation du Session State ---
|
|
|
|
| 124 |
if 'dataframe_to_export' not in st.session_state: st.session_state.dataframe_to_export = None
|
| 125 |
if 'analyses' not in st.session_state: st.session_state.analyses = []
|
| 126 |
if 'show_advanced_analysis' not in st.session_state: st.session_state.show_advanced_analysis = False
|
|
@@ -129,10 +107,8 @@ if 'last_header_preference' not in st.session_state: st.session_state.last_heade
|
|
| 129 |
if 'html_report_content' not in st.session_state: st.session_state.html_report_content = None
|
| 130 |
if 'html_report_filename' not in st.session_state: st.session_state.html_report_filename = "rapport.html"
|
| 131 |
if 'data_source_info' not in st.session_state: st.session_state.data_source_info = "Aucune donnée chargée"
|
| 132 |
-
# Specific state for Gemini chat
|
| 133 |
if "gemini_chat_history" not in st.session_state: st.session_state.gemini_chat_history = []
|
| 134 |
|
| 135 |
-
|
| 136 |
# --- Création des Onglets ---
|
| 137 |
app_tab, manual_tab, chat_tab = st.tabs(["📊 Application Principale", "📘 Manuel d'Utilisation", "💬 Chat IA (Gemini)"])
|
| 138 |
|
|
@@ -147,154 +123,120 @@ with app_tab:
|
|
| 147 |
|
| 148 |
# --- Chargement des Données ---
|
| 149 |
st.subheader("1. Chargement des Données")
|
| 150 |
-
uploaded_file = st.file_uploader(
|
| 151 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
header_param = 0 if use_header else None
|
| 153 |
|
| 154 |
data = None
|
| 155 |
data_source_info = "Aucune donnée chargée"
|
| 156 |
load_error = False
|
| 157 |
trigger_reload = False
|
|
|
|
| 158 |
|
| 159 |
-
#
|
| 160 |
-
current_data_id = None
|
| 161 |
if uploaded_file is not None:
|
| 162 |
current_data_id = f"{uploaded_file.name}-{uploaded_file.size}"
|
| 163 |
if st.session_state.data_loaded_id != current_data_id or st.session_state.last_header_preference != use_header:
|
| 164 |
trigger_reload = True
|
| 165 |
-
elif st.session_state.data_loaded_id != "local_default" or st.session_state.last_header_preference != use_header:
|
| 166 |
-
current_data_id = "local_default"
|
| 167 |
-
trigger_reload = True
|
| 168 |
else:
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
|
|
|
|
|
|
| 176 |
st.session_state.html_report_content = None
|
| 177 |
st.session_state.html_report_filename = "rapport.html"
|
| 178 |
-
st.session_state.analyses = [] # Reset analyses
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
|
| 180 |
-
|
| 181 |
-
try:
|
| 182 |
-
st.info(f"Chargement de '{uploaded_file.name}'...")
|
| 183 |
-
if uploaded_file.name.endswith('.csv'):
|
| 184 |
-
data = pd.read_csv(uploaded_file, header=header_param)
|
| 185 |
-
elif uploaded_file.name.endswith('.xlsx'):
|
| 186 |
-
data = pd.read_excel(uploaded_file, header=header_param) # Requires openpyxl
|
| 187 |
-
st.session_state.dataframe_to_export = data
|
| 188 |
-
data_source_info = f"Fichier chargé : {uploaded_file.name}"
|
| 189 |
-
st.session_state.data_loaded_id = current_data_id
|
| 190 |
-
st.session_state.last_header_preference = use_header
|
| 191 |
-
load_error = False
|
| 192 |
-
st.success(f"'{uploaded_file.name}' chargé.")
|
| 193 |
-
st.rerun()
|
| 194 |
-
except Exception as e:
|
| 195 |
-
st.error(f"Erreur chargement upload '{uploaded_file.name}': {e}")
|
| 196 |
-
if '.xlsx' in uploaded_file.name: st.warning("Vérifiez que 'openpyxl' est dans requirements.txt.", icon="💡")
|
| 197 |
-
data = None
|
| 198 |
-
st.session_state.dataframe_to_export = None
|
| 199 |
-
st.session_state.data_loaded_id = None
|
| 200 |
-
data_source_info = "Erreur chargement upload"
|
| 201 |
-
load_error = True
|
| 202 |
-
elif current_data_id == "local_default":
|
| 203 |
-
try:
|
| 204 |
-
# --- CORRECTION CHEMIN --- Utiliser app_root_dir
|
| 205 |
-
default_filename = "sample_excel.xlsx"
|
| 206 |
-
local_file_path = os.path.join(app_root_dir, default_filename)
|
| 207 |
-
st.info(f"Chargement fichier local par défaut '{default_filename}'...")
|
| 208 |
-
# --- DEBUG PATH ---
|
| 209 |
-
# st.sidebar.info(f"DEBUG: Trying default file at: {local_file_path}")
|
| 210 |
-
# -----------------
|
| 211 |
-
data = pd.read_excel(local_file_path, header=header_param) # Requires openpyxl
|
| 212 |
-
st.session_state.dataframe_to_export = data
|
| 213 |
-
data_source_info = "Fichier local par défaut"
|
| 214 |
-
st.session_state.data_loaded_id = current_data_id
|
| 215 |
-
st.session_state.last_header_preference = use_header
|
| 216 |
-
load_error = False
|
| 217 |
-
st.success(f"Fichier local '{default_filename}' chargé.")
|
| 218 |
-
st.rerun()
|
| 219 |
-
except FileNotFoundError:
|
| 220 |
-
st.warning(f"Fichier local par défaut '{default_filename}' NON TROUVÉ dans '{app_root_dir}'. Chargez un fichier.", icon="⚠️")
|
| 221 |
-
data = None
|
| 222 |
-
st.session_state.dataframe_to_export = None
|
| 223 |
-
st.session_state.data_loaded_id = None
|
| 224 |
-
data_source_info = "Fichier local non trouvé"
|
| 225 |
-
load_error = True
|
| 226 |
-
except Exception as e:
|
| 227 |
-
st.error(f"Erreur chargement fichier local '{default_filename}': {e}")
|
| 228 |
-
st.warning("Vérifiez que 'openpyxl' est dans requirements.txt.", icon="💡")
|
| 229 |
-
data = None
|
| 230 |
-
st.session_state.dataframe_to_export = None
|
| 231 |
-
st.session_state.data_loaded_id = None
|
| 232 |
-
data_source_info = "Erreur fichier local"
|
| 233 |
-
load_error = True
|
| 234 |
-
|
| 235 |
-
# Mise à jour finale de l'état
|
| 236 |
st.session_state.data_source_info = data_source_info
|
|
|
|
| 237 |
data = st.session_state.get('dataframe_to_export', None)
|
| 238 |
|
| 239 |
-
# Définition des colonnes (
|
| 240 |
-
# ... (la longue section de détection/conversion de type reste la même) ...
|
| 241 |
categorical_columns = []
|
| 242 |
numerical_columns = []
|
| 243 |
datetime_columns = []
|
| 244 |
all_columns = []
|
| 245 |
-
|
| 246 |
if data is not None:
|
| 247 |
all_columns = data.columns.tolist()
|
| 248 |
data_processed = data.copy()
|
| 249 |
-
# (Conversion booléens)
|
| 250 |
for col in data_processed.select_dtypes(include=['bool']).columns:
|
| 251 |
try: data_processed[col] = data_processed[col].astype(str)
|
| 252 |
except Exception: pass
|
| 253 |
-
# (Conversion object -> num/date)
|
| 254 |
for col in data_processed.select_dtypes(include=['object']).columns:
|
| 255 |
-
try:
|
| 256 |
converted_num = pd.to_numeric(data_processed[col], errors='coerce')
|
| 257 |
-
|
| 258 |
-
try:
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
continue
|
| 263 |
-
except Exception: pass # Ignorer si l'heuristique échoue
|
| 264 |
except (ValueError, TypeError, AttributeError): pass
|
| 265 |
-
try:
|
| 266 |
converted_date = pd.to_datetime(data_processed[col], errors='coerce', infer_datetime_format=True)
|
| 267 |
if converted_date.notna().any():
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
try: # Encapsuler l'heuristique
|
| 271 |
original_col_sample = data[col].dropna().unique()
|
| 272 |
if len(original_col_sample) > 0:
|
| 273 |
sample_numeric = pd.to_numeric(original_col_sample[:min(len(original_col_sample), 50)], errors='coerce')
|
| 274 |
non_nan_numeric = sample_numeric[~np.isnan(sample_numeric)]
|
| 275 |
if len(non_nan_numeric) > 0:
|
| 276 |
-
# Check if they look like integers AND are large
|
| 277 |
is_int_like = np.all(np.mod(non_nan_numeric, 1) == 0)
|
| 278 |
is_large = np.nanmax(non_nan_numeric) > 10000
|
| 279 |
-
if is_int_like and is_large:
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
if not is_likely_id: data_processed[col] = converted_date
|
| 283 |
except (ValueError, TypeError, OverflowError): pass
|
| 284 |
|
| 285 |
numerical_columns = data_processed.select_dtypes(include=['number']).columns.tolist()
|
| 286 |
datetime_columns = data_processed.select_dtypes(include=['datetime', 'datetimetz']).columns.tolist()
|
| 287 |
categorical_columns = data_processed.select_dtypes(exclude=['number', 'datetime', 'datetimetz', 'timedelta']).columns.tolist()
|
| 288 |
-
else:
|
| 289 |
-
all_columns, categorical_columns, numerical_columns, datetime_columns = [], [], [], []
|
| 290 |
-
|
| 291 |
|
| 292 |
-
# Renommage Colonnes (
|
| 293 |
-
# ... (section renommage) ...
|
| 294 |
st.subheader("2. Renommer Colonnes (Optionnel)")
|
| 295 |
current_columns_for_rename = all_columns
|
| 296 |
if data is not None and current_columns_for_rename:
|
| 297 |
-
# ... (widgets selectbox, text_input, button) ...
|
| 298 |
rename_key_suffix = st.session_state.data_loaded_id if st.session_state.data_loaded_id else "no_data"
|
| 299 |
col_to_rename = st.selectbox(
|
| 300 |
"Colonne à renommer :", current_columns_for_rename, index=0,
|
|
@@ -305,7 +247,6 @@ with app_tab:
|
|
| 305 |
key=f"rename_text_{rename_key_suffix}"
|
| 306 |
)
|
| 307 |
if st.button("Appliquer Renommage", key=f"rename_button_{rename_key_suffix}"):
|
| 308 |
-
# ... (logique de renommage et st.rerun()) ...
|
| 309 |
data_to_modify = st.session_state.dataframe_to_export
|
| 310 |
if data_to_modify is not None and col_to_rename and new_name and col_to_rename in data_to_modify.columns:
|
| 311 |
if new_name in data_to_modify.columns and new_name != col_to_rename: st.error(f"Le nom '{new_name}' existe déjà.")
|
|
@@ -315,74 +256,77 @@ with app_tab:
|
|
| 315 |
st.success(f"'{col_to_rename}' renommée en '{new_name}'. Rafraîchissement...")
|
| 316 |
st.rerun()
|
| 317 |
else: st.warning("Le nouveau nom ne peut pas être vide.")
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
|
|
|
|
|
|
|
|
|
| 321 |
|
| 322 |
-
# Exportation (
|
| 323 |
-
# ... (section exportation avec les 3 boutons) ...
|
| 324 |
st.subheader("3. Exporter")
|
| 325 |
df_to_export = st.session_state.get('dataframe_to_export', None)
|
| 326 |
if df_to_export is not None:
|
| 327 |
-
# ... (calcul nom de fichier) ...
|
| 328 |
-
# ... (boutons CSV, Excel) ...
|
| 329 |
-
# ... (bouton Préparer/Télécharger HTML qui utilise generate_html_report) ...
|
| 330 |
export_key_suffix = st.session_state.data_loaded_id if st.session_state.data_loaded_id else "no_data"
|
| 331 |
source_for_filename = st.session_state.get('data_source_info', 'donnees')
|
| 332 |
if "Fichier chargé :" in source_for_filename: base_name = source_for_filename.split(":")[-1].strip(); export_filename_base = f"export_{os.path.splitext(base_name)[0]}"
|
| 333 |
-
|
| 334 |
-
else: export_filename_base = "export_donnees"
|
| 335 |
export_filename_base = "".join(c if c.isalnum() or c in ('_', '-') else '_' for c in export_filename_base)
|
| 336 |
|
| 337 |
col_export1, col_export2, col_export3 = st.columns(3)
|
| 338 |
-
# CSV
|
| 339 |
-
with col_export1:
|
| 340 |
try:
|
| 341 |
csv_data = df_to_export.to_csv(index=False).encode('utf-8')
|
| 342 |
st.download_button("Exporter CSV", csv_data, f"{export_filename_base}.csv", "text/csv", key=f"dl_csv_{export_key_suffix}")
|
| 343 |
except Exception as e: st.error(f"Erreur Export CSV: {e}")
|
| 344 |
-
# Excel
|
| 345 |
-
with col_export2:
|
| 346 |
try:
|
| 347 |
excel_buffer = io.BytesIO()
|
| 348 |
with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer: df_to_export.to_excel(writer, index=False, sheet_name='Data')
|
| 349 |
st.download_button("Exporter Excel", excel_buffer.getvalue(), f"{export_filename_base}.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", key=f"dl_excel_{export_key_suffix}")
|
| 350 |
except Exception as e:
|
| 351 |
-
st.error(f"Erreur Export Excel: {e}")
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
with col_export3:
|
| 355 |
-
if template: # Check if template loaded successfully
|
| 356 |
if st.button("Préparer Rapport HTML", key=f"prep_html_{export_key_suffix}"):
|
| 357 |
-
# ... (logique de génération HTML inchangée) ...
|
| 358 |
with st.spinner("Génération rapport..."):
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 386 |
|
| 387 |
if st.session_state.get('html_report_content'):
|
| 388 |
st.download_button("Télécharger Rapport HTML", st.session_state.html_report_content, st.session_state.html_report_filename, "text/html", key=f"dl_html_{export_key_suffix}", on_click=lambda: st.session_state.update(html_report_content=None))
|
|
@@ -398,127 +342,373 @@ with app_tab:
|
|
| 398 |
data_source_info = st.session_state.get('data_source_info', "Aucune donnée chargée")
|
| 399 |
|
| 400 |
if data is not None:
|
| 401 |
-
#
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 410 |
st.subheader("🛠️ Construire les Analyses")
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 418 |
st.subheader("🔍 Analyses Configurées")
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 427 |
st.markdown("---")
|
| 428 |
st.subheader("🔬 Analyses Statistiques Avancées")
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 433 |
|
| 434 |
|
| 435 |
# ==============================================================================
|
| 436 |
-
# ONGLET MANUEL D'UTILISATION
|
| 437 |
# ==============================================================================
|
| 438 |
with manual_tab:
|
| 439 |
-
# (Contenu Markdown inchangé, mais vérifier les instructions sur l'emplacement des fichiers par défaut)
|
| 440 |
st.markdown("## 📘 Manuel d'Utilisation - Suite d'Analyse Interactive")
|
| 441 |
st.markdown("""
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
-
|
| 445 |
-
|
| 446 |
-
...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 447 |
""")
|
| 448 |
|
| 449 |
# ==============================================================================
|
| 450 |
-
# ONGLET CHAT IA
|
| 451 |
# ==============================================================================
|
| 452 |
with chat_tab:
|
| 453 |
st.markdown("## 💬 Chat IA (Assisté par Google Gemini)")
|
| 454 |
-
|
| 455 |
if not api_key:
|
| 456 |
-
# L'avertissement est déjà dans la sidebar, on peut être plus concis ici
|
| 457 |
st.info("Chat IA désactivé. Configurez `GOOGLE_API_KEY` dans les Secrets HF.")
|
| 458 |
else:
|
| 459 |
-
st.info("Posez des questions générales sur l'analyse
|
| 460 |
-
|
| 461 |
-
# --- AJOUT DEBUGGING IA ---
|
| 462 |
model_chat = None
|
| 463 |
try:
|
| 464 |
genai.configure(api_key=api_key)
|
| 465 |
model_chat = genai.GenerativeModel('gemini-1.5-flash-latest')
|
| 466 |
-
|
| 467 |
-
except Exception as e:
|
| 468 |
-
st.error(f"Erreur lors de l'initialisation de l'API Gemini: {e}")
|
| 469 |
-
st.warning("Vérifiez votre clé API et la configuration des Secrets HF.")
|
| 470 |
|
| 471 |
if model_chat:
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
st.markdown(message["content"])
|
| 476 |
-
|
| 477 |
-
# Input utilisateur
|
| 478 |
-
if user_question := st.chat_input("Votre question à l'IA..."):
|
| 479 |
st.session_state.gemini_chat_history.append({"role": "user", "content": user_question})
|
| 480 |
-
with st.chat_message("user"):
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
# Préparation contexte (Code inchangé)
|
| 484 |
-
# ... (récupération num_cols, cat_cols, date_cols, analyses_context) ...
|
| 485 |
data_context_chat = st.session_state.get('dataframe_to_export', None)
|
| 486 |
num_cols_context = numerical_columns if data_context_chat is not None else []
|
| 487 |
cat_cols_context = categorical_columns if data_context_chat is not None else []
|
| 488 |
date_cols_context = datetime_columns if data_context_chat is not None else []
|
| 489 |
analyses_context = list(set(a['type'].replace('_', ' ').title() for a in st.session_state.get('analyses', [])))
|
| 490 |
source_info_context = st.session_state.get('data_source_info', 'Inconnue')
|
| 491 |
-
context_prompt = f"""
|
| 492 |
-
|
| 493 |
-
Question de l'utilisateur : "{user_question}"
|
| 494 |
-
Ta réponse :
|
| 495 |
-
"""
|
| 496 |
-
|
| 497 |
-
# Génération réponse avec try/except amélioré
|
| 498 |
try:
|
| 499 |
-
with st.spinner("L'IA réfléchit..."):
|
| 500 |
-
response = model_chat.generate_content(context_prompt)
|
| 501 |
-
|
| 502 |
-
# Vérifier si une réponse a été générée
|
| 503 |
if response and response.text:
|
| 504 |
-
with st.chat_message("assistant"):
|
| 505 |
-
st.markdown(response.text)
|
| 506 |
st.session_state.gemini_chat_history.append({"role": "assistant", "content": response.text})
|
| 507 |
else:
|
| 508 |
-
|
| 509 |
-
error_msg_ai
|
| 510 |
-
st.error(error_msg_ai)
|
| 511 |
-
st.session_state.gemini_chat_history.append({"role": "assistant", "content": f"({error_msg_ai})"})
|
| 512 |
-
|
| 513 |
-
|
| 514 |
except Exception as e:
|
| 515 |
-
# Afficher l'erreur spécifique de l'API Gemini
|
| 516 |
error_message = f"Erreur communication API Gemini: {e}"
|
| 517 |
-
st.error(error_message)
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
else: # model_chat n'a pas pu être initialisé
|
| 521 |
-
st.error("Le modèle de Chat IA n'est pas disponible.")
|
| 522 |
-
|
| 523 |
|
| 524 |
# --- Fin du Script ---
|
|
|
|
| 29 |
from sklearn.metrics import mean_squared_error, r2_score
|
| 30 |
import scipy.stats as stats
|
| 31 |
import numpy as np
|
| 32 |
+
from sklearn.preprocessing import StandardScaler # Importer StandardScaler pour ACP et K-Means
|
| 33 |
|
| 34 |
# --- Configuration de la Page Streamlit ---
|
| 35 |
st.set_page_config(layout="wide", page_title="Suite d'Analyse Interactive", page_icon="📊")
|
| 36 |
|
| 37 |
# --- Configuration Initiale ---
|
|
|
|
|
|
|
| 38 |
load_dotenv()
|
| 39 |
api_key = os.getenv("GOOGLE_API_KEY")
|
| 40 |
|
|
|
|
|
|
|
| 41 |
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
| 42 |
app_root_dir = os.path.join(script_dir, '..') # Aller un niveau au-dessus de 'src'
|
| 43 |
|
| 44 |
+
# Charger le template HTML depuis la racine
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
TEMPLATE_FILE = "report_template.html"
|
| 46 |
template = None
|
| 47 |
try:
|
|
|
|
| 48 |
env = Environment(loader=FileSystemLoader(app_root_dir))
|
| 49 |
template = env.get_template(TEMPLATE_FILE)
|
|
|
|
| 50 |
except TemplateNotFound:
|
| 51 |
+
st.error(f"Erreur chargement template : '{TEMPLATE_FILE}' NON TROUVÉ dans '{app_root_dir}'. Vérifiez qu'il est bien à la racine du dépôt. Export HTML indisponible.")
|
| 52 |
except Exception as e:
|
| 53 |
+
st.error(f"Erreur chargement template '{TEMPLATE_FILE}' depuis '{app_root_dir}': {e}. Export HTML indisponible.")
|
| 54 |
|
| 55 |
+
# Affichage état API Key
|
| 56 |
if not api_key:
|
| 57 |
+
st.sidebar.warning("⚠️ Clé API Google Gemini manquante. Chat AI désactivé.", icon="🔑")
|
|
|
|
| 58 |
else:
|
| 59 |
+
# st.sidebar.success("✔️ Clé API Google Gemini trouvée.") # Optionnel
|
| 60 |
+
pass
|
|
|
|
| 61 |
|
| 62 |
# --- Fonctions Utilitaires ---
|
|
|
|
| 63 |
def generate_html_report(data, num_submissions, columns, tables_html="", charts_html=""):
|
| 64 |
+
if template is None: return "Erreur: Template HTML manquant."
|
|
|
|
|
|
|
|
|
|
| 65 |
last_sync = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
| 66 |
try:
|
| 67 |
context = {
|
| 68 |
+
'last_sync': last_sync, 'num_submissions': num_submissions, 'columns': columns,
|
| 69 |
+
'tables': tables_html, 'charts': charts_html,
|
| 70 |
+
'data_preview': data.head().to_html(classes='table table-sm table-striped', index=False, border=0) if data is not None else "<p>Aperçu indisponible.</p>"
|
|
|
|
|
|
|
|
|
|
| 71 |
}
|
| 72 |
+
return template.render(context)
|
|
|
|
| 73 |
except Exception as e:
|
| 74 |
+
st.error(f"Erreur rendu template: {e}")
|
| 75 |
+
return f"Erreur génération rapport: {e}"
|
| 76 |
|
| 77 |
def get_safe_index(options, value, default_index=0):
|
| 78 |
if not options or value is None: return default_index
|
| 79 |
+
options_list = list(options);
|
| 80 |
try: return options_list.index(value)
|
| 81 |
except (ValueError, TypeError): return default_index
|
| 82 |
|
|
|
|
| 85 |
if param_key not in st.session_state.analyses[analysis_index]['params']:
|
| 86 |
st.session_state.analyses[analysis_index]['params'][param_key] = default_value
|
| 87 |
|
|
|
|
| 88 |
# --- Titre et Description ---
|
| 89 |
st.markdown("<h1 style='text-align: center;'>📊 Suite d'Analyse de Données Interactive</h1>", unsafe_allow_html=True)
|
|
|
|
| 90 |
st.markdown(
|
| 91 |
"""
|
| 92 |
<div style='background-color: #f0f2f6; border-left: 5px solid #1f77b4; padding: 15px; border-radius: 5px; margin-bottom: 20px;'>
|
|
|
|
| 98 |
st.caption(f"Heure du serveur : {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
| 99 |
|
| 100 |
# --- Initialisation du Session State ---
|
| 101 |
+
# (Identique à avant)
|
| 102 |
if 'dataframe_to_export' not in st.session_state: st.session_state.dataframe_to_export = None
|
| 103 |
if 'analyses' not in st.session_state: st.session_state.analyses = []
|
| 104 |
if 'show_advanced_analysis' not in st.session_state: st.session_state.show_advanced_analysis = False
|
|
|
|
| 107 |
if 'html_report_content' not in st.session_state: st.session_state.html_report_content = None
|
| 108 |
if 'html_report_filename' not in st.session_state: st.session_state.html_report_filename = "rapport.html"
|
| 109 |
if 'data_source_info' not in st.session_state: st.session_state.data_source_info = "Aucune donnée chargée"
|
|
|
|
| 110 |
if "gemini_chat_history" not in st.session_state: st.session_state.gemini_chat_history = []
|
| 111 |
|
|
|
|
| 112 |
# --- Création des Onglets ---
|
| 113 |
app_tab, manual_tab, chat_tab = st.tabs(["📊 Application Principale", "📘 Manuel d'Utilisation", "💬 Chat IA (Gemini)"])
|
| 114 |
|
|
|
|
| 123 |
|
| 124 |
# --- Chargement des Données ---
|
| 125 |
st.subheader("1. Chargement des Données")
|
| 126 |
+
uploaded_file = st.file_uploader(
|
| 127 |
+
"Déposez votre fichier CSV ou Excel ici",
|
| 128 |
+
type=["csv", "xlsx"],
|
| 129 |
+
key="file_uploader",
|
| 130 |
+
help="Chargez votre propre jeu de données pour l'analyse."
|
| 131 |
+
)
|
| 132 |
+
use_header = st.checkbox("La première ligne est l'en-tête", value=st.session_state.last_header_preference, key="header_toggle")
|
| 133 |
header_param = 0 if use_header else None
|
| 134 |
|
| 135 |
data = None
|
| 136 |
data_source_info = "Aucune donnée chargée"
|
| 137 |
load_error = False
|
| 138 |
trigger_reload = False
|
| 139 |
+
current_data_id = None # Initialisation
|
| 140 |
|
| 141 |
+
# --- LOGIQUE DE DÉCLENCHEMENT DU RECHARGEMENT (Upload uniquement) ---
|
|
|
|
| 142 |
if uploaded_file is not None:
|
| 143 |
current_data_id = f"{uploaded_file.name}-{uploaded_file.size}"
|
| 144 |
if st.session_state.data_loaded_id != current_data_id or st.session_state.last_header_preference != use_header:
|
| 145 |
trigger_reload = True
|
|
|
|
|
|
|
|
|
|
| 146 |
else:
|
| 147 |
+
# Si aucun fichier n'est uploadé, on ne fait rien ici.
|
| 148 |
+
# Si l'utilisateur retire le fichier, l'état précédent est conservé
|
| 149 |
+
# jusqu'au prochain upload ou rafraîchissement de page.
|
| 150 |
+
# On pourrait ajouter un bouton "Vider les données" si nécessaire.
|
| 151 |
+
pass
|
| 152 |
+
|
| 153 |
+
# Recharger/Charger si nécessaire (Seulement si trigger_reload est True ET uploaded_file existe)
|
| 154 |
+
if trigger_reload and uploaded_file is not None:
|
| 155 |
+
st.sidebar.info("🔄 Chargement du fichier uploadé...")
|
| 156 |
st.session_state.html_report_content = None
|
| 157 |
st.session_state.html_report_filename = "rapport.html"
|
| 158 |
+
st.session_state.analyses = [] # Reset analyses pour le nouveau fichier
|
| 159 |
+
|
| 160 |
+
try:
|
| 161 |
+
st.info(f"Traitement de '{uploaded_file.name}'...")
|
| 162 |
+
if uploaded_file.name.endswith('.csv'):
|
| 163 |
+
data = pd.read_csv(uploaded_file, header=header_param)
|
| 164 |
+
elif uploaded_file.name.endswith('.xlsx'):
|
| 165 |
+
data = pd.read_excel(uploaded_file, header=header_param) # Requires openpyxl
|
| 166 |
+
|
| 167 |
+
st.session_state.dataframe_to_export = data
|
| 168 |
+
data_source_info = f"Fichier chargé : {uploaded_file.name}"
|
| 169 |
+
st.session_state.data_loaded_id = current_data_id
|
| 170 |
+
st.session_state.last_header_preference = use_header
|
| 171 |
+
load_error = False
|
| 172 |
+
st.success(f"'{uploaded_file.name}' chargé avec succès.")
|
| 173 |
+
st.rerun()
|
| 174 |
+
|
| 175 |
+
except Exception as e:
|
| 176 |
+
st.error(f"Erreur lors du chargement du fichier '{uploaded_file.name}': {e}")
|
| 177 |
+
if '.xlsx' in uploaded_file.name: st.warning("Vérifiez que 'openpyxl' est dans requirements.txt.", icon="💡")
|
| 178 |
+
data = None; st.session_state.dataframe_to_export = None
|
| 179 |
+
st.session_state.data_loaded_id = None
|
| 180 |
+
data_source_info = "Erreur de chargement"
|
| 181 |
+
load_error = True
|
| 182 |
+
# --- FIN DU BLOC DE RECHARGEMENT ---
|
| 183 |
+
|
| 184 |
+
# Récupérer les données de la session si elles n'ont pas été rechargées
|
| 185 |
+
if not trigger_reload:
|
| 186 |
+
data = st.session_state.get('dataframe_to_export', None)
|
| 187 |
+
data_source_info = st.session_state.get('data_source_info', "Aucune donnée chargée")
|
| 188 |
|
| 189 |
+
# Mise à jour finale de l'info source
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
st.session_state.data_source_info = data_source_info
|
| 191 |
+
# S'assurer que 'data' reflète bien l'état actuel
|
| 192 |
data = st.session_state.get('dataframe_to_export', None)
|
| 193 |
|
| 194 |
+
# --- Définition des colonnes (RESTAURÉ - code inchangé) ---
|
|
|
|
| 195 |
categorical_columns = []
|
| 196 |
numerical_columns = []
|
| 197 |
datetime_columns = []
|
| 198 |
all_columns = []
|
|
|
|
| 199 |
if data is not None:
|
| 200 |
all_columns = data.columns.tolist()
|
| 201 |
data_processed = data.copy()
|
|
|
|
| 202 |
for col in data_processed.select_dtypes(include=['bool']).columns:
|
| 203 |
try: data_processed[col] = data_processed[col].astype(str)
|
| 204 |
except Exception: pass
|
|
|
|
| 205 |
for col in data_processed.select_dtypes(include=['object']).columns:
|
| 206 |
+
try: # Tentative Numérique
|
| 207 |
converted_num = pd.to_numeric(data_processed[col], errors='coerce')
|
| 208 |
+
original_looks_numeric = False # Heuristique
|
| 209 |
+
try: original_looks_numeric = data[col].astype(str).str.match(r'^-?(\d+(\.\d+)?|\d+\.?\d*)$').all()
|
| 210 |
+
except Exception: pass
|
| 211 |
+
if original_looks_numeric and converted_num.notna().any():
|
| 212 |
+
data_processed[col] = converted_num; continue
|
|
|
|
|
|
|
| 213 |
except (ValueError, TypeError, AttributeError): pass
|
| 214 |
+
try: # Tentative Datetime
|
| 215 |
converted_date = pd.to_datetime(data_processed[col], errors='coerce', infer_datetime_format=True)
|
| 216 |
if converted_date.notna().any():
|
| 217 |
+
is_likely_id = False # Heuristique ID
|
| 218 |
+
try:
|
|
|
|
| 219 |
original_col_sample = data[col].dropna().unique()
|
| 220 |
if len(original_col_sample) > 0:
|
| 221 |
sample_numeric = pd.to_numeric(original_col_sample[:min(len(original_col_sample), 50)], errors='coerce')
|
| 222 |
non_nan_numeric = sample_numeric[~np.isnan(sample_numeric)]
|
| 223 |
if len(non_nan_numeric) > 0:
|
|
|
|
| 224 |
is_int_like = np.all(np.mod(non_nan_numeric, 1) == 0)
|
| 225 |
is_large = np.nanmax(non_nan_numeric) > 10000
|
| 226 |
+
if is_int_like and is_large: is_likely_id = True
|
| 227 |
+
except Exception: pass
|
| 228 |
+
if not is_likely_id: data_processed[col] = converted_date
|
|
|
|
| 229 |
except (ValueError, TypeError, OverflowError): pass
|
| 230 |
|
| 231 |
numerical_columns = data_processed.select_dtypes(include=['number']).columns.tolist()
|
| 232 |
datetime_columns = data_processed.select_dtypes(include=['datetime', 'datetimetz']).columns.tolist()
|
| 233 |
categorical_columns = data_processed.select_dtypes(exclude=['number', 'datetime', 'datetimetz', 'timedelta']).columns.tolist()
|
| 234 |
+
# else: # Si data is None, les listes restent vides
|
|
|
|
|
|
|
| 235 |
|
| 236 |
+
# --- Renommage des Colonnes (RESTAURÉ - code inchangé) ---
|
|
|
|
| 237 |
st.subheader("2. Renommer Colonnes (Optionnel)")
|
| 238 |
current_columns_for_rename = all_columns
|
| 239 |
if data is not None and current_columns_for_rename:
|
|
|
|
| 240 |
rename_key_suffix = st.session_state.data_loaded_id if st.session_state.data_loaded_id else "no_data"
|
| 241 |
col_to_rename = st.selectbox(
|
| 242 |
"Colonne à renommer :", current_columns_for_rename, index=0,
|
|
|
|
| 247 |
key=f"rename_text_{rename_key_suffix}"
|
| 248 |
)
|
| 249 |
if st.button("Appliquer Renommage", key=f"rename_button_{rename_key_suffix}"):
|
|
|
|
| 250 |
data_to_modify = st.session_state.dataframe_to_export
|
| 251 |
if data_to_modify is not None and col_to_rename and new_name and col_to_rename in data_to_modify.columns:
|
| 252 |
if new_name in data_to_modify.columns and new_name != col_to_rename: st.error(f"Le nom '{new_name}' existe déjà.")
|
|
|
|
| 256 |
st.success(f"'{col_to_rename}' renommée en '{new_name}'. Rafraîchissement...")
|
| 257 |
st.rerun()
|
| 258 |
else: st.warning("Le nouveau nom ne peut pas être vide.")
|
| 259 |
+
elif data_to_modify is None: st.error("Impossible de renommer: Aucune donnée chargée.")
|
| 260 |
+
elif not col_to_rename: st.warning("Sélectionnez une colonne.")
|
| 261 |
+
elif not new_name: st.warning("Entrez un nouveau nom.")
|
| 262 |
+
elif col_to_rename not in data_to_modify.columns: st.error(f"Colonne '{col_to_rename}' non trouvée.")
|
| 263 |
+
else:
|
| 264 |
+
st.info("Chargez des données pour renommer les colonnes.")
|
| 265 |
|
| 266 |
+
# --- Exportation (RESTAURÉ - code inchangé) ---
|
|
|
|
| 267 |
st.subheader("3. Exporter")
|
| 268 |
df_to_export = st.session_state.get('dataframe_to_export', None)
|
| 269 |
if df_to_export is not None:
|
|
|
|
|
|
|
|
|
|
| 270 |
export_key_suffix = st.session_state.data_loaded_id if st.session_state.data_loaded_id else "no_data"
|
| 271 |
source_for_filename = st.session_state.get('data_source_info', 'donnees')
|
| 272 |
if "Fichier chargé :" in source_for_filename: base_name = source_for_filename.split(":")[-1].strip(); export_filename_base = f"export_{os.path.splitext(base_name)[0]}"
|
| 273 |
+
else: export_filename_base = "export_donnees" # Fallback
|
|
|
|
| 274 |
export_filename_base = "".join(c if c.isalnum() or c in ('_', '-') else '_' for c in export_filename_base)
|
| 275 |
|
| 276 |
col_export1, col_export2, col_export3 = st.columns(3)
|
| 277 |
+
with col_export1: # CSV
|
|
|
|
| 278 |
try:
|
| 279 |
csv_data = df_to_export.to_csv(index=False).encode('utf-8')
|
| 280 |
st.download_button("Exporter CSV", csv_data, f"{export_filename_base}.csv", "text/csv", key=f"dl_csv_{export_key_suffix}")
|
| 281 |
except Exception as e: st.error(f"Erreur Export CSV: {e}")
|
| 282 |
+
with col_export2: # Excel
|
|
|
|
| 283 |
try:
|
| 284 |
excel_buffer = io.BytesIO()
|
| 285 |
with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer: df_to_export.to_excel(writer, index=False, sheet_name='Data')
|
| 286 |
st.download_button("Exporter Excel", excel_buffer.getvalue(), f"{export_filename_base}.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", key=f"dl_excel_{export_key_suffix}")
|
| 287 |
except Exception as e:
|
| 288 |
+
st.error(f"Erreur Export Excel: {e}"); st.warning("Vérifiez 'openpyxl' dans reqs.", icon="💡")
|
| 289 |
+
with col_export3: # HTML Report
|
| 290 |
+
if template:
|
|
|
|
|
|
|
| 291 |
if st.button("Préparer Rapport HTML", key=f"prep_html_{export_key_suffix}"):
|
|
|
|
| 292 |
with st.spinner("Génération rapport..."):
|
| 293 |
+
try:
|
| 294 |
+
data_for_report = st.session_state.dataframe_to_export
|
| 295 |
+
if data_for_report is not None:
|
| 296 |
+
num_submissions_report = data_for_report['_index'].nunique() if '_index' in data_for_report.columns else len(data_for_report)
|
| 297 |
+
columns_for_report = data_for_report.columns.tolist()
|
| 298 |
+
tables_html_list, charts_html_list = [], []
|
| 299 |
+
for analysis in st.session_state.get('analyses', []):
|
| 300 |
+
result = analysis.get('result'); analysis_type = analysis.get('type')
|
| 301 |
+
params = analysis.get('executed_params', analysis.get('params', {}))
|
| 302 |
+
analysis_id_rep = analysis.get('id', -1) + 1
|
| 303 |
+
if result is not None:
|
| 304 |
+
title = f"Analyse {analysis_id_rep}: {analysis_type.replace('_', ' ').title()}"
|
| 305 |
+
param_details_list = []
|
| 306 |
+
for k,v in params.items():
|
| 307 |
+
if v is not None and v != [] and v != 'None':
|
| 308 |
+
v_repr = f"[{v[0]}, ..., {v[-1]}] ({len(v)})" if isinstance(v, list) and len(v) > 3 else str(v)
|
| 309 |
+
param_details_list.append(f"{k.replace('_', ' ').title()} = {v_repr}")
|
| 310 |
+
param_details = "; ".join(param_details_list)
|
| 311 |
+
full_title = f"{title} <small>({param_details})</small>" if param_details else title
|
| 312 |
+
try:
|
| 313 |
+
if analysis_type in ['aggregated_table', 'descriptive_stats'] and isinstance(result, pd.DataFrame):
|
| 314 |
+
table_html = result.to_html(index=(analysis_type == 'descriptive_stats'), classes='table table-striped table-hover table-sm', border=0)
|
| 315 |
+
tables_html_list.append(f"<h3>{full_title}</h3>{table_html}")
|
| 316 |
+
elif analysis_type == 'graph' and isinstance(result, go.Figure):
|
| 317 |
+
chart_html = result.to_html(full_html=False, include_plotlyjs='cdn')
|
| 318 |
+
charts_html_list.append(f"<h3>{full_title}</h3>{chart_html}")
|
| 319 |
+
except Exception as e_render: st.warning(f"Erreur rendu résultat Analyse {analysis_id_rep}: {e_render}")
|
| 320 |
+
tables_html = "\n<hr/>\n".join(tables_html_list) if tables_html_list else "<p>Aucun tableau généré.</p>"
|
| 321 |
+
charts_html = "\n<hr/>\n".join(charts_html_list) if charts_html_list else "<p>Aucun graphique généré.</p>"
|
| 322 |
+
html_content = generate_html_report(data_for_report, num_submissions_report, columns_for_report, tables_html, charts_html)
|
| 323 |
+
if "Erreur:" not in html_content:
|
| 324 |
+
st.session_state.html_report_content = html_content.encode('utf-8')
|
| 325 |
+
st.session_state.html_report_filename = f"rapport_{export_filename_base}.html"
|
| 326 |
+
st.success("Rapport prêt.")
|
| 327 |
+
else: st.error("Échec génération contenu HTML.")
|
| 328 |
+
else: st.error("Pas de données pour le rapport.")
|
| 329 |
+
except Exception as e_report: st.error(f"Erreur préparation rapport: {e_report}")
|
| 330 |
|
| 331 |
if st.session_state.get('html_report_content'):
|
| 332 |
st.download_button("Télécharger Rapport HTML", st.session_state.html_report_content, st.session_state.html_report_filename, "text/html", key=f"dl_html_{export_key_suffix}", on_click=lambda: st.session_state.update(html_report_content=None))
|
|
|
|
| 342 |
data_source_info = st.session_state.get('data_source_info', "Aucune donnée chargée")
|
| 343 |
|
| 344 |
if data is not None:
|
| 345 |
+
# --- AFFICHAGE INFOS DONNÉES (RESTAURÉ) ---
|
| 346 |
+
st.info(f"**Source de données active :** {data_source_info}")
|
| 347 |
+
try:
|
| 348 |
+
if '_index' in data.columns: num_submissions = data['_index'].nunique(); display_text = f"Nb soumissions uniques ('_index') : **{num_submissions}**"
|
| 349 |
+
else: num_submissions = len(data); display_text = f"Nb total enregistrements : **{num_submissions}**"
|
| 350 |
+
st.markdown(f"<div style='font-size: 1.1em;'>{display_text}</div>", unsafe_allow_html=True)
|
| 351 |
+
st.write(f"Dimensions : **{data.shape[0]} lignes x {data.shape[1]} colonnes**")
|
| 352 |
+
|
| 353 |
+
with st.expander("Afficher aperçu données (5 premières lignes)"):
|
| 354 |
+
st.dataframe(data.head(), use_container_width=True)
|
| 355 |
+
with st.expander("Afficher détails colonnes (Types détectés)"):
|
| 356 |
+
cols_df = pd.DataFrame({'Nom Colonne': all_columns})
|
| 357 |
+
col_types = []
|
| 358 |
+
for col in all_columns:
|
| 359 |
+
if col in numerical_columns: col_types.append(f"Numérique ({data[col].dtype})")
|
| 360 |
+
elif col in datetime_columns: col_types.append(f"Date/Heure ({data[col].dtype})")
|
| 361 |
+
elif col in categorical_columns: col_types.append(f"Catégoriel ({data[col].dtype})")
|
| 362 |
+
else: col_types.append(f"Inconnu ({data[col].dtype})")
|
| 363 |
+
cols_df['Type Détecté'] = col_types
|
| 364 |
+
cols_df['Num'] = cols_df['Nom Colonne'].isin(numerical_columns)
|
| 365 |
+
cols_df['Cat'] = cols_df['Nom Colonne'].isin(categorical_columns)
|
| 366 |
+
cols_df['Date'] = cols_df['Nom Colonne'].isin(datetime_columns)
|
| 367 |
+
st.dataframe(cols_df.set_index('Nom Colonne'), use_container_width=True)
|
| 368 |
+
except Exception as e_display: st.error(f"Erreur affichage infos données: {e_display}")
|
| 369 |
+
|
| 370 |
+
# --- SECTION AJOUT ANALYSES (RESTAURÉ) ---
|
| 371 |
st.subheader("🛠️ Construire les Analyses")
|
| 372 |
+
st.write("Ajoutez des blocs d'analyse pour explorer vos données.")
|
| 373 |
+
col_add1, col_add2, col_add3 = st.columns(3)
|
| 374 |
+
analysis_key_suffix = "data_loaded" # data is not None here
|
| 375 |
+
with col_add1:
|
| 376 |
+
if st.button("➕ Tableau Agrégé", key=f"add_agg_{analysis_key_suffix}", help="Stats groupées (ex: moyenne par catégorie)."):
|
| 377 |
+
new_id = max([a.get('id', -1) for a in st.session_state.analyses] + [-1]) + 1
|
| 378 |
+
st.session_state.analyses.append({'type': 'aggregated_table', 'params': {}, 'result': None, 'id': new_id, 'executed_params': None})
|
| 379 |
+
st.rerun()
|
| 380 |
+
with col_add2:
|
| 381 |
+
if st.button("➕ Graphique", key=f"add_graph_{analysis_key_suffix}", help="Visualisation interactive."):
|
| 382 |
+
new_id = max([a.get('id', -1) for a in st.session_state.analyses] + [-1]) + 1
|
| 383 |
+
st.session_state.analyses.append({'type': 'graph', 'params': {}, 'result': None, 'id': new_id, 'executed_params': None})
|
| 384 |
+
st.rerun()
|
| 385 |
+
with col_add3:
|
| 386 |
+
if st.button("➕ Stats Descriptives", key=f"add_desc_{analysis_key_suffix}", help="Résumé statistique (moyenne, médiane...)."):
|
| 387 |
+
new_id = max([a.get('id', -1) for a in st.session_state.analyses] + [-1]) + 1
|
| 388 |
+
st.session_state.analyses.append({'type': 'descriptive_stats', 'params': {}, 'result': None, 'id': new_id, 'executed_params': None})
|
| 389 |
+
st.rerun()
|
| 390 |
+
|
| 391 |
+
# --- AFFICHAGE ET CONFIG ANALYSES (RESTAURÉ - Bloc entier) ---
|
| 392 |
st.subheader("🔍 Analyses Configurées")
|
| 393 |
+
indices_to_remove = []
|
| 394 |
+
data_available = True # data is not None here
|
| 395 |
+
conf_categorical_columns = categorical_columns
|
| 396 |
+
conf_numerical_columns = numerical_columns
|
| 397 |
+
conf_datetime_columns = datetime_columns
|
| 398 |
+
conf_all_columns = all_columns
|
| 399 |
+
columns_defined = bool(conf_all_columns)
|
| 400 |
+
|
| 401 |
+
if not st.session_state.analyses:
|
| 402 |
+
st.info("Cliquez sur '➕ Ajouter...' ci-dessus pour commencer.")
|
| 403 |
+
|
| 404 |
+
if data_available and columns_defined:
|
| 405 |
+
for i, analysis in enumerate(st.session_state.analyses):
|
| 406 |
+
analysis_id = analysis.get('id', i)
|
| 407 |
+
analysis_container = st.container(border=True)
|
| 408 |
+
with analysis_container:
|
| 409 |
+
cols_header = st.columns([0.95, 0.05])
|
| 410 |
+
with cols_header[0]: st.subheader(f"Analyse {i+1}: {analysis['type'].replace('_', ' ').title()}")
|
| 411 |
+
with cols_header[1]:
|
| 412 |
+
if st.button("🗑️", key=f"remove_analysis_{analysis_id}", help="Supprimer"):
|
| 413 |
+
indices_to_remove.append(i); st.rerun()
|
| 414 |
+
|
| 415 |
+
# --- CONFIGURATION TABLEAU AGRÉGÉ (Restauré) ---
|
| 416 |
+
if analysis['type'] == 'aggregated_table':
|
| 417 |
+
st.markdown("##### Configuration Tableau Agrégé")
|
| 418 |
+
if not conf_categorical_columns or not conf_numerical_columns: st.warning("Nécessite colonnes Catégorielles ET Numériques.")
|
| 419 |
+
else:
|
| 420 |
+
init_analysis_state(i, 'group_by_columns', []); init_analysis_state(i, 'agg_column', conf_numerical_columns[0]); init_analysis_state(i, 'agg_method', 'count')
|
| 421 |
+
col_agg1, col_agg2, col_agg3 = st.columns(3)
|
| 422 |
+
with col_agg1: default_groupby = [col for col in analysis['params'].get('group_by_columns', []) if col in conf_categorical_columns]; st.session_state.analyses[i]['params']['group_by_columns'] = st.multiselect(f"Regrouper par :", conf_categorical_columns, default=default_groupby, key=f"agg_table_groupby_{analysis_id}")
|
| 423 |
+
with col_agg2: agg_col_index = get_safe_index(conf_numerical_columns, analysis['params'].get('agg_column')); st.session_state.analyses[i]['params']['agg_column'] = st.selectbox(f"Calculer sur :", conf_numerical_columns, index=agg_col_index, key=f"agg_table_agg_col_{analysis_id}")
|
| 424 |
+
with col_agg3: agg_method_options = ('count', 'mean', 'sum', 'median', 'min', 'max', 'std', 'nunique'); agg_method_index = get_safe_index(agg_method_options, analysis['params'].get('agg_method', 'count')); st.session_state.analyses[i]['params']['agg_method'] = st.selectbox(f"Avec fonction :", agg_method_options, index=agg_method_index, key=f"agg_table_agg_method_{analysis_id}")
|
| 425 |
+
if st.button(f"Exécuter Tableau Agrégé {i+1}", key=f"run_agg_table_{analysis_id}"):
|
| 426 |
+
current_params = st.session_state.analyses[i]['params'].copy(); group_by_cols = current_params['group_by_columns']; agg_col = current_params['agg_column']; agg_method = current_params['agg_method']
|
| 427 |
+
if group_by_cols and agg_col and agg_method:
|
| 428 |
+
try: # ... (logique d'agrégation et renommage colonne résultat) ...
|
| 429 |
+
if all(c in data.columns for c in group_by_cols) and agg_col in data.columns:
|
| 430 |
+
st.info(f"Exécution agrégation: {agg_method}({agg_col}) groupé par {group_by_cols}")
|
| 431 |
+
aggregated_data = data.groupby(group_by_cols, as_index=False)[agg_col].agg(agg_method)
|
| 432 |
+
agg_col_name_new = f'{agg_col}_{agg_method}'
|
| 433 |
+
if agg_col in aggregated_data.columns: aggregated_data = aggregated_data.rename(columns={agg_col: agg_col_name_new})
|
| 434 |
+
st.session_state.analyses[i]['result'] = aggregated_data
|
| 435 |
+
st.session_state.analyses[i]['executed_params'] = current_params
|
| 436 |
+
st.rerun()
|
| 437 |
+
else: st.error("Colonnes invalides.")
|
| 438 |
+
except Exception as e: st.error(f"Erreur Agrégation {i+1}: {e}"); st.session_state.analyses[i]['result'] = None; st.session_state.analyses[i]['executed_params'] = current_params
|
| 439 |
+
else: st.warning("Sélectionnez les 3 options.")
|
| 440 |
+
|
| 441 |
+
# --- CONFIGURATION GRAPHIQUE (Restauré) ---
|
| 442 |
+
elif analysis['type'] == 'graph':
|
| 443 |
+
st.markdown("##### Configuration Graphique")
|
| 444 |
+
if not conf_all_columns: st.warning("Aucune colonne disponible.")
|
| 445 |
+
else:
|
| 446 |
+
if 0 <= i < len(st.session_state.analyses): # Safety check
|
| 447 |
+
# Initialisation état graphique
|
| 448 |
+
init_analysis_state(i, 'chart_type', 'Bar Chart'); init_analysis_state(i, 'group_by_columns_graph', []); init_analysis_state(i, 'agg_column_graph', conf_numerical_columns[0] if conf_numerical_columns else None); init_analysis_state(i, 'agg_method_graph', 'count')
|
| 449 |
+
init_analysis_state(i, 'x_column', conf_categorical_columns[0] if conf_categorical_columns else (conf_datetime_columns[0] if conf_datetime_columns else conf_all_columns[0])); init_analysis_state(i, 'y_column', conf_numerical_columns[0] if conf_numerical_columns else None); init_analysis_state(i, 'color_column', 'None'); init_analysis_state(i, 'size_column', 'None'); init_analysis_state(i, 'facet_column', 'None'); init_analysis_state(i, 'hover_data_cols', [])
|
| 450 |
+
# Selecteur Type Graphique
|
| 451 |
+
chart_type_options = ('Bar Chart', 'Line Chart', 'Scatter Plot', 'Histogram', 'Box Plot', 'Violin Plot', 'Heatmap', 'Density Contour', 'Area Chart', 'Funnel Chart', 'Timeline (Gantt)', 'Sunburst', 'Treemap', '3D Scatter Plot', 'Pair Plot (SPLOM)'); chart_type_index = get_safe_index(chart_type_options, st.session_state.analyses[i]['params'].get('chart_type')); st.session_state.analyses[i]['params']['chart_type'] = st.selectbox(f"Type graphique:", chart_type_options, index=chart_type_index, key=f"graph_type_{analysis_id}"); graph_analysis_type = st.session_state.analyses[i]['params']['chart_type']
|
| 452 |
+
# Détermination source données (originale ou agrégée)
|
| 453 |
+
plot_data_source_df = data; is_aggregated = False; agg_warning = None; current_group_by = st.session_state.analyses[i]['params'].get('group_by_columns_graph', []); current_agg_col = st.session_state.analyses[i]['params'].get('agg_column_graph'); current_agg_method = st.session_state.analyses[i]['params'].get('agg_method_graph'); aggregation_enabled = bool(current_group_by)
|
| 454 |
+
if aggregation_enabled: # Tentative agrégation si configurée
|
| 455 |
+
# ... (vérifications et try/except pour agréger) ...
|
| 456 |
+
if not current_group_by: agg_warning = "Sélectionnez colonne(s) pour 'Agréger par'." # ... autres warnings ...
|
| 457 |
+
elif not all(c in data.columns for c in current_group_by) or current_agg_col not in data.columns: agg_warning = "Colonnes agrégation invalides."
|
| 458 |
+
else:
|
| 459 |
+
try: temp_aggregated_data_graph = data.groupby(current_group_by, as_index=False)[current_agg_col].agg(current_agg_method); agg_col_name_new = f'{current_agg_col}_{current_agg_method}'; temp_aggregated_data_graph = temp_aggregated_data_graph.rename(columns={current_agg_col: agg_col_name_new}); plot_data_source_df = temp_aggregated_data_graph; is_aggregated = True
|
| 460 |
+
except Exception as agg_e: agg_warning = f"Erreur agrégation: {agg_e}"; plot_data_source_df = data; is_aggregated = False
|
| 461 |
+
chart_columns = plot_data_source_df.columns.tolist() if plot_data_source_df is not None else []
|
| 462 |
+
# Widgets Axes & Mappages
|
| 463 |
+
if not chart_columns: st.warning("Colonnes pour graphique non déterminées.")
|
| 464 |
+
else:
|
| 465 |
+
st.markdown("###### Axes & Mappages"); col1_axes, col2_axes, col3_axes = st.columns(3)
|
| 466 |
+
# Axe X
|
| 467 |
+
with col1_axes: default_x = analysis['params'].get('x_column'); if default_x not in chart_columns: default_x = chart_columns[0]; x_col_index = get_safe_index(chart_columns, default_x); st.session_state.analyses[i]['params']['x_column'] = st.selectbox(f"Axe X:", chart_columns, index=x_col_index, key=f"graph_x_{analysis_id}")
|
| 468 |
+
# Axe Y
|
| 469 |
+
with col2_axes: y_options = chart_columns; y_disabled = graph_analysis_type == 'Histogram'; y_label = "Axe Y"; default_y = analysis['params'].get('y_column'); if graph_analysis_type == 'Timeline (Gantt)': y_options = conf_categorical_columns; y_label = "Tâche/Groupe (Y)"; y_disabled=False; # ... (logique défaut Y) ...; y_col_index = get_safe_index(y_options, default_y) if default_y else 0; st.session_state.analyses[i]['params']['y_column'] = st.selectbox(y_label, y_options, index=y_col_index, key=f"graph_y_{analysis_id}", disabled=y_disabled)
|
| 470 |
+
# Couleur, Taille
|
| 471 |
+
with col3_axes: mapping_options_cat = ['None'] + conf_categorical_columns; mapping_options_num = ['None'] + conf_numerical_columns; default_color = analysis['params'].get('color_column', 'None'); if default_color not in mapping_options_cat + mapping_options_num: default_color = 'None'; color_col_index = get_safe_index(mapping_options_cat + mapping_options_num, default_color); st.session_state.analyses[i]['params']['color_column'] = st.selectbox(f"Couleur (Opt.):", mapping_options_cat + mapping_options_num, index=color_col_index, key=f"graph_color_{analysis_id}"); default_size = analysis['params'].get('size_column', 'None'); if default_size not in mapping_options_num: default_size = 'None'; size_col_index = get_safe_index(mapping_options_num, default_size); size_disabled = graph_analysis_type not in ['Scatter Plot', '3D Scatter Plot']; st.session_state.analyses[i]['params']['size_column'] = st.selectbox(f"Taille (Opt., Num.):", mapping_options_num, index=size_col_index, key=f"graph_size_{analysis_id}", disabled=size_disabled)
|
| 472 |
+
# Facet, Hover
|
| 473 |
+
col1_extra, col2_extra = st.columns(2); mapping_options_all = ['None'] + conf_all_columns
|
| 474 |
+
with col1_extra: default_facet = analysis['params'].get('facet_column', 'None'); if default_facet not in mapping_options_cat: default_facet = 'None'; facet_col_index = get_safe_index(mapping_options_cat, default_facet); facet_disabled = graph_analysis_type in ['Heatmap', 'Density Contour', 'Pair Plot (SPLOM)', 'Sunburst', 'Treemap']; st.session_state.analyses[i]['params']['facet_column'] = st.selectbox(f"Diviser par (Facet, Opt.):", mapping_options_cat, index=facet_col_index, key=f"graph_facet_{analysis_id}", disabled=facet_disabled)
|
| 475 |
+
with col2_extra: default_hover = analysis['params'].get('hover_data_cols', []); valid_default_hover = [c for c in default_hover if c in conf_all_columns]; st.session_state.analyses[i]['params']['hover_data_cols'] = st.multiselect("Infos survol (Hover):", conf_all_columns, default=valid_default_hover, key=f"graph_hover_{analysis_id}")
|
| 476 |
+
# Options Agrégation (collapsible)
|
| 477 |
+
with st.expander("Options d'agrégation (si besoin)", expanded=aggregation_enabled):
|
| 478 |
+
if not conf_categorical_columns or not conf_numerical_columns: st.caption("Nécessite cols Catégorielles ET Numériques.")
|
| 479 |
+
else: # ... (widgets agrégation) ...
|
| 480 |
+
if aggregation_enabled and agg_warning: st.warning(f"Avert. Aggr: {agg_warning}", icon="⚠️")
|
| 481 |
+
elif is_aggregated: st.caption(f"Utilisation données agrégées ({plot_data_source_df.shape[0]} l.).")
|
| 482 |
+
else: st.caption("Utilisation données originales.")
|
| 483 |
+
# Bouton Exécuter Graphique
|
| 484 |
+
if st.button(f"Exécuter Graphique {i+1}", key=f"run_graph_{analysis_id}"):
|
| 485 |
+
with st.spinner(f"Génération '{graph_analysis_type}'..."):
|
| 486 |
+
current_params = st.session_state.analyses[i]['params'].copy(); final_x = current_params['x_column']; final_y = current_params['y_column'] if graph_analysis_type != 'Histogram' else None; final_color = current_params['color_column'] if current_params['color_column'] != 'None' else None; final_size = current_params['size_column'] if current_params['size_column'] != 'None' else None; final_facet = current_params['facet_column'] if current_params['facet_column'] != 'None' else None; final_hover = current_params['hover_data_cols'] if current_params['hover_data_cols'] else None
|
| 487 |
+
# Validation colonnes
|
| 488 |
+
required_plot_cols = [final_x]; required_map_cols = [c for c in [final_color, final_size, final_facet] if c] + (final_hover or [])
|
| 489 |
+
if final_y and graph_analysis_type != 'Histogram': required_plot_cols.append(final_y)
|
| 490 |
+
if graph_analysis_type == 'Timeline (Gantt)': st.error("Timeline nécessite config Début/Fin."); required_plot_cols = [] # Invalide
|
| 491 |
+
plot_cols_exist = all(c in plot_data_source_df.columns for c in required_plot_cols if c)
|
| 492 |
+
map_cols_exist = all(c in data.columns for c in required_map_cols if c)
|
| 493 |
+
if not final_x: st.error("Axe X requis.")
|
| 494 |
+
elif not plot_cols_exist: st.error(f"Colonnes axes non trouvées dans source {'agrégée' if is_aggregated else 'originale'}.")
|
| 495 |
+
elif not map_cols_exist: st.warning(f"Colonnes mappage non trouvées dans données originales, ignorées.", icon="⚠️")
|
| 496 |
+
else: # Tentative de plot
|
| 497 |
+
try:
|
| 498 |
+
fig = None; px_args = {'data_frame': plot_data_source_df, 'x': final_x} # ... (assemblage px_args) ...
|
| 499 |
+
title = f"{graph_analysis_type}: {final_y or ''} vs {final_x}" # ... (assemblage titre) ...; px_args['title'] = title
|
| 500 |
+
# Logique Plotting par type (Bar, Line, Scatter, Histo, Box, Violin, Heatmap, Density, Area, Funnel, 3D, Pair Plot)
|
| 501 |
+
if graph_analysis_type == 'Bar Chart': fig = px.bar(**px_args)
|
| 502 |
+
elif graph_analysis_type == 'Line Chart': fig = px.line(**px_args)
|
| 503 |
+
# ... (tous les autres types de graphiques) ...
|
| 504 |
+
elif graph_analysis_type == 'Pair Plot (SPLOM)': # Exemple
|
| 505 |
+
splom_dims = [c for c in data.columns if c in conf_numerical_columns];
|
| 506 |
+
if len(splom_dims)>=2: splom_args={'data_frame':data, 'dimensions':splom_dims, 'title':'Pair Plot'}; # ... (ajout couleur si cat) ...; fig=px.scatter_matrix(**splom_args)
|
| 507 |
+
else: st.warning("Pair Plot requiert >= 2 cols numériques.")
|
| 508 |
+
|
| 509 |
+
if fig is not None: fig.update_layout(title_x=0.5); st.session_state.analyses[i]['result'] = fig; st.session_state.analyses[i]['executed_params'] = current_params; st.rerun()
|
| 510 |
+
except Exception as e: st.error(f"Erreur graph {i+1}: {e}"); st.session_state.analyses[i]['result'] = None; st.session_state.analyses[i]['executed_params'] = current_params
|
| 511 |
+
|
| 512 |
+
# --- CONFIGURATION STATS DESCRIPTIVES (Restauré) ---
|
| 513 |
+
elif analysis['type'] == 'descriptive_stats':
|
| 514 |
+
st.markdown("##### Configuration Stats Descriptives")
|
| 515 |
+
desc_col_options = conf_all_columns
|
| 516 |
+
if not desc_col_options: st.warning("Aucune colonne disponible.")
|
| 517 |
+
else:
|
| 518 |
+
init_analysis_state(i, 'selected_columns_desc', [])
|
| 519 |
+
default_desc_cols = analysis['params'].get('selected_columns_desc', []); valid_default_desc = [col for col in default_desc_cols if col in desc_col_options]
|
| 520 |
+
if not valid_default_desc: valid_default_desc = [c for c in conf_numerical_columns + conf_datetime_columns if c in desc_col_options] or desc_col_options[:min(len(desc_col_options), 5)]
|
| 521 |
+
st.session_state.analyses[i]['params']['selected_columns_desc'] = st.multiselect(f"Analyser colonnes :", desc_col_options, default=valid_default_desc, key=f"desc_stats_columns_{analysis_id}")
|
| 522 |
+
if st.button(f"Exécuter Stats Descriptives {i+1}", key=f"run_desc_stats_{analysis_id}"):
|
| 523 |
+
current_params = st.session_state.analyses[i]['params'].copy(); selected_cols = current_params['selected_columns_desc']
|
| 524 |
+
if selected_cols:
|
| 525 |
+
try: # ... (logique describe) ...
|
| 526 |
+
valid_cols = [col for col in selected_cols if col in data.columns]
|
| 527 |
+
if valid_cols: st.info(f"Calcul stats descr pour: {', '.join(valid_cols)}"); descriptive_stats = data[valid_cols].describe(include='all', datetime_is_numeric=True); st.session_state.analyses[i]['result'] = descriptive_stats; st.session_state.analyses[i]['executed_params'] = current_params; st.rerun()
|
| 528 |
+
else: st.warning("Colonnes sélectionnées non trouvées.")
|
| 529 |
+
except Exception as e: st.error(f"Erreur Stats Desc {i+1}: {e}"); st.session_state.analyses[i]['result'] = None; st.session_state.analyses[i]['executed_params'] = current_params
|
| 530 |
+
else: st.warning("Sélectionnez au moins une colonne.")
|
| 531 |
+
|
| 532 |
+
# --- AFFICHAGE RÉSULTAT (Restauré) ---
|
| 533 |
+
result_data = st.session_state.analyses[i].get('result')
|
| 534 |
+
executed_params_display = st.session_state.analyses[i].get('executed_params')
|
| 535 |
+
if result_data is not None:
|
| 536 |
+
st.markdown("---"); st.write(f"**Résultat Analyse {i+1}:**")
|
| 537 |
+
if executed_params_display: params_str = ", ".join([f"{k}={v}" for k,v in executed_params_display.items() if v is not None and v != []]); st.caption(f"Paramètres: {params_str}")
|
| 538 |
+
analysis_type = st.session_state.analyses[i]['type']
|
| 539 |
+
try:
|
| 540 |
+
if analysis_type in ['aggregated_table', 'descriptive_stats'] and isinstance(result_data, pd.DataFrame): st.dataframe(result_data.T if analysis_type == 'descriptive_stats' else result_data, use_container_width=True)
|
| 541 |
+
elif analysis_type == 'graph' and isinstance(result_data, go.Figure): st.plotly_chart(result_data, use_container_width=True)
|
| 542 |
+
else: st.write("Résultat non standard:"); st.write(result_data)
|
| 543 |
+
except Exception as e_display_result: st.error(f"Erreur affichage résultat {i+1}: {e_display_result}")
|
| 544 |
+
elif executed_params_display is not None: st.warning(f"Exécution précédente Analyse {i+1} échouée.", icon="⚠️")
|
| 545 |
+
|
| 546 |
+
# Suppression analyses marquées (Restauré)
|
| 547 |
+
if indices_to_remove:
|
| 548 |
+
for index in sorted(indices_to_remove, reverse=True):
|
| 549 |
+
if 0 <= index < len(st.session_state.analyses): del st.session_state.analyses[index]
|
| 550 |
+
st.rerun()
|
| 551 |
+
|
| 552 |
+
# --- SECTION ANALYSES AVANCÉES (RESTAURÉ - Bloc entier) ---
|
| 553 |
st.markdown("---")
|
| 554 |
st.subheader("🔬 Analyses Statistiques Avancées")
|
| 555 |
+
show_advanced = st.checkbox("Afficher les analyses avancées", key="toggle_advanced_stats", value=st.session_state.show_advanced_analysis)
|
| 556 |
+
st.session_state.show_advanced_analysis = show_advanced
|
| 557 |
+
|
| 558 |
+
if show_advanced:
|
| 559 |
+
adv_numerical_columns = conf_numerical_columns; adv_categorical_columns = conf_categorical_columns; adv_all_columns = conf_all_columns
|
| 560 |
+
if not data_available: st.warning("Chargez des données.")
|
| 561 |
+
elif not (adv_numerical_columns or adv_categorical_columns): st.warning("Nécessite colonnes Num/Cat.")
|
| 562 |
+
else:
|
| 563 |
+
adv_analysis_key_suffix = "adv_data_loaded"
|
| 564 |
+
advanced_analysis_type = st.selectbox("Sélectionnez analyse avancée :", ('Test T', 'ANOVA', 'Chi-Square Test', 'Corrélation', 'Régression Linéaire', 'ACP (PCA)', 'Clustering K-Means', 'Détection d\'Anomalies (Z-score)'), key=f"advanced_type_{adv_analysis_key_suffix}")
|
| 565 |
+
st.markdown("---")
|
| 566 |
+
def get_valid_data(df, col): return df[col].dropna() if df is not None and col in df.columns else pd.Series(dtype='float64')
|
| 567 |
+
container_advanced = st.container(border=True)
|
| 568 |
+
with container_advanced:
|
| 569 |
+
# Test T (Restauré)
|
| 570 |
+
if advanced_analysis_type == 'Test T':
|
| 571 |
+
st.markdown("###### Test T (Comparaison 2 moyennes)"); cols_valid_t = [c for c in adv_categorical_columns if data[c].nunique() == 2]
|
| 572 |
+
if not adv_numerical_columns: st.warning("Nécessite Var Numérique.")
|
| 573 |
+
elif not cols_valid_t: st.warning("Nécessite Var Catégorielle à 2 groupes.")
|
| 574 |
+
else: # ... (Widgets et logique Test T) ...
|
| 575 |
+
col_t1, col_t2, col_t3 = st.columns([2, 2, 1]); # ... (selectboxes) ...; if st.button("Effectuer Test T"): # ... (logique ttest_ind) ...
|
| 576 |
+
|
| 577 |
+
# ANOVA (Restauré)
|
| 578 |
+
elif advanced_analysis_type == 'ANOVA':
|
| 579 |
+
st.markdown("###### ANOVA (Comparaison >2 moyennes)"); cols_valid_a = [c for c in adv_categorical_columns if data[c].nunique() > 2]
|
| 580 |
+
if not adv_numerical_columns: st.warning("Nécessite Var Numérique.")
|
| 581 |
+
elif not cols_valid_a: st.warning("Nécessite Var Catégorielle à >2 groupes.")
|
| 582 |
+
else: # ... (Widgets et logique ANOVA) ...
|
| 583 |
+
col_a1, col_a2, col_a3 = st.columns([2, 2, 1]); # ... (selectboxes) ...; if st.button("Effectuer ANOVA"): # ... (logique f_oneway) ...
|
| 584 |
+
|
| 585 |
+
# Chi-Square (Restauré)
|
| 586 |
+
elif advanced_analysis_type == 'Chi-Square Test':
|
| 587 |
+
st.markdown("###### Test Chi-carré (Indépendance 2 Vars Catégorielles)")
|
| 588 |
+
if len(adv_categorical_columns) < 2: st.warning("Nécessite >= 2 Vars Catégorielles.")
|
| 589 |
+
else: # ... (Widgets et logique Chi²) ...
|
| 590 |
+
col_c1, col_c2, col_c3 = st.columns([2, 2, 1]); # ... (selectboxes) ...; if st.button("Effectuer Test Chi²"): # ... (logique crosstab et chi2_contingency) ...
|
| 591 |
+
|
| 592 |
+
# Corrélation (Restauré)
|
| 593 |
+
elif advanced_analysis_type == 'Corrélation':
|
| 594 |
+
st.markdown("###### Matrice de Corrélation (Vars Numériques)")
|
| 595 |
+
if len(adv_numerical_columns) < 2: st.warning("Nécessite >= 2 Vars Numériques.")
|
| 596 |
+
else: # ... (Widget et logique Corrélation) ...
|
| 597 |
+
default_corr_cols = adv_numerical_columns[:min(len(adv_numerical_columns), 5)]; corr_features = st.multiselect("Sélectionnez 2+ vars numériques:", adv_numerical_columns, default=default_corr_cols, key=f"corr_vars_{adv_analysis_key_suffix}"); if st.button("Calculer Matrice Corrélation"): # ... (logique .corr() et px.imshow) ...
|
| 598 |
+
|
| 599 |
+
# Régression Linéaire (Restauré)
|
| 600 |
+
elif advanced_analysis_type == 'Régression Linéaire':
|
| 601 |
+
st.markdown("###### Régression Linéaire Simple (Y ~ X)")
|
| 602 |
+
if len(adv_numerical_columns) < 2: st.warning("Nécessite >= 2 Vars Numériques.")
|
| 603 |
+
else: # ... (Widgets et logique Régression) ...
|
| 604 |
+
col_r1, col_r2, col_r3 = st.columns([2, 2, 1]); # ... (selectboxes Y et X) ...; if st.button("Effectuer Régression"): # ... (logique train_test_split, LinearRegression, metrics, px.scatter trendline) ...
|
| 605 |
+
|
| 606 |
+
# ACP (Restauré)
|
| 607 |
+
elif advanced_analysis_type == 'ACP (PCA)':
|
| 608 |
+
st.markdown("###### ACP (Analyse en Composantes Principales)")
|
| 609 |
+
if len(adv_numerical_columns) < 2: st.warning("Nécessite >= 2 Vars Numériques.")
|
| 610 |
+
else: # ... (Widget et logique ACP) ...
|
| 611 |
+
default_pca_cols = adv_numerical_columns[:min(len(adv_numerical_columns), 5)]; pca_features = st.multiselect("Sélectionnez 2+ vars numériques:", adv_numerical_columns, default=default_pca_cols, key=f"pca_vars_{adv_analysis_key_suffix}"); if st.button("Effectuer ACP"): # ... (logique dropna, StandardScaler, PCA, variance expliquée, px.scatter, loadings, scree plot) ...
|
| 612 |
+
|
| 613 |
+
# K-Means (Restauré)
|
| 614 |
+
elif advanced_analysis_type == 'Clustering K-Means':
|
| 615 |
+
st.markdown("###### Clustering K-Means")
|
| 616 |
+
if len(adv_numerical_columns) < 2: st.warning("Nécessite >= 2 Vars Numériques.") # Ou 1 pour histo
|
| 617 |
+
else: # ... (Widgets et logique K-Means) ...
|
| 618 |
+
col_cl1, col_cl2, col_cl3 = st.columns([2, 1, 1]); # ... (multiselect features, number_input K) ...; if st.button("Effectuer Clustering"): # ... (logique dropna, StandardScaler, KMeans, visualisation 1D/2D/PCA, méthode du coude) ...
|
| 619 |
+
|
| 620 |
+
# Détection Anomalies (Restauré)
|
| 621 |
+
elif advanced_analysis_type == 'Détection d\'Anomalies (Z-score)':
|
| 622 |
+
st.markdown("###### Détection Anomalies (Z-score)")
|
| 623 |
+
if not adv_numerical_columns: st.warning("Nécessite >= 1 Var Numérique.")
|
| 624 |
+
else: # ... (Widgets et logique Z-score) ...
|
| 625 |
+
col_anom1, col_anom2, col_anom3 = st.columns([2, 1, 1]); # ... (multiselect features, number_input seuil Z) ...; if st.button("Détecter Anomalies"): # ... (logique dropna, zscore, affichage anomalies, histo avec seuils si 1 var) ...
|
| 626 |
+
|
| 627 |
+
|
| 628 |
+
else: # data is None (Message d'accueil mis à jour)
|
| 629 |
+
st.info("👋 Bienvenue ! Pour commencer, veuillez **charger un fichier** (CSV/Excel) via le bouton dans la barre latérale gauche.", icon="👈")
|
| 630 |
+
st.warning("Aucune donnée n'est actuellement chargée.", icon="⚠️")
|
| 631 |
|
| 632 |
|
| 633 |
# ==============================================================================
|
| 634 |
+
# ONGLET MANUEL D'UTILISATION (Mis à jour pour ne plus mentionner fichier local)
|
| 635 |
# ==============================================================================
|
| 636 |
with manual_tab:
|
|
|
|
| 637 |
st.markdown("## 📘 Manuel d'Utilisation - Suite d'Analyse Interactive")
|
| 638 |
st.markdown("""
|
| 639 |
+
Bienvenue ! Ce guide vous aide à utiliser efficacement cette application pour analyser vos données.
|
| 640 |
+
|
| 641 |
+
---
|
| 642 |
+
### 1. Chargement des Données (Barre Latérale ⚙️)
|
| 643 |
+
- **Uploader un Fichier** : Cliquez sur "Déposez votre fichier..." ou glissez votre fichier CSV/Excel dans la zone prévue dans la barre latérale gauche. C'est la seule façon de charger des données.
|
| 644 |
+
- **Utiliser l'en-tête** : Cochez/décochez la case "La première ligne est l'en-tête" pour indiquer si la première ligne contient les noms de colonnes.
|
| 645 |
+
- **Indicateur** : La source des données actives est indiquée dans la barre latérale et l'onglet principal.
|
| 646 |
+
|
| 647 |
+
---
|
| 648 |
+
### 2. Configuration (Barre Latérale ⚙️)
|
| 649 |
+
(Options disponibles uniquement si un fichier est chargé)
|
| 650 |
+
- **Renommer Colonnes** : Modifiez les noms de colonnes.
|
| 651 |
+
- **Exporter** : Téléchargez les données (CSV/Excel) ou un rapport HTML des analyses exécutées.
|
| 652 |
+
|
| 653 |
+
---
|
| 654 |
+
### 3. Analyses (Zone Principale 📊)
|
| 655 |
+
(Nécessite qu'un fichier soit chargé)
|
| 656 |
+
- **Construire** : Utilisez les boutons `➕ Ajouter...`.
|
| 657 |
+
- **Configurer & Exécuter** : Paramétrez chaque bloc et cliquez sur "Exécuter...".
|
| 658 |
+
- **Analyses Avancées** : Cochez la case pour accéder aux tests statistiques et modèles.
|
| 659 |
+
|
| 660 |
+
---
|
| 661 |
+
### 💡 Conseils & Dépannage
|
| 662 |
+
- **Chargement Excel échoue ?** Vérifiez `openpyxl` dans `requirements.txt`.
|
| 663 |
+
- **Pas de données chargées ?** Assurez-vous d'avoir uploadé un fichier valide.
|
| 664 |
+
- **Erreurs ?** Lisez les messages et consultez les logs HF.
|
| 665 |
+
|
| 666 |
+
---
|
| 667 |
+
**👨💻 Concepteur : Sidoine YEBADOKPO**
|
| 668 |
+
... (Infos contact) ...
|
| 669 |
""")
|
| 670 |
|
| 671 |
# ==============================================================================
|
| 672 |
+
# ONGLET CHAT IA (RESTAURÉ - identique à la version précédente avec débogage)
|
| 673 |
# ==============================================================================
|
| 674 |
with chat_tab:
|
| 675 |
st.markdown("## 💬 Chat IA (Assisté par Google Gemini)")
|
|
|
|
| 676 |
if not api_key:
|
|
|
|
| 677 |
st.info("Chat IA désactivé. Configurez `GOOGLE_API_KEY` dans les Secrets HF.")
|
| 678 |
else:
|
| 679 |
+
st.info("Posez des questions générales sur l'analyse...", icon="💡")
|
|
|
|
|
|
|
| 680 |
model_chat = None
|
| 681 |
try:
|
| 682 |
genai.configure(api_key=api_key)
|
| 683 |
model_chat = genai.GenerativeModel('gemini-1.5-flash-latest')
|
| 684 |
+
except Exception as e: st.error(f"Erreur init API Gemini: {e}")
|
|
|
|
|
|
|
|
|
|
| 685 |
|
| 686 |
if model_chat:
|
| 687 |
+
for message in st.session_state.gemini_chat_history: # Affichage historique
|
| 688 |
+
with st.chat_message(message["role"]): st.markdown(message["content"])
|
| 689 |
+
if user_question := st.chat_input("Votre question à l'IA..."): # Input
|
|
|
|
|
|
|
|
|
|
|
|
|
| 690 |
st.session_state.gemini_chat_history.append({"role": "user", "content": user_question})
|
| 691 |
+
with st.chat_message("user"): st.markdown(user_question)
|
| 692 |
+
# Préparation contexte
|
|
|
|
|
|
|
|
|
|
| 693 |
data_context_chat = st.session_state.get('dataframe_to_export', None)
|
| 694 |
num_cols_context = numerical_columns if data_context_chat is not None else []
|
| 695 |
cat_cols_context = categorical_columns if data_context_chat is not None else []
|
| 696 |
date_cols_context = datetime_columns if data_context_chat is not None else []
|
| 697 |
analyses_context = list(set(a['type'].replace('_', ' ').title() for a in st.session_state.get('analyses', [])))
|
| 698 |
source_info_context = st.session_state.get('data_source_info', 'Inconnue')
|
| 699 |
+
context_prompt = f"""... (Prompt IA inchangé) ..."""
|
| 700 |
+
# Génération réponse
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 701 |
try:
|
| 702 |
+
with st.spinner("L'IA réfléchit..."): response = model_chat.generate_content(context_prompt)
|
|
|
|
|
|
|
|
|
|
| 703 |
if response and response.text:
|
| 704 |
+
with st.chat_message("assistant"): st.markdown(response.text)
|
|
|
|
| 705 |
st.session_state.gemini_chat_history.append({"role": "assistant", "content": response.text})
|
| 706 |
else:
|
| 707 |
+
error_msg_ai = "L'IA n'a pas pu générer de réponse."
|
| 708 |
+
st.error(error_msg_ai); st.session_state.gemini_chat_history.append({"role": "assistant", "content": f"({error_msg_ai})"})
|
|
|
|
|
|
|
|
|
|
|
|
|
| 709 |
except Exception as e:
|
|
|
|
| 710 |
error_message = f"Erreur communication API Gemini: {e}"
|
| 711 |
+
st.error(error_message); st.session_state.gemini_chat_history.append({"role": "assistant", "content": f"(Erreur système: {e})"})
|
| 712 |
+
else: st.error("Modèle Chat IA indisponible.")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 713 |
|
| 714 |
# --- Fin du Script ---
|