Spaces:
Sleeping
Sleeping
Update src/dashboard_app.py
Browse files- src/dashboard_app.py +105 -102
src/dashboard_app.py
CHANGED
|
@@ -66,8 +66,6 @@ def init_analysis_state(analysis_index, param_key, default_value):
|
|
| 66 |
if 'params' not in analyses[analysis_index]:
|
| 67 |
analyses[analysis_index]['params'] = {}
|
| 68 |
analyses[analysis_index]['params'].setdefault(param_key, default_value)
|
| 69 |
-
# else: # Gérer le cas où l'index est hors limites si nécessaire
|
| 70 |
-
# print(f"Warning: Tentative d'initialisation de l'état pour un index d'analyse invalide: {analysis_index}")
|
| 71 |
|
| 72 |
# --- Fonction de chargement de données générique ---
|
| 73 |
def load_data(source_type, source_value, header_param, sep=None):
|
|
@@ -128,8 +126,8 @@ def load_data(source_type, source_value, header_param, sep=None):
|
|
| 128 |
st.rerun() # Indispensable pour mettre à jour toute l'application
|
| 129 |
else:
|
| 130 |
st.session_state.dataframe_to_export = None
|
| 131 |
-
st.session_state.data_source_info = f"Erreur: {error_message}" if error_message else "Erreur de chargement inconnue"
|
| 132 |
st.session_state.data_loaded_id = None
|
|
|
|
| 133 |
st.sidebar.error(st.session_state.data_source_info)
|
| 134 |
# Pas de rerun en cas d'erreur pour que l'erreur reste visible
|
| 135 |
|
|
@@ -179,7 +177,6 @@ with app_tab:
|
|
| 179 |
"Choisissez une méthode de chargement :",
|
| 180 |
options=load_options,
|
| 181 |
key="data_load_method_radio",
|
| 182 |
-
# index=get_safe_index(load_options, st.session_state.load_method) # Garder la sélection
|
| 183 |
)
|
| 184 |
|
| 185 |
# Options communes
|
|
@@ -273,7 +270,7 @@ with app_tab:
|
|
| 273 |
num_non_na_original = col_data.dropna().shape[0]
|
| 274 |
if num_non_na_original > 0 and converted_num.notna().sum() / num_non_na_original > 0.7:
|
| 275 |
# Heuristique simple pour éviter les ID (grands entiers)
|
| 276 |
-
#
|
| 277 |
is_int_like = converted_num.dropna().apply(lambda x: x == int(x)).all()
|
| 278 |
if is_int_like and converted_num.max() > 100000:
|
| 279 |
pass # Probablement un ID, on n'ajoute pas à numérique
|
|
@@ -510,20 +507,20 @@ with app_tab:
|
|
| 510 |
# ===========================
|
| 511 |
if analysis['type'] == 'aggregated_table':
|
| 512 |
st.markdown("##### Configuration Tableau Agrégé")
|
| 513 |
-
if not
|
| 514 |
-
#
|
| 515 |
-
# elif not
|
| 516 |
# st.warning("Nécessite au moins une colonne Numérique (sauf pour 'count').")
|
| 517 |
else:
|
| 518 |
init_analysis_state(i, 'group_by_columns', [])
|
| 519 |
-
init_analysis_state(i, 'agg_column',
|
| 520 |
init_analysis_state(i, 'agg_method', 'count')
|
| 521 |
|
| 522 |
col_agg1, col_agg2, col_agg3 = st.columns(3)
|
| 523 |
with col_agg1:
|
| 524 |
-
default_groupby = [col for col in analysis['params'].get('group_by_columns', []) if col in
|
| 525 |
st.session_state.analyses[i]['params']['group_by_columns'] = st.multiselect(
|
| 526 |
-
f"Regrouper par :",
|
| 527 |
default=default_groupby, key=f"agg_table_groupby_{analysis_id}"
|
| 528 |
)
|
| 529 |
with col_agg3: # Méthode avant colonne
|
|
@@ -536,7 +533,7 @@ with app_tab:
|
|
| 536 |
with col_agg2:
|
| 537 |
agg_method_selected_agg = st.session_state.analyses[i]['params']['agg_method']
|
| 538 |
agg_col_needed_agg = agg_method_selected_agg != 'count'
|
| 539 |
-
agg_col_options_agg =
|
| 540 |
agg_col_index_agg = get_safe_index(agg_col_options_agg, analysis['params'].get('agg_column'))
|
| 541 |
current_agg_col_selection_agg = st.selectbox(
|
| 542 |
f"Calculer sur :", agg_col_options_agg,
|
|
@@ -558,7 +555,7 @@ with app_tab:
|
|
| 558 |
else:
|
| 559 |
try:
|
| 560 |
valid_groupby = all(c in data.columns for c in group_by_cols)
|
| 561 |
-
valid_aggcol = agg_method == 'count' or (agg_col and agg_col in data.columns and agg_col in
|
| 562 |
|
| 563 |
if not valid_groupby: st.error("Colonnes de groupement invalides.")
|
| 564 |
elif not valid_aggcol: st.error(f"Colonne d'agrégation '{agg_col}' invalide ou non numérique (requis pour '{agg_method}').")
|
|
@@ -590,15 +587,15 @@ with app_tab:
|
|
| 590 |
# ===========================
|
| 591 |
elif analysis['type'] == 'graph':
|
| 592 |
st.markdown("##### Configuration Graphique")
|
| 593 |
-
if not
|
| 594 |
else:
|
| 595 |
# --- Initialisation état graphique ---
|
| 596 |
-
init_analysis_state(i, 'chart_type', '
|
| 597 |
init_analysis_state(i, 'group_by_columns_graph', [])
|
| 598 |
-
init_analysis_state(i, 'agg_column_graph',
|
| 599 |
init_analysis_state(i, 'agg_method_graph', 'count')
|
| 600 |
-
init_analysis_state(i, 'x_column',
|
| 601 |
-
init_analysis_state(i, 'y_column',
|
| 602 |
init_analysis_state(i, 'color_column', None)
|
| 603 |
init_analysis_state(i, 'size_column', None)
|
| 604 |
init_analysis_state(i, 'facet_column', None)
|
|
@@ -609,7 +606,11 @@ with app_tab:
|
|
| 609 |
init_analysis_state(i, 'z_column', None)
|
| 610 |
|
| 611 |
# --- Type Graphique ---
|
| 612 |
-
chart_type_options = (
|
|
|
|
|
|
|
|
|
|
|
|
|
| 613 |
chart_type_index = get_safe_index(chart_type_options, st.session_state.analyses[i]['params'].get('chart_type'))
|
| 614 |
st.session_state.analyses[i]['params']['chart_type'] = st.selectbox(f"Type graphique:", chart_type_options, index=chart_type_index, key=f"graph_type_{analysis_id}")
|
| 615 |
graph_analysis_type = st.session_state.analyses[i]['params']['chart_type']
|
|
@@ -630,7 +631,7 @@ with app_tab:
|
|
| 630 |
elif current_agg_method_graph != 'count' and not current_agg_col_graph: agg_warning = f"Sélectionnez 'Calculer' pour '{current_agg_method_graph}'."
|
| 631 |
elif not all(c in data.columns for c in current_group_by_graph): agg_warning = "Colonnes 'Agréger par' invalides."
|
| 632 |
elif current_agg_method_graph != 'count' and current_agg_col_graph not in data.columns: agg_warning = f"Colonne 'Calculer' ('{current_agg_col_graph}') invalide."
|
| 633 |
-
elif current_agg_method_graph != 'count' and current_agg_col_graph not in
|
| 634 |
else:
|
| 635 |
try:
|
| 636 |
if current_agg_method_graph == 'count':
|
|
@@ -665,94 +666,95 @@ with app_tab:
|
|
| 665 |
st.session_state.analyses[i]['params']['x_column'] = selected_x
|
| 666 |
# --- Axe Y ---
|
| 667 |
with col2_axes:
|
| 668 |
-
y_disabled = graph_analysis_type in ['
|
| 669 |
y_label = "Axe Y"
|
| 670 |
options_y = [c for c in chart_columns if c != selected_x]
|
| 671 |
-
if graph_analysis_type == '
|
| 672 |
-
options_y = [c for c in chart_columns if c in
|
| 673 |
default_y = analysis['params'].get('y_column')
|
| 674 |
if y_disabled: default_y = None
|
| 675 |
elif default_y not in options_y:
|
| 676 |
if is_aggregated and agg_col_name_new and agg_col_name_new in options_y: default_y = agg_col_name_new
|
| 677 |
else:
|
| 678 |
-
num_y_opts = [c for c in options_y if c in
|
| 679 |
default_y = num_y_opts[0] if num_y_opts else (options_y[0] if options_y else None)
|
| 680 |
y_col_index = get_safe_index(options_y, default_y)
|
| 681 |
selected_y = st.selectbox(y_label, options_y, index=y_col_index, key=f"graph_y_{analysis_id}", disabled=y_disabled or not options_y, help="Requis pour la plupart des graphiques.")
|
| 682 |
st.session_state.analyses[i]['params']['y_column'] = selected_y if not y_disabled else None
|
| 683 |
# --- Couleur & Taille ---
|
| 684 |
with col3_axes:
|
|
|
|
| 685 |
map_options_all_orig = [None] + original_columns
|
| 686 |
-
map_options_num_orig = [None] + [c for c in original_columns if c in conf_numerical_columns]
|
| 687 |
selected_color = st.selectbox(f"Couleur (Opt.):", map_options_all_orig, index=get_safe_index(map_options_all_orig, analysis['params'].get('color_column')), key=f"graph_color_{analysis_id}", format_func=lambda x: x if x is not None else "Aucune")
|
| 688 |
st.session_state.analyses[i]['params']['color_column'] = selected_color
|
| 689 |
-
size_disabled = graph_analysis_type not in ['
|
| 690 |
selected_size = st.selectbox(f"Taille (Opt., Num.):", map_options_num_orig, index=get_safe_index(map_options_num_orig, analysis['params'].get('size_column')), key=f"graph_size_{analysis_id}", disabled=size_disabled, format_func=lambda x: x if x is not None else "Aucune")
|
| 691 |
st.session_state.analyses[i]['params']['size_column'] = selected_size
|
| 692 |
|
| 693 |
# --- Facet, Hover & Autres ---
|
| 694 |
col1_extra, col2_extra = st.columns(2)
|
| 695 |
with col1_extra:
|
| 696 |
-
map_options_cat_orig = [None] + [c for c in original_columns if c in
|
| 697 |
-
facet_disabled = graph_analysis_type in ['
|
| 698 |
selected_facet = st.selectbox(f"Diviser par (Facet, Opt.):", map_options_cat_orig, index=get_safe_index(map_options_cat_orig, analysis['params'].get('facet_column')), key=f"graph_facet_{analysis_id}", disabled=facet_disabled, format_func=lambda x: x if x is not None else "Aucune")
|
| 699 |
st.session_state.analyses[i]['params']['facet_column'] = selected_facet
|
| 700 |
-
if graph_analysis_type == '
|
| 701 |
-
options_z = [c for c in chart_columns if c in
|
| 702 |
selected_z = st.selectbox("Axe Z (Num.):", options_z, index=get_safe_index(options_z, analysis['params'].get('z_column')), key=f"graph_z_{analysis_id}")
|
| 703 |
st.session_state.analyses[i]['params']['z_column'] = selected_z
|
| 704 |
with col2_extra:
|
| 705 |
selected_hover = st.multiselect("Infos survol (Hover):", original_columns, default=analysis['params'].get('hover_data_cols', []), key=f"graph_hover_{analysis_id}")
|
| 706 |
st.session_state.analyses[i]['params']['hover_data_cols'] = selected_hover
|
| 707 |
-
if graph_analysis_type == '
|
| 708 |
-
options_end = [c for c in chart_columns if c in
|
| 709 |
-
selected_end = st.selectbox("Date Fin (
|
| 710 |
st.session_state.analyses[i]['params']['gantt_end_column'] = selected_end
|
| 711 |
|
| 712 |
# --- Params spécifiques Sunburst/Treemap ---
|
| 713 |
-
if graph_analysis_type in ['
|
| 714 |
col1_hier, col2_hier = st.columns(2)
|
| 715 |
with col1_hier:
|
| 716 |
-
options_path = [c for c in chart_columns if c in
|
| 717 |
selected_path = st.multiselect("Chemin Hiérarchique:", options_path, default=analysis['params'].get('path_columns', []), key=f"graph_path_{analysis_id}")
|
| 718 |
st.session_state.analyses[i]['params']['path_columns'] = selected_path
|
| 719 |
with col2_hier:
|
| 720 |
-
options_values = [c for c in chart_columns if c in
|
| 721 |
selected_value = st.selectbox("Valeurs (Taille):", options_values, index=get_safe_index(options_values, analysis['params'].get('value_column')), key=f"graph_value_{analysis_id}")
|
| 722 |
st.session_state.analyses[i]['params']['value_column'] = selected_value
|
| 723 |
|
| 724 |
# --- Options d'agrégation ---
|
| 725 |
with st.expander("Options d'agrégation (avant graphique)", expanded=aggregation_enabled_graph):
|
| 726 |
-
if not
|
| 727 |
else:
|
| 728 |
col_agg_graph1, col_agg_graph2, col_agg_graph3 = st.columns(3)
|
| 729 |
with col_agg_graph1:
|
| 730 |
-
valid_gb = [c for c in analysis['params'].get('group_by_columns_graph',[]) if c in
|
| 731 |
st.session_state.analyses[i]['params']['group_by_columns_graph'] = st.multiselect(
|
| 732 |
-
f"Agréger par :",
|
| 733 |
)
|
| 734 |
group_by_sel = st.session_state.analyses[i]['params']['group_by_columns_graph']
|
| 735 |
with col_agg_graph3: # Méthode
|
| 736 |
-
|
| 737 |
st.session_state.analyses[i]['params']['agg_method_graph'] = st.selectbox(
|
| 738 |
-
f"Avec fonction :",
|
| 739 |
)
|
| 740 |
with col_agg_graph2: # Colonne
|
| 741 |
-
|
| 742 |
-
agg_col_need =
|
| 743 |
-
agg_col_opts =
|
| 744 |
agg_col_sel = st.selectbox(
|
| 745 |
-
f"Calculer :", agg_col_opts,
|
|
|
|
| 746 |
)
|
| 747 |
st.session_state.analyses[i]['params']['agg_column_graph'] = agg_col_sel if agg_col_need else None
|
| 748 |
-
if aggregation_enabled_graph and agg_warning: st.warning(f"Avert.
|
| 749 |
elif is_aggregated: st.caption(f"Utilisation données agrégées ({plot_data_source_df.shape[0]} l.).")
|
| 750 |
else: st.caption("Utilisation données originales.")
|
| 751 |
|
| 752 |
# --- Bouton Exécuter ---
|
| 753 |
if st.button(f"Exécuter Graphique {i+1}", key=f"run_graph_{analysis_id}"):
|
| 754 |
with st.spinner(f"Génération '{graph_analysis_type}'..."):
|
| 755 |
-
current_params = st.session_state.analyses[i]['params'].copy()
|
| 756 |
# --- Récupérer tous les params finaux ---
|
| 757 |
final_x = current_params.get('x_column'); final_y = current_params.get('y_column')
|
| 758 |
final_color = current_params.get('color_column'); final_size = current_params.get('size_column')
|
|
@@ -763,12 +765,12 @@ with app_tab:
|
|
| 763 |
# --- Validation ---
|
| 764 |
error_msg = None
|
| 765 |
if not final_x: error_msg = "Axe X requis."
|
| 766 |
-
elif graph_analysis_type not in ['
|
| 767 |
elif final_x and final_x not in plot_data_source_df.columns: error_msg = f"Colonne X '{final_x}' non trouvée."
|
| 768 |
elif final_y and final_y not in plot_data_source_df.columns: error_msg = f"Colonne Y '{final_y}' non trouvée."
|
| 769 |
-
elif graph_analysis_type == '
|
| 770 |
-
elif graph_analysis_type == '
|
| 771 |
-
elif graph_analysis_type in ['
|
| 772 |
|
| 773 |
# --- Préparer Args Plotly ---
|
| 774 |
px_args = {}
|
|
@@ -786,14 +788,14 @@ with app_tab:
|
|
| 786 |
if final_color: px_args['color'] = final_color
|
| 787 |
if final_facet: px_args['facet_col'] = final_facet
|
| 788 |
if final_hover_valid: px_args['hover_data'] = final_hover_valid
|
| 789 |
-
if final_size and graph_analysis_type in ['
|
| 790 |
-
if final_z and graph_analysis_type == '
|
| 791 |
-
if final_path and graph_analysis_type in ['
|
| 792 |
-
if final_value and graph_analysis_type in ['
|
| 793 |
-
if final_gantt_end and graph_analysis_type == '
|
| 794 |
|
| 795 |
title_parts = [graph_analysis_type] # Construire titre
|
| 796 |
-
if final_y and graph_analysis_type != '
|
| 797 |
if final_x: title_parts.append(final_x)
|
| 798 |
if final_color: title_parts.append(f"par {final_color}")
|
| 799 |
if is_aggregated: title_parts.append("(Agrégé)")
|
|
@@ -804,19 +806,19 @@ with app_tab:
|
|
| 804 |
fig = None
|
| 805 |
plot_func = getattr(px, graph_analysis_type.lower().replace(' ', '').replace('(','').replace(')',''), None)
|
| 806 |
|
| 807 |
-
if graph_analysis_type == '
|
| 808 |
-
splom_dims = [c for c in data.columns if c in
|
| 809 |
if len(splom_dims)>=2:
|
| 810 |
splom_args={'data_frame':data, 'dimensions':splom_dims}
|
| 811 |
-
color_splom = final_color if (final_color and final_color in data.columns and final_color in
|
| 812 |
if color_splom: splom_args['color'] = color_splom
|
| 813 |
splom_args['title'] = f'Pair Plot' + (f' par {color_splom}' if color_splom else '')
|
| 814 |
fig=px.scatter_matrix(**splom_args)
|
| 815 |
else: st.warning("Pair Plot requiert >= 2 cols numériques.")
|
| 816 |
-
elif graph_analysis_type == '
|
| 817 |
hist_args = {k: v for k, v in px_args.items() if k != 'y'} # Retirer Y pour histogramme
|
| 818 |
fig = px.histogram(**hist_args)
|
| 819 |
-
elif graph_analysis_type == '
|
| 820 |
gantt_args = px_args.copy()
|
| 821 |
if 'y' in gantt_args: gantt_args['task'] = gantt_args.pop('y') # Renommer y en task pour timeline
|
| 822 |
fig = px.timeline(**gantt_args)
|
|
@@ -834,20 +836,20 @@ with app_tab:
|
|
| 834 |
except Exception as e:
|
| 835 |
st.error(f"Erreur génération graphique {i+1}: {e}")
|
| 836 |
st.session_state.analyses[i]['result'] = None
|
| 837 |
-
st.session_state.analyses[i]['executed_params'] = current_params # Sauver params même si
|
| 838 |
|
| 839 |
# ===========================
|
| 840 |
# Bloc Stats Descriptives
|
| 841 |
# ===========================
|
| 842 |
elif analysis['type'] == 'descriptive_stats':
|
| 843 |
st.markdown("##### Configuration Stats Descriptives")
|
| 844 |
-
desc_col_options =
|
| 845 |
if not desc_col_options: st.warning("Aucune colonne disponible.")
|
| 846 |
else:
|
| 847 |
init_analysis_state(i, 'selected_columns_desc', [])
|
| 848 |
default_desc = analysis['params'].get('selected_columns_desc', [])
|
| 849 |
valid_default = [c for c in default_desc if c in desc_col_options] or \
|
| 850 |
-
[c for c in desc_col_options if c in
|
| 851 |
desc_col_options # Fallback
|
| 852 |
|
| 853 |
st.session_state.analyses[i]['params']['selected_columns_desc'] = st.multiselect(
|
|
@@ -914,7 +916,7 @@ with app_tab:
|
|
| 914 |
if show_advanced:
|
| 915 |
if not data_available: st.warning("Chargez des données pour utiliser les analyses avancées.")
|
| 916 |
# Vérifier si les listes de colonnes nécessaires existent et ne sont pas vides
|
| 917 |
-
elif not columns_defined or not (
|
| 918 |
st.warning("Nécessite des colonnes Numériques ou Catégorielles détectées pour les analyses avancées.")
|
| 919 |
else:
|
| 920 |
adv_analysis_key_suffix = st.session_state.data_loaded_id or "adv_data_loaded"
|
|
@@ -927,13 +929,13 @@ with app_tab:
|
|
| 927 |
# Test T
|
| 928 |
if advanced_analysis_type == 'Test T':
|
| 929 |
st.markdown("###### Test T (Comparaison de 2 moyennes)");
|
| 930 |
-
cols_valid_t = [c for c in
|
| 931 |
-
if not
|
| 932 |
elif not cols_valid_t: st.warning("Nécessite Var Catégorielle à 2 groupes.")
|
| 933 |
else:
|
| 934 |
col_t1, col_t2, col_t3 = st.columns([2, 2, 1])
|
| 935 |
with col_t1: group_col_t = st.selectbox("Var Catégorielle (2 groupes):", cols_valid_t, key=f"t_group_{adv_analysis_key_suffix}")
|
| 936 |
-
with col_t2: numeric_var_t = st.selectbox("Var Numérique:",
|
| 937 |
with col_t3:
|
| 938 |
st.write(""); st.write("")
|
| 939 |
if st.button("Effectuer Test T", key=f"run_t_{adv_analysis_key_suffix}", use_container_width=True):
|
|
@@ -956,13 +958,13 @@ with app_tab:
|
|
| 956 |
# ANOVA
|
| 957 |
elif advanced_analysis_type == 'ANOVA':
|
| 958 |
st.markdown("###### ANOVA (Comparaison >2 moyennes)")
|
| 959 |
-
cols_valid_a = [c for c in
|
| 960 |
-
if not
|
| 961 |
elif not cols_valid_a: st.warning("Nécessite Var Catégorielle (>2 et <50 groupes).")
|
| 962 |
else:
|
| 963 |
col_a1, col_a2, col_a3 = st.columns([2, 2, 1])
|
| 964 |
with col_a1: group_col_a = st.selectbox("Var Catégorielle (>2 groupes):", cols_valid_a, key=f"a_group_{adv_analysis_key_suffix}")
|
| 965 |
-
with col_a2: anova_numeric_var = st.selectbox("Var Numérique:",
|
| 966 |
with col_a3:
|
| 967 |
st.write(""); st.write("")
|
| 968 |
if st.button("Effectuer ANOVA", key=f"run_a_{adv_analysis_key_suffix}", use_container_width=True):
|
|
@@ -985,11 +987,11 @@ with app_tab:
|
|
| 985 |
# Chi-Square
|
| 986 |
elif advanced_analysis_type == 'Chi-Square Test':
|
| 987 |
st.markdown("###### Test Chi-carré (Indépendance Vars Catégorielles)")
|
| 988 |
-
if len(
|
| 989 |
else:
|
| 990 |
col_c1, col_c2, col_c3 = st.columns([2, 2, 1])
|
| 991 |
-
with col_c1: chi2_var1 = st.selectbox("Variable Catégorielle 1:",
|
| 992 |
-
options_var2 = [c for c in
|
| 993 |
with col_c2: chi2_var2 = st.selectbox("Variable Catégorielle 2:", options_var2, key=f"c2_var_{adv_analysis_key_suffix}", index=0 if options_var2 else None, disabled=not options_var2)
|
| 994 |
with col_c3:
|
| 995 |
st.write(""); st.write("")
|
|
@@ -1005,17 +1007,17 @@ with app_tab:
|
|
| 1005 |
if p < alpha: st.success(msg)
|
| 1006 |
else: st.info(msg)
|
| 1007 |
with st.expander("Tableau Contingence"): st.dataframe(ct)
|
| 1008 |
-
if np.any(expected < 5): st.warning("
|
| 1009 |
-
with st.expander("
|
| 1010 |
except Exception as e: st.error(f"Erreur Test Chi²: {e}")
|
| 1011 |
else: st.warning("Sélectionnez 2 variables.")
|
| 1012 |
# Corrélation
|
| 1013 |
elif advanced_analysis_type == 'Corrélation':
|
| 1014 |
st.markdown("###### Matrice de Corrélation (Vars Numériques)")
|
| 1015 |
-
if len(
|
| 1016 |
else:
|
| 1017 |
-
default_cols =
|
| 1018 |
-
features = st.multiselect("Sélectionnez 2+ vars numériques:",
|
| 1019 |
if st.button("Calculer Matrice Corrélation", key=f"run_corr_{adv_analysis_key_suffix}", use_container_width=True):
|
| 1020 |
if len(features) >= 2:
|
| 1021 |
try:
|
|
@@ -1028,13 +1030,13 @@ with app_tab:
|
|
| 1028 |
except Exception as e: st.error(f"Erreur Corrélation: {e}")
|
| 1029 |
else: st.warning("Sélectionnez >= 2 variables.")
|
| 1030 |
# Régression Linéaire
|
| 1031 |
-
elif advanced_analysis_type == 'Régression Linéaire':
|
| 1032 |
st.markdown("###### Régression Linéaire Simple (Y ~ X)")
|
| 1033 |
-
if len(
|
| 1034 |
else:
|
| 1035 |
col_r1, col_r2, col_r3 = st.columns([2, 2, 1])
|
| 1036 |
-
with col_r1: target = st.selectbox("Variable Cible (Y):",
|
| 1037 |
-
options_feat = [f for f in
|
| 1038 |
with col_r2: feature = st.selectbox("Variable Explicative (X):", options_feat, key=f"reg_feature_{adv_analysis_key_suffix}", index=0 if options_feat else None, disabled=not options_feat)
|
| 1039 |
with col_r3:
|
| 1040 |
st.write(""); st.write("")
|
|
@@ -1057,10 +1059,10 @@ with app_tab:
|
|
| 1057 |
# ACP (PCA)
|
| 1058 |
elif advanced_analysis_type == 'ACP (PCA)':
|
| 1059 |
st.markdown("###### ACP (Analyse en Composantes Principales)")
|
| 1060 |
-
if len(
|
| 1061 |
else:
|
| 1062 |
-
default_cols =
|
| 1063 |
-
features = st.multiselect("Sélectionnez 2+ vars numériques:",
|
| 1064 |
if st.button("Effectuer ACP", key=f"run_pca_{adv_analysis_key_suffix}", use_container_width=True):
|
| 1065 |
if len(features) >= 2:
|
| 1066 |
try:
|
|
@@ -1090,12 +1092,12 @@ with app_tab:
|
|
| 1090 |
# K-Means
|
| 1091 |
elif advanced_analysis_type == 'Clustering K-Means':
|
| 1092 |
st.markdown("###### Clustering K-Means")
|
| 1093 |
-
if len(
|
| 1094 |
else:
|
| 1095 |
col_cl1, col_cl2, col_cl3 = st.columns([2, 1, 1])
|
| 1096 |
with col_cl1:
|
| 1097 |
-
default_cols =
|
| 1098 |
-
features = st.multiselect("Variables Numériques:",
|
| 1099 |
with col_cl2:
|
| 1100 |
k_suggested = 3
|
| 1101 |
if data is not None and not data.empty and features:
|
|
@@ -1143,10 +1145,10 @@ with app_tab:
|
|
| 1143 |
# Détection Anomalies
|
| 1144 |
elif advanced_analysis_type == 'Détection d\'Anomalies (Z-score)':
|
| 1145 |
st.markdown("###### Détection Anomalies (Z-score)")
|
| 1146 |
-
if not
|
| 1147 |
else:
|
| 1148 |
col_anom1, col_anom2, col_anom3 = st.columns([2, 1, 1])
|
| 1149 |
-
with col_anom1: default_cols =
|
| 1150 |
with col_anom2: threshold = st.number_input("Seuil Z-score:", min_value=1.0, max_value=5.0, value=3.0, step=0.1, key=f"anomaly_z_{adv_analysis_key_suffix}", help="|valeur - moy| / std > seuil.")
|
| 1151 |
with col_anom3:
|
| 1152 |
st.write(""); st.write("")
|
|
@@ -1161,7 +1163,7 @@ with app_tab:
|
|
| 1161 |
z = np.abs(stats.zscore(df_raw)); mask = (z > threshold).any(axis=1)
|
| 1162 |
indices = df_raw.index[mask]; n_anom = len(indices)
|
| 1163 |
st.metric(label="Anomalies Détectées", value=n_anom); st.caption(f"Z > {threshold} pour au moins une var.")
|
| 1164 |
-
if n_anom > 0: st.write(f"**{n_anom} ligne(s)
|
| 1165 |
else: st.success("Aucune anomalie détectée.")
|
| 1166 |
if len(valid_f) == 1:
|
| 1167 |
col = valid_f[0]; moy = data[col].mean(); std = data[col].std()
|
|
@@ -1192,8 +1194,8 @@ with manual_tab:
|
|
| 1192 |
|
| 1193 |
---
|
| 1194 |
### 1. Chargement des Données (Barre Latérale ⚙️)
|
| 1195 |
-
- **Choisir une méthode** : Sélectionnez l'une des options proposées (URL, Coller).
|
| 1196 |
-
- **URL** : Collez l'URL
|
| 1197 |
- **Coller depuis presse-papiers**: Copiez des données depuis un tableur (Excel, Sheets), collez-les dans la zone de texte, vérifiez le séparateur (Tabulation par défaut) et cliquez sur "Charger Données Collées".
|
| 1198 |
- **Utiliser l'en-tête** : Cochez/décochez la case **avant** de cliquer sur le bouton de chargement correspondant à votre méthode pour indiquer si la première ligne contient les noms de colonnes.
|
| 1199 |
|
|
@@ -1215,7 +1217,7 @@ with manual_tab:
|
|
| 1215 |
---
|
| 1216 |
### 💡 Conseils & Dépannage
|
| 1217 |
- **Types de Colonnes** : Vérifiez les types détectés dans la section "Afficher détails colonnes". Corrigez vos données sources si nécessaire (ex: formats de date, nombres avec texte). Une mauvaise détection peut limiter les options d'analyse.
|
| 1218 |
-
- **Chargement
|
| 1219 |
- **Erreurs d'analyse ?** Lisez les messages d'erreur. Vérifiez la sélection des colonnes et leur type. Les analyses statistiques nécessitent souvent des données numériques sans valeurs manquantes.
|
| 1220 |
- **Problèmes sur Hugging Face Spaces ?** Vérifiez `requirements.txt`, la présence de `report_template.html` et `sample_excel.xlsx` à la racine, et la configuration de la clé `GOOGLE_API_KEY` dans les Secrets.
|
| 1221 |
|
|
@@ -1225,6 +1227,7 @@ with manual_tab:
|
|
| 1225 |
📞 Contact : +229 96911346
|
| 1226 |
🔗 [Profil LinkedIn](https://www.linkedin.com/in/sidoineko) | 📂 [Portfolio](https://huggingface.co/spaces/Sidoineko/portfolio)
|
| 1227 |
""")
|
|
|
|
| 1228 |
# ==============================================================================
|
| 1229 |
# ONGLET CHAT IA
|
| 1230 |
# ==============================================================================
|
|
@@ -1259,7 +1262,7 @@ with chat_tab:
|
|
| 1259 |
source_info_context = st.session_state.get('data_source_info', 'Inconnue')
|
| 1260 |
context_prompt = f"""
|
| 1261 |
CONTEXTE:
|
| 1262 |
-
|
| 1263 |
- Source des données: "{source_info_context}"
|
| 1264 |
- Colonnes Numériques: {', '.join(num_cols_context) if num_cols_context else 'Aucune'}
|
| 1265 |
- Colonnes Catégorielles: {', '.join(cat_cols_context) if cat_cols_context else 'Aucune'}
|
|
@@ -1267,13 +1270,12 @@ with chat_tab:
|
|
| 1267 |
- Analyses ajoutées: {', '.join(analyses_context) if analyses_context else 'Aucune'}
|
| 1268 |
- Analyses avancées dispo: Test T, ANOVA, Chi-Square, Corrélation, Régression Linéaire, ACP (PCA), Clustering K-Means, Détection Anomalies (Z-score).
|
| 1269 |
|
| 1270 |
-
|
| 1271 |
|
| 1272 |
QUESTION UTILISATEUR: "{user_question}"
|
| 1273 |
|
| 1274 |
-
|
| 1275 |
"""
|
| 1276 |
-
# Génération de la réponse (identique)
|
| 1277 |
try:
|
| 1278 |
with st.spinner("L'IA réfléchit..."):
|
| 1279 |
response = model_chat.generate_content(context_prompt)
|
|
@@ -1290,5 +1292,6 @@ with chat_tab:
|
|
| 1290 |
st.warning(error_msg_ai); st.session_state.gemini_chat_history.append({"role": "assistant", "content": f"({error_msg_ai})"})
|
| 1291 |
except Exception as e:
|
| 1292 |
error_message = f"Erreur API Gemini: {e}"
|
| 1293 |
-
st.error(error_message); st.session_state.gemini_chat_history.append({"role": "assistant", "content": f"(
|
| 1294 |
-
else:
|
|
|
|
|
|
| 66 |
if 'params' not in analyses[analysis_index]:
|
| 67 |
analyses[analysis_index]['params'] = {}
|
| 68 |
analyses[analysis_index]['params'].setdefault(param_key, default_value)
|
|
|
|
|
|
|
| 69 |
|
| 70 |
# --- Fonction de chargement de données générique ---
|
| 71 |
def load_data(source_type, source_value, header_param, sep=None):
|
|
|
|
| 126 |
st.rerun() # Indispensable pour mettre à jour toute l'application
|
| 127 |
else:
|
| 128 |
st.session_state.dataframe_to_export = None
|
|
|
|
| 129 |
st.session_state.data_loaded_id = None
|
| 130 |
+
st.session_state.data_source_info = f"Erreur: {error_message}" if error_message else "Erreur de chargement inconnue"
|
| 131 |
st.sidebar.error(st.session_state.data_source_info)
|
| 132 |
# Pas de rerun en cas d'erreur pour que l'erreur reste visible
|
| 133 |
|
|
|
|
| 177 |
"Choisissez une méthode de chargement :",
|
| 178 |
options=load_options,
|
| 179 |
key="data_load_method_radio",
|
|
|
|
| 180 |
)
|
| 181 |
|
| 182 |
# Options communes
|
|
|
|
| 270 |
num_non_na_original = col_data.dropna().shape[0]
|
| 271 |
if num_non_na_original > 0 and converted_num.notna().sum() / num_non_na_original > 0.7:
|
| 272 |
# Heuristique simple pour éviter les ID (grands entiers)
|
| 273 |
+
# Vérifier si toutes les valeurs numériques sont des entiers ET si le max est grand
|
| 274 |
is_int_like = converted_num.dropna().apply(lambda x: x == int(x)).all()
|
| 275 |
if is_int_like and converted_num.max() > 100000:
|
| 276 |
pass # Probablement un ID, on n'ajoute pas à numérique
|
|
|
|
| 507 |
# ===========================
|
| 508 |
if analysis['type'] == 'aggregated_table':
|
| 509 |
st.markdown("##### Configuration Tableau Agrégé")
|
| 510 |
+
if not categorical_columns: st.warning("Nécessite au moins une colonne Catégorielle.")
|
| 511 |
+
# Numérique n'est pas requis si méthode = count
|
| 512 |
+
# elif not numerical_columns and analysis.get('params',{}).get('agg_method') != 'count':
|
| 513 |
# st.warning("Nécessite au moins une colonne Numérique (sauf pour 'count').")
|
| 514 |
else:
|
| 515 |
init_analysis_state(i, 'group_by_columns', [])
|
| 516 |
+
init_analysis_state(i, 'agg_column', numerical_columns[0] if numerical_columns else None)
|
| 517 |
init_analysis_state(i, 'agg_method', 'count')
|
| 518 |
|
| 519 |
col_agg1, col_agg2, col_agg3 = st.columns(3)
|
| 520 |
with col_agg1:
|
| 521 |
+
default_groupby = [col for col in analysis['params'].get('group_by_columns', []) if col in categorical_columns]
|
| 522 |
st.session_state.analyses[i]['params']['group_by_columns'] = st.multiselect(
|
| 523 |
+
f"Regrouper par :", categorical_columns,
|
| 524 |
default=default_groupby, key=f"agg_table_groupby_{analysis_id}"
|
| 525 |
)
|
| 526 |
with col_agg3: # Méthode avant colonne
|
|
|
|
| 533 |
with col_agg2:
|
| 534 |
agg_method_selected_agg = st.session_state.analyses[i]['params']['agg_method']
|
| 535 |
agg_col_needed_agg = agg_method_selected_agg != 'count'
|
| 536 |
+
agg_col_options_agg = numerical_columns if agg_col_needed_agg else ["(Non requis pour 'count')"]
|
| 537 |
agg_col_index_agg = get_safe_index(agg_col_options_agg, analysis['params'].get('agg_column'))
|
| 538 |
current_agg_col_selection_agg = st.selectbox(
|
| 539 |
f"Calculer sur :", agg_col_options_agg,
|
|
|
|
| 555 |
else:
|
| 556 |
try:
|
| 557 |
valid_groupby = all(c in data.columns for c in group_by_cols)
|
| 558 |
+
valid_aggcol = agg_method == 'count' or (agg_col and agg_col in data.columns and agg_col in numerical_columns)
|
| 559 |
|
| 560 |
if not valid_groupby: st.error("Colonnes de groupement invalides.")
|
| 561 |
elif not valid_aggcol: st.error(f"Colonne d'agrégation '{agg_col}' invalide ou non numérique (requis pour '{agg_method}').")
|
|
|
|
| 587 |
# ===========================
|
| 588 |
elif analysis['type'] == 'graph':
|
| 589 |
st.markdown("##### Configuration Graphique")
|
| 590 |
+
if not all_columns: st.warning("Aucune colonne disponible.")
|
| 591 |
else:
|
| 592 |
# --- Initialisation état graphique ---
|
| 593 |
+
init_analysis_state(i, 'chart_type', 'bar')
|
| 594 |
init_analysis_state(i, 'group_by_columns_graph', [])
|
| 595 |
+
init_analysis_state(i, 'agg_column_graph', numerical_columns[0] if numerical_columns else None)
|
| 596 |
init_analysis_state(i, 'agg_method_graph', 'count')
|
| 597 |
+
init_analysis_state(i, 'x_column', categorical_columns[0] if categorical_columns else (all_columns[0] if all_columns else None))
|
| 598 |
+
init_analysis_state(i, 'y_column', numerical_columns[0] if numerical_columns else None)
|
| 599 |
init_analysis_state(i, 'color_column', None)
|
| 600 |
init_analysis_state(i, 'size_column', None)
|
| 601 |
init_analysis_state(i, 'facet_column', None)
|
|
|
|
| 606 |
init_analysis_state(i, 'z_column', None)
|
| 607 |
|
| 608 |
# --- Type Graphique ---
|
| 609 |
+
chart_type_options = (
|
| 610 |
+
'bar', 'line', 'scatter', 'histogram', 'box', 'violin',
|
| 611 |
+
'heatmap', 'density_contour', 'area', 'funnel', 'timeline',
|
| 612 |
+
'sunburst', 'treemap', 'scatter_3d', 'scatter_matrix'
|
| 613 |
+
)
|
| 614 |
chart_type_index = get_safe_index(chart_type_options, st.session_state.analyses[i]['params'].get('chart_type'))
|
| 615 |
st.session_state.analyses[i]['params']['chart_type'] = st.selectbox(f"Type graphique:", chart_type_options, index=chart_type_index, key=f"graph_type_{analysis_id}")
|
| 616 |
graph_analysis_type = st.session_state.analyses[i]['params']['chart_type']
|
|
|
|
| 631 |
elif current_agg_method_graph != 'count' and not current_agg_col_graph: agg_warning = f"Sélectionnez 'Calculer' pour '{current_agg_method_graph}'."
|
| 632 |
elif not all(c in data.columns for c in current_group_by_graph): agg_warning = "Colonnes 'Agréger par' invalides."
|
| 633 |
elif current_agg_method_graph != 'count' and current_agg_col_graph not in data.columns: agg_warning = f"Colonne 'Calculer' ('{current_agg_col_graph}') invalide."
|
| 634 |
+
elif current_agg_method_graph != 'count' and current_agg_col_graph not in numerical_columns: agg_warning = f"'{current_agg_col_graph}' doit être numérique pour '{current_agg_method_graph}'."
|
| 635 |
else:
|
| 636 |
try:
|
| 637 |
if current_agg_method_graph == 'count':
|
|
|
|
| 666 |
st.session_state.analyses[i]['params']['x_column'] = selected_x
|
| 667 |
# --- Axe Y ---
|
| 668 |
with col2_axes:
|
| 669 |
+
y_disabled = graph_analysis_type in ['histogram', 'pair_plot', 'sunburst', 'treemap']
|
| 670 |
y_label = "Axe Y"
|
| 671 |
options_y = [c for c in chart_columns if c != selected_x]
|
| 672 |
+
if graph_analysis_type == 'timeline':
|
| 673 |
+
options_y = [c for c in chart_columns if c in categorical_columns and c != selected_x]; y_label = "Tâche/Groupe (Y)"
|
| 674 |
default_y = analysis['params'].get('y_column')
|
| 675 |
if y_disabled: default_y = None
|
| 676 |
elif default_y not in options_y:
|
| 677 |
if is_aggregated and agg_col_name_new and agg_col_name_new in options_y: default_y = agg_col_name_new
|
| 678 |
else:
|
| 679 |
+
num_y_opts = [c for c in options_y if c in numerical_columns]
|
| 680 |
default_y = num_y_opts[0] if num_y_opts else (options_y[0] if options_y else None)
|
| 681 |
y_col_index = get_safe_index(options_y, default_y)
|
| 682 |
selected_y = st.selectbox(y_label, options_y, index=y_col_index, key=f"graph_y_{analysis_id}", disabled=y_disabled or not options_y, help="Requis pour la plupart des graphiques.")
|
| 683 |
st.session_state.analyses[i]['params']['y_column'] = selected_y if not y_disabled else None
|
| 684 |
# --- Couleur & Taille ---
|
| 685 |
with col3_axes:
|
| 686 |
+
map_options_num_orig = [None] + [c for c in original_columns if c in numerical_columns]
|
| 687 |
map_options_all_orig = [None] + original_columns
|
|
|
|
| 688 |
selected_color = st.selectbox(f"Couleur (Opt.):", map_options_all_orig, index=get_safe_index(map_options_all_orig, analysis['params'].get('color_column')), key=f"graph_color_{analysis_id}", format_func=lambda x: x if x is not None else "Aucune")
|
| 689 |
st.session_state.analyses[i]['params']['color_column'] = selected_color
|
| 690 |
+
size_disabled = graph_analysis_type not in ['scatter', 'scatter_3d']
|
| 691 |
selected_size = st.selectbox(f"Taille (Opt., Num.):", map_options_num_orig, index=get_safe_index(map_options_num_orig, analysis['params'].get('size_column')), key=f"graph_size_{analysis_id}", disabled=size_disabled, format_func=lambda x: x if x is not None else "Aucune")
|
| 692 |
st.session_state.analyses[i]['params']['size_column'] = selected_size
|
| 693 |
|
| 694 |
# --- Facet, Hover & Autres ---
|
| 695 |
col1_extra, col2_extra = st.columns(2)
|
| 696 |
with col1_extra:
|
| 697 |
+
map_options_cat_orig = [None] + [c for c in original_columns if c in categorical_columns]
|
| 698 |
+
facet_disabled = graph_analysis_type in ['heatmap', 'density_contour', 'pair_plot', 'sunburst', 'treemap']
|
| 699 |
selected_facet = st.selectbox(f"Diviser par (Facet, Opt.):", map_options_cat_orig, index=get_safe_index(map_options_cat_orig, analysis['params'].get('facet_column')), key=f"graph_facet_{analysis_id}", disabled=facet_disabled, format_func=lambda x: x if x is not None else "Aucune")
|
| 700 |
st.session_state.analyses[i]['params']['facet_column'] = selected_facet
|
| 701 |
+
if graph_analysis_type == 'scatter_3d':
|
| 702 |
+
options_z = [c for c in chart_columns if c in numerical_columns and c not in [selected_x, selected_y]]
|
| 703 |
selected_z = st.selectbox("Axe Z (Num.):", options_z, index=get_safe_index(options_z, analysis['params'].get('z_column')), key=f"graph_z_{analysis_id}")
|
| 704 |
st.session_state.analyses[i]['params']['z_column'] = selected_z
|
| 705 |
with col2_extra:
|
| 706 |
selected_hover = st.multiselect("Infos survol (Hover):", original_columns, default=analysis['params'].get('hover_data_cols', []), key=f"graph_hover_{analysis_id}")
|
| 707 |
st.session_state.analyses[i]['params']['hover_data_cols'] = selected_hover
|
| 708 |
+
if graph_analysis_type == 'timeline':
|
| 709 |
+
options_end = [c for c in chart_columns if c in datetime_columns and c != selected_x]
|
| 710 |
+
selected_end = st.selectbox("Date Fin (Timeline):", options_end, index=get_safe_index(options_end, analysis['params'].get('gantt_end_column')), key=f"graph_gantt_end_{analysis_id}")
|
| 711 |
st.session_state.analyses[i]['params']['gantt_end_column'] = selected_end
|
| 712 |
|
| 713 |
# --- Params spécifiques Sunburst/Treemap ---
|
| 714 |
+
if graph_analysis_type in ['sunburst', 'treemap']:
|
| 715 |
col1_hier, col2_hier = st.columns(2)
|
| 716 |
with col1_hier:
|
| 717 |
+
options_path = [c for c in chart_columns if c in categorical_columns]
|
| 718 |
selected_path = st.multiselect("Chemin Hiérarchique:", options_path, default=analysis['params'].get('path_columns', []), key=f"graph_path_{analysis_id}")
|
| 719 |
st.session_state.analyses[i]['params']['path_columns'] = selected_path
|
| 720 |
with col2_hier:
|
| 721 |
+
options_values = [c for c in chart_columns if c in numerical_columns]
|
| 722 |
selected_value = st.selectbox("Valeurs (Taille):", options_values, index=get_safe_index(options_values, analysis['params'].get('value_column')), key=f"graph_value_{analysis_id}")
|
| 723 |
st.session_state.analyses[i]['params']['value_column'] = selected_value
|
| 724 |
|
| 725 |
# --- Options d'agrégation ---
|
| 726 |
with st.expander("Options d'agrégation (avant graphique)", expanded=aggregation_enabled_graph):
|
| 727 |
+
if not categorical_columns: st.caption("Nécessite cols Catégorielles.")
|
| 728 |
else:
|
| 729 |
col_agg_graph1, col_agg_graph2, col_agg_graph3 = st.columns(3)
|
| 730 |
with col_agg_graph1:
|
| 731 |
+
valid_gb = [c for c in analysis['params'].get('group_by_columns_graph',[]) if c in categorical_columns]
|
| 732 |
st.session_state.analyses[i]['params']['group_by_columns_graph'] = st.multiselect(
|
| 733 |
+
f"Agréger par :", categorical_columns, default=valid_gb, key=f"graph_groupby_{analysis_id}"
|
| 734 |
)
|
| 735 |
group_by_sel = st.session_state.analyses[i]['params']['group_by_columns_graph']
|
| 736 |
with col_agg_graph3: # Méthode
|
| 737 |
+
agg_method_options = ('count', 'mean', 'sum', 'median', 'min', 'max', 'std', 'nunique')
|
| 738 |
st.session_state.analyses[i]['params']['agg_method_graph'] = st.selectbox(
|
| 739 |
+
f"Avec fonction :", agg_method_options, index=get_safe_index(agg_method_options, analysis['params'].get('agg_method_graph','count')), key=f"graph_agg_method_{analysis_id}", disabled=not group_by_sel
|
| 740 |
)
|
| 741 |
with col_agg_graph2: # Colonne
|
| 742 |
+
agg_method_sel = st.session_state.analyses[i]['params']['agg_method_graph']
|
| 743 |
+
agg_col_need = agg_method_sel != 'count'
|
| 744 |
+
agg_col_opts = numerical_columns if agg_col_need else ["(Non requis pour 'count')"]
|
| 745 |
agg_col_sel = st.selectbox(
|
| 746 |
+
f"Calculer :", agg_col_opts,
|
| 747 |
+
index=get_safe_index(agg_col_opts, analysis['params'].get('agg_column_graph')), key=f"graph_agg_col_{analysis_id}", disabled=not group_by_sel or not agg_col_need
|
| 748 |
)
|
| 749 |
st.session_state.analyses[i]['params']['agg_column_graph'] = agg_col_sel if agg_col_need else None
|
| 750 |
+
if aggregation_enabled_graph and agg_warning: st.warning(f"Avert. Agrégation: {agg_warning}", icon="⚠️")
|
| 751 |
elif is_aggregated: st.caption(f"Utilisation données agrégées ({plot_data_source_df.shape[0]} l.).")
|
| 752 |
else: st.caption("Utilisation données originales.")
|
| 753 |
|
| 754 |
# --- Bouton Exécuter ---
|
| 755 |
if st.button(f"Exécuter Graphique {i+1}", key=f"run_graph_{analysis_id}"):
|
| 756 |
with st.spinner(f"Génération '{graph_analysis_type}'..."):
|
| 757 |
+
current_params = st.session_state.analyses[i]['params'].copy()
|
| 758 |
# --- Récupérer tous les params finaux ---
|
| 759 |
final_x = current_params.get('x_column'); final_y = current_params.get('y_column')
|
| 760 |
final_color = current_params.get('color_column'); final_size = current_params.get('size_column')
|
|
|
|
| 765 |
# --- Validation ---
|
| 766 |
error_msg = None
|
| 767 |
if not final_x: error_msg = "Axe X requis."
|
| 768 |
+
elif graph_analysis_type not in ['histogram', 'pair_plot', 'sunburst', 'treemap'] and not final_y: error_msg = f"Axe Y requis pour '{graph_analysis_type}'."
|
| 769 |
elif final_x and final_x not in plot_data_source_df.columns: error_msg = f"Colonne X '{final_x}' non trouvée."
|
| 770 |
elif final_y and final_y not in plot_data_source_df.columns: error_msg = f"Colonne Y '{final_y}' non trouvée."
|
| 771 |
+
elif graph_analysis_type == 'timeline' and not final_gantt_end: error_msg = "'Date Fin' requis pour Timeline."
|
| 772 |
+
elif graph_analysis_type == 'scatter_3d' and not final_z: error_msg = "'Axe Z' requis pour 3D Scatter."
|
| 773 |
+
elif graph_analysis_type in ['sunburst', 'treemap'] and (not final_path or not final_value): error_msg = "'Chemin Hiérarchique' et 'Valeurs' requis."
|
| 774 |
|
| 775 |
# --- Préparer Args Plotly ---
|
| 776 |
px_args = {}
|
|
|
|
| 788 |
if final_color: px_args['color'] = final_color
|
| 789 |
if final_facet: px_args['facet_col'] = final_facet
|
| 790 |
if final_hover_valid: px_args['hover_data'] = final_hover_valid
|
| 791 |
+
if final_size and graph_analysis_type in ['scatter', 'scatter_3d']: px_args['size'] = final_size
|
| 792 |
+
if final_z and graph_analysis_type == 'scatter_3d': px_args['z'] = final_z
|
| 793 |
+
if final_path and graph_analysis_type in ['sunburst', 'treemap']: px_args['path'] = final_path
|
| 794 |
+
if final_value and graph_analysis_type in ['sunburst', 'treemap']: px_args['values'] = final_value
|
| 795 |
+
if final_gantt_end and graph_analysis_type == 'timeline': px_args['x_end'] = final_gantt_end; px_args['x_start'] = final_x
|
| 796 |
|
| 797 |
title_parts = [graph_analysis_type] # Construire titre
|
| 798 |
+
if final_y and graph_analysis_type != 'histogram': title_parts.append(f"{final_y} vs")
|
| 799 |
if final_x: title_parts.append(final_x)
|
| 800 |
if final_color: title_parts.append(f"par {final_color}")
|
| 801 |
if is_aggregated: title_parts.append("(Agrégé)")
|
|
|
|
| 806 |
fig = None
|
| 807 |
plot_func = getattr(px, graph_analysis_type.lower().replace(' ', '').replace('(','').replace(')',''), None)
|
| 808 |
|
| 809 |
+
if graph_analysis_type == 'pair_plot':
|
| 810 |
+
splom_dims = [c for c in data.columns if c in numerical_columns]
|
| 811 |
if len(splom_dims)>=2:
|
| 812 |
splom_args={'data_frame':data, 'dimensions':splom_dims}
|
| 813 |
+
color_splom = final_color if (final_color and final_color in data.columns and final_color in categorical_columns) else None
|
| 814 |
if color_splom: splom_args['color'] = color_splom
|
| 815 |
splom_args['title'] = f'Pair Plot' + (f' par {color_splom}' if color_splom else '')
|
| 816 |
fig=px.scatter_matrix(**splom_args)
|
| 817 |
else: st.warning("Pair Plot requiert >= 2 cols numériques.")
|
| 818 |
+
elif graph_analysis_type == 'histogram':
|
| 819 |
hist_args = {k: v for k, v in px_args.items() if k != 'y'} # Retirer Y pour histogramme
|
| 820 |
fig = px.histogram(**hist_args)
|
| 821 |
+
elif graph_analysis_type == 'timeline':
|
| 822 |
gantt_args = px_args.copy()
|
| 823 |
if 'y' in gantt_args: gantt_args['task'] = gantt_args.pop('y') # Renommer y en task pour timeline
|
| 824 |
fig = px.timeline(**gantt_args)
|
|
|
|
| 836 |
except Exception as e:
|
| 837 |
st.error(f"Erreur génération graphique {i+1}: {e}")
|
| 838 |
st.session_state.analyses[i]['result'] = None
|
| 839 |
+
st.session_state.analyses[i]['executed_params'] = current_params # Sauver params même si échoué
|
| 840 |
|
| 841 |
# ===========================
|
| 842 |
# Bloc Stats Descriptives
|
| 843 |
# ===========================
|
| 844 |
elif analysis['type'] == 'descriptive_stats':
|
| 845 |
st.markdown("##### Configuration Stats Descriptives")
|
| 846 |
+
desc_col_options = all_columns
|
| 847 |
if not desc_col_options: st.warning("Aucune colonne disponible.")
|
| 848 |
else:
|
| 849 |
init_analysis_state(i, 'selected_columns_desc', [])
|
| 850 |
default_desc = analysis['params'].get('selected_columns_desc', [])
|
| 851 |
valid_default = [c for c in default_desc if c in desc_col_options] or \
|
| 852 |
+
[c for c in desc_col_options if c in numerical_columns or c in datetime_columns] or \
|
| 853 |
desc_col_options # Fallback
|
| 854 |
|
| 855 |
st.session_state.analyses[i]['params']['selected_columns_desc'] = st.multiselect(
|
|
|
|
| 916 |
if show_advanced:
|
| 917 |
if not data_available: st.warning("Chargez des données pour utiliser les analyses avancées.")
|
| 918 |
# Vérifier si les listes de colonnes nécessaires existent et ne sont pas vides
|
| 919 |
+
elif not columns_defined or not (numerical_columns or categorical_columns):
|
| 920 |
st.warning("Nécessite des colonnes Numériques ou Catégorielles détectées pour les analyses avancées.")
|
| 921 |
else:
|
| 922 |
adv_analysis_key_suffix = st.session_state.data_loaded_id or "adv_data_loaded"
|
|
|
|
| 929 |
# Test T
|
| 930 |
if advanced_analysis_type == 'Test T':
|
| 931 |
st.markdown("###### Test T (Comparaison de 2 moyennes)");
|
| 932 |
+
cols_valid_t = [c for c in categorical_columns if data[c].nunique() == 2]
|
| 933 |
+
if not numerical_columns: st.warning("Nécessite Var Numérique.")
|
| 934 |
elif not cols_valid_t: st.warning("Nécessite Var Catégorielle à 2 groupes.")
|
| 935 |
else:
|
| 936 |
col_t1, col_t2, col_t3 = st.columns([2, 2, 1])
|
| 937 |
with col_t1: group_col_t = st.selectbox("Var Catégorielle (2 groupes):", cols_valid_t, key=f"t_group_{adv_analysis_key_suffix}")
|
| 938 |
+
with col_t2: numeric_var_t = st.selectbox("Var Numérique:", numerical_columns, key=f"t_numeric_{adv_analysis_key_suffix}")
|
| 939 |
with col_t3:
|
| 940 |
st.write(""); st.write("")
|
| 941 |
if st.button("Effectuer Test T", key=f"run_t_{adv_analysis_key_suffix}", use_container_width=True):
|
|
|
|
| 958 |
# ANOVA
|
| 959 |
elif advanced_analysis_type == 'ANOVA':
|
| 960 |
st.markdown("###### ANOVA (Comparaison >2 moyennes)")
|
| 961 |
+
cols_valid_a = [c for c in categorical_columns if data[c].nunique() > 2 and data[c].nunique() < 50]
|
| 962 |
+
if not numerical_columns: st.warning("Nécessite Var Numérique.")
|
| 963 |
elif not cols_valid_a: st.warning("Nécessite Var Catégorielle (>2 et <50 groupes).")
|
| 964 |
else:
|
| 965 |
col_a1, col_a2, col_a3 = st.columns([2, 2, 1])
|
| 966 |
with col_a1: group_col_a = st.selectbox("Var Catégorielle (>2 groupes):", cols_valid_a, key=f"a_group_{adv_analysis_key_suffix}")
|
| 967 |
+
with col_a2: anova_numeric_var = st.selectbox("Var Numérique:", numerical_columns, key=f"a_numeric_{adv_analysis_key_suffix}")
|
| 968 |
with col_a3:
|
| 969 |
st.write(""); st.write("")
|
| 970 |
if st.button("Effectuer ANOVA", key=f"run_a_{adv_analysis_key_suffix}", use_container_width=True):
|
|
|
|
| 987 |
# Chi-Square
|
| 988 |
elif advanced_analysis_type == 'Chi-Square Test':
|
| 989 |
st.markdown("###### Test Chi-carré (Indépendance Vars Catégorielles)")
|
| 990 |
+
if len(categorical_columns) < 2: st.warning("Nécessite >= 2 Vars Catégorielles.")
|
| 991 |
else:
|
| 992 |
col_c1, col_c2, col_c3 = st.columns([2, 2, 1])
|
| 993 |
+
with col_c1: chi2_var1 = st.selectbox("Variable Catégorielle 1:", categorical_columns, key=f"c1_var_{adv_analysis_key_suffix}", index=0)
|
| 994 |
+
options_var2 = [c for c in categorical_columns if c != chi2_var1]
|
| 995 |
with col_c2: chi2_var2 = st.selectbox("Variable Catégorielle 2:", options_var2, key=f"c2_var_{adv_analysis_key_suffix}", index=0 if options_var2 else None, disabled=not options_var2)
|
| 996 |
with col_c3:
|
| 997 |
st.write(""); st.write("")
|
|
|
|
| 1007 |
if p < alpha: st.success(msg)
|
| 1008 |
else: st.info(msg)
|
| 1009 |
with st.expander("Tableau Contingence"): st.dataframe(ct)
|
| 1010 |
+
if np.any(expected < 5): st.warning("Fréq. attendues < 5, test moins fiable.", icon="⚠️")
|
| 1011 |
+
with st.expander("Fréq. Attendues"): st.dataframe(pd.DataFrame(expected, index=ct.index, columns=ct.columns).style.format("{:.2f}"))
|
| 1012 |
except Exception as e: st.error(f"Erreur Test Chi²: {e}")
|
| 1013 |
else: st.warning("Sélectionnez 2 variables.")
|
| 1014 |
# Corrélation
|
| 1015 |
elif advanced_analysis_type == 'Corrélation':
|
| 1016 |
st.markdown("###### Matrice de Corrélation (Vars Numériques)")
|
| 1017 |
+
if len(numerical_columns) < 2: st.warning("Nécessite >= 2 Vars Numériques.")
|
| 1018 |
else:
|
| 1019 |
+
default_cols = numerical_columns[:min(len(numerical_columns), 5)]
|
| 1020 |
+
features = st.multiselect("Sélectionnez 2+ vars numériques:", numerical_columns, default=default_cols, key=f"corr_vars_{adv_analysis_key_suffix}")
|
| 1021 |
if st.button("Calculer Matrice Corrélation", key=f"run_corr_{adv_analysis_key_suffix}", use_container_width=True):
|
| 1022 |
if len(features) >= 2:
|
| 1023 |
try:
|
|
|
|
| 1030 |
except Exception as e: st.error(f"Erreur Corrélation: {e}")
|
| 1031 |
else: st.warning("Sélectionnez >= 2 variables.")
|
| 1032 |
# Régression Linéaire
|
| 1033 |
+
elif advanced_analysis_type == 'Régression Linéaire Simple (Y ~ X)':
|
| 1034 |
st.markdown("###### Régression Linéaire Simple (Y ~ X)")
|
| 1035 |
+
if len(numerical_columns) < 2: st.warning("Nécessite >= 2 Vars Numériques.")
|
| 1036 |
else:
|
| 1037 |
col_r1, col_r2, col_r3 = st.columns([2, 2, 1])
|
| 1038 |
+
with col_r1: target = st.selectbox("Variable Cible (Y):", numerical_columns, key=f"reg_target_{adv_analysis_key_suffix}", index=0)
|
| 1039 |
+
options_feat = [f for f in numerical_columns if f != target]
|
| 1040 |
with col_r2: feature = st.selectbox("Variable Explicative (X):", options_feat, key=f"reg_feature_{adv_analysis_key_suffix}", index=0 if options_feat else None, disabled=not options_feat)
|
| 1041 |
with col_r3:
|
| 1042 |
st.write(""); st.write("")
|
|
|
|
| 1059 |
# ACP (PCA)
|
| 1060 |
elif advanced_analysis_type == 'ACP (PCA)':
|
| 1061 |
st.markdown("###### ACP (Analyse en Composantes Principales)")
|
| 1062 |
+
if len(numerical_columns) < 2: st.warning("Nécessite >= 2 Vars Numériques.")
|
| 1063 |
else:
|
| 1064 |
+
default_cols = numerical_columns[:min(len(numerical_columns), 5)]
|
| 1065 |
+
features = st.multiselect("Sélectionnez 2+ vars numériques:", numerical_columns, default=default_cols, key=f"pca_vars_{adv_analysis_key_suffix}")
|
| 1066 |
if st.button("Effectuer ACP", key=f"run_pca_{adv_analysis_key_suffix}", use_container_width=True):
|
| 1067 |
if len(features) >= 2:
|
| 1068 |
try:
|
|
|
|
| 1092 |
# K-Means
|
| 1093 |
elif advanced_analysis_type == 'Clustering K-Means':
|
| 1094 |
st.markdown("###### Clustering K-Means")
|
| 1095 |
+
if len(numerical_columns) < 1: st.warning("Nécessite >= 1 Var Numérique.")
|
| 1096 |
else:
|
| 1097 |
col_cl1, col_cl2, col_cl3 = st.columns([2, 1, 1])
|
| 1098 |
with col_cl1:
|
| 1099 |
+
default_cols = numerical_columns[:min(len(numerical_columns), 2)]
|
| 1100 |
+
features = st.multiselect("Variables Numériques:", numerical_columns, default=default_cols, key=f"clust_vars_{adv_analysis_key_suffix}")
|
| 1101 |
with col_cl2:
|
| 1102 |
k_suggested = 3
|
| 1103 |
if data is not None and not data.empty and features:
|
|
|
|
| 1145 |
# Détection Anomalies
|
| 1146 |
elif advanced_analysis_type == 'Détection d\'Anomalies (Z-score)':
|
| 1147 |
st.markdown("###### Détection Anomalies (Z-score)")
|
| 1148 |
+
if not numerical_columns: st.warning("Nécessite >= 1 Var Numérique.")
|
| 1149 |
else:
|
| 1150 |
col_anom1, col_anom2, col_anom3 = st.columns([2, 1, 1])
|
| 1151 |
+
with col_anom1: default_cols = numerical_columns[:1]; features = st.multiselect("Sélectionnez 1+ vars numériques:", numerical_columns, default=default_cols, key=f"anomaly_vars_{adv_analysis_key_suffix}")
|
| 1152 |
with col_anom2: threshold = st.number_input("Seuil Z-score:", min_value=1.0, max_value=5.0, value=3.0, step=0.1, key=f"anomaly_z_{adv_analysis_key_suffix}", help="|valeur - moy| / std > seuil.")
|
| 1153 |
with col_anom3:
|
| 1154 |
st.write(""); st.write("")
|
|
|
|
| 1163 |
z = np.abs(stats.zscore(df_raw)); mask = (z > threshold).any(axis=1)
|
| 1164 |
indices = df_raw.index[mask]; n_anom = len(indices)
|
| 1165 |
st.metric(label="Anomalies Détectées", value=n_anom); st.caption(f"Z > {threshold} pour au moins une var.")
|
| 1166 |
+
if n_anom > 0: st.write(f"**{n_anom} ligne(s) anormale(s):**"); st.dataframe(data.loc[indices])
|
| 1167 |
else: st.success("Aucune anomalie détectée.")
|
| 1168 |
if len(valid_f) == 1:
|
| 1169 |
col = valid_f[0]; moy = data[col].mean(); std = data[col].std()
|
|
|
|
| 1194 |
|
| 1195 |
---
|
| 1196 |
### 1. Chargement des Données (Barre Latérale ⚙️)
|
| 1197 |
+
- **Choisir une méthode** : Sélectionnez l'une des options proposées (URL, Coller depuis presse-papiers).
|
| 1198 |
+
- **URL** : Collez l'URL direct d'un fichier CSV ou Excel public et cliquez sur "Charger depuis URL".
|
| 1199 |
- **Coller depuis presse-papiers**: Copiez des données depuis un tableur (Excel, Sheets), collez-les dans la zone de texte, vérifiez le séparateur (Tabulation par défaut) et cliquez sur "Charger Données Collées".
|
| 1200 |
- **Utiliser l'en-tête** : Cochez/décochez la case **avant** de cliquer sur le bouton de chargement correspondant à votre méthode pour indiquer si la première ligne contient les noms de colonnes.
|
| 1201 |
|
|
|
|
| 1217 |
---
|
| 1218 |
### 💡 Conseils & Dépannage
|
| 1219 |
- **Types de Colonnes** : Vérifiez les types détectés dans la section "Afficher détails colonnes". Corrigez vos données sources si nécessaire (ex: formats de date, nombres avec texte). Une mauvaise détection peut limiter les options d'analyse.
|
| 1220 |
+
- **Chargement échoué ?** Vérifiez le format du fichier/URL/données collées, le séparateur choisi (pour coller), la connexion internet (pour URL) et les dépendances (`openpyxl` pour Excel). Assurez-vous que le fichier `sample_excel.xlsx` est bien présent à la racine si vous utilisez l'exemple. Vérifiez le message d'erreur dans la sidebar.
|
| 1221 |
- **Erreurs d'analyse ?** Lisez les messages d'erreur. Vérifiez la sélection des colonnes et leur type. Les analyses statistiques nécessitent souvent des données numériques sans valeurs manquantes.
|
| 1222 |
- **Problèmes sur Hugging Face Spaces ?** Vérifiez `requirements.txt`, la présence de `report_template.html` et `sample_excel.xlsx` à la racine, et la configuration de la clé `GOOGLE_API_KEY` dans les Secrets.
|
| 1223 |
|
|
|
|
| 1227 |
📞 Contact : +229 96911346
|
| 1228 |
🔗 [Profil LinkedIn](https://www.linkedin.com/in/sidoineko) | 📂 [Portfolio](https://huggingface.co/spaces/Sidoineko/portfolio)
|
| 1229 |
""")
|
| 1230 |
+
|
| 1231 |
# ==============================================================================
|
| 1232 |
# ONGLET CHAT IA
|
| 1233 |
# ==============================================================================
|
|
|
|
| 1262 |
source_info_context = st.session_state.get('data_source_info', 'Inconnue')
|
| 1263 |
context_prompt = f"""
|
| 1264 |
CONTEXTE:
|
| 1265 |
+
Vous êtes un assistant IA dans une application Streamlit d'analyse de données.
|
| 1266 |
- Source des données: "{source_info_context}"
|
| 1267 |
- Colonnes Numériques: {', '.join(num_cols_context) if num_cols_context else 'Aucune'}
|
| 1268 |
- Colonnes Catégorielles: {', '.join(cat_cols_context) if cat_cols_context else 'Aucune'}
|
|
|
|
| 1270 |
- Analyses ajoutées: {', '.join(analyses_context) if analyses_context else 'Aucune'}
|
| 1271 |
- Analyses avancées dispo: Test T, ANOVA, Chi-Square, Corrélation, Régression Linéaire, ACP (PCA), Clustering K-Means, Détection Anomalies (Z-score).
|
| 1272 |
|
| 1273 |
+
TÂCHE: Réponds à la question de l'utilisateur de manière concise et utile, en te basant sur le contexte fourni (types de colonnes, analyses possibles). Ne prétends pas connaître les valeurs des données.
|
| 1274 |
|
| 1275 |
QUESTION UTILISATEUR: "{user_question}"
|
| 1276 |
|
| 1277 |
+
RÉPONSE:
|
| 1278 |
"""
|
|
|
|
| 1279 |
try:
|
| 1280 |
with st.spinner("L'IA réfléchit..."):
|
| 1281 |
response = model_chat.generate_content(context_prompt)
|
|
|
|
| 1292 |
st.warning(error_msg_ai); st.session_state.gemini_chat_history.append({"role": "assistant", "content": f"({error_msg_ai})"})
|
| 1293 |
except Exception as e:
|
| 1294 |
error_message = f"Erreur API Gemini: {e}"
|
| 1295 |
+
st.error(error_message); st.session_state.gemini_chat_history.append({"role": "assistant", "content": f"({error_message})"})
|
| 1296 |
+
else:
|
| 1297 |
+
st.error("Modèle Chat IA indisponible.")
|