klydekushy commited on
Commit
27d9297
·
verified ·
1 Parent(s): f986738

Update src/modules/ml_dashboard.py

Browse files
Files changed (1) hide show
  1. src/modules/ml_dashboard.py +3 -268
src/modules/ml_dashboard.py CHANGED
@@ -613,272 +613,7 @@ def show_ml_features(client, sheet_name):
613
  time.sleep(30)
614
  st.rerun()
615
 
616
- # Chargement des données pour le feature store
617
- try:
618
- # Réutilisation de la fonction helper si 'client' est global, sinon adaptation nécessaire
619
- df_clients = get_data_from_sheet("Clients_KYC")
620
- df_prets = get_data_from_sheet("Prets_Master")
621
- except Exception as e:
622
- st.error(f"Erreur de lecture : {e}")
623
- st.stop()
624
-
625
- if df_clients.empty or df_prets.empty:
626
- st.warning("Données insuffisantes")
627
- st.stop()
628
-
629
- # Fusion des données
630
- df_full = pd.merge(df_prets, df_clients, on="ID_Client", how="left", suffixes=('_Pret', '_Client'))
631
-
632
- # Thème Plotly surveillance discret
633
- plotly_template = {
634
- 'layout': {
635
- 'plot_bgcolor': 'rgba(13, 17, 23, 0.8)',
636
- 'paper_bgcolor': 'rgba(22, 27, 34, 0.3)',
637
- 'font': {'color': '#8b949e', 'family': 'Space Grotesk', 'size': 12},
638
- 'title': {
639
- 'font': {'size': 14, 'color': '#c9d1d9', 'family': 'Space Grotesk'},
640
- 'x': 0,
641
- 'xanchor': 'left'
642
- },
643
- 'xaxis': {
644
- 'gridcolor': 'rgba(48, 54, 61, 0.3)',
645
- 'linecolor': 'rgba(48, 54, 61, 0.5)',
646
- 'zerolinecolor': 'rgba(48, 54, 61, 0.5)',
647
- 'tickfont': {'family': 'Space Grotesk', 'size': 11}
648
- },
649
- 'yaxis': {
650
- 'gridcolor': 'rgba(48, 54, 61, 0.3)',
651
- 'linecolor': 'rgba(48, 54, 61, 0.5)',
652
- 'zerolinecolor': 'rgba(48, 54, 61, 0.5)',
653
- 'tickfont': {'family': 'Space Grotesk', 'size': 11}
654
- },
655
- 'legend': {
656
- 'font': {'family': 'Space Grotesk', 'size': 11}
657
- }
658
- }
659
- }
660
-
661
- # FEATURE ENGINEERING
662
- st.markdown("<h2>Feature Engineering Pipeline</h2>", unsafe_allow_html=True)
663
-
664
- # 1. FEATURES DÉMOGRAPHIQUES
665
- with st.expander("Features Démographiques", expanded=True):
666
- df_full['Date_Naissance'] = pd.to_datetime(df_full['Date_Naissance'], errors='coerce')
667
- df_full['Age'] = ((datetime.now() - df_full['Date_Naissance']).dt.days / 365.25).round(0)
668
- df_full['Pers_Charge'] = pd.to_numeric(df_full['Pers_Charge'], errors='coerce').fillna(0)
669
- df_full['Categorie_Age'] = pd.cut(df_full['Age'],
670
- bins=[0, 25, 35, 45, 55, 100],
671
- labels=['18-25', '26-35', '36-45', '46-55', '55+'])
672
-
673
- col1, col2, col3, col4 = st.columns(4)
674
- col1.metric("Âge moyen", f"{df_full['Age'].mean():.1f} ans",
675
- delta=f"σ {df_full['Age'].std():.1f}")
676
- col2.metric("Pers. charge", f"{df_full['Pers_Charge'].mean():.1f}",
677
- delta=f"Max {df_full['Pers_Charge'].max():.0f}")
678
- col3.metric("Mariés", f"{(df_full['Etat_Civil']=='Marié(e)').sum()}")
679
- col4.metric("Célibataires", f"{(df_full['Etat_Civil']=='Célibataire').sum()}")
680
-
681
- fig_age = px.histogram(df_full, x='Age', nbins=20,
682
- title="Distribution des âges",
683
- color_discrete_sequence=['#58a6ff'],
684
- template=plotly_template)
685
- fig_age.update_traces(marker_line_color='rgba(139, 148, 158, 0.3)', marker_line_width=0.5, opacity=0.8)
686
- st.plotly_chart(fig_age, use_container_width=True)
687
-
688
- # 2. FEATURES FINANCIÈRES
689
- with st.expander("Features Financières", expanded=True):
690
- df_full['Revenus_Mensuels'] = pd.to_numeric(df_full['Revenus_Mensuels'], errors='coerce').fillna(0)
691
- df_full['Autres_Revenus'] = pd.to_numeric(df_full['Autres_Revenus'], errors='coerce').fillna(0)
692
- df_full['Charges_Estimees'] = pd.to_numeric(df_full['Charges_Estimees'], errors='coerce').fillna(0)
693
- df_full['Patrimoine_Declare'] = pd.to_numeric(df_full['Patrimoine_Declare'], errors='coerce').fillna(0)
694
- df_full['Montant_Capital'] = pd.to_numeric(df_full['Montant_Capital'], errors='coerce').fillna(0)
695
- df_full['Montant_Versement'] = pd.to_numeric(df_full['Montant_Versement'], errors='coerce').fillna(0)
696
-
697
- df_full['Revenus_Totaux'] = df_full['Revenus_Mensuels'] + df_full['Autres_Revenus']
698
- df_full['Revenu_Net'] = df_full['Revenus_Totaux'] - df_full['Charges_Estimees']
699
- df_full['Ratio_Charges_Revenus'] = (df_full['Charges_Estimees'] / df_full['Revenus_Totaux']).replace([np.inf, -np.inf], 0).fillna(0)
700
- df_full['Taux_Effort_Mensuel'] = ((df_full['Montant_Versement'] * 4.33) / df_full['Revenus_Totaux']).replace([np.inf, -np.inf], 0).fillna(0) * 100
701
- df_full['Capacite_Endettement'] = df_full['Revenus_Totaux'] * 0.33
702
- df_full['Reste_A_Vivre'] = df_full['Revenus_Totaux'] - df_full['Charges_Estimees'] - (df_full['Montant_Versement'] * 4.33)
703
-
704
- col1, col2, col3, col4 = st.columns(4)
705
- col1.metric("Revenu moyen", f"{df_full['Revenus_Totaux'].mean():,.0f} XOF",
706
- delta=f"Med {df_full['Revenus_Totaux'].median():,.0f}")
707
- col2.metric("Taux effort", f"{df_full['Taux_Effort_Mensuel'].mean():.1f}%",
708
- delta="Risque" if df_full['Taux_Effort_Mensuel'].mean() > 33 else "Sain")
709
- col3.metric("Reste à vivre", f"{df_full['Reste_A_Vivre'].mean():,.0f} XOF",
710
- delta=f"Min {df_full['Reste_A_Vivre'].min():,.0f}")
711
- col4.metric("Patrimoine", f"{df_full['Patrimoine_Declare'].mean():,.0f} XOF")
712
-
713
- fig_effort = px.box(df_full, y='Taux_Effort_Mensuel',
714
- title="Taux d'effort mensuel (%)",
715
- color_discrete_sequence=['#58a6ff'],
716
- template=plotly_template)
717
- fig_effort.add_hline(y=33, line_dash="dash", line_color="rgba(248, 81, 73, 0.6)",
718
- line_width=1.5,
719
- annotation_text="Seuil critique 33%",
720
- annotation_font_color="#8b949e",
721
- annotation_font_size=11)
722
- fig_effort.update_traces(opacity=0.7)
723
- st.plotly_chart(fig_effort, use_container_width=True)
724
-
725
- # 3. FEATURES PROFESSIONNELLES
726
- with st.expander("Features Professionnelles", expanded=True):
727
- df_full['Anciennete_Emploi'] = pd.to_numeric(df_full['Anciennete_Emploi'], errors='coerce').fillna(0)
728
- df_full['Anciennete_Revenu'] = pd.to_numeric(df_full['Anciennete_Revenu'], errors='coerce').fillna(0)
729
- df_full['Stabilite_Pro'] = df_full['Anciennete_Emploi'].apply(
730
- lambda x: 'Élevée' if x >= 36 else ('Moyenne' if x >= 12 else 'Faible')
731
- )
732
-
733
- col1, col2, col3 = st.columns(3)
734
- col1.metric("Ancienneté", f"{df_full['Anciennete_Emploi'].mean():.1f} mois")
735
- col2.metric("CDI / Stable", f"{(df_full['Statut_Pro']=='CDI').sum()}")
736
- col3.metric("Indépendants", f"{(df_full['Statut_Pro']=='Indépendant').sum()}")
737
-
738
- secteur_count = df_full['Secteur_Activite'].value_counts().head(10)
739
- fig_secteur = px.bar(x=secteur_count.values, y=secteur_count.index,
740
- orientation='h',
741
- title="Top 10 secteurs d'activité",
742
- color_discrete_sequence=['#58a6ff'],
743
- template=plotly_template)
744
- fig_secteur.update_traces(marker_line_color='rgba(139, 148, 158, 0.2)', marker_line_width=0.5, opacity=0.8)
745
- st.plotly_chart(fig_secteur, use_container_width=True)
746
-
747
- # 4. FEATURES DE PRÊTS
748
- with st.expander("Features de Prêts & Historique", expanded=True):
749
- # Conversion des colonnes en numérique AVANT le groupby
750
- df_prets['Montant_Capital'] = pd.to_numeric(df_prets['Montant_Capital'], errors='coerce').fillna(0)
751
- df_prets['Duree_Semaines'] = pd.to_numeric(df_prets['Duree_Semaines'], errors='coerce').fillna(0)
752
-
753
- pret_history = df_prets.groupby('ID_Client').agg({
754
- 'ID_Pret': 'count',
755
- 'Montant_Capital': ['sum', 'mean', 'max'],
756
- 'Duree_Semaines': 'mean',
757
- 'Statut': lambda x: (x == 'Actif').sum()
758
- }).reset_index()
759
-
760
- pret_history.columns = ['ID_Client', 'Nb_Prets_Total', 'Montant_Total_Emprunte',
761
- 'Montant_Moyen_Pret', 'Montant_Max_Pret',
762
- 'Duree_Moyenne_Semaines', 'Nb_Prets_Actifs']
763
-
764
- df_full = pd.merge(df_full, pret_history, on='ID_Client', how='left')
765
- df_full['Ratio_Pret_Revenu'] = (df_full['Montant_Capital'] / df_full['Revenus_Totaux']).replace([np.inf, -np.inf], 0).fillna(0)
766
- df_full['Frequence_Emprunt'] = df_full['Nb_Prets_Total']
767
-
768
- col1, col2, col3, col4 = st.columns(4)
769
- col1.metric("Prêts / client", f"{df_full['Nb_Prets_Total'].mean():.1f}",
770
- delta=f"Max {df_full['Nb_Prets_Total'].max():.0f}")
771
- col2.metric("Montant moy", f"{df_full['Montant_Moyen_Pret'].mean():,.0f} XOF")
772
- col3.metric("Durée moy", f"{df_full['Duree_Moyenne_Semaines'].mean():.0f} sem")
773
- col4.metric("Prêts actifs", f"{df_full['Nb_Prets_Actifs'].mean():.1f}")
774
-
775
- fig_montant = px.scatter(df_full, x='Revenus_Totaux', y='Montant_Capital',
776
- size='Nb_Prets_Total', color='Statut_Pro',
777
- title="Montant prêt vs revenus",
778
- hover_data=['Nom_Complet_Client'],
779
- color_discrete_sequence=['#58a6ff', '#79c0ff', '#a5d6ff', '#c9d1d9'],
780
- template=plotly_template)
781
- fig_montant.update_traces(opacity=0.7)
782
- st.plotly_chart(fig_montant, use_container_width=True)
783
-
784
- # 5. FEATURES GÉOGRAPHIQUES
785
- with st.expander("Features Géographiques", expanded=True):
786
- ville_count = df_full['Ville'].value_counts()
787
- quartier_count = df_full['Quartier'].value_counts().head(10)
788
-
789
- col1, col2 = st.columns(2)
790
- with col1:
791
- st.metric("Villes", len(ville_count))
792
- fig_ville = px.pie(values=ville_count.values, names=ville_count.index,
793
- title="Répartition par ville",
794
- color_discrete_sequence=['#58a6ff', '#79c0ff', '#a5d6ff', '#c9d1d9', '#8b949e'],
795
- template=plotly_template)
796
- fig_ville.update_traces(textfont_family='Space Grotesk', opacity=0.85)
797
- st.plotly_chart(fig_ville, use_container_width=True)
798
-
799
- with col2:
800
- st.metric("Quartiers", len(quartier_count))
801
- fig_quartier = px.bar(x=quartier_count.values, y=quartier_count.index,
802
- orientation='h', title="Top 10 quartiers",
803
- color_discrete_sequence=['#58a6ff'],
804
- template=plotly_template)
805
- fig_quartier.update_traces(opacity=0.8)
806
- st.plotly_chart(fig_quartier, use_container_width=True)
807
-
808
- # 6. FEATURES TEMPORELLES
809
- with st.expander("Features Temporelles", expanded=True):
810
- df_full['Date_Creation'] = pd.to_datetime(df_full.get('Date_Creation', pd.Series()), errors='coerce')
811
- df_full['Anciennete_Client_Jours'] = (datetime.now() - df_full['Date_Creation']).dt.days
812
- df_full['Anciennete_Client_Mois'] = (df_full['Anciennete_Client_Jours'] / 30.44).round(1)
813
-
814
- col1, col2 = st.columns(2)
815
- col1.metric("Ancienneté moy", f"{df_full['Anciennete_Client_Mois'].mean():.1f} mois")
816
- col2.metric("Client + ancien", f"{df_full['Anciennete_Client_Mois'].max():.0f} mois")
817
-
818
- df_full['Mois_Creation'] = df_full['Date_Creation'].dt.to_period('M').astype(str)
819
- timeline = df_full.groupby('Mois_Creation').size().reset_index(name='Nb_Prets')
820
- fig_timeline = px.line(timeline, x='Mois_Creation', y='Nb_Prets',
821
- title="Évolution temporelle des prêts",
822
- markers=True,
823
- color_discrete_sequence=['#58a6ff'],
824
- template=plotly_template)
825
- fig_timeline.update_traces(line_width=2, marker_size=6, opacity=0.9)
826
- st.plotly_chart(fig_timeline, use_container_width=True)
827
-
828
- # MÉTRIQUES GLOBALES
829
- st.divider()
830
- st.markdown("<h2>Métriques Globales</h2>", unsafe_allow_html=True)
831
- col1, col2, col3 = st.columns(3)
832
- col1.metric("Taux défaut", "4.2 %", delta="Simulé")
833
- col2.metric("Montant moy", "10,000 XOF")
834
- col3.metric("Durée moy", "12 semaines")
835
 
836
- # TABLE DATASET
837
- st.divider()
838
- st.markdown("<h2>Dataset ML Enrichi</h2>", unsafe_allow_html=True)
839
-
840
- features_cols = [
841
- 'ID_Pret', 'ID_Client', 'Nom_Complet_Client', 'Age', 'Categorie_Age',
842
- 'Revenus_Totaux', 'Revenu_Net', 'Ratio_Charges_Revenus',
843
- 'Taux_Effort_Mensuel', 'Reste_A_Vivre', 'Patrimoine_Declare',
844
- 'Stabilite_Pro', 'Anciennete_Emploi', 'Statut_Pro',
845
- 'Nb_Prets_Total', 'Montant_Moyen_Pret', 'Ratio_Pret_Revenu',
846
- 'Ville', 'Quartier', 'Anciennete_Client_Mois', 'Statut'
847
- ]
848
-
849
- features_cols = [col for col in features_cols if col in df_full.columns]
850
- df_display = df_full[features_cols].copy()
851
- numeric_cols = df_display.select_dtypes(include=[np.number]).columns
852
- df_display[numeric_cols] = df_display[numeric_cols].round(2)
853
-
854
- st.dataframe(df_display, use_container_width=True, height=400)
855
-
856
- # EXPORT
857
- st.divider()
858
- st.markdown("<h2>Export Dataset ML</h2>", unsafe_allow_html=True)
859
-
860
- col1, col2 = st.columns([2, 1])
861
- with col1:
862
- csv = df_full.to_csv(index=False).encode('utf-8')
863
- st.download_button(
864
- label="Télécharger dataset complet (.csv)",
865
- data=csv,
866
- file_name=f'dataset_credit_scoring_{datetime.now().strftime("%Y%m%d_%H%M%S")}.csv',
867
- mime='text/csv',
868
- use_container_width=True
869
- )
870
-
871
- with col2:
872
- st.info(f"**{len(df_full)} lignes** \n**{len(df_full.columns)} features**")
873
-
874
- with st.expander("Liste des features créées"):
875
- created_features = [
876
- 'Age', 'Categorie_Age', 'Revenus_Totaux', 'Revenu_Net',
877
- 'Ratio_Charges_Revenus', 'Taux_Effort_Mensuel', 'Capacite_Endettement',
878
- 'Reste_A_Vivre', 'Stabilite_Pro', 'Nb_Prets_Total', 'Montant_Total_Emprunte',
879
- 'Montant_Moyen_Pret', 'Montant_Max_Pret', 'Ratio_Pret_Revenu',
880
- 'Frequence_Emprunt', 'Anciennete_Client_Jours', 'Anciennete_Client_Mois'
881
- ]
882
- st.markdown("**Features engineering:**")
883
- for i, feat in enumerate(created_features, 1):
884
- st.markdown(f"`{i:02d}` {feat}")
 
613
  time.sleep(30)
614
  st.rerun()
615
 
616
+ # === APPEL DU MODULE ML FEATURE STORE ===
617
+ from Analytics.ML_Feature_Store_Analytics import show_ml_feature_store
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
618
 
619
+ show_ml_feature_store(client, sheet_name)