Cyr-CK commited on
Commit
617e380
·
1 Parent(s): 892886a

Enlarged the scope of the analytics section

Browse files
Files changed (2) hide show
  1. app.py +2 -2
  2. sections/{ml.py → analytics.py} +28 -10
app.py CHANGED
@@ -34,10 +34,10 @@ home = st.Page("sections/home.py", title="🏠 Home")
34
  upload = st.Page("sections/upload.py", title="📥 Upload")
35
  statistics = st.Page("sections/statistics.py", title="📈 Statistics")
36
  analyze = st.Page("sections/analyze.py", title="🔍 Analyze")
37
- ml = st.Page("sections/ml.py", title="🤖 Machine Learning")
38
  alerts = st.Page("sections/alerts.py", title="🚨 Alerts")
39
  about = st.Page("sections/about.py", title="📄 About")
40
  test_filtre = st.Page("sections/test_filtre.py", title="test_filtre")
41
 
42
- pg = st.navigation([home, upload, statistics, analyze, ml, alerts, test_filtre])
43
  pg.run()
 
34
  upload = st.Page("sections/upload.py", title="📥 Upload")
35
  statistics = st.Page("sections/statistics.py", title="📈 Statistics")
36
  analyze = st.Page("sections/analyze.py", title="🔍 Analyze")
37
+ analytics = st.Page("sections/analytics.py", title="🤖 Analytics")
38
  alerts = st.Page("sections/alerts.py", title="🚨 Alerts")
39
  about = st.Page("sections/about.py", title="📄 About")
40
  test_filtre = st.Page("sections/test_filtre.py", title="test_filtre")
41
 
42
+ pg = st.navigation([home, upload, statistics, analyze, analytics, alerts, test_filtre])
43
  pg.run()
sections/{ml.py → analytics.py} RENAMED
@@ -4,14 +4,14 @@ import streamlit as st
4
  import polars as pl
5
 
6
  from sklearn.preprocessing import StandardScaler
 
7
  from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering
8
- import matplotlib.pyplot as plt
9
 
10
  if "parsed_df" not in st.session_state:
11
  st.session_state.parsed_df = None
12
 
13
  # Page title
14
- st.title("Machine Learning")
15
 
16
  # Loading data
17
  if st.session_state.parsed_df is None:
@@ -19,18 +19,36 @@ if st.session_state.parsed_df is None:
19
  st.stop()
20
 
21
  data = st.session_state.parsed_df
 
 
 
 
 
 
 
22
 
23
  ##############################################
24
  #### Preprocessing ####
25
  ##############################################
26
 
27
- # Normalisation des données (Standardisation : moyenne = 0, écart-type = 1)
 
28
  scaler = StandardScaler()
29
- df_scaled = scaler.fit_transform(data.to_pandas())
 
 
 
 
 
 
 
 
30
 
31
  # Convertir de nouveau en DataFrame Polars
32
- df_scaled = pl.from_pandas(pd.DataFrame(df_scaled, columns=data.columns))
 
33
 
 
34
 
35
  ###############################################
36
  #### Clustering ####
@@ -43,15 +61,15 @@ if st.button("Start clustering"):
43
  # Appliquer K-Means avec k optimal choisi
44
  k_optimal = 2 # Par exemple, supposons que k = 3
45
  kmeans = KMeans(n_clusters=k_optimal, random_state=42)
46
- df_scaled = df_scaled.with_columns(pl.Series(kmeans.fit_predict(df_scaled.to_pandas()), name='cluster_kmeans'))
47
 
48
  # Appliquer DBSCAN (epsilon et min_samples sont des hyperparamètres)
49
  # dbscan = DBSCAN(eps=0.5, min_samples=10)
50
- # df_scaled = df_scaled.with_columns(pl.Series(dbscan.fit_predict(df_scaled.to_pandas()), name='cluster_dbscan'))
51
 
52
  # Appliquer Agglomerative Clustering
53
  # agg_clustering = AgglomerativeClustering(n_clusters=2)
54
- # df_scaled = df_scaled.with_columns(pl.Series(agg_clustering.fit_predict(df_scaled.to_pandas()), name='cluster_agg'))
55
 
56
  ###############################################################
57
  #### Visualisation des clusters ####
@@ -62,12 +80,12 @@ if st.button("Start clustering"):
62
  from sklearn.decomposition import PCA
63
 
64
  pca = PCA(n_components=2)
65
- df_pca = pca.fit_transform(df_scaled.to_pandas())
66
 
67
  fig = px.scatter(
68
  x=df_pca[:, 0],
69
  y=df_pca[:, 1],
70
- color=df_scaled['cluster_kmeans'],
71
  color_continuous_scale='viridis',
72
  title='Clustering coupled with PCA',
73
  labels={'x': 'Component 1', 'y': 'Component 2', 'color': 'Cluster'},
 
4
  import polars as pl
5
 
6
  from sklearn.preprocessing import StandardScaler
7
+ from sklearn.preprocessing import OneHotEncoder
8
  from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering
 
9
 
10
  if "parsed_df" not in st.session_state:
11
  st.session_state.parsed_df = None
12
 
13
  # Page title
14
+ st.title("Analytiques")
15
 
16
  # Loading data
17
  if st.session_state.parsed_df is None:
 
19
  st.stop()
20
 
21
  data = st.session_state.parsed_df
22
+ data = data.select(["portdest","protocole","regle1","status"])
23
+
24
+ # Sélectionner toutes les colonnes numériques
25
+ quanti = data.select(pl.col(pl.String))
26
+
27
+ # Sélectionner toutes les colonnes de type chaîne
28
+ quali = data.select(pl.col(pl.Int64))
29
 
30
  ##############################################
31
  #### Preprocessing ####
32
  ##############################################
33
 
34
+ # Normalisation des données quanti (Standardisation : moyenne = 0, écart-type = 1)
35
+
36
  scaler = StandardScaler()
37
+ data_quanti = scaler.fit_transform(quanti.to_pandas())
38
+
39
+ # Convertir de nouveau en DataFrame Polars
40
+ data_quanti = pl.from_pandas(pd.DataFrame(data_quanti, columns=data_quanti.columns))
41
+
42
+ # Encodage one-hot des données quali
43
+
44
+ encoder = OneHotEncoder()
45
+ data_quali = encoder.fit_transform(quali.to_pandas())
46
 
47
  # Convertir de nouveau en DataFrame Polars
48
+ data_quali = pl.from_pandas(pd.DataFrame(data_quali, columns=data_quali.columns))
49
+
50
 
51
+ df = pl.concat([data_quanti, data_quali], how="diagonal")
52
 
53
  ###############################################
54
  #### Clustering ####
 
61
  # Appliquer K-Means avec k optimal choisi
62
  k_optimal = 2 # Par exemple, supposons que k = 3
63
  kmeans = KMeans(n_clusters=k_optimal, random_state=42)
64
+ df = df.with_columns(pl.Series(kmeans.fit_predict(df.to_pandas()), name='cluster_kmeans'))
65
 
66
  # Appliquer DBSCAN (epsilon et min_samples sont des hyperparamètres)
67
  # dbscan = DBSCAN(eps=0.5, min_samples=10)
68
+ # df = df.with_columns(pl.Series(dbscan.fit_predict(df.to_pandas()), name='cluster_dbscan'))
69
 
70
  # Appliquer Agglomerative Clustering
71
  # agg_clustering = AgglomerativeClustering(n_clusters=2)
72
+ # df = df.with_columns(pl.Series(agg_clustering.fit_predict(df.to_pandas()), name='cluster_agg'))
73
 
74
  ###############################################################
75
  #### Visualisation des clusters ####
 
80
  from sklearn.decomposition import PCA
81
 
82
  pca = PCA(n_components=2)
83
+ df_pca = pca.fit_transform(df.to_pandas())
84
 
85
  fig = px.scatter(
86
  x=df_pca[:, 0],
87
  y=df_pca[:, 1],
88
+ color=df['cluster_kmeans'],
89
  color_continuous_scale='viridis',
90
  title='Clustering coupled with PCA',
91
  labels={'x': 'Component 1', 'y': 'Component 2', 'color': 'Cluster'},