Spaces:
Running
Running
Enlarged the scope of the analytics section
Browse files- app.py +2 -2
- sections/{ml.py → analytics.py} +28 -10
app.py
CHANGED
|
@@ -34,10 +34,10 @@ home = st.Page("sections/home.py", title="🏠 Home")
|
|
| 34 |
upload = st.Page("sections/upload.py", title="📥 Upload")
|
| 35 |
statistics = st.Page("sections/statistics.py", title="📈 Statistics")
|
| 36 |
analyze = st.Page("sections/analyze.py", title="🔍 Analyze")
|
| 37 |
-
|
| 38 |
alerts = st.Page("sections/alerts.py", title="🚨 Alerts")
|
| 39 |
about = st.Page("sections/about.py", title="📄 About")
|
| 40 |
test_filtre = st.Page("sections/test_filtre.py", title="test_filtre")
|
| 41 |
|
| 42 |
-
pg = st.navigation([home, upload, statistics, analyze,
|
| 43 |
pg.run()
|
|
|
|
| 34 |
upload = st.Page("sections/upload.py", title="📥 Upload")
|
| 35 |
statistics = st.Page("sections/statistics.py", title="📈 Statistics")
|
| 36 |
analyze = st.Page("sections/analyze.py", title="🔍 Analyze")
|
| 37 |
+
analytics = st.Page("sections/analytics.py", title="🤖 Analytics")
|
| 38 |
alerts = st.Page("sections/alerts.py", title="🚨 Alerts")
|
| 39 |
about = st.Page("sections/about.py", title="📄 About")
|
| 40 |
test_filtre = st.Page("sections/test_filtre.py", title="test_filtre")
|
| 41 |
|
| 42 |
+
pg = st.navigation([home, upload, statistics, analyze, analytics, alerts, test_filtre])
|
| 43 |
pg.run()
|
sections/{ml.py → analytics.py}
RENAMED
|
@@ -4,14 +4,14 @@ import streamlit as st
|
|
| 4 |
import polars as pl
|
| 5 |
|
| 6 |
from sklearn.preprocessing import StandardScaler
|
|
|
|
| 7 |
from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering
|
| 8 |
-
import matplotlib.pyplot as plt
|
| 9 |
|
| 10 |
if "parsed_df" not in st.session_state:
|
| 11 |
st.session_state.parsed_df = None
|
| 12 |
|
| 13 |
# Page title
|
| 14 |
-
st.title("
|
| 15 |
|
| 16 |
# Loading data
|
| 17 |
if st.session_state.parsed_df is None:
|
|
@@ -19,18 +19,36 @@ if st.session_state.parsed_df is None:
|
|
| 19 |
st.stop()
|
| 20 |
|
| 21 |
data = st.session_state.parsed_df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
##############################################
|
| 24 |
#### Preprocessing ####
|
| 25 |
##############################################
|
| 26 |
|
| 27 |
-
# Normalisation des données (Standardisation : moyenne = 0, écart-type = 1)
|
|
|
|
| 28 |
scaler = StandardScaler()
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
# Convertir de nouveau en DataFrame Polars
|
| 32 |
-
|
|
|
|
| 33 |
|
|
|
|
| 34 |
|
| 35 |
###############################################
|
| 36 |
#### Clustering ####
|
|
@@ -43,15 +61,15 @@ if st.button("Start clustering"):
|
|
| 43 |
# Appliquer K-Means avec k optimal choisi
|
| 44 |
k_optimal = 2 # Par exemple, supposons que k = 3
|
| 45 |
kmeans = KMeans(n_clusters=k_optimal, random_state=42)
|
| 46 |
-
|
| 47 |
|
| 48 |
# Appliquer DBSCAN (epsilon et min_samples sont des hyperparamètres)
|
| 49 |
# dbscan = DBSCAN(eps=0.5, min_samples=10)
|
| 50 |
-
#
|
| 51 |
|
| 52 |
# Appliquer Agglomerative Clustering
|
| 53 |
# agg_clustering = AgglomerativeClustering(n_clusters=2)
|
| 54 |
-
#
|
| 55 |
|
| 56 |
###############################################################
|
| 57 |
#### Visualisation des clusters ####
|
|
@@ -62,12 +80,12 @@ if st.button("Start clustering"):
|
|
| 62 |
from sklearn.decomposition import PCA
|
| 63 |
|
| 64 |
pca = PCA(n_components=2)
|
| 65 |
-
df_pca = pca.fit_transform(
|
| 66 |
|
| 67 |
fig = px.scatter(
|
| 68 |
x=df_pca[:, 0],
|
| 69 |
y=df_pca[:, 1],
|
| 70 |
-
color=
|
| 71 |
color_continuous_scale='viridis',
|
| 72 |
title='Clustering coupled with PCA',
|
| 73 |
labels={'x': 'Component 1', 'y': 'Component 2', 'color': 'Cluster'},
|
|
|
|
| 4 |
import polars as pl
|
| 5 |
|
| 6 |
from sklearn.preprocessing import StandardScaler
|
| 7 |
+
from sklearn.preprocessing import OneHotEncoder
|
| 8 |
from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering
|
|
|
|
| 9 |
|
| 10 |
if "parsed_df" not in st.session_state:
|
| 11 |
st.session_state.parsed_df = None
|
| 12 |
|
| 13 |
# Page title
|
| 14 |
+
st.title("Analytiques")
|
| 15 |
|
| 16 |
# Loading data
|
| 17 |
if st.session_state.parsed_df is None:
|
|
|
|
| 19 |
st.stop()
|
| 20 |
|
| 21 |
data = st.session_state.parsed_df
|
| 22 |
+
data = data.select(["portdest","protocole","regle1","status"])
|
| 23 |
+
|
| 24 |
+
# Sélectionner toutes les colonnes numériques
|
| 25 |
+
quanti = data.select(pl.col(pl.String))
|
| 26 |
+
|
| 27 |
+
# Sélectionner toutes les colonnes de type chaîne
|
| 28 |
+
quali = data.select(pl.col(pl.Int64))
|
| 29 |
|
| 30 |
##############################################
|
| 31 |
#### Preprocessing ####
|
| 32 |
##############################################
|
| 33 |
|
| 34 |
+
# Normalisation des données quanti (Standardisation : moyenne = 0, écart-type = 1)
|
| 35 |
+
|
| 36 |
scaler = StandardScaler()
|
| 37 |
+
data_quanti = scaler.fit_transform(quanti.to_pandas())
|
| 38 |
+
|
| 39 |
+
# Convertir de nouveau en DataFrame Polars
|
| 40 |
+
data_quanti = pl.from_pandas(pd.DataFrame(data_quanti, columns=data_quanti.columns))
|
| 41 |
+
|
| 42 |
+
# Encodage one-hot des données quali
|
| 43 |
+
|
| 44 |
+
encoder = OneHotEncoder()
|
| 45 |
+
data_quali = encoder.fit_transform(quali.to_pandas())
|
| 46 |
|
| 47 |
# Convertir de nouveau en DataFrame Polars
|
| 48 |
+
data_quali = pl.from_pandas(pd.DataFrame(data_quali, columns=data_quali.columns))
|
| 49 |
+
|
| 50 |
|
| 51 |
+
df = pl.concat([data_quanti, data_quali], how="diagonal")
|
| 52 |
|
| 53 |
###############################################
|
| 54 |
#### Clustering ####
|
|
|
|
| 61 |
# Appliquer K-Means avec k optimal choisi
|
| 62 |
k_optimal = 2 # Par exemple, supposons que k = 3
|
| 63 |
kmeans = KMeans(n_clusters=k_optimal, random_state=42)
|
| 64 |
+
df = df.with_columns(pl.Series(kmeans.fit_predict(df.to_pandas()), name='cluster_kmeans'))
|
| 65 |
|
| 66 |
# Appliquer DBSCAN (epsilon et min_samples sont des hyperparamètres)
|
| 67 |
# dbscan = DBSCAN(eps=0.5, min_samples=10)
|
| 68 |
+
# df = df.with_columns(pl.Series(dbscan.fit_predict(df.to_pandas()), name='cluster_dbscan'))
|
| 69 |
|
| 70 |
# Appliquer Agglomerative Clustering
|
| 71 |
# agg_clustering = AgglomerativeClustering(n_clusters=2)
|
| 72 |
+
# df = df.with_columns(pl.Series(agg_clustering.fit_predict(df.to_pandas()), name='cluster_agg'))
|
| 73 |
|
| 74 |
###############################################################
|
| 75 |
#### Visualisation des clusters ####
|
|
|
|
| 80 |
from sklearn.decomposition import PCA
|
| 81 |
|
| 82 |
pca = PCA(n_components=2)
|
| 83 |
+
df_pca = pca.fit_transform(df.to_pandas())
|
| 84 |
|
| 85 |
fig = px.scatter(
|
| 86 |
x=df_pca[:, 0],
|
| 87 |
y=df_pca[:, 1],
|
| 88 |
+
color=df['cluster_kmeans'],
|
| 89 |
color_continuous_scale='viridis',
|
| 90 |
title='Clustering coupled with PCA',
|
| 91 |
labels={'x': 'Component 1', 'y': 'Component 2', 'color': 'Cluster'},
|