Spaces:
Running
Running
Corrected "counts" related error
Browse files- sections/analytics.py +13 -7
- sections/statistics.py +2 -2
sections/analytics.py
CHANGED
|
@@ -22,10 +22,10 @@ data = st.session_state.parsed_df
|
|
| 22 |
data = data.select(["portdest","protocole","regle1","status"])
|
| 23 |
|
| 24 |
# Sélectionner toutes les colonnes numériques
|
| 25 |
-
quanti = data.select(pl.col(pl.
|
| 26 |
|
| 27 |
# Sélectionner toutes les colonnes de type chaîne
|
| 28 |
-
quali = data.select(pl.col(pl.
|
| 29 |
|
| 30 |
##############################################
|
| 31 |
#### Preprocessing ####
|
|
@@ -37,18 +37,23 @@ scaler = StandardScaler()
|
|
| 37 |
data_quanti = scaler.fit_transform(quanti.to_pandas())
|
| 38 |
|
| 39 |
# Convertir de nouveau en DataFrame Polars
|
| 40 |
-
data_quanti = pl.from_pandas(pd.DataFrame(data_quanti, columns=
|
| 41 |
|
| 42 |
# Encodage one-hot des données quali
|
| 43 |
|
| 44 |
-
encoder = OneHotEncoder()
|
| 45 |
data_quali = encoder.fit_transform(quali.to_pandas())
|
| 46 |
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
| 49 |
|
|
|
|
|
|
|
| 50 |
|
| 51 |
-
df = pl.concat([data_quanti, data_quali], how="
|
| 52 |
|
| 53 |
###############################################
|
| 54 |
#### Clustering ####
|
|
@@ -62,6 +67,7 @@ if st.button("Start clustering"):
|
|
| 62 |
k_optimal = 2 # Par exemple, supposons que k = 3
|
| 63 |
kmeans = KMeans(n_clusters=k_optimal, random_state=42)
|
| 64 |
df = df.with_columns(pl.Series(kmeans.fit_predict(df.to_pandas()), name='cluster_kmeans'))
|
|
|
|
| 65 |
|
| 66 |
# Appliquer DBSCAN (epsilon et min_samples sont des hyperparamètres)
|
| 67 |
# dbscan = DBSCAN(eps=0.5, min_samples=10)
|
|
|
|
| 22 |
data = data.select(["portdest","protocole","regle1","status"])
|
| 23 |
|
| 24 |
# Sélectionner toutes les colonnes numériques
|
| 25 |
+
quanti = data.select(pl.col(pl.Int64))
|
| 26 |
|
| 27 |
# Sélectionner toutes les colonnes de type chaîne
|
| 28 |
+
quali = data.select(pl.col(pl.String))
|
| 29 |
|
| 30 |
##############################################
|
| 31 |
#### Preprocessing ####
|
|
|
|
| 37 |
data_quanti = scaler.fit_transform(quanti.to_pandas())
|
| 38 |
|
| 39 |
# Convertir de nouveau en DataFrame Polars
|
| 40 |
+
data_quanti = pl.from_pandas(pd.DataFrame(data_quanti, columns=quanti.columns))
|
| 41 |
|
| 42 |
# Encodage one-hot des données quali
|
| 43 |
|
| 44 |
+
encoder = OneHotEncoder(sparse_output=False)
|
| 45 |
data_quali = encoder.fit_transform(quali.to_pandas())
|
| 46 |
|
| 47 |
+
col_names = [
|
| 48 |
+
f"{feature}_{category}"
|
| 49 |
+
for feature, categories in zip(quali.columns, encoder.categories_)
|
| 50 |
+
for category in categories
|
| 51 |
+
]
|
| 52 |
|
| 53 |
+
# Convertir de nouveau en DataFrame Polars
|
| 54 |
+
data_quali = pl.from_pandas(pd.DataFrame(data_quali, columns=col_names))
|
| 55 |
|
| 56 |
+
df = pl.concat([data_quanti, data_quali], how="horizontal")
|
| 57 |
|
| 58 |
###############################################
|
| 59 |
#### Clustering ####
|
|
|
|
| 67 |
k_optimal = 2 # Par exemple, supposons que k = 3
|
| 68 |
kmeans = KMeans(n_clusters=k_optimal, random_state=42)
|
| 69 |
df = df.with_columns(pl.Series(kmeans.fit_predict(df.to_pandas()), name='cluster_kmeans'))
|
| 70 |
+
st.write(df[:10])
|
| 71 |
|
| 72 |
# Appliquer DBSCAN (epsilon et min_samples sont des hyperparamètres)
|
| 73 |
# dbscan = DBSCAN(eps=0.5, min_samples=10)
|
sections/statistics.py
CHANGED
|
@@ -191,7 +191,7 @@ with stat_tab3:
|
|
| 191 |
if unique_count <= 20:
|
| 192 |
st.write(
|
| 193 |
df.select(pl.col(col).value_counts().struct.unnest()).sort(
|
| 194 |
-
"
|
| 195 |
)
|
| 196 |
)
|
| 197 |
else:
|
|
@@ -203,7 +203,7 @@ with stat_tab3:
|
|
| 203 |
.value_counts()
|
| 204 |
.struct.unnest() # Déstructure la struct ici
|
| 205 |
)
|
| 206 |
-
.sort("
|
| 207 |
.head(10)
|
| 208 |
)
|
| 209 |
|
|
|
|
| 191 |
if unique_count <= 20:
|
| 192 |
st.write(
|
| 193 |
df.select(pl.col(col).value_counts().struct.unnest()).sort(
|
| 194 |
+
"count", descending=True
|
| 195 |
)
|
| 196 |
)
|
| 197 |
else:
|
|
|
|
| 203 |
.value_counts()
|
| 204 |
.struct.unnest() # Déstructure la struct ici
|
| 205 |
)
|
| 206 |
+
.sort("count", descending=True)
|
| 207 |
.head(10)
|
| 208 |
)
|
| 209 |
|