Spaces:
Sleeping
Sleeping
new update analyze 3
Browse files- sections/analyze.py +33 -19
sections/analyze.py
CHANGED
|
@@ -239,34 +239,49 @@ with tab2:
|
|
| 239 |
st.warning("Columns 'ipsrc' or 'action' not found.")
|
| 240 |
|
| 241 |
# Graphique de série temporelle des connexions par heure
|
| 242 |
-
st.write("### 📊
|
| 243 |
-
|
| 244 |
if "timestamp" in data.columns:
|
| 245 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
activity_data = (
|
| 247 |
-
data
|
| 248 |
-
.
|
| 249 |
-
.
|
| 250 |
-
.
|
| 251 |
-
.
|
| 252 |
-
.sort("hour") # Trier chronologiquement
|
| 253 |
)
|
| 254 |
|
| 255 |
-
# Vérifier
|
| 256 |
if not activity_data.is_empty():
|
| 257 |
-
# Convertir en
|
| 258 |
df_activity = activity_data.to_pandas()
|
| 259 |
-
df_activity["
|
| 260 |
|
| 261 |
# Tracer le graphique
|
| 262 |
fig = px.line(
|
| 263 |
df_activity,
|
| 264 |
-
x="
|
| 265 |
y="connection_count",
|
| 266 |
-
markers=True,
|
| 267 |
-
title="
|
| 268 |
-
labels={"
|
| 269 |
-
line_shape="spline"
|
| 270 |
)
|
| 271 |
|
| 272 |
# Afficher le graphique
|
|
@@ -277,7 +292,6 @@ with tab2:
|
|
| 277 |
st.warning("Column 'timestamp' not found.")
|
| 278 |
|
| 279 |
|
| 280 |
-
|
| 281 |
# Onglet Foreign IP addresses
|
| 282 |
with tab3:
|
| 283 |
st.subheader("🚫 List of access outside the university network")
|
|
@@ -320,7 +334,7 @@ with tab4:
|
|
| 320 |
|
| 321 |
def create_sankey(df, source_col, target_col):
|
| 322 |
""" Crée un diagramme de Sankey entre deux colonnes """
|
| 323 |
-
df_grouped = df.
|
| 324 |
|
| 325 |
# Création des nœuds
|
| 326 |
labels = list(pd.concat([df_grouped[source_col], df_grouped[target_col]]).unique())
|
|
|
|
| 239 |
st.warning("Columns 'ipsrc' or 'action' not found.")
|
| 240 |
|
| 241 |
# Graphique de série temporelle des connexions par heure
|
| 242 |
+
st.write("### 📊 Connection Activity Analysis")
|
|
|
|
| 243 |
if "timestamp" in data.columns:
|
| 244 |
+
# 📌 Ajout d'un sélecteur de fréquence
|
| 245 |
+
frequency = st.selectbox("Select frequency", ["second", "minute", "hour", "day"], index=1)
|
| 246 |
+
|
| 247 |
+
# Définition des formats selon la fréquence choisie
|
| 248 |
+
if frequency == "second":
|
| 249 |
+
time_format = "%Y-%m-%d %H:%M:%S"
|
| 250 |
+
time_label = "Second"
|
| 251 |
+
elif frequency == "minute":
|
| 252 |
+
time_format = "%Y-%m-%d %H:%M:00"
|
| 253 |
+
time_label = "Minute"
|
| 254 |
+
elif frequency == "hour":
|
| 255 |
+
time_format = "%Y-%m-%d %H:00:00"
|
| 256 |
+
time_label = "Hour"
|
| 257 |
+
else:
|
| 258 |
+
time_format = "%Y-%m-%d"
|
| 259 |
+
time_label = "Day"
|
| 260 |
+
|
| 261 |
+
# Filtrage et regroupement
|
| 262 |
activity_data = (
|
| 263 |
+
data.filter(pl.col("action") == "PERMIT")
|
| 264 |
+
.with_columns(pl.col("timestamp").dt.strftime(time_format).alias("time_period"))
|
| 265 |
+
.group_by("time_period")
|
| 266 |
+
.agg(pl.count("time_period").alias("connection_count"))
|
| 267 |
+
.sort("time_period")
|
|
|
|
| 268 |
)
|
| 269 |
|
| 270 |
+
# Vérifier s'il y a des données
|
| 271 |
if not activity_data.is_empty():
|
| 272 |
+
# Convertir en Pandas
|
| 273 |
df_activity = activity_data.to_pandas()
|
| 274 |
+
df_activity["time_period"] = pd.to_datetime(df_activity["time_period"])
|
| 275 |
|
| 276 |
# Tracer le graphique
|
| 277 |
fig = px.line(
|
| 278 |
df_activity,
|
| 279 |
+
x="time_period",
|
| 280 |
y="connection_count",
|
| 281 |
+
markers=True,
|
| 282 |
+
title=f"Connection Activity ({time_label} level)",
|
| 283 |
+
labels={"time_period": time_label, "connection_count": "Number of Connections"},
|
| 284 |
+
line_shape="spline"
|
| 285 |
)
|
| 286 |
|
| 287 |
# Afficher le graphique
|
|
|
|
| 292 |
st.warning("Column 'timestamp' not found.")
|
| 293 |
|
| 294 |
|
|
|
|
| 295 |
# Onglet Foreign IP addresses
|
| 296 |
with tab3:
|
| 297 |
st.subheader("🚫 List of access outside the university network")
|
|
|
|
| 334 |
|
| 335 |
def create_sankey(df, source_col, target_col):
|
| 336 |
""" Crée un diagramme de Sankey entre deux colonnes """
|
| 337 |
+
df_grouped = df.group_by([source_col, target_col]).len().to_pandas()
|
| 338 |
|
| 339 |
# Création des nœuds
|
| 340 |
labels = list(pd.concat([df_grouped[source_col], df_grouped[target_col]]).unique())
|