rtik007 commited on
Commit
0fd2603
·
verified ·
1 Parent(s): cbee6c1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +136 -0
app.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ from sklearn.preprocessing import MinMaxScaler, LabelEncoder
5
+ from sklearn.ensemble import IsolationForest
6
+ from pyod.models.hbos import HBOS
7
+ from pyod.models.ecod import ECOD
8
+ from pyod.models.lof import LOF
9
+ import gradio as gr
10
+
11
+ # Step 1: Generate synthetic network logs dataset
12
+ users = [f"user_{i}" for i in range(1, 11)] # 10 unique users
13
+ protocols = ["TCP", "UDP"]
14
+ actions = ["allow", "deny"]
15
+
16
+ np.random.seed(42)
17
+ data = [
18
+ [
19
+ f"2024-12-01T12:{np.random.randint(0, 59):02}:00Z",
20
+ f"192.168.1.{np.random.randint(1, 255)}",
21
+ f"10.0.0.{np.random.randint(1, 255)}",
22
+ np.random.randint(100, 10000), # bytes
23
+ np.random.choice(protocols),
24
+ np.random.randint(1024, 65535), # src_port
25
+ np.random.randint(1, 65535), # dest_port
26
+ np.random.choice(actions),
27
+ round(np.random.uniform(0.1, 10.0), 2), # duration
28
+ np.random.randint(1, 1000), # packets
29
+ np.random.choice(users), # user
30
+ ]
31
+ for _ in range(1000)
32
+ ]
33
+ columns = ["timestamp", "src_ip", "dest_ip", "bytes", "protocol", "src_port", "dest_port", "action", "duration", "packets", "user"]
34
+
35
+ df = pd.DataFrame(data, columns=columns)
36
+
37
+ # Preprocess the dataset
38
+ df["timestamp_unix"] = pd.to_datetime(df["timestamp"]).view('int64') // 10**9
39
+
40
+ # Encode categorical features
41
+ label_encoders = {}
42
+ for column in ["protocol", "action", "user"]:
43
+ le = LabelEncoder()
44
+ df[column + "_index"] = le.fit_transform(df[column])
45
+ label_encoders[column] = le
46
+
47
+ # Normalize numerical features
48
+ scaler = MinMaxScaler()
49
+ feature_columns = ["timestamp_unix", "bytes", "src_port", "dest_port", "duration", "packets", "protocol_index", "action_index", "user_index"]
50
+ df[feature_columns] = scaler.fit_transform(df[feature_columns])
51
+
52
+ # Map feature column names to actual names
53
+ feature_mapping = {
54
+ "timestamp_unix": "Timestamp (Unix)",
55
+ "bytes": "Bytes Transferred",
56
+ "src_port": "Source Port",
57
+ "dest_port": "Destination Port",
58
+ "duration": "Duration",
59
+ "packets": "Number of Packets",
60
+ "protocol_index": "Protocol (TCP/UDP)",
61
+ "action_index": "Action (Allow/Deny)",
62
+ "user_index": "User",
63
+ }
64
+
65
+ # Step 2: Function to visualize anomalies
66
+ def visualize_anomalies(feature1, feature2, sample_size):
67
+ # Validate features
68
+ if feature1 not in feature_columns or feature2 not in feature_columns:
69
+ raise ValueError("Selected features are not valid.")
70
+
71
+ # Sample the dataset
72
+ sample_size = min(sample_size, len(df)) # Ensure sample size is not larger than the dataset
73
+ sampled_df = df.sample(sample_size, random_state=42)
74
+ X = sampled_df[feature_columns].values
75
+
76
+ # Initialize anomaly detection models
77
+ models = {
78
+ "Isolation Forest": IsolationForest(contamination=0.1, random_state=42),
79
+ "HBOS": HBOS(contamination=0.1),
80
+ "ECOD": ECOD(contamination=0.1),
81
+ "LOF": LOF(contamination=0.1),
82
+ }
83
+
84
+ # Train models and collect predictions
85
+ predictions = {}
86
+ for name, model in models.items():
87
+ model.fit(X)
88
+ predictions[name] = model.predict(X) # 0 for inliers, 1 for outliers
89
+
90
+ # Visualize results
91
+ fig, axes = plt.subplots(1, len(models), figsize=(20, 5), sharey=True)
92
+ feature1_index = feature_columns.index(feature1)
93
+ feature2_index = feature_columns.index(feature2)
94
+
95
+ for i, (name, preds) in enumerate(predictions.items()):
96
+ axes[i].scatter(X[:, feature1_index], X[:, feature2_index], c=preds, cmap="coolwarm", s=10)
97
+ axes[i].set_title(name)
98
+ axes[i].set_xlabel(feature_mapping[feature1])
99
+ axes[i].set_ylabel(feature_mapping[feature2])
100
+
101
+ plt.suptitle("Comparison of Anomaly Detection Algorithms")
102
+ plt.tight_layout()
103
+ return fig
104
+
105
+ # Create Gradio Interface for Anomaly Detection Algorithm Comparison
106
+ demo = gr.Blocks()
107
+ with demo:
108
+ gr.Markdown("### Anomaly Detection Algorithm Comparison")
109
+ with gr.Row():
110
+ with gr.Column():
111
+ feature1_dropdown = gr.Dropdown(
112
+ choices=list(feature_mapping.keys()),
113
+ label="Feature 1"
114
+ )
115
+ feature2_dropdown = gr.Dropdown(
116
+ choices=list(feature_mapping.keys()),
117
+ label="Feature 2"
118
+ )
119
+ sample_slider = gr.Slider(
120
+ minimum=10,
121
+ maximum=1000,
122
+ step=10,
123
+ value=500,
124
+ label="Number of Samples"
125
+ )
126
+ submit_button = gr.Button("Visualize")
127
+
128
+ plot_output = gr.Plot(label="Visualization Results")
129
+
130
+ submit_button.click(
131
+ fn=visualize_anomalies,
132
+ inputs=[feature1_dropdown, feature2_dropdown, sample_slider],
133
+ outputs=plot_output,
134
+ )
135
+
136
+ demo.launch()