ATllll commited on
Commit
dd609d0
Β·
verified Β·
1 Parent(s): 11c32cc

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +167 -38
src/streamlit_app.py CHANGED
@@ -1,40 +1,169 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
1
  import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import joblib
5
+ import os
6
+ import ipaddress
7
+ import tensorflow as tf
8
+ from tensorflow.keras.models import load_model
9
+ from tensorflow.keras.optimizers.legacy import SGD
10
+
11
+ # ==== File Paths (update if needed) ====
12
+
13
+ MODEL_FILE = ("model1.h5")
14
+ WEIGHTS_FILE = ("weights.h5")
15
+ SCALER_FILE = ("standard_scaler1.pkl")
16
+ LABEL_ENCODER_FILE = ("label_encoder1.pkl")
17
+
18
+ ENCODER_PATHS = {
19
+ "proto": ("categorical_label_encoder_proto1.pkl"),
20
+ "conn_state": ("categorical_label_encoder_conn_state1.pkl"),
21
+ "history": ("categorical_label_encoder_history1.pkl")
22
+ }
23
+
24
+ SAMPLE_FILE = "sample_input.csv"
25
+
26
+ # ==== Class for Malware Prediction ====
27
+ class MalwareClassifier:
28
+ def __init__(self):
29
+ for path in [MODEL_FILE, WEIGHTS_FILE, SCALER_FILE, LABEL_ENCODER_FILE] + list(ENCODER_PATHS.values()):
30
+ if not os.path.exists(path):
31
+ raise FileNotFoundError(f"Missing file: {path}")
32
+
33
+ self.model = load_model(MODEL_FILE, compile=False)
34
+ self.model.load_weights(WEIGHTS_FILE)
35
+ self.model.compile(optimizer=SGD(), loss="categorical_crossentropy", metrics=["accuracy"])
36
+
37
+ self.scaler = joblib.load(SCALER_FILE)
38
+ self.label_encoder = joblib.load(LABEL_ENCODER_FILE)
39
+ self.encoders = {k: joblib.load(v) for k, v in ENCODER_PATHS.items()}
40
+
41
+ def _validate_numeric_column(self, col_name, values):
42
+ if not values.astype(str).str.isdigit().all():
43
+ raise ValueError(f"Non-integer value found in column {col_name}")
44
+ if (values < 0).any():
45
+ raise ValueError(f"Negative value found in column {col_name}")
46
+
47
+ def _validate_ip_address(self, col_name, values):
48
+ for value in values:
49
+ try:
50
+ ipaddress.ip_address(value)
51
+ except ValueError:
52
+ raise ValueError(f"Invalid IP address in column {col_name}: {value}")
53
+
54
+ def _validate_input_data(self, data):
55
+ required_columns = {
56
+ 'id.orig_h', 'id.orig_p', 'id.resp_h', 'id.resp_p',
57
+ 'proto', 'conn_state', 'history',
58
+ 'orig_pkts', 'orig_ip_bytes', 'resp_pkts', 'resp_ip_bytes'
59
+ }
60
+
61
+ if data.empty:
62
+ raise ValueError("CSV is empty.")
63
+
64
+ missing = required_columns - set(data.columns)
65
+ if missing:
66
+ raise ValueError(f"Missing required columns: {missing}")
67
+
68
+ for col in data.columns:
69
+ if col in {'id.orig_p', 'id.resp_p', 'orig_pkts', 'orig_ip_bytes', 'resp_pkts', 'resp_ip_bytes'}:
70
+ self._validate_numeric_column(col, data[col])
71
+ elif col in {'id.orig_h', 'id.resp_h'}:
72
+ self._validate_ip_address(col, data[col])
73
+
74
+ def _encode_data(self, df):
75
+ for col in ['proto', 'conn_state', 'history']:
76
+ df[col] = self.encoders[col].transform(df[col])
77
+ df['id.orig_h'] = df['id.orig_h'].apply(lambda x: int(ipaddress.ip_address(x)))
78
+ df['id.resp_h'] = df['id.resp_h'].apply(lambda x: int(ipaddress.ip_address(x)))
79
+ return df
80
+
81
+ def _preprocess_data(self, df):
82
+ self._validate_input_data(df)
83
+ df = self._encode_data(df)
84
+ model_columns = [
85
+ 'id.orig_h', 'id.orig_p', 'id.resp_h', 'id.resp_p',
86
+ 'proto', 'conn_state', 'history',
87
+ 'orig_pkts', 'orig_ip_bytes', 'resp_pkts', 'resp_ip_bytes'
88
+ ]
89
+ return self.scaler.transform(df[model_columns])
90
+
91
+ def predict(self, df):
92
+ preprocessed = self._preprocess_data(df)
93
+ preds = self.model.predict(preprocessed)
94
+
95
+ results = []
96
+ for pred in preds:
97
+ label = self.label_encoder.inverse_transform([np.argmax(pred)])[0]
98
+ scores = {label: f"{score:.6f}" for label, score in zip(self.label_encoder.classes_, pred)}
99
+ results.append({"result": label, "scores": scores})
100
+ return results
101
+
102
+ # ==== Streamlit UI ====
103
+ def main():
104
+ import os
105
+ st.set_page_config(page_title="Malware Detection System", page_icon="πŸ›‘οΈ")
106
+ st.title("πŸ›‘οΈ Malware Detection System")
107
+ st.markdown("Upload a CSV file with network traffic logs to detect malware.")
108
+
109
+ # πŸ› οΈ DEBUG INFO
110
+
111
+ # πŸ”½ Sample CSV download
112
+ SAMPLE_FILE = "sample_input.csv"
113
+ st.markdown("πŸ“„ Need a sample file to test? Download below:")
114
+
115
+ if os.path.exists(SAMPLE_FILE):
116
+ with open(SAMPLE_FILE, "rb") as f:
117
+ st.download_button(
118
+ label="πŸ“₯ Download Sample CSV",
119
+ data=f,
120
+ file_name="sample_input.csv",
121
+ mime="text/csv"
122
+ )
123
+ else:
124
+ st.warning("⚠️ Sample CSV not found. Please place 'sample_input.csv' in the app directory.")
125
+
126
+ try:
127
+ classifier = MalwareClassifier()
128
+ except Exception as e:
129
+ st.error(f"❌ Model loading failed: {e}")
130
+ return
131
+
132
+ uploaded_file = st.file_uploader("πŸ“‚ Drag & drop or select a CSV file", type=["csv"])
133
+
134
+ if uploaded_file is not None:
135
+ try:
136
+ df = pd.read_csv(uploaded_file, delimiter=',')
137
+ df.columns = df.columns.str.strip() # βœ… Normalize column names
138
+
139
+ required_prediction_columns = [
140
+ 'id.orig_h', 'id.orig_p', 'id.resp_h', 'id.resp_p',
141
+ 'proto', 'conn_state', 'history',
142
+ 'orig_pkts', 'orig_ip_bytes', 'resp_pkts', 'resp_ip_bytes'
143
+ ]
144
+
145
+ # βœ… Check for missing columns
146
+ missing_columns = set(required_prediction_columns) - set(df.columns)
147
+ if missing_columns:
148
+ st.error(f"❌ Missing required columns for prediction: {missing_columns}")
149
+ st.write("πŸ“‹ Detected columns:", df.columns.tolist()) # Helpful debug info
150
+ return
151
+
152
+ # βœ… Extract necessary columns
153
+ prediction_input = df[required_prediction_columns].copy()
154
+
155
+ # βœ… Predict
156
+ predictions = classifier.predict(prediction_input)
157
+ st.success("βœ… Prediction complete!")
158
+
159
+ # βœ… Display results
160
+ for i, result in enumerate(predictions):
161
+ st.subheader(f"Prediction {i + 1}")
162
+ st.write(f"**Predicted Label:** {result['result']}")
163
+ st.json(result['scores'])
164
+
165
+ except Exception as e:
166
+ st.error(f"❌ Error during prediction: {e}")
167
 
168
+ if __name__ == "__main__":
169
+ main()