Afeefa123 commited on
Commit
304df69
·
verified ·
1 Parent(s): cc5878b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +249 -0
app.py CHANGED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Gradio app for NSL-KDD binary intrusion detection demo (MVP)
3
+ Expecting these files in the same repo/root of the Space:
4
+ - nsl_kdd_tf_model.h5 (optional; if present will be used)
5
+ - scaler.pkl (optional; sklearn StandardScaler, must match model training)
6
+ - columns.json (optional; list of feature column names used by the model)
7
+
8
+ If artifacts are missing, the app will instruct you how to add them and offers a quick fallback
9
+ where you can upload a CSV and the app will train a lightweight sklearn model for demo purposes.
10
+ """
11
+
12
+ import os
13
+ import json
14
+ import tempfile
15
+ import traceback
16
+ from typing import Tuple, List
17
+
18
+ import numpy as np
19
+ import pandas as pd
20
+
21
+ import gradio as gr
22
+
23
+ # optional heavy import guarded
24
+ TF_AVAILABLE = True
25
+ try:
26
+ import tensorflow as tf
27
+ except Exception:
28
+ TF_AVAILABLE = False
29
+
30
+ from sklearn.preprocessing import StandardScaler
31
+ from sklearn.linear_model import LogisticRegression
32
+ import joblib
33
+
34
+ # artifact filenames
35
+ MODEL_FILE = "nsl_kdd_tf_model.h5"
36
+ SCALER_FILE = "scaler.pkl"
37
+ COLUMNS_FILE = "columns.json"
38
+
39
+ # helper: load artifacts if exist
40
+ def load_artifacts():
41
+ model = None
42
+ scaler = None
43
+ columns = None
44
+ model_type = None
45
+
46
+ # load columns.json if present
47
+ if os.path.exists(COLUMNS_FILE):
48
+ with open(COLUMNS_FILE, "r", encoding="utf-8") as f:
49
+ columns = json.load(f)
50
+
51
+ # load scaler if present
52
+ if os.path.exists(SCALER_FILE):
53
+ try:
54
+ scaler = joblib.load(SCALER_FILE)
55
+ except Exception:
56
+ try:
57
+ scaler = joblib.load(open(SCALER_FILE, "rb"))
58
+ except Exception:
59
+ scaler = None
60
+
61
+ # load TF model if present and TF available
62
+ if os.path.exists(MODEL_FILE) and TF_AVAILABLE:
63
+ try:
64
+ model = tf.keras.models.load_model(MODEL_FILE)
65
+ model_type = "tensorflow"
66
+ except Exception:
67
+ model = None
68
+
69
+ return model, scaler, columns, model_type
70
+
71
+ MODEL, SCALER, COLUMNS, MODEL_TYPE = load_artifacts()
72
+
73
+ def model_available_message() -> str:
74
+ if MODEL is not None and SCALER is not None and COLUMNS is not None:
75
+ return "✅ Pretrained TensorFlow model and artifacts loaded. Ready to predict."
76
+ pieces = []
77
+ if MODEL is None:
78
+ pieces.append(f"Missing `{MODEL_FILE}`")
79
+ if SCALER is None:
80
+ pieces.append(f"Missing `{SCALER_FILE}`")
81
+ if COLUMNS is None:
82
+ pieces.append(f"Missing `{COLUMNS_FILE}`")
83
+ msg = "⚠️ Artifacts missing: " + ", ".join(pieces) + ".\n\n"
84
+ msg += "To run the TF model, add those files to the Space repository (same folder as app.py).\n"
85
+ msg += "Alternatively, upload a CSV of NSL-KDD records (the app will train a quick sklearn model for demo).\n\n"
86
+ msg += "columns.json should be a JSON array of feature names that match the model input (same as X_train.columns).\n"
87
+ return msg
88
+
89
+ # utility: preprocess input dataframe into model-ready X using columns & scaler
90
+ def prepare_X_from_df(df: pd.DataFrame, expected_columns: List[str], scaler_obj) -> np.ndarray:
91
+ # Align columns: fill missing with 0
92
+ X = df.reindex(columns=expected_columns, fill_value=0)
93
+ # Ensure numeric type
94
+ X = X.apply(pd.to_numeric, errors="coerce").fillna(0.0)
95
+ if scaler_obj is not None:
96
+ Xs = scaler_obj.transform(X)
97
+ else:
98
+ # if no scaler provided, return raw numpy
99
+ Xs = X.values.astype(np.float32)
100
+ return Xs
101
+
102
+ def predict_batch_from_df(df: pd.DataFrame) -> Tuple[pd.DataFrame, str]:
103
+ """
104
+ returns (result_df, status_message)
105
+ result_df contains prob and predicted class per row
106
+ """
107
+ try:
108
+ if MODEL is not None and SCALER is not None and COLUMNS is not None and MODEL_TYPE == "tensorflow":
109
+ Xs = prepare_X_from_df(df, COLUMNS, SCALER)
110
+ probs = MODEL.predict(Xs).ravel()
111
+ preds = (probs >= 0.5).astype(int)
112
+ out = df.copy()
113
+ out["_pred_prob"] = probs
114
+ out["_pred_class"] = preds
115
+ return out, "Predictions from TensorFlow model"
116
+ else:
117
+ # fallback: train a quick logistic regression on uploaded data if contains label
118
+ if 'label' in df.columns or 'label_bin' in df.columns:
119
+ # If label present, run quick preprocess similar to notebook: create X (one-hot for cats)
120
+ # Identify expected categorical columns if present
121
+ cats = ['protocol_type', 'service', 'flag']
122
+ col_names = df.columns.tolist()
123
+ # We'll try to mimic preprocess from notebook: numeric vs cats
124
+ num_cols = [c for c in col_names if c not in cats + ['label','label_bin']]
125
+ X_num = df[num_cols].apply(pd.to_numeric, errors='coerce').fillna(0.0)
126
+ X_cat = pd.get_dummies(df[cats], drop_first=True)
127
+ X = pd.concat([X_num, X_cat], axis=1)
128
+ y = df['label_bin'] if 'label_bin' in df.columns else df['label'].apply(lambda s: 0 if str(s).strip().lower()=="normal" else 1)
129
+ # minimal scaler + logistic
130
+ scaler_local = StandardScaler()
131
+ Xs = scaler_local.fit_transform(X)
132
+ clf = LogisticRegression(max_iter=200)
133
+ clf.fit(Xs, y)
134
+ probs = clf.predict_proba(Xs)[:,1]
135
+ preds = (probs >= 0.5).astype(int)
136
+ out = df.copy()
137
+ out["_pred_prob"] = probs
138
+ out["_pred_class"] = preds
139
+ return out, "Trained temporary LogisticRegression on uploaded CSV (used 'label' or 'label_bin' for training)."
140
+ else:
141
+ return pd.DataFrame(), "Cannot fallback: artifacts missing and uploaded CSV does not contain 'label' or 'label_bin' to train a temporary model."
142
+ except Exception as e:
143
+ tb = traceback.format_exc()
144
+ return pd.DataFrame(), f"Prediction error: {e}\n\n{tb}"
145
+
146
+ def predict_single(sample_text: str) -> str:
147
+ """
148
+ sample_text: CSV row or JSON dict representing one row with same columns as columns.json
149
+ returns a readable string with probability and class
150
+ """
151
+ try:
152
+ if not sample_text:
153
+ return "No input provided."
154
+ # try JSON first
155
+ try:
156
+ d = json.loads(sample_text)
157
+ if isinstance(d, dict):
158
+ df = pd.DataFrame([d])
159
+ else:
160
+ return "JSON must represent an object/dict for single sample."
161
+ except Exception:
162
+ # try CSV row
163
+ try:
164
+ df = pd.read_csv(pd.compat.StringIO(sample_text), header=None)
165
+ # if no header, user probably pasted values: cannot map to columns
166
+ if COLUMNS is not None and df.shape[1] == len(COLUMNS):
167
+ df.columns = COLUMNS
168
+ else:
169
+ return "CSV input detected but header/column count mismatch. Prefer JSON object keyed by column names."
170
+ except Exception:
171
+ return "Could not parse input. Paste a JSON object like {\"duration\":0, \"protocol_type\":\"tcp\", ...} or upload a CSV row with header."
172
+
173
+ # Now we have df; run batch predict logic but for a single row
174
+ if MODEL is not None and SCALER is not None and COLUMNS is not None and MODEL_TYPE == "tensorflow":
175
+ Xs = prepare_X_from_df(df, COLUMNS, SCALER)
176
+ prob = float(MODEL.predict(Xs)[0,0])
177
+ pred = int(prob >= 0.5)
178
+ return f"Pred prob: {prob:.4f} — predicted class: {pred} (0=normal, 1=attack)"
179
+ else:
180
+ return "Model artifacts not present in Space. Upload `nsl_kdd_tf_model.h5`, `scaler.pkl`, and `columns.json` to use the TensorFlow model. Alternatively upload a labelled CSV to train a quick demo model."
181
+ except Exception as e:
182
+ tb = traceback.format_exc()
183
+ return f"Error: {e}\n\n{tb}"
184
+
185
+ # Gradio UI components
186
+ with gr.Blocks(title="NSL-KDD Intrusion Detection — Demo MVP") as demo:
187
+ gr.Markdown("# NSL-KDD Intrusion Detection — Demo (MVP)\n"
188
+ "Upload your artifacts (`nsl_kdd_tf_model.h5`, `scaler.pkl`, `columns.json`) to the Space to use the TensorFlow model.\n"
189
+ "Or upload a labelled CSV (contains `label` or `label_bin`) and the app will train a quick logistic regression for demo.\n\n"
190
+ "Columns expected: the original notebook used 41 numeric features with one-hot for `protocol_type`, `service`, `flag`.\n"
191
+ )
192
+ status = gr.Textbox(label="Status / Artifact check", value=model_available_message(), interactive=False)
193
+ with gr.Row():
194
+ with gr.Column(scale=2):
195
+ file_input = gr.File(label="Upload CSV for batch prediction or for training fallback", file_types=['.csv'])
196
+ sample_input = gr.Textbox(label="Single-sample input (JSON object)", placeholder='{"duration":0, "protocol_type":"tcp", ...}', lines=6)
197
+ predict_button = gr.Button("Predict single sample")
198
+ batch_button = gr.Button("Run batch (on uploaded CSV)")
199
+
200
+ with gr.Column(scale=1):
201
+ out_table = gr.Dataframe(headers="auto", label="Batch predictions (if any)")
202
+ single_out = gr.Textbox(label="Single sample result", interactive=False)
203
+
204
+ # Example / help
205
+ example_text = json.dumps({
206
+ "duration": 0,
207
+ "protocol_type": "tcp",
208
+ "service": "http",
209
+ "flag": "SF",
210
+ "src_bytes": 181,
211
+ "dst_bytes": 5450
212
+ }, indent=2)
213
+ gr.Markdown("**Example single-sample JSON (fill in more NSL-KDD fields if you have them):**")
214
+ gr.Code(example_text, language="json")
215
+
216
+ # Callbacks
217
+ def on_predict_single(sample_text):
218
+ return predict_single(sample_text)
219
+
220
+ def on_batch_predict(file_obj):
221
+ if file_obj is None:
222
+ return pd.DataFrame(), "No file uploaded."
223
+ try:
224
+ # read uploaded CSV into DataFrame
225
+ df = pd.read_csv(file_obj.name)
226
+ except Exception:
227
+ try:
228
+ # fallback: try bytes
229
+ df = pd.read_csv(file_obj)
230
+ except Exception as e:
231
+ return pd.DataFrame(), f"Could not read CSV: {e}"
232
+
233
+ out_df, msg = predict_batch_from_df(df)
234
+ if out_df.empty:
235
+ return pd.DataFrame(), msg
236
+ # Limit columns shown for readability
237
+ display_df = out_df.copy()
238
+ # move prediction columns to front if present
239
+ for c in ["_pred_prob", "_pred_class"]:
240
+ if c in display_df.columns:
241
+ cols = [c] + [x for x in display_df.columns if x != c]
242
+ display_df = display_df[cols]
243
+ return display_df, msg
244
+
245
+ predict_button.click(on_predict_single, inputs=[sample_input], outputs=[single_out])
246
+ batch_button.click(on_batch_predict, inputs=[file_input], outputs=[out_table, status])
247
+
248
+ if __name__ == "__main__":
249
+ demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))