Deevyankar commited on
Commit
b956a4c
·
verified ·
1 Parent(s): f62a9a9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +503 -0
app.py ADDED
@@ -0,0 +1,503 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ from sklearn.cluster import KMeans
4
+ import torch
5
+ from torch import nn
6
+ from torch.utils.data import TensorDataset, DataLoader
7
+ import matplotlib.pyplot as plt
8
+ import gradio as gr
9
+
10
+ # -------------------------------------------------------
11
+ # 1. Load dataset (Facebook Metrics of Cosmetic Brand)
12
+ # -------------------------------------------------------
13
+ DATA_PATH = "dataset_Facebook.csv" # semicolon-separated file
14
+
15
+ df = pd.read_csv(DATA_PATH, sep=";")
16
+ df = df.fillna(0)
17
+
18
+ # Rename important columns if needed
19
+ df = df.rename(columns={
20
+ "Page total likes": "page_likes",
21
+ "Lifetime Post Total Impressions": "impressions",
22
+ "Lifetime Engaged Users": "engaged_users",
23
+ "comment": "comments",
24
+ "like": "likes",
25
+ "share": "shares"
26
+ })
27
+
28
+ # Fallback if some names are missing in your copy
29
+ for col in ["comments", "likes", "shares"]:
30
+ if col not in df.columns:
31
+ df[col] = 0
32
+
33
+ # -------------------------------------------------------
34
+ # 2. Real behavioural features from dataset
35
+ # -------------------------------------------------------
36
+
37
+ engagement = df["comments"] + df["likes"] + df["shares"]
38
+
39
+ impressions = df["impressions"].replace(0, 1)
40
+ interaction_rate = df["engaged_users"] / impressions
41
+
42
+ def minmax(x):
43
+ x = np.asarray(x, dtype=float)
44
+ return (x - x.min()) / (x.max() - x.min() + 1e-8)
45
+
46
+ eng_norm = minmax(engagement)
47
+ interaction_norm = minmax(interaction_rate)
48
+
49
+ # Trust-like base: higher interaction => more trusted
50
+ trust_base = interaction_norm.copy()
51
+
52
+ # Suspicious: high impressions but low engagement
53
+ imp_norm = minmax(df["impressions"])
54
+ suspicious_score = imp_norm * (1.0 - trust_base)
55
+ susp_norm = minmax(suspicious_score)
56
+
57
+ # Activity regularity: posts around midday more "regular"
58
+ if "Post Hour" in df.columns:
59
+ post_hour = df["Post Hour"]
60
+ else:
61
+ post_hour = pd.Series([12] * len(df)) # default midday if missing
62
+
63
+ activity_reg = 1.0 - (np.abs(post_hour - 12) / 12.0).clip(0, 1)
64
+ act_norm = minmax(activity_reg)
65
+
66
+ # -------------------------------------------------------
67
+ # 3. Synthetic FRR & MFR
68
+ # -------------------------------------------------------
69
+
70
+ rng = np.random.default_rng(42)
71
+
72
+ # Friend requests sent (more for engaged posts)
73
+ base_sent = rng.poisson(lam=3 + 20 * eng_norm)
74
+ sent_requests = np.maximum(base_sent, 1)
75
+
76
+ # Acceptance probability depends on trust_base (0.2 to 0.9)
77
+ accepted_prob = 0.2 + 0.7 * trust_base
78
+ accepted_prob = np.clip(accepted_prob, 0.0, 1.0)
79
+ accepted_requests = rng.binomial(sent_requests, accepted_prob)
80
+ friend_request_ratio = accepted_requests / (sent_requests + 1e-8)
81
+ frr_norm = minmax(friend_request_ratio)
82
+
83
+ # Synthetic total friends
84
+ total_friends = rng.integers(low=50, high=2000, size=len(df))
85
+
86
+ # Mutual friends probability depends on trust_base
87
+ mutual_prob = 0.1 + 0.6 * trust_base
88
+ mutual_prob = np.clip(mutual_prob, 0.0, 1.0)
89
+ mutual_friends = rng.binomial(total_friends, mutual_prob)
90
+ mutual_friends_ratio = mutual_friends / (total_friends + 1e-8)
91
+ mfr_norm = minmax(mutual_friends_ratio)
92
+
93
+ friends_norm = minmax(total_friends)
94
+
95
+ # -------------------------------------------------------
96
+ # 4. Build S, T, B scores
97
+ # -------------------------------------------------------
98
+
99
+ # S: social / structural (FRR, MFR, friends)
100
+ S_score = (frr_norm + mfr_norm + friends_norm) / 3.0
101
+
102
+ # T: trust (trust_base, FRR, inverse suspiciousness)
103
+ T_score = (trust_base + frr_norm + (1.0 - susp_norm)) / 3.0
104
+
105
+ # B: behaviour (engagement, regularity, suspiciousness)
106
+ B_score = (eng_norm + act_norm + susp_norm) / 3.0
107
+
108
+ # -------------------------------------------------------
109
+ # 5. Fused features with variance-based weights
110
+ # -------------------------------------------------------
111
+
112
+ varS = np.var(S_score)
113
+ varT = np.var(T_score)
114
+ varB = np.var(B_score)
115
+ den = varS + varT + varB + 1e-8
116
+ wS, wT, wB = varS / den, varT / den, varB / den
117
+
118
+ F = np.vstack([
119
+ wS * S_score,
120
+ wT * T_score,
121
+ wB * B_score
122
+ ]).T # shape (N, 3)
123
+
124
+ # -------------------------------------------------------
125
+ # 6. Unsupervised clustering -> pseudo labels
126
+ # -------------------------------------------------------
127
+
128
+ kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
129
+ cluster_raw = kmeans.fit_predict(F)
130
+
131
+ cluster_means = []
132
+ for c in range(3):
133
+ cluster_means.append((c, T_score[cluster_raw == c].mean()))
134
+ cluster_means_sorted = sorted(cluster_means, key=lambda x: x[1])
135
+
136
+ label_map = {
137
+ cluster_means_sorted[0][0]: 2, # lowest trust => Intruder
138
+ cluster_means_sorted[1][0]: 1, # mid => Under Observation
139
+ cluster_means_sorted[2][0]: 0 # highest => Trusted
140
+ }
141
+
142
+ cluster_labels = np.array([label_map[c] for c in cluster_raw], dtype=int)
143
+
144
+ label_names = {
145
+ 0: "Trusted",
146
+ 1: "Under Observation",
147
+ 2: "Intruder"
148
+ }
149
+
150
+ status_counts = np.bincount(cluster_labels, minlength=3)
151
+
152
+ def make_status_bar_plot():
153
+ fig, ax = plt.subplots()
154
+ labels = ["Trusted", "Under Observation", "Intruder"]
155
+ ax.bar(labels, status_counts)
156
+ ax.set_ylabel("Number of posts")
157
+ ax.set_title("Global distribution of statuses (on dataset)")
158
+ fig.tight_layout()
159
+ return fig
160
+
161
+ # -------------------------------------------------------
162
+ # 7. Train MLP on fused features
163
+ # -------------------------------------------------------
164
+
165
+ X = torch.tensor(F, dtype=torch.float32)
166
+ y = torch.tensor(cluster_labels, dtype=torch.long)
167
+
168
+ dataset = TensorDataset(X, y)
169
+ loader = DataLoader(dataset, batch_size=64, shuffle=True)
170
+
171
+ class MLPClassifier(nn.Module):
172
+ def __init__(self, in_dim, hidden_dim=32, num_classes=3):
173
+ super().__init__()
174
+ self.net = nn.Sequential(
175
+ nn.Linear(in_dim, hidden_dim),
176
+ nn.ReLU(),
177
+ nn.Linear(hidden_dim, hidden_dim),
178
+ nn.ReLU(),
179
+ nn.Linear(hidden_dim, num_classes)
180
+ )
181
+ def forward(self, x):
182
+ return self.net(x)
183
+
184
+ model = MLPClassifier(in_dim=3)
185
+ criterion = nn.CrossEntropyLoss()
186
+ optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
187
+
188
+ for epoch in range(20):
189
+ model.train()
190
+ total_loss = 0.0
191
+ for xb, yb in loader:
192
+ optimizer.zero_grad()
193
+ logits = model(xb)
194
+ loss = criterion(logits, yb)
195
+ loss.backward()
196
+ optimizer.step()
197
+ total_loss += loss.item() * xb.size(0)
198
+
199
+ model.eval()
200
+ with torch.no_grad():
201
+ preds = model(X).argmax(dim=1)
202
+ acc = (preds == y).float().mean().item()
203
+ print(f"Training accuracy vs pseudo-labels: {acc:.3f}")
204
+
205
+ def predict_from_fused(S_val, T_val, B_val):
206
+ vec3 = np.array([wS * S_val, wT * T_val, wB * B_val], dtype=np.float32)
207
+ x = torch.tensor(vec3.reshape(1, -1), dtype=torch.float32)
208
+ model.eval()
209
+ with torch.no_grad():
210
+ logits = model(x)
211
+ probs = torch.softmax(logits, dim=1).cpu().numpy()[0]
212
+ pred = int(np.argmax(probs))
213
+ return pred, probs
214
+
215
+ eng_min = engagement.min()
216
+ eng_max = engagement.max()
217
+
218
+ # -------------------------------------------------------
219
+ # 8. Build S, T, B from UI inputs
220
+ # -------------------------------------------------------
221
+
222
+ def build_scores_from_user_input(
223
+ engagement_input,
224
+ suspicious_input,
225
+ activity_input,
226
+ frr_input,
227
+ mfr_input
228
+ ):
229
+ eng_norm_ui = (engagement_input - eng_min) / (eng_max - eng_min + 1e-8)
230
+ eng_norm_ui = float(np.clip(eng_norm_ui, 0.0, 1.0))
231
+
232
+ susp_norm_ui = float(np.clip(suspicious_input, 0.0, 1.0))
233
+ act_norm_ui = float(np.clip(activity_input, 0.0, 1.0))
234
+ frr_norm_ui = float(np.clip(frr_input, 0.0, 1.0))
235
+ mfr_norm_ui = float(np.clip(mfr_input, 0.0, 1.0))
236
+
237
+ friends_norm_ui = 0.5 # fixed average friends
238
+
239
+ trust_norm_ui = (eng_norm_ui + (1.0 - susp_norm_ui)) / 2.0
240
+
241
+ S_ui = (frr_norm_ui + mfr_norm_ui + friends_norm_ui) / 3.0
242
+ T_ui = (trust_norm_ui + frr_norm_ui + (1.0 - susp_norm_ui)) / 3.0
243
+ B_ui = (eng_norm_ui + act_norm_ui + susp_norm_ui) / 3.0
244
+
245
+ return S_ui, T_ui, B_ui, eng_norm_ui, susp_norm_ui, act_norm_ui
246
+
247
+ # -------------------------------------------------------
248
+ # 9. Timeline helpers (T1–T5)
249
+ # -------------------------------------------------------
250
+
251
+ def make_timeline_plot(timeline_state):
252
+ fig, ax = plt.subplots()
253
+ if not timeline_state:
254
+ ax.text(0.5, 0.5, "No timeline yet", ha="center", va="center")
255
+ ax.set_axis_off()
256
+ fig.tight_layout()
257
+ return fig
258
+
259
+ steps = [i + 1 for i in range(len(timeline_state))]
260
+ trusted = [e["probs"][0] for e in timeline_state]
261
+ obs = [e["probs"][1] for e in timeline_state]
262
+ intr = [e["probs"][2] for e in timeline_state]
263
+
264
+ ax.plot(steps, trusted, marker="o", label="Trusted")
265
+ ax.plot(steps, obs, marker="o", label="Under Observation")
266
+ ax.plot(steps, intr, marker="o", label="Intruder")
267
+
268
+ ax.set_xticks(steps)
269
+ ax.set_xlabel("Time step (T1–T5)")
270
+ ax.set_ylabel("Probability")
271
+ ax.set_ylim(0, 1)
272
+ ax.set_title("User status probabilities over time")
273
+ ax.legend()
274
+ fig.tight_layout()
275
+ return fig
276
+
277
+ def simulate_week(
278
+ engagement_input,
279
+ suspicious_input,
280
+ activity_input,
281
+ frr_input,
282
+ mfr_input,
283
+ timeline_state
284
+ ):
285
+ if timeline_state is None:
286
+ timeline_state = []
287
+
288
+ S_ui, T_ui, B_ui, eng_n, susp_n, act_n = build_scores_from_user_input(
289
+ engagement_input,
290
+ suspicious_input,
291
+ activity_input,
292
+ frr_input,
293
+ mfr_input
294
+ )
295
+
296
+ pred, probs = predict_from_fused(S_ui, T_ui, B_ui)
297
+ status = label_names[pred]
298
+
299
+ if len(timeline_state) >= 5:
300
+ timeline_state = timeline_state[1:] # keep only last 4
301
+ timeline_state.append({
302
+ "status": status,
303
+ "probs": probs.tolist(),
304
+ "S": float(S_ui),
305
+ "T": float(T_ui),
306
+ "B": float(B_ui)
307
+ })
308
+
309
+ step_num = len(timeline_state)
310
+
311
+ # Current step summary
312
+ lines = []
313
+ lines.append(f"### Current Week: T{step_num}")
314
+ lines.append(f"**Predicted Status:** **{status}**")
315
+ lines.append("")
316
+ lines.append("**Probabilities:**")
317
+ lines.append(f"- Trusted: {probs[0]:.2f}")
318
+ lines.append(f"- Under Observation: {probs[1]:.2f}")
319
+ lines.append(f"- Intruder: {probs[2]:.2f}")
320
+ lines.append("")
321
+ lines.append("**Aggregated scores (0–1):**")
322
+ lines.append(f"- S (Social / Structural): `{S_ui:.2f}`")
323
+ lines.append(f"- T (Trust): `{T_ui:.2f}`")
324
+ lines.append(f"- B (Behaviour): `{B_ui:.2f}`")
325
+ lines.append("")
326
+ lines.append("**Inputs (normalized):**")
327
+ lines.append(f"- Engagement: `{eng_n:.2f}`")
328
+ lines.append(f"- Suspiciousness: `{susp_n:.2f}`")
329
+ lines.append(f"- Activity regularity: `{act_n:.2f}`")
330
+
331
+ current_md = "\n".join(lines)
332
+
333
+ # Timeline text
334
+ tl_lines = ["## Timeline (T1–T5)"]
335
+ for i, entry in enumerate(timeline_state):
336
+ p = entry["probs"]
337
+ tl_lines.append(
338
+ f"- **T{i+1}**: {entry['status']} | "
339
+ f"Trusted={p[0]:.2f}, Obs={p[1]:.2f}, Intruder={p[2]:.2f}"
340
+ )
341
+ timeline_md = "\n".join(tl_lines)
342
+
343
+ tl_fig = make_timeline_plot(timeline_state)
344
+
345
+ return current_md, timeline_md, tl_fig, timeline_state
346
+
347
+ def reset_timeline():
348
+ empty_fig = make_timeline_plot([])
349
+ return (
350
+ "Timeline reset. Adjust sliders and click **Next week (T+1)** to start from T1.",
351
+ "## Timeline (T1–T5)\n(No entries yet)",
352
+ empty_fig,
353
+ []
354
+ )
355
+
356
+ # -------------------------------------------------------
357
+ # 10. Example table: real Trusted / Intruder-like samples
358
+ # -------------------------------------------------------
359
+
360
+ def build_example_table(n_per_class=5):
361
+ rows = []
362
+ for lbl in [0, 2]: # 0 = Trusted, 2 = Intruder
363
+ idxs = np.where(cluster_labels == lbl)[0]
364
+ if len(idxs) == 0:
365
+ continue
366
+ sel = rng.choice(idxs, size=min(n_per_class, len(idxs)), replace=False)
367
+ tmp = pd.DataFrame({
368
+ "Status": [label_names[lbl]] * len(sel),
369
+ "Comments": df["comments"].values[sel],
370
+ "Likes": df["likes"].values[sel],
371
+ "Shares": df["shares"].values[sel],
372
+ "Engagement": engagement.values[sel],
373
+ "S_score": S_score[sel],
374
+ "T_score": T_score[sel],
375
+ "B_score": B_score[sel]
376
+ })
377
+ rows.append(tmp)
378
+ if rows:
379
+ return pd.concat(rows, ignore_index=True)
380
+ else:
381
+ return pd.DataFrame(columns=[
382
+ "Status", "Comments", "Likes", "Shares", "Engagement",
383
+ "S_score", "T_score", "B_score"
384
+ ])
385
+
386
+ examples_df = build_example_table()
387
+
388
+ def refresh_examples():
389
+ return build_example_table()
390
+
391
+ # Precompute global status plot
392
+ global_status_fig = make_status_bar_plot()
393
+
394
+ # -------------------------------------------------------
395
+ # 11. Gradio app
396
+ # -------------------------------------------------------
397
+
398
+ with gr.Blocks() as demo:
399
+ gr.Markdown("# Trust-Based Intrusion Detection Demo (Facebook Cosmetic Brand Metrics)")
400
+ gr.Markdown(
401
+ "This app is trained on the **Facebook Metrics of a Cosmetic Brand** dataset.\n\n"
402
+ "- Real post metrics (comments, likes, shares, impressions, engaged users) are used to derive\n"
403
+ " engagement, suspiciousness, and trust-like scores.\n"
404
+ "- Two social features – **Friend Request Ratio (FRR)** and **Mutual Friends Ratio (MFR)** –\n"
405
+ " are generated synthetically but consistently with behaviour.\n\n"
406
+ "Use the sliders to change user behaviour. Each click on **Next week (T+1)** simulates\n"
407
+ "the same user at a new time step T1..T5, so you can see how their status changes over time."
408
+ )
409
+
410
+ with gr.Row():
411
+ with gr.Column():
412
+ gr.Markdown("### Behaviour Inputs")
413
+ engagement_slider = gr.Slider(
414
+ minimum=float(eng_min),
415
+ maximum=float(eng_max),
416
+ value=float((eng_min + eng_max) / 2.0),
417
+ step=10.0,
418
+ label="Engagement level (comments + likes + shares)"
419
+ )
420
+ suspicious_slider = gr.Slider(
421
+ minimum=0.0,
422
+ maximum=1.0,
423
+ value=0.2,
424
+ step=0.01,
425
+ label="Suspiciousness (0 = clean, 1 = very suspicious)"
426
+ )
427
+ activity_slider = gr.Slider(
428
+ minimum=0.0,
429
+ maximum=1.0,
430
+ value=0.7,
431
+ step=0.01,
432
+ label="Activity regularity (1 = very regular, 0 = random)"
433
+ )
434
+ frr_slider = gr.Slider(
435
+ minimum=0.0,
436
+ maximum=1.0,
437
+ value=0.8,
438
+ step=0.01,
439
+ label="Friend Request Ratio (accepted / sent)"
440
+ )
441
+ mfr_slider = gr.Slider(
442
+ minimum=0.0,
443
+ maximum=1.0,
444
+ value=0.6,
445
+ step=0.01,
446
+ label="Mutual Friends Ratio"
447
+ )
448
+
449
+ next_button = gr.Button("Next week (T+1)")
450
+ reset_button = gr.Button("Reset timeline")
451
+
452
+ with gr.Column():
453
+ current_box = gr.Markdown(
454
+ "Current week status will appear here after you click **Next week (T+1)**."
455
+ )
456
+ timeline_box = gr.Markdown(
457
+ "## Timeline (T1–T5)\n(No entries yet)"
458
+ )
459
+ timeline_plot = gr.Plot(
460
+ value=make_timeline_plot([]),
461
+ label="Timeline probabilities (T1–T5)"
462
+ )
463
+
464
+ gr.Markdown("### Global Status Distribution on Real Dataset")
465
+ status_plot = gr.Plot(value=global_status_fig)
466
+
467
+ gr.Markdown("### Example Posts (Real Trusted vs Intruder-like)")
468
+ examples_table = gr.Dataframe(
469
+ value=examples_df,
470
+ label="Sample posts from dataset",
471
+ interactive=False
472
+ )
473
+ refresh_button = gr.Button("Refresh examples")
474
+
475
+ timeline_state = gr.State([])
476
+
477
+ next_button.click(
478
+ fn=simulate_week,
479
+ inputs=[
480
+ engagement_slider,
481
+ suspicious_slider,
482
+ activity_slider,
483
+ frr_slider,
484
+ mfr_slider,
485
+ timeline_state
486
+ ],
487
+ outputs=[current_box, timeline_box, timeline_plot, timeline_state]
488
+ )
489
+
490
+ reset_button.click(
491
+ fn=reset_timeline,
492
+ inputs=None,
493
+ outputs=[current_box, timeline_box, timeline_plot, timeline_state]
494
+ )
495
+
496
+ refresh_button.click(
497
+ fn=refresh_examples,
498
+ inputs=None,
499
+ outputs=[examples_table]
500
+ )
501
+
502
+ if __name__ == "__main__":
503
+ demo.launch()