NourFakih commited on
Commit
5beaf60
·
verified ·
1 Parent(s): 457a6af

Upload LSTM window=16 artifacts

Browse files
Files changed (7) hide show
  1. README.md +65 -0
  2. config.json +31 -0
  3. inference.py +81 -0
  4. metrics.json +39 -0
  5. model.safetensors +3 -0
  6. requirements.txt +7 -0
  7. scaler.joblib +3 -0
README.md ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ library_name: pytorch
4
+ tags:
5
+ - keystroke-dynamics
6
+ - lstm
7
+ - cybersecurity
8
+ - hid
9
+ ---
10
+
11
+ # LSTM-16win-Keystrokes
12
+
13
+ ## Summary
14
+ This repository contains a PyTorch **LSTM** classifier for **Human vs HID** keystroke control detection using **windowed** timing features.
15
+ The label for each window is the **last keystroke** label in that window.
16
+
17
+ ## Training setup (as implemented)
18
+ - **Window size:** 16
19
+ - **Stride:** 1
20
+ - **Label mapping:** {"human": 0, "hid": 1}
21
+ - **Window label:** last-keystroke in the window
22
+ - **Features (F=10):**
23
+ - `duration`
24
+ - `DD_time`
25
+ - `DU_time`
26
+ - `UD_time`
27
+ - `UU_time`
28
+ - `run_avg_duration`
29
+ - `run_avg_DD`
30
+ - `run_avg_DU`
31
+ - `run_avg_UD`
32
+ - `run_avg_UU`
33
+
34
+ ### Preprocessing
35
+ - Windows built with NumPy `sliding_window_view`.
36
+ - Standardization via `StandardScaler` fitted on **training windows only**, across all timesteps and samples.
37
+
38
+ ### Model
39
+ - `torch.nn.LSTM` (unidirectional, batch_first)
40
+ - Hidden size: 64
41
+ - Num layers: 1
42
+ - Dropout: 0.0
43
+ - Head: Linear(hidden_size → 2)
44
+
45
+ ### Optimization
46
+ - Optimizer: Adam
47
+ - LR: 0.001
48
+ - Batch size: 256
49
+ - Epochs: 30
50
+ - Seed: 42
51
+
52
+ ## Files
53
+ - `model.safetensors`: model weights
54
+ - `config.json`: architecture + feature metadata
55
+ - `scaler.joblib`: fitted StandardScaler
56
+ - `metrics.json`: classification report + confusion matrix
57
+ - `inference.py`: minimal loading + prediction example
58
+
59
+ ## Usage (minimal)
60
+ ```python
61
+ from inference import load_model_and_scaler, predict_df
62
+
63
+ model, scaler, cfg = load_model_and_scaler("NourFakih/LSTM-16win-Keystrokes")
64
+ y_pred = predict_df(df, model, scaler, cfg) # df must contain cfg["feature_cols"]
65
+ ```
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "repo_id": "NourFakih/LSTM-16win-Keystrokes",
3
+ "created_at": "2026-01-13T14:37:25.798901Z",
4
+ "window_size": 16,
5
+ "stride": 1,
6
+ "feature_cols": [
7
+ "duration",
8
+ "DD_time",
9
+ "DU_time",
10
+ "UD_time",
11
+ "UU_time",
12
+ "run_avg_duration",
13
+ "run_avg_DD",
14
+ "run_avg_DU",
15
+ "run_avg_UD",
16
+ "run_avg_UU"
17
+ ],
18
+ "label_mapping": {
19
+ "human": 0,
20
+ "hid": 1
21
+ },
22
+ "window_label": "last_keystroke",
23
+ "input_size": 10,
24
+ "hidden_size": 64,
25
+ "num_layers": 1,
26
+ "dropout": 0.0,
27
+ "weights_file": "model.safetensors",
28
+ "framework": "pytorch",
29
+ "python": "3.11.13",
30
+ "torch": "2.6.0+cu124"
31
+ }
inference.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import numpy as np
3
+ import pandas as pd
4
+ import torch
5
+ import torch.nn as nn
6
+ import joblib
7
+ from typing import Optional, Dict, Any
8
+ from huggingface_hub import hf_hub_download
9
+
10
+ class LSTMClassifier(nn.Module):
11
+ def __init__(self, input_size: int, hidden_size: int = 64, num_layers: int = 1, dropout: float = 0.0):
12
+ super().__init__()
13
+ self.lstm = nn.LSTM(
14
+ input_size=input_size,
15
+ hidden_size=hidden_size,
16
+ num_layers=num_layers,
17
+ batch_first=True,
18
+ dropout=dropout if num_layers > 1 else 0.0,
19
+ bidirectional=False
20
+ )
21
+ self.head = nn.Linear(hidden_size, 2)
22
+
23
+ def forward(self, x):
24
+ _, (h_n, _) = self.lstm(x)
25
+ last_h = h_n[-1]
26
+ return self.head(last_h)
27
+
28
+ def load_model_and_scaler(repo_id: str, revision: Optional[str] = None, device: Optional[str] = None):
29
+ cfg_path = hf_hub_download(repo_id, "config.json", revision=revision)
30
+ scaler_path = hf_hub_download(repo_id, "scaler.joblib", revision=revision)
31
+
32
+ with open(cfg_path, "r", encoding="utf-8") as f:
33
+ cfg = json.load(f)
34
+
35
+ device = device or ("cuda" if torch.cuda.is_available() else "cpu")
36
+
37
+ model = LSTMClassifier(
38
+ input_size=int(cfg["input_size"]),
39
+ hidden_size=int(cfg["hidden_size"]),
40
+ num_layers=int(cfg["num_layers"]),
41
+ dropout=float(cfg["dropout"]),
42
+ ).to(device)
43
+
44
+ weights_name = cfg.get("weights_file", "model.safetensors")
45
+ weights_path = hf_hub_download(repo_id, weights_name, revision=revision)
46
+
47
+ if weights_name.endswith(".safetensors"):
48
+ from safetensors.torch import load_file
49
+ state = load_file(weights_path)
50
+ model.load_state_dict({k: v for k, v in state.items()}, strict=True)
51
+ else:
52
+ state = torch.load(weights_path, map_location="cpu")
53
+ model.load_state_dict(state, strict=True)
54
+
55
+ model.eval()
56
+ scaler = joblib.load(scaler_path)
57
+ return model, scaler, cfg
58
+
59
+ def predict_df(df: pd.DataFrame, model: nn.Module, scaler, cfg: Dict[str, Any]) -> np.ndarray:
60
+ from numpy.lib.stride_tricks import sliding_window_view
61
+
62
+ feature_cols = cfg["feature_cols"]
63
+ W = int(cfg["window_size"])
64
+ stride = int(cfg.get("stride", 1))
65
+
66
+ X = df[feature_cols].to_numpy(np.float32)
67
+ if len(X) < W:
68
+ return np.empty((0,), dtype=np.int64)
69
+
70
+ Xw = sliding_window_view(X, window_shape=(W, X.shape[1])).squeeze(1)
71
+ Xw = Xw[::stride]
72
+
73
+ F = Xw.shape[2]
74
+ Xw_scaled = scaler.transform(Xw.reshape(-1, F)).reshape(Xw.shape).astype(np.float32)
75
+
76
+ device = next(model.parameters()).device
77
+ with torch.no_grad():
78
+ xb = torch.tensor(Xw_scaled, device=device)
79
+ logits = model(xb)
80
+ y_pred = torch.argmax(logits, dim=1).detach().cpu().numpy()
81
+ return y_pred
metrics.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "classification_report": {
3
+ "human": {
4
+ "precision": 0.9969518003429224,
5
+ "recall": 0.9989909457837897,
6
+ "f1-score": 0.9979703314217215,
7
+ "support": 36668
8
+ },
9
+ "hid": {
10
+ "precision": 0.986506199854121,
11
+ "recall": 0.9602413915512957,
12
+ "f1-score": 0.9731966180967799,
13
+ "support": 2817
14
+ },
15
+ "accuracy": 0.9962264150943396,
16
+ "macro avg": {
17
+ "precision": 0.9917290000985217,
18
+ "recall": 0.9796161686675426,
19
+ "f1-score": 0.9855834747592507,
20
+ "support": 39485
21
+ },
22
+ "weighted avg": {
23
+ "precision": 0.9962065741411508,
24
+ "recall": 0.9962264150943396,
25
+ "f1-score": 0.9962028868114552,
26
+ "support": 39485
27
+ }
28
+ },
29
+ "confusion_matrix": [
30
+ [
31
+ 36631,
32
+ 37
33
+ ],
34
+ [
35
+ 112,
36
+ 2705
37
+ ]
38
+ ]
39
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f438c288561ec0b864b21918d7019ef6a1ca356595c85df5c4fee36e41d5468d
3
+ size 78800
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ torch
2
+ numpy
3
+ pandas
4
+ scikit-learn
5
+ huggingface_hub
6
+ joblib
7
+ safetensors
scaler.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65ed1567172d142f745da831a4d0dea9131c33afe1b4352fbcaf164732648113
3
+ size 855