NourFakih commited on
Commit
318847a
·
verified ·
1 Parent(s): 91d4759

Upload LSTM window=64 artifacts

Browse files
Files changed (7) hide show
  1. README.md +65 -0
  2. config.json +31 -0
  3. inference.py +81 -0
  4. metrics.json +39 -0
  5. model.safetensors +3 -0
  6. requirements.txt +7 -0
  7. scaler.joblib +3 -0
README.md ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ library_name: pytorch
4
+ tags:
5
+ - keystroke-dynamics
6
+ - lstm
7
+ - cybersecurity
8
+ - hid
9
+ ---
10
+
11
+ # LSTM-64win-Keystrokes
12
+
13
+ ## Summary
14
+ This repository contains a PyTorch **LSTM** classifier for **Human vs HID** keystroke control detection using **windowed** timing features.
15
+ The label for each window is the **last keystroke** label in that window.
16
+
17
+ ## Training setup (as implemented)
18
+ - **Window size:** 64
19
+ - **Stride:** 1
20
+ - **Label mapping:** {"human": 0, "hid": 1}
21
+ - **Window label:** last-keystroke in the window
22
+ - **Features (F=10):**
23
+ - `duration`
24
+ - `DD_time`
25
+ - `DU_time`
26
+ - `UD_time`
27
+ - `UU_time`
28
+ - `run_avg_duration`
29
+ - `run_avg_DD`
30
+ - `run_avg_DU`
31
+ - `run_avg_UD`
32
+ - `run_avg_UU`
33
+
34
+ ### Preprocessing
35
+ - Windows built with NumPy `sliding_window_view`.
36
+ - Standardization via `StandardScaler` fitted on **training windows only**, across all timesteps and samples.
37
+
38
+ ### Model
39
+ - `torch.nn.LSTM` (unidirectional, batch_first)
40
+ - Hidden size: 64
41
+ - Num layers: 1
42
+ - Dropout: 0.0
43
+ - Head: Linear(hidden_size → 2)
44
+
45
+ ### Optimization
46
+ - Optimizer: Adam
47
+ - LR: 0.001
48
+ - Batch size: 256
49
+ - Epochs: 30
50
+ - Seed: 42
51
+
52
+ ## Files
53
+ - `model.safetensors`: model weights
54
+ - `config.json`: architecture + feature metadata
55
+ - `scaler.joblib`: fitted StandardScaler
56
+ - `metrics.json`: classification report + confusion matrix
57
+ - `inference.py`: minimal loading + prediction example
58
+
59
+ ## Usage (minimal)
60
+ ```python
61
+ from inference import load_model_and_scaler, predict_df
62
+
63
+ model, scaler, cfg = load_model_and_scaler("NourFakih/LSTM-64win-Keystrokes")
64
+ y_pred = predict_df(df, model, scaler, cfg) # df must contain cfg["feature_cols"]
65
+ ```
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "repo_id": "NourFakih/LSTM-64win-Keystrokes",
3
+ "created_at": "2026-01-13T14:40:45.923493Z",
4
+ "window_size": 64,
5
+ "stride": 1,
6
+ "feature_cols": [
7
+ "duration",
8
+ "DD_time",
9
+ "DU_time",
10
+ "UD_time",
11
+ "UU_time",
12
+ "run_avg_duration",
13
+ "run_avg_DD",
14
+ "run_avg_DU",
15
+ "run_avg_UD",
16
+ "run_avg_UU"
17
+ ],
18
+ "label_mapping": {
19
+ "human": 0,
20
+ "hid": 1
21
+ },
22
+ "window_label": "last_keystroke",
23
+ "input_size": 10,
24
+ "hidden_size": 64,
25
+ "num_layers": 1,
26
+ "dropout": 0.0,
27
+ "weights_file": "model.safetensors",
28
+ "framework": "pytorch",
29
+ "python": "3.11.13",
30
+ "torch": "2.6.0+cu124"
31
+ }
inference.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import numpy as np
3
+ import pandas as pd
4
+ import torch
5
+ import torch.nn as nn
6
+ import joblib
7
+ from typing import Optional, Dict, Any
8
+ from huggingface_hub import hf_hub_download
9
+
10
+ class LSTMClassifier(nn.Module):
11
+ def __init__(self, input_size: int, hidden_size: int = 64, num_layers: int = 1, dropout: float = 0.0):
12
+ super().__init__()
13
+ self.lstm = nn.LSTM(
14
+ input_size=input_size,
15
+ hidden_size=hidden_size,
16
+ num_layers=num_layers,
17
+ batch_first=True,
18
+ dropout=dropout if num_layers > 1 else 0.0,
19
+ bidirectional=False
20
+ )
21
+ self.head = nn.Linear(hidden_size, 2)
22
+
23
+ def forward(self, x):
24
+ _, (h_n, _) = self.lstm(x)
25
+ last_h = h_n[-1]
26
+ return self.head(last_h)
27
+
28
+ def load_model_and_scaler(repo_id: str, revision: Optional[str] = None, device: Optional[str] = None):
29
+ cfg_path = hf_hub_download(repo_id, "config.json", revision=revision)
30
+ scaler_path = hf_hub_download(repo_id, "scaler.joblib", revision=revision)
31
+
32
+ with open(cfg_path, "r", encoding="utf-8") as f:
33
+ cfg = json.load(f)
34
+
35
+ device = device or ("cuda" if torch.cuda.is_available() else "cpu")
36
+
37
+ model = LSTMClassifier(
38
+ input_size=int(cfg["input_size"]),
39
+ hidden_size=int(cfg["hidden_size"]),
40
+ num_layers=int(cfg["num_layers"]),
41
+ dropout=float(cfg["dropout"]),
42
+ ).to(device)
43
+
44
+ weights_name = cfg.get("weights_file", "model.safetensors")
45
+ weights_path = hf_hub_download(repo_id, weights_name, revision=revision)
46
+
47
+ if weights_name.endswith(".safetensors"):
48
+ from safetensors.torch import load_file
49
+ state = load_file(weights_path)
50
+ model.load_state_dict({k: v for k, v in state.items()}, strict=True)
51
+ else:
52
+ state = torch.load(weights_path, map_location="cpu")
53
+ model.load_state_dict(state, strict=True)
54
+
55
+ model.eval()
56
+ scaler = joblib.load(scaler_path)
57
+ return model, scaler, cfg
58
+
59
+ def predict_df(df: pd.DataFrame, model: nn.Module, scaler, cfg: Dict[str, Any]) -> np.ndarray:
60
+ from numpy.lib.stride_tricks import sliding_window_view
61
+
62
+ feature_cols = cfg["feature_cols"]
63
+ W = int(cfg["window_size"])
64
+ stride = int(cfg.get("stride", 1))
65
+
66
+ X = df[feature_cols].to_numpy(np.float32)
67
+ if len(X) < W:
68
+ return np.empty((0,), dtype=np.int64)
69
+
70
+ Xw = sliding_window_view(X, window_shape=(W, X.shape[1])).squeeze(1)
71
+ Xw = Xw[::stride]
72
+
73
+ F = Xw.shape[2]
74
+ Xw_scaled = scaler.transform(Xw.reshape(-1, F)).reshape(Xw.shape).astype(np.float32)
75
+
76
+ device = next(model.parameters()).device
77
+ with torch.no_grad():
78
+ xb = torch.tensor(Xw_scaled, device=device)
79
+ logits = model(xb)
80
+ y_pred = torch.argmax(logits, dim=1).detach().cpu().numpy()
81
+ return y_pred
metrics.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "classification_report": {
3
+ "human": {
4
+ "precision": 0.9949655754211228,
5
+ "recall": 0.9984161660294921,
6
+ "f1-score": 0.996687884198618,
7
+ "support": 36620
8
+ },
9
+ "hid": {
10
+ "precision": 0.9784386617100371,
11
+ "recall": 0.934327298544551,
12
+ "f1-score": 0.9558743417468677,
13
+ "support": 2817
14
+ },
15
+ "accuracy": 0.9938382737023608,
16
+ "macro avg": {
17
+ "precision": 0.98670211856558,
18
+ "recall": 0.9663717322870216,
19
+ "f1-score": 0.9762811129727429,
20
+ "support": 39437
21
+ },
22
+ "weighted avg": {
23
+ "precision": 0.9937850516509544,
24
+ "recall": 0.9938382737023608,
25
+ "f1-score": 0.9937725572445754,
26
+ "support": 39437
27
+ }
28
+ },
29
+ "confusion_matrix": [
30
+ [
31
+ 36562,
32
+ 58
33
+ ],
34
+ [
35
+ 185,
36
+ 2632
37
+ ]
38
+ ]
39
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af72b2067442b2d41d7a48fab80fc9dab26413d7231d8f2c0910f63fd846406d
3
+ size 78800
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ torch
2
+ numpy
3
+ pandas
4
+ scikit-learn
5
+ huggingface_hub
6
+ joblib
7
+ safetensors
scaler.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34855de08538c35d4ff1096ad17c86d4075d131e6975c4be0c9519e95ef4f9ac
3
+ size 855