Upload LSTM window=64 artifacts
Browse files- README.md +65 -0
- config.json +31 -0
- inference.py +81 -0
- metrics.json +39 -0
- model.safetensors +3 -0
- requirements.txt +7 -0
- scaler.joblib +3 -0
README.md
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: mit
|
| 3 |
+
library_name: pytorch
|
| 4 |
+
tags:
|
| 5 |
+
- keystroke-dynamics
|
| 6 |
+
- lstm
|
| 7 |
+
- cybersecurity
|
| 8 |
+
- hid
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
# LSTM-64win-Keystrokes
|
| 12 |
+
|
| 13 |
+
## Summary
|
| 14 |
+
This repository contains a PyTorch **LSTM** classifier for **Human vs HID** keystroke control detection using **windowed** timing features.
|
| 15 |
+
The label for each window is the **last keystroke** label in that window.
|
| 16 |
+
|
| 17 |
+
## Training setup (as implemented)
|
| 18 |
+
- **Window size:** 64
|
| 19 |
+
- **Stride:** 1
|
| 20 |
+
- **Label mapping:** {"human": 0, "hid": 1}
|
| 21 |
+
- **Window label:** last-keystroke in the window
|
| 22 |
+
- **Features (F=10):**
|
| 23 |
+
- `duration`
|
| 24 |
+
- `DD_time`
|
| 25 |
+
- `DU_time`
|
| 26 |
+
- `UD_time`
|
| 27 |
+
- `UU_time`
|
| 28 |
+
- `run_avg_duration`
|
| 29 |
+
- `run_avg_DD`
|
| 30 |
+
- `run_avg_DU`
|
| 31 |
+
- `run_avg_UD`
|
| 32 |
+
- `run_avg_UU`
|
| 33 |
+
|
| 34 |
+
### Preprocessing
|
| 35 |
+
- Windows built with NumPy `sliding_window_view`.
|
| 36 |
+
- Standardization via `StandardScaler` fitted on **training windows only**, across all timesteps and samples.
|
| 37 |
+
|
| 38 |
+
### Model
|
| 39 |
+
- `torch.nn.LSTM` (unidirectional, batch_first)
|
| 40 |
+
- Hidden size: 64
|
| 41 |
+
- Num layers: 1
|
| 42 |
+
- Dropout: 0.0
|
| 43 |
+
- Head: Linear(hidden_size → 2)
|
| 44 |
+
|
| 45 |
+
### Optimization
|
| 46 |
+
- Optimizer: Adam
|
| 47 |
+
- LR: 0.001
|
| 48 |
+
- Batch size: 256
|
| 49 |
+
- Epochs: 30
|
| 50 |
+
- Seed: 42
|
| 51 |
+
|
| 52 |
+
## Files
|
| 53 |
+
- `model.safetensors`: model weights
|
| 54 |
+
- `config.json`: architecture + feature metadata
|
| 55 |
+
- `scaler.joblib`: fitted StandardScaler
|
| 56 |
+
- `metrics.json`: classification report + confusion matrix
|
| 57 |
+
- `inference.py`: minimal loading + prediction example
|
| 58 |
+
|
| 59 |
+
## Usage (minimal)
|
| 60 |
+
```python
|
| 61 |
+
from inference import load_model_and_scaler, predict_df
|
| 62 |
+
|
| 63 |
+
model, scaler, cfg = load_model_and_scaler("NourFakih/LSTM-64win-Keystrokes")
|
| 64 |
+
y_pred = predict_df(df, model, scaler, cfg) # df must contain cfg["feature_cols"]
|
| 65 |
+
```
|
config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"repo_id": "NourFakih/LSTM-64win-Keystrokes",
|
| 3 |
+
"created_at": "2026-01-13T14:40:45.923493Z",
|
| 4 |
+
"window_size": 64,
|
| 5 |
+
"stride": 1,
|
| 6 |
+
"feature_cols": [
|
| 7 |
+
"duration",
|
| 8 |
+
"DD_time",
|
| 9 |
+
"DU_time",
|
| 10 |
+
"UD_time",
|
| 11 |
+
"UU_time",
|
| 12 |
+
"run_avg_duration",
|
| 13 |
+
"run_avg_DD",
|
| 14 |
+
"run_avg_DU",
|
| 15 |
+
"run_avg_UD",
|
| 16 |
+
"run_avg_UU"
|
| 17 |
+
],
|
| 18 |
+
"label_mapping": {
|
| 19 |
+
"human": 0,
|
| 20 |
+
"hid": 1
|
| 21 |
+
},
|
| 22 |
+
"window_label": "last_keystroke",
|
| 23 |
+
"input_size": 10,
|
| 24 |
+
"hidden_size": 64,
|
| 25 |
+
"num_layers": 1,
|
| 26 |
+
"dropout": 0.0,
|
| 27 |
+
"weights_file": "model.safetensors",
|
| 28 |
+
"framework": "pytorch",
|
| 29 |
+
"python": "3.11.13",
|
| 30 |
+
"torch": "2.6.0+cu124"
|
| 31 |
+
}
|
inference.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import numpy as np
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import torch
|
| 5 |
+
import torch.nn as nn
|
| 6 |
+
import joblib
|
| 7 |
+
from typing import Optional, Dict, Any
|
| 8 |
+
from huggingface_hub import hf_hub_download
|
| 9 |
+
|
| 10 |
+
class LSTMClassifier(nn.Module):
|
| 11 |
+
def __init__(self, input_size: int, hidden_size: int = 64, num_layers: int = 1, dropout: float = 0.0):
|
| 12 |
+
super().__init__()
|
| 13 |
+
self.lstm = nn.LSTM(
|
| 14 |
+
input_size=input_size,
|
| 15 |
+
hidden_size=hidden_size,
|
| 16 |
+
num_layers=num_layers,
|
| 17 |
+
batch_first=True,
|
| 18 |
+
dropout=dropout if num_layers > 1 else 0.0,
|
| 19 |
+
bidirectional=False
|
| 20 |
+
)
|
| 21 |
+
self.head = nn.Linear(hidden_size, 2)
|
| 22 |
+
|
| 23 |
+
def forward(self, x):
|
| 24 |
+
_, (h_n, _) = self.lstm(x)
|
| 25 |
+
last_h = h_n[-1]
|
| 26 |
+
return self.head(last_h)
|
| 27 |
+
|
| 28 |
+
def load_model_and_scaler(repo_id: str, revision: Optional[str] = None, device: Optional[str] = None):
|
| 29 |
+
cfg_path = hf_hub_download(repo_id, "config.json", revision=revision)
|
| 30 |
+
scaler_path = hf_hub_download(repo_id, "scaler.joblib", revision=revision)
|
| 31 |
+
|
| 32 |
+
with open(cfg_path, "r", encoding="utf-8") as f:
|
| 33 |
+
cfg = json.load(f)
|
| 34 |
+
|
| 35 |
+
device = device or ("cuda" if torch.cuda.is_available() else "cpu")
|
| 36 |
+
|
| 37 |
+
model = LSTMClassifier(
|
| 38 |
+
input_size=int(cfg["input_size"]),
|
| 39 |
+
hidden_size=int(cfg["hidden_size"]),
|
| 40 |
+
num_layers=int(cfg["num_layers"]),
|
| 41 |
+
dropout=float(cfg["dropout"]),
|
| 42 |
+
).to(device)
|
| 43 |
+
|
| 44 |
+
weights_name = cfg.get("weights_file", "model.safetensors")
|
| 45 |
+
weights_path = hf_hub_download(repo_id, weights_name, revision=revision)
|
| 46 |
+
|
| 47 |
+
if weights_name.endswith(".safetensors"):
|
| 48 |
+
from safetensors.torch import load_file
|
| 49 |
+
state = load_file(weights_path)
|
| 50 |
+
model.load_state_dict({k: v for k, v in state.items()}, strict=True)
|
| 51 |
+
else:
|
| 52 |
+
state = torch.load(weights_path, map_location="cpu")
|
| 53 |
+
model.load_state_dict(state, strict=True)
|
| 54 |
+
|
| 55 |
+
model.eval()
|
| 56 |
+
scaler = joblib.load(scaler_path)
|
| 57 |
+
return model, scaler, cfg
|
| 58 |
+
|
| 59 |
+
def predict_df(df: pd.DataFrame, model: nn.Module, scaler, cfg: Dict[str, Any]) -> np.ndarray:
|
| 60 |
+
from numpy.lib.stride_tricks import sliding_window_view
|
| 61 |
+
|
| 62 |
+
feature_cols = cfg["feature_cols"]
|
| 63 |
+
W = int(cfg["window_size"])
|
| 64 |
+
stride = int(cfg.get("stride", 1))
|
| 65 |
+
|
| 66 |
+
X = df[feature_cols].to_numpy(np.float32)
|
| 67 |
+
if len(X) < W:
|
| 68 |
+
return np.empty((0,), dtype=np.int64)
|
| 69 |
+
|
| 70 |
+
Xw = sliding_window_view(X, window_shape=(W, X.shape[1])).squeeze(1)
|
| 71 |
+
Xw = Xw[::stride]
|
| 72 |
+
|
| 73 |
+
F = Xw.shape[2]
|
| 74 |
+
Xw_scaled = scaler.transform(Xw.reshape(-1, F)).reshape(Xw.shape).astype(np.float32)
|
| 75 |
+
|
| 76 |
+
device = next(model.parameters()).device
|
| 77 |
+
with torch.no_grad():
|
| 78 |
+
xb = torch.tensor(Xw_scaled, device=device)
|
| 79 |
+
logits = model(xb)
|
| 80 |
+
y_pred = torch.argmax(logits, dim=1).detach().cpu().numpy()
|
| 81 |
+
return y_pred
|
metrics.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"classification_report": {
|
| 3 |
+
"human": {
|
| 4 |
+
"precision": 0.9949655754211228,
|
| 5 |
+
"recall": 0.9984161660294921,
|
| 6 |
+
"f1-score": 0.996687884198618,
|
| 7 |
+
"support": 36620
|
| 8 |
+
},
|
| 9 |
+
"hid": {
|
| 10 |
+
"precision": 0.9784386617100371,
|
| 11 |
+
"recall": 0.934327298544551,
|
| 12 |
+
"f1-score": 0.9558743417468677,
|
| 13 |
+
"support": 2817
|
| 14 |
+
},
|
| 15 |
+
"accuracy": 0.9938382737023608,
|
| 16 |
+
"macro avg": {
|
| 17 |
+
"precision": 0.98670211856558,
|
| 18 |
+
"recall": 0.9663717322870216,
|
| 19 |
+
"f1-score": 0.9762811129727429,
|
| 20 |
+
"support": 39437
|
| 21 |
+
},
|
| 22 |
+
"weighted avg": {
|
| 23 |
+
"precision": 0.9937850516509544,
|
| 24 |
+
"recall": 0.9938382737023608,
|
| 25 |
+
"f1-score": 0.9937725572445754,
|
| 26 |
+
"support": 39437
|
| 27 |
+
}
|
| 28 |
+
},
|
| 29 |
+
"confusion_matrix": [
|
| 30 |
+
[
|
| 31 |
+
36562,
|
| 32 |
+
58
|
| 33 |
+
],
|
| 34 |
+
[
|
| 35 |
+
185,
|
| 36 |
+
2632
|
| 37 |
+
]
|
| 38 |
+
]
|
| 39 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:af72b2067442b2d41d7a48fab80fc9dab26413d7231d8f2c0910f63fd846406d
|
| 3 |
+
size 78800
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
torch
|
| 2 |
+
numpy
|
| 3 |
+
pandas
|
| 4 |
+
scikit-learn
|
| 5 |
+
huggingface_hub
|
| 6 |
+
joblib
|
| 7 |
+
safetensors
|
scaler.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34855de08538c35d4ff1096ad17c86d4075d131e6975c4be0c9519e95ef4f9ac
|
| 3 |
+
size 855
|