Upload LSTM window=16 artifacts

Browse files

Files changed (7) hide show

README.md +65 -0
config.json +31 -0
inference.py +81 -0
metrics.json +39 -0
model.safetensors +3 -0
requirements.txt +7 -0
scaler.joblib +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,65 @@

+---
+license: mit
+library_name: pytorch
+tags:
+- keystroke-dynamics
+- lstm
+- cybersecurity
+- hid
+---
+# LSTM-16win-Keystrokes
+## Summary
+This repository contains a PyTorch **LSTM** classifier for **Human vs HID** keystroke control detection using **windowed** timing features.
+The label for each window is the **last keystroke** label in that window.
+## Training setup (as implemented)
+- **Window size:** 16
+- **Stride:** 1
+- **Label mapping:** {"human": 0, "hid": 1}
+- **Window label:** last-keystroke in the window
+- **Features (F=10):**
+- `duration`
+- `DD_time`
+- `DU_time`
+- `UD_time`
+- `UU_time`
+- `run_avg_duration`
+- `run_avg_DD`
+- `run_avg_DU`
+- `run_avg_UD`
+- `run_avg_UU`
+### Preprocessing
+- Windows built with NumPy `sliding_window_view`.
+- Standardization via `StandardScaler` fitted on **training windows only**, across all timesteps and samples.
+### Model
+- `torch.nn.LSTM` (unidirectional, batch_first)
+- Hidden size: 64
+- Num layers: 1
+- Dropout: 0.0
+- Head: Linear(hidden_size → 2)
+### Optimization
+- Optimizer: Adam
+- LR: 0.001
+- Batch size: 256
+- Epochs: 30
+- Seed: 42
+## Files
+- `model.safetensors`: model weights
+- `config.json`: architecture + feature metadata
+- `scaler.joblib`: fitted StandardScaler
+- `metrics.json`: classification report + confusion matrix
+- `inference.py`: minimal loading + prediction example
+## Usage (minimal)
+```python
+from inference import load_model_and_scaler, predict_df
+model, scaler, cfg = load_model_and_scaler("NourFakih/LSTM-16win-Keystrokes")
+y_pred = predict_df(df, model, scaler, cfg)  # df must contain cfg["feature_cols"]
+```

config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "repo_id": "NourFakih/LSTM-16win-Keystrokes",
+  "created_at": "2026-01-13T14:37:25.798901Z",
+  "window_size": 16,
+  "stride": 1,
+  "feature_cols": [
+    "duration",
+    "DD_time",
+    "DU_time",
+    "UD_time",
+    "UU_time",
+    "run_avg_duration",
+    "run_avg_DD",
+    "run_avg_DU",
+    "run_avg_UD",
+    "run_avg_UU"
+  ],
+  "label_mapping": {
+    "human": 0,
+    "hid": 1
+  },
+  "window_label": "last_keystroke",
+  "input_size": 10,
+  "hidden_size": 64,
+  "num_layers": 1,
+  "dropout": 0.0,
+  "weights_file": "model.safetensors",
+  "framework": "pytorch",
+  "python": "3.11.13",
+  "torch": "2.6.0+cu124"
+}

inference.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import json
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn as nn
+import joblib
+from typing import Optional, Dict, Any
+from huggingface_hub import hf_hub_download
+class LSTMClassifier(nn.Module):
+    def __init__(self, input_size: int, hidden_size: int = 64, num_layers: int = 1, dropout: float = 0.0):
+        super().__init__()
+        self.lstm = nn.LSTM(
+            input_size=input_size,
+            hidden_size=hidden_size,
+            num_layers=num_layers,
+            batch_first=True,
+            dropout=dropout if num_layers > 1 else 0.0,
+            bidirectional=False
+        )
+        self.head = nn.Linear(hidden_size, 2)
+    def forward(self, x):
+        _, (h_n, _) = self.lstm(x)
+        last_h = h_n[-1]
+        return self.head(last_h)
+def load_model_and_scaler(repo_id: str, revision: Optional[str] = None, device: Optional[str] = None):
+    cfg_path = hf_hub_download(repo_id, "config.json", revision=revision)
+    scaler_path = hf_hub_download(repo_id, "scaler.joblib", revision=revision)
+    with open(cfg_path, "r", encoding="utf-8") as f:
+        cfg = json.load(f)
+    device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+    model = LSTMClassifier(
+        input_size=int(cfg["input_size"]),
+        hidden_size=int(cfg["hidden_size"]),
+        num_layers=int(cfg["num_layers"]),
+        dropout=float(cfg["dropout"]),
+    ).to(device)
+    weights_name = cfg.get("weights_file", "model.safetensors")
+    weights_path = hf_hub_download(repo_id, weights_name, revision=revision)
+    if weights_name.endswith(".safetensors"):
+        from safetensors.torch import load_file
+        state = load_file(weights_path)
+        model.load_state_dict({k: v for k, v in state.items()}, strict=True)
+    else:
+        state = torch.load(weights_path, map_location="cpu")
+        model.load_state_dict(state, strict=True)
+    model.eval()
+    scaler = joblib.load(scaler_path)
+    return model, scaler, cfg
+def predict_df(df: pd.DataFrame, model: nn.Module, scaler, cfg: Dict[str, Any]) -> np.ndarray:
+    from numpy.lib.stride_tricks import sliding_window_view
+    feature_cols = cfg["feature_cols"]
+    W = int(cfg["window_size"])
+    stride = int(cfg.get("stride", 1))
+    X = df[feature_cols].to_numpy(np.float32)
+    if len(X) < W:
+        return np.empty((0,), dtype=np.int64)
+    Xw = sliding_window_view(X, window_shape=(W, X.shape[1])).squeeze(1)
+    Xw = Xw[::stride]
+    F = Xw.shape[2]
+    Xw_scaled = scaler.transform(Xw.reshape(-1, F)).reshape(Xw.shape).astype(np.float32)
+    device = next(model.parameters()).device
+    with torch.no_grad():
+        xb = torch.tensor(Xw_scaled, device=device)
+        logits = model(xb)
+        y_pred = torch.argmax(logits, dim=1).detach().cpu().numpy()
+    return y_pred

metrics.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "classification_report": {
+    "human": {
+      "precision": 0.9969518003429224,
+      "recall": 0.9989909457837897,
+      "f1-score": 0.9979703314217215,
+      "support": 36668
+    },
+    "hid": {
+      "precision": 0.986506199854121,
+      "recall": 0.9602413915512957,
+      "f1-score": 0.9731966180967799,
+      "support": 2817
+    },
+    "accuracy": 0.9962264150943396,
+    "macro avg": {
+      "precision": 0.9917290000985217,
+      "recall": 0.9796161686675426,
+      "f1-score": 0.9855834747592507,
+      "support": 39485
+    },
+    "weighted avg": {
+      "precision": 0.9962065741411508,
+      "recall": 0.9962264150943396,
+      "f1-score": 0.9962028868114552,
+      "support": 39485
+    }
+  },
+  "confusion_matrix": [
+    [
+      36631,
+      37
+    ],
+    [
+      112,
+      2705
+    ]
+  ]
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f438c288561ec0b864b21918d7019ef6a1ca356595c85df5c4fee36e41d5468d
+size 78800

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+torch
+numpy
+pandas
+scikit-learn
+huggingface_hub
+joblib
+safetensors

scaler.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:65ed1567172d142f745da831a4d0dea9131c33afe1b4352fbcaf164732648113
+size 855