initial commit

Browse files

Files changed (7) hide show

.gitattributes +1 -0
8.gif +3 -0
DroneStalker.py +69 -0
Figure_1.png +0 -0
README.md +123 -3
dronestalker-1.1.pth +3 -0
requirements.txt +1 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+8.gif filter=lfs diff=lfs merge=lfs -text

8.gif ADDED Viewed

Git LFS Details

SHA256: 001fc22656956841b530381611c449f710d0666f0ec00ad4f66043262740b67b
Pointer size: 132 Bytes
Size of remote file: 2.47 MB

DroneStalker.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import torch
+import torch.nn as nn
+class DroneStalker(nn.Module):
+    INTERVAL = 0.033333 # Seconds
+    IMAGE_WIDTH = 1280
+    IMAGE_HEIGHT = 720
+    def __init__(self, Np: int, Nf: int):
+        super().__init__()
+        self.Np = Np
+        self.Nf = Nf
+    def _extract_features(self, sample):
+        features = []
+        for i, box in enumerate(sample):
+            if i == 0:
+                features.append(self._get_kinematics(box, box))
+                continue
+            past_box = sample[i - 1]
+            features.append(self._get_kinematics(past_box, box))
+        return features
+    def _get_kinematics(self, past_box, box):
+        past_x1, past_y1, past_x2, past_y2 = (past_box[0] / self.IMAGE_WIDTH, past_box[1] / self.IMAGE_HEIGHT, past_box[2] / self.IMAGE_WIDTH, past_box[3] / self.IMAGE_HEIGHT)
+        x1, y1, x2, y2 = (box[0] / self.IMAGE_WIDTH, box[1] / self.IMAGE_HEIGHT, box[2] / self.IMAGE_WIDTH, box[3] / self.IMAGE_HEIGHT)
+        x_center = (x1 + x2) / 2
+        y_center = (y1 + y2) / 2
+        past_x_center = (past_x1 + past_x2) / 2
+        past_y_center = (past_y1 + past_y2) / 2
+        x_velocity = (x_center - past_x_center) / (self.INTERVAL)
+        y_velocity = (y_center - past_y_center) / (self.INTERVAL)
+        return [x_center, y_center, x_velocity, y_velocity]
+class DroneStalkerBase(DroneStalker):
+    def __init__(self, Np: int, Nf: int):
+        super().__init__(Np, Nf)
+    def _get_kinematics(self, past_box, box):
+        [x_center, y_center, x_velocity, y_velocity] = super()._get_kinematics(past_box, box)
+        x1, y1, x2, y2 = (box[0] / self.IMAGE_WIDTH, box[1] / self.IMAGE_HEIGHT, box[2] / self.IMAGE_WIDTH, box[3] / self.IMAGE_HEIGHT)
+        width = x2 - x1
+        height = y2 - y1
+        return [x_center, y_center, x_velocity, y_velocity, width, height, x1, y1]
+class Model(DroneStalkerBase):
+    def __init__(self, Np: int, Nf: int, hidden_dim: int = 128, num_layers: int = 2, dropout: float = 0.1):
+        super().__init__(Np, Nf)
+        # Input layer
+        self.input = nn.Linear(8, 16)
+        self.leaky_relu = nn.LeakyReLU()
+        self.hidden = nn.GRU(input_size=16, hidden_size=hidden_dim, num_layers=num_layers, dropout=dropout, batch_first=True)
+        self.output = nn.Linear(hidden_dim, Nf * 4)
+    def forward(self, batch):
+        batch_size = batch.shape[0]
+        # Extract features
+        features = []
+        for sample in batch:
+            features.append(self._extract_features(sample))
+        x = torch.tensor(features, dtype=torch.float32)
+        # Forward pass
+        out = self.input(x)
+        out = self.leaky_relu(out)
+        out, _ = self.hidden(out)
+        out = self.output(out[:, -1, :])
+        return out.view(batch_size, self.Nf, 4)

Figure_1.png ADDED Viewed

README.md CHANGED Viewed

@@ -1,3 +1,123 @@
----
-license: mit
----

+---
+tags:
+- trajectory-prediction
+- lstm
+- drone-tracking
+- computer-vision
+license: apache-2.0
+datasets:
+- Ecoaetix/uFRED-predict-0.4
+---
+# Drone Stalker 1
+![Demo GIF](8.gif)
+LSTM model for predicting drone trajectories based on bounding box sequences from video footage.
+## Model Description
+This model predicts future drone positions given past trajectory data. It processes sequences of bounding boxes and outputs predicted future positions, significantly outperforming baseline models on the FRED dataset.
+Drone Stalker 1 is an extremely lightweight model with just 2,224 parameters. Despite this, its performance is on par with other models of up to 300k parameters.
+## Architecture
+- **Model Type**: GRU (Long Short-Term Memory)
+- **Input Features**: [x_center, y_center, x_velocity, y_velocity, width, height, x1, y1]
+- **Total Parameters**: 2,592
+- **Input Sequence Length**: 12 frames (Np=12)
+- **Output Sequence Length**: 12 frames (Nf=12)
+- **Frame Interval**: 33.3ms (30 FPS)
+- **Image Resolution**: 1280x720
+### Output
+Predicted future bounding boxes (normalized [0, 1])
+## Training Details
+- **Dataset**: uFRED-predict-0.4
+- **Epochs**: 25
+- **Learning Rate**: 1e-3
+- **Optimizer**: Adam
+- **Loss Function**: Smooth L1 Loss
+## Performance
+Evaluation metrics on test set:
+- **Average Displacement Error (ADE)**: 23.91px
+- **Final Displacement Error (FDE)**: 43.83px
+- **Mean Intersection over Union (mIoU)***: 0.5135
+![Performance Comparison Chart](Figure_1.png)
+## Usage
+```python
+import torch
+# Load the model
+model = torch.hub.load_state_dict_from_url(
+    'https://huggingface.co/Ecoaetix/DroneStalker/resolve/main/dronestalker-1.1.pth'
+)
+# Or download and load manually
+from huggingface_hub import hf_hub_download
+model_path = hf_hub_download(
+    repo_id="Ecoaetix/DroneStalker",
+    filename="dronestalker-1.1.pth"
+)
+# You'll need the Model class (included as model.py in this repo)
+from DroneStalker import Model
+model = Model(Np=12, Nf=12, hidden_dim=16, num_layers=1, dropout=0)
+model.load_state_dict(torch.load(model_path))
+model.eval()
+# Inference
+with torch.no_grad():
+    # Input: [batch_size, 12, 4] - 12 past bounding boxes [x1, y1, x2, y2]
+    predictions = model(past_bboxes)
+    # Output: [batch_size, 12, 4] - 12 future bounding boxes (min-max normalized)
+```
+## Input Format
+The model expects input bounding boxes in pixel coordinates:
+- Shape: `[batch_size, 12, 4]`
+- Format: `[x1, y1, x2, y2]` where (x1,y1) is top-left, (x2,y2) is bottom-right
+- Image dimensions: 1280x720 pixels
+## Output Format
+The model outputs normalized predictions:
+- Shape: `[batch_size, 12, 4]`
+- Format: `[x1_norm, y1_norm, x2_norm, y2_norm]` where values are in range [0, 1]
+- Multiply x-coordinates by 1280 and y-coordinates by 720 to get pixel values
+## Limitations
+- Trained specifically on drone footage at 1280x720 resolution
+- Assumes consistent frame rate of 30 FPS
+- Best performance on stationary, ground-based tracking scenarios similar to training data
+- Single object tracking only
+## Citation
+```bibtex
+@misc{DroneStalker-LSTM-0.3,
+  author = {Jacob Kenney},
+  title = {DroneStalker-LSTM-0.3},
+  year = {2025},
+  publisher = {HuggingFace},
+  howpublished = {\url{https://huggingface.co/Ecoaetix/DroneStalker}}
+}
+```
+## License
+Apache 2.0

dronestalker-1.1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f514dea60dc7c55b0ee8b7eecc1833329d71338091f1222bf04dec0b3999e55f
+size 13676

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ torch>=2.0.0