Spaces:
Sleeping
Sleeping
cyberai-1 commited on
Commit ·
84a9657
1
Parent(s): d5baa30
Update Readme
Browse files- README.md +327 -116
- backend/__pycache__/tracker.cpython-311.pyc +0 -0
- backend/run_tracker.py +0 -2
README.md
CHANGED
|
@@ -6,11 +6,13 @@ sdk: docker
|
|
| 6 |
app_port: 7860
|
| 7 |
pinned: false
|
| 8 |
---
|
| 9 |
-
# TrafficSense
|
| 10 |
|
| 11 |
-
> AIMS Senegal
|
| 12 |
|
| 13 |
-
|
|
|
|
|
|
|
| 14 |
|
| 15 |

|
| 16 |

|
|
@@ -18,141 +20,316 @@ A real-time computer vision system for detecting, tracking, and counting road-tr
|
|
| 18 |
|
| 19 |
---
|
| 20 |
|
| 21 |
-
## Features
|
| 22 |
|
| 23 |
-
|
|
| 24 |
|---|---|
|
| 25 |
-
|
|
| 26 |
-
|
|
| 27 |
-
|
|
| 28 |
-
|
|
| 29 |
-
|
|
| 30 |
-
|
|
| 31 |
-
|
|
| 32 |
-
|
|
| 33 |
-
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
---
|
| 36 |
|
| 37 |
-
##
|
| 38 |
|
| 39 |
```
|
| 40 |
traffic-tracker/
|
| 41 |
├── backend/
|
| 42 |
-
│ ├── main.py
|
| 43 |
-
│ ├── tracker.py
|
| 44 |
-
│ ├── run_tracker.py
|
| 45 |
-
│ ├── finetune.py
|
| 46 |
-
│ ├── extract_frames.py
|
| 47 |
-
│
|
|
|
|
|
|
|
| 48 |
├── frontend/
|
| 49 |
-
│ └── index.html
|
| 50 |
-
├──
|
| 51 |
-
├──
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
└── README.md
|
| 54 |
```
|
| 55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
---
|
| 57 |
|
| 58 |
-
##
|
| 59 |
|
| 60 |
-
###
|
| 61 |
|
| 62 |
```bash
|
| 63 |
cd backend
|
| 64 |
pip install -r requirements.txt
|
| 65 |
```
|
| 66 |
|
| 67 |
-
|
| 68 |
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
```bash
|
| 72 |
-
cd backend
|
| 73 |
uvicorn main:app --host 0.0.0.0 --port 8000 --reload
|
| 74 |
```
|
| 75 |
|
| 76 |
-
|
| 77 |
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
|
|
|
| 88 |
|
| 89 |
---
|
| 90 |
|
| 91 |
-
##
|
| 92 |
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
|
| 97 |
-
|
| 98 |
-
python run_tracker.py --video traffic.mp4 --scene highway_cam --classes car truck bus --save
|
| 99 |
|
| 100 |
-
|
| 101 |
-
|
|
|
|
|
|
|
| 102 |
```
|
| 103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
---
|
| 105 |
|
| 106 |
-
## API
|
| 107 |
|
| 108 |
| Method | Endpoint | Description |
|
| 109 |
|---|---|---|
|
| 110 |
-
| `
|
| 111 |
-
| `GET` | `/
|
| 112 |
-
| `GET` | `/
|
| 113 |
-
| `
|
| 114 |
-
| `
|
| 115 |
-
| `
|
| 116 |
-
| `
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
---
|
| 119 |
|
| 120 |
-
##
|
| 121 |
-
|
| 122 |
-
All groups must follow this schema for dashboard merging.
|
| 123 |
|
| 124 |
-
|
| 125 |
|
| 126 |
-
```
|
| 127 |
-
{
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
"class": "car",
|
| 133 |
-
"confidence": 0.872,
|
| 134 |
-
"bbox": [120, 340, 280, 450],
|
| 135 |
-
"center": [200, 395],
|
| 136 |
-
"crossed_line": true,
|
| 137 |
-
"direction": "down"
|
| 138 |
-
}
|
| 139 |
```
|
| 140 |
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
```json
|
| 144 |
{
|
| 145 |
-
"scene":
|
| 146 |
-
"
|
| 147 |
-
"
|
| 148 |
-
"
|
| 149 |
-
"
|
| 150 |
-
"
|
| 151 |
-
"
|
| 152 |
-
"
|
|
|
|
|
|
|
| 153 |
"total_unique_objects": 142,
|
| 154 |
-
"count_per_class":
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
"temporal_distribution": [
|
| 157 |
{"bucket_10s": 0, "detections": 34},
|
| 158 |
{"bucket_10s": 1, "detections": 51}
|
|
@@ -160,63 +337,97 @@ All groups must follow this schema for dashboard merging.
|
|
| 160 |
}
|
| 161 |
```
|
| 162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
---
|
| 164 |
|
| 165 |
-
##
|
|
|
|
|
|
|
| 166 |
|
| 167 |
```bash
|
| 168 |
-
|
| 169 |
-
python extract_frames.py --video traffic1.mp4 --out frames/ --every 10
|
| 170 |
|
| 171 |
-
#
|
| 172 |
-
|
| 173 |
|
| 174 |
-
#
|
| 175 |
-
python
|
| 176 |
|
| 177 |
-
#
|
| 178 |
-
python run_tracker.py --video
|
| 179 |
```
|
| 180 |
|
|
|
|
|
|
|
| 181 |
---
|
| 182 |
|
| 183 |
-
##
|
| 184 |
|
| 185 |
-
|
| 186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
|
| 188 |
---
|
| 189 |
|
| 190 |
-
##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
|
| 192 |
-
|
| 193 |
-
- [x] Fine-tuning script + instructions
|
| 194 |
-
- [x] ByteTrack tracking with persistent IDs
|
| 195 |
-
- [x] Virtual counting line + direction detection
|
| 196 |
-
- [x] Unique object counting (not per-frame)
|
| 197 |
-
- [x] Detailed detection logs (JSONL + JSON + CSV)
|
| 198 |
-
- [x] Shared data schema across groups
|
| 199 |
-
- [x] Web interface (upload, class selection, live display)
|
| 200 |
-
- [x] Bounding boxes, labels, IDs, live counters
|
| 201 |
-
- [x] "No object detected" indicator
|
| 202 |
-
- [x] Multi-scene dashboard with comparisons
|
| 203 |
-
- [x] GitHub-ready structure
|
| 204 |
|
| 205 |
-
|
| 206 |
-
-
|
| 207 |
-
-
|
| 208 |
-
-
|
| 209 |
-
-
|
| 210 |
-
- [x] CSV frame statistics export
|
| 211 |
|
| 212 |
---
|
| 213 |
|
| 214 |
## License
|
| 215 |
|
| 216 |
-
MIT
|
| 217 |
|
| 218 |
---
|
| 219 |
|
| 220 |
## Authors
|
| 221 |
|
| 222 |
-
|
|
|
|
| 6 |
app_port: 7860
|
| 7 |
pinned: false
|
| 8 |
---
|
| 9 |
+
# TrafficSense - Road Traffic Detection, Tracking, and Analytics
|
| 10 |
|
| 11 |
+
> AIMS Senegal - Computer Vision Project 2 - April 2026
|
| 12 |
|
| 13 |
+
TrafficSense is a road-traffic analysis application that detects, tracks, counts, and summarizes moving traffic objects from video files, remote video URLs, or a webcam feed. The system combines YOLO object detection, ByteTrack object tracking, a FastAPI backend, and a browser dashboard for live monitoring and post-processing analytics.
|
| 14 |
+
|
| 15 |
+
The project focuses on six traffic classes: `person`, `bicycle`, `car`, `motorbike`, `bus`, and `truck`.
|
| 16 |
|
| 17 |

|
| 18 |

|
|
|
|
| 20 |
|
| 21 |
---
|
| 22 |
|
| 23 |
+
## Main Features
|
| 24 |
|
| 25 |
+
| Area | Description |
|
| 26 |
|---|---|
|
| 27 |
+
| Object detection | YOLOv8-compatible models through Ultralytics. The default model path is `best.pt`. |
|
| 28 |
+
| Multi-object tracking | ByteTrack assigns persistent IDs to visible objects across frames. |
|
| 29 |
+
| Unique counting | Each tracked object is counted once when its `track_id` appears for the first time. |
|
| 30 |
+
| Supported classes | `person`, `bicycle`, `car`, `motorbike`, `bus`, `truck`. |
|
| 31 |
+
| Live processing | The backend streams annotated frames to the browser with Server-Sent Events. |
|
| 32 |
+
| Video inputs | Local upload, remote video URL, and webcam frame analysis. |
|
| 33 |
+
| Visual output | Bounding boxes, class labels, tracking IDs, object trails, and live counters. |
|
| 34 |
+
| Dashboard | Scene filtering, global statistics, class distribution, timeline chart, scene comparison, and object-position heatmap. |
|
| 35 |
+
| Logs | Detection CSV, raw JSONL detections, summary JSON, frame-level CSV statistics, and annotated MP4 output. |
|
| 36 |
+
| Export | Download logs and annotated videos directly from the interface. |
|
| 37 |
+
| Training support | Frame extraction and fine-tuning scripts are included for custom datasets. |
|
| 38 |
|
| 39 |
---
|
| 40 |
|
| 41 |
+
## Architecture
|
| 42 |
|
| 43 |
```
|
| 44 |
traffic-tracker/
|
| 45 |
├── backend/
|
| 46 |
+
│ ├── main.py # FastAPI application, routes, sessions, streaming, dashboard aggregation
|
| 47 |
+
│ ├── tracker.py # YOLO + ByteTrack processing engine and log generation
|
| 48 |
+
│ ├── run_tracker.py # Command-line processing entry point
|
| 49 |
+
│ ├── finetune.py # YOLO fine-tuning script
|
| 50 |
+
│ ├── extract_frames.py # Utility to extract video frames for labeling
|
| 51 |
+
│ ├── dataset.yaml # Dataset configuration for training
|
| 52 |
+
│ ├── best.pt # Default model weights used by the app
|
| 53 |
+
│ └── requirements.txt # Python dependencies
|
| 54 |
├── frontend/
|
| 55 |
+
│ └── index.html # Single-page dashboard and control interface
|
| 56 |
+
├── data/
|
| 57 |
+
│ ├── Traffic_detection.mp4
|
| 58 |
+
│ └── Group_05_Africa_countries_001_detections.csv
|
| 59 |
+
├── logs/ # Created at runtime: summaries, detections, annotated videos
|
| 60 |
+
├── uploads/ # Created at runtime: uploaded source videos
|
| 61 |
+
├── output/ # Created at runtime when needed
|
| 62 |
+
├── Dockerfile # Docker/Hugging Face Spaces deployment
|
| 63 |
+
├── LICENSE
|
| 64 |
└── README.md
|
| 65 |
```
|
| 66 |
|
| 67 |
+
### Backend flow
|
| 68 |
+
|
| 69 |
+
1. A video file, video URL, or webcam session is submitted to FastAPI.
|
| 70 |
+
2. `TrafficTracker` loads the selected YOLO model and filters detections by selected classes.
|
| 71 |
+
3. YOLO detects objects frame by frame.
|
| 72 |
+
4. ByteTrack assigns stable object IDs.
|
| 73 |
+
5. The tracker writes annotated frames, detection rows, frame statistics, and summary metrics.
|
| 74 |
+
6. The dashboard endpoint aggregates completed sessions and saved log files.
|
| 75 |
+
7. The frontend renders live video feedback and analytics.
|
| 76 |
+
|
| 77 |
+
### Frontend flow
|
| 78 |
+
|
| 79 |
+
The frontend is contained in `frontend/index.html`. It provides:
|
| 80 |
+
|
| 81 |
+
- Source selection: file upload, remote URL, or webcam.
|
| 82 |
+
- Scene and group metadata inputs.
|
| 83 |
+
- Model, confidence, and class controls.
|
| 84 |
+
- Live frame canvas with counters and progress state.
|
| 85 |
+
- Analytics dashboard with charts and heatmap.
|
| 86 |
+
- Log list and download controls.
|
| 87 |
+
|
| 88 |
---
|
| 89 |
|
| 90 |
+
## Installation
|
| 91 |
|
| 92 |
+
### Local Python setup
|
| 93 |
|
| 94 |
```bash
|
| 95 |
cd backend
|
| 96 |
pip install -r requirements.txt
|
| 97 |
```
|
| 98 |
|
| 99 |
+
For CUDA-enabled GPU environments, install the matching PyTorch build before running the app. For example:
|
| 100 |
|
| 101 |
+
```bash
|
| 102 |
+
pip install torch torchvision --index-url https://download.pytorch.org/whl/cu118
|
| 103 |
+
```
|
| 104 |
+
|
| 105 |
+
### Start the application
|
| 106 |
+
|
| 107 |
+
From the `backend` directory:
|
| 108 |
|
| 109 |
```bash
|
|
|
|
| 110 |
uvicorn main:app --host 0.0.0.0 --port 8000 --reload
|
| 111 |
```
|
| 112 |
|
| 113 |
+
Then open:
|
| 114 |
|
| 115 |
+
```text
|
| 116 |
+
http://localhost:8000
|
| 117 |
+
```
|
| 118 |
+
|
| 119 |
+
The FastAPI server serves the frontend automatically from `frontend/index.html`.
|
| 120 |
+
|
| 121 |
+
### Docker
|
| 122 |
|
| 123 |
+
```bash
|
| 124 |
+
docker build -t trafficsense .
|
| 125 |
+
docker run --rm -p 7860:7860 trafficsense
|
| 126 |
+
```
|
| 127 |
|
| 128 |
+
Then open:
|
| 129 |
+
|
| 130 |
+
```text
|
| 131 |
+
http://localhost:7860
|
| 132 |
+
```
|
| 133 |
+
|
| 134 |
+
The Docker configuration is also compatible with the Hugging Face Spaces metadata at the top of this README.
|
| 135 |
|
| 136 |
---
|
| 137 |
|
| 138 |
+
## Using the Web Interface
|
| 139 |
|
| 140 |
+
### Analyze an uploaded video
|
| 141 |
+
|
| 142 |
+
1. Open the web interface.
|
| 143 |
+
2. Drop a video into the upload area or choose a file manually.
|
| 144 |
+
3. Enter a scene name, such as `intersection_01` or `Africa_countries`.
|
| 145 |
+
4. Keep the default group ID or enter another group name.
|
| 146 |
+
5. Select the traffic classes to track.
|
| 147 |
+
6. Choose the model path and confidence threshold.
|
| 148 |
+
7. Click **START ANALYSIS**.
|
| 149 |
+
8. Watch the annotated video stream and live object counters.
|
| 150 |
+
9. Open the Analytics tab to inspect summary charts and the position heatmap.
|
| 151 |
+
10. Open the Log Files tab to download generated outputs.
|
| 152 |
+
|
| 153 |
+
### Analyze a remote video URL
|
| 154 |
+
|
| 155 |
+
Paste a direct `http://` or `https://` video URL into the URL field. The backend downloads the video into `uploads/` and processes it like a normal uploaded file.
|
| 156 |
+
|
| 157 |
+
### Analyze webcam frames
|
| 158 |
+
|
| 159 |
+
Use the webcam option in the interface. The browser captures frames and sends them to the backend session. When stopped, the backend saves the same summary and detection files used for video processing.
|
| 160 |
+
|
| 161 |
+
---
|
| 162 |
+
|
| 163 |
+
## Dashboard and Metrics
|
| 164 |
+
|
| 165 |
+
The dashboard combines all completed in-memory sessions and saved `*_summary.json` files in `logs/`.
|
| 166 |
+
|
| 167 |
+
### Summary cards
|
| 168 |
+
|
| 169 |
+
| Metric | Meaning |
|
| 170 |
+
|---|---|
|
| 171 |
+
| Scenes | Number of completed scenes included in the current dashboard filter. |
|
| 172 |
+
| Total objects | Sum of unique tracked objects across selected scenes. |
|
| 173 |
+
| Total duration | Total processed video duration in seconds. |
|
| 174 |
+
| Average per scene | `total_objects / number_of_scenes`, rounded to the nearest integer. |
|
| 175 |
+
|
| 176 |
+
### Charts
|
| 177 |
+
|
| 178 |
+
| Component | Description |
|
| 179 |
+
|---|---|
|
| 180 |
+
| Objects by class | Bar chart of unique object counts per class. |
|
| 181 |
+
| Traffic intensity timeline | Number of detections grouped into 10-second buckets. |
|
| 182 |
+
| Scene comparison | Per-scene duration, total object count, cars, pedestrians, and trucks/buses. |
|
| 183 |
+
| Position heatmap | A normalized grid built from object center coordinates (`cx`, `cy`) in the detection CSV files. |
|
| 184 |
+
|
| 185 |
+
### Position heatmap
|
| 186 |
+
|
| 187 |
+
The heatmap uses each detection center and normalizes it by the frame size:
|
| 188 |
+
|
| 189 |
+
- `x = cx / frame_width`
|
| 190 |
+
- `y = cy / frame_height`
|
| 191 |
+
|
| 192 |
+
The normalized positions are assigned to a 24 by 24 grid. Each cell stores:
|
| 193 |
+
|
| 194 |
+
- total detections in that region
|
| 195 |
+
- per-class counts in that region
|
| 196 |
+
- dominant class for color display
|
| 197 |
+
|
| 198 |
+
The map includes percentage coordinates around the plot. Cell colors match the class colors used in the Track Classes controls:
|
| 199 |
+
|
| 200 |
+
| Class | Color role |
|
| 201 |
+
|---|---|
|
| 202 |
+
| `person` | Red |
|
| 203 |
+
| `bicycle` | Green |
|
| 204 |
+
| `car` | Amber |
|
| 205 |
+
| `motorbike` | Pink |
|
| 206 |
+
| `bus` | Blue |
|
| 207 |
+
| `truck` | Purple |
|
| 208 |
+
|
| 209 |
+
---
|
| 210 |
+
|
| 211 |
+
## Tracking and Counting Method
|
| 212 |
+
|
| 213 |
+
The tracker uses YOLO detections followed by ByteTrack tracking. Each detection includes a `track_id` when the tracker can associate it with an object trajectory.
|
| 214 |
|
| 215 |
+
Counting is based on first appearance:
|
|
|
|
| 216 |
|
| 217 |
+
```text
|
| 218 |
+
if track_id has not been counted before:
|
| 219 |
+
add track_id to counted_ids
|
| 220 |
+
increment count_per_class[class_name]
|
| 221 |
```
|
| 222 |
|
| 223 |
+
This avoids counting the same visible object again on every frame. The CSV schema still includes `crossed_line` and `direction` fields for compatibility with shared traffic-analysis formats, but the current implementation stores `false` and an empty direction by default.
|
| 224 |
+
|
| 225 |
+
The tracker also computes approximate pixel speed:
|
| 226 |
+
|
| 227 |
+
```text
|
| 228 |
+
speed_px_s = distance_between_current_and_previous_center * fps
|
| 229 |
+
```
|
| 230 |
+
|
| 231 |
+
This value is useful for relative movement analysis inside the same video, but it is not a calibrated real-world speed in km/h.
|
| 232 |
+
|
| 233 |
---
|
| 234 |
|
| 235 |
+
## API Reference
|
| 236 |
|
| 237 |
| Method | Endpoint | Description |
|
| 238 |
|---|---|---|
|
| 239 |
+
| `GET` | `/` | Serves the web interface. |
|
| 240 |
+
| `GET` | `/health` | Basic server status and active session count. |
|
| 241 |
+
| `GET` | `/classes` | Returns supported traffic classes. |
|
| 242 |
+
| `POST` | `/upload` | Uploads a file or downloads a video URL and starts processing. |
|
| 243 |
+
| `POST` | `/webcam/start` | Starts a webcam tracking session. |
|
| 244 |
+
| `POST` | `/webcam/frame/{sid}` | Sends one webcam frame for detection and tracking. |
|
| 245 |
+
| `POST` | `/webcam/stop/{sid}` | Stops a webcam session and writes logs. |
|
| 246 |
+
| `GET` | `/stream/{sid}` | Streams annotated frames for an uploaded video session using Server-Sent Events. |
|
| 247 |
+
| `GET` | `/status/{sid}` | Returns processing status, progress, FPS, and latest counters. |
|
| 248 |
+
| `GET` | `/summary/{sid}` | Returns final summary for a completed session. |
|
| 249 |
+
| `GET` | `/dashboard` | Returns aggregated dashboard data and heatmap cells. |
|
| 250 |
+
| `GET` | `/logs` | Lists generated files in `logs/`. |
|
| 251 |
+
| `GET` | `/videos` | Lists annotated MP4 files. |
|
| 252 |
+
| `GET` | `/log/{filename}` | Downloads one log file. |
|
| 253 |
+
| `GET` | `/download/video/{sid}` | Downloads annotated video for a completed session. |
|
| 254 |
+
| `GET` | `/download/video-file/{filename}` | Downloads an annotated video by filename. |
|
| 255 |
+
| `GET` | `/stream/video/{sid}` | Streams an annotated video for browser playback. |
|
| 256 |
+
| `GET` | `/stream/video-file/{filename}` | Streams an annotated video by filename. |
|
| 257 |
+
|
| 258 |
+
### Upload form fields
|
| 259 |
+
|
| 260 |
+
| Field | Type | Default | Description |
|
| 261 |
+
|---|---|---|---|
|
| 262 |
+
| `file` | file | empty | Local video file. |
|
| 263 |
+
| `video_url` | string | empty | Remote video URL. Used only if no file is uploaded. |
|
| 264 |
+
| `scene_name` | string | `scene_01` | Scene label used in logs and dashboard filters. |
|
| 265 |
+
| `group_id` | string | `Group_05` | Group label used in log filenames. |
|
| 266 |
+
| `classes` | comma-separated string | all classes | Example: `car,bus,truck`. |
|
| 267 |
+
| `conf` | float | `0.5` | YOLO confidence threshold. |
|
| 268 |
+
| `model` | string | `best.pt` | Path or name of the model weights. |
|
| 269 |
|
| 270 |
---
|
| 271 |
|
| 272 |
+
## Output Files
|
|
|
|
|
|
|
| 273 |
|
| 274 |
+
Each completed session writes files into `logs/` using this pattern:
|
| 275 |
|
| 276 |
+
```text
|
| 277 |
+
{group_id}_{scene_name}_{order}_detections.csv
|
| 278 |
+
{group_id}_{scene_name}_{order}_detections.jsonl
|
| 279 |
+
{group_id}_{scene_name}_{order}_summary.json
|
| 280 |
+
{group_id}_{scene_name}_{order}_frame_stats.csv
|
| 281 |
+
{group_id}_{scene_name}_{order}_annotated.mp4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
```
|
| 283 |
|
| 284 |
+
The order number is automatically incremented per group and scene.
|
| 285 |
+
|
| 286 |
+
### Detection CSV
|
| 287 |
+
|
| 288 |
+
The main detection table contains one row per detected object per frame:
|
| 289 |
+
|
| 290 |
+
| Column | Description |
|
| 291 |
+
|---|---|
|
| 292 |
+
| `frame` | Frame index starting at 1. |
|
| 293 |
+
| `timestamp_sec` | Timestamp in seconds. |
|
| 294 |
+
| `scene_name` | Scene label. |
|
| 295 |
+
| `group_id` | Group label. |
|
| 296 |
+
| `video_name` | Original video name or `webcam`. |
|
| 297 |
+
| `track_id` | ByteTrack object ID, or `-1` if no ID is assigned. |
|
| 298 |
+
| `class_name` | Detected traffic class. |
|
| 299 |
+
| `confidence` | YOLO detection confidence. |
|
| 300 |
+
| `bbox_x1`, `bbox_y1`, `bbox_x2`, `bbox_y2` | Bounding box coordinates in pixels. |
|
| 301 |
+
| `cx`, `cy` | Bounding box center in pixels. |
|
| 302 |
+
| `frame_width`, `frame_height` | Source frame dimensions. |
|
| 303 |
+
| `crossed_line` | Compatibility field, currently `false` by default. |
|
| 304 |
+
| `direction` | Compatibility field, currently empty by default. |
|
| 305 |
+
| `speed_px_s` | Approximate speed in pixels per second. |
|
| 306 |
+
|
| 307 |
+
### JSONL detections
|
| 308 |
+
|
| 309 |
+
The JSONL file stores the same detection rows in line-delimited JSON format.
|
| 310 |
+
|
| 311 |
+
### Summary JSON
|
| 312 |
|
| 313 |
```json
|
| 314 |
{
|
| 315 |
+
"scene": "Africa_countries",
|
| 316 |
+
"group_id": "Group_05",
|
| 317 |
+
"video_name": "Traffic_detection.mp4",
|
| 318 |
+
"session_id": "abc123",
|
| 319 |
+
"processed_at": "2026-04-29T12:00:00",
|
| 320 |
+
"total_frames": 1800,
|
| 321 |
+
"duration_sec": 60.0,
|
| 322 |
+
"fps": 30.0,
|
| 323 |
+
"resolution": [1080, 1440],
|
| 324 |
+
"selected_classes": ["person", "bicycle", "car", "motorbike", "bus", "truck"],
|
| 325 |
"total_unique_objects": 142,
|
| 326 |
+
"count_per_class": {
|
| 327 |
+
"car": 98,
|
| 328 |
+
"bus": 12,
|
| 329 |
+
"truck": 17,
|
| 330 |
+
"person": 15
|
| 331 |
+
},
|
| 332 |
+
"annotated_video": "logs/Group_05_Africa_countries_001_annotated.mp4",
|
| 333 |
"temporal_distribution": [
|
| 334 |
{"bucket_10s": 0, "detections": 34},
|
| 335 |
{"bucket_10s": 1, "detections": 51}
|
|
|
|
| 337 |
}
|
| 338 |
```
|
| 339 |
|
| 340 |
+
### Frame statistics CSV
|
| 341 |
+
|
| 342 |
+
The frame statistics file summarizes each processed frame, including frame index, timestamp, number of detections in the frame, visibility state, and cumulative counts.
|
| 343 |
+
|
| 344 |
---
|
| 345 |
|
| 346 |
+
## Command-Line Processing
|
| 347 |
+
|
| 348 |
+
The CLI is useful for batch processing videos without the web interface.
|
| 349 |
|
| 350 |
```bash
|
| 351 |
+
cd backend
|
|
|
|
| 352 |
|
| 353 |
+
# Process a video and show the annotated window
|
| 354 |
+
python run_tracker.py --video ../data/Traffic_detection.mp4 --scene Africa_countries --show
|
| 355 |
|
| 356 |
+
# Track only selected classes
|
| 357 |
+
python run_tracker.py --video ../data/Traffic_detection.mp4 --classes car bus truck --conf 0.4
|
| 358 |
|
| 359 |
+
# Use a custom model path
|
| 360 |
+
python run_tracker.py --video ../data/Traffic_detection.mp4 --model best.pt --conf 0.5
|
| 361 |
```
|
| 362 |
|
| 363 |
+
Generated logs are saved to the directory passed with `--logs` or to `logs/` by default.
|
| 364 |
+
|
| 365 |
---
|
| 366 |
|
| 367 |
+
## Fine-Tuning Workflow
|
| 368 |
|
| 369 |
+
The repository includes utilities for preparing and training a custom detector.
|
| 370 |
+
|
| 371 |
+
### 1. Extract frames
|
| 372 |
+
|
| 373 |
+
```bash
|
| 374 |
+
cd backend
|
| 375 |
+
python extract_frames.py --video ../data/Traffic_detection.mp4 --out frames/ --every 10
|
| 376 |
+
```
|
| 377 |
+
|
| 378 |
+
### 2. Label the frames
|
| 379 |
+
|
| 380 |
+
Label extracted frames with a tool that can export YOLO-format annotations. The dataset configuration should follow `backend/dataset.yaml`.
|
| 381 |
+
|
| 382 |
+
### 3. Train or fine-tune
|
| 383 |
+
|
| 384 |
+
```bash
|
| 385 |
+
python finetune.py --data dataset.yaml --model yolov8s.pt --epochs 50 --device 0
|
| 386 |
+
```
|
| 387 |
+
|
| 388 |
+
### 4. Use the trained weights
|
| 389 |
+
|
| 390 |
+
```bash
|
| 391 |
+
python run_tracker.py --video ../data/Traffic_detection.mp4 --model runs/traffic/finetune/weights/best.pt
|
| 392 |
+
```
|
| 393 |
+
|
| 394 |
+
The web interface can also use a custom model by entering the model path in the model field.
|
| 395 |
|
| 396 |
---
|
| 397 |
|
| 398 |
+
## Model and Class Notes
|
| 399 |
+
|
| 400 |
+
The tracker maps the following COCO class IDs:
|
| 401 |
+
|
| 402 |
+
| COCO ID | Class |
|
| 403 |
+
|---:|---|
|
| 404 |
+
| 0 | person |
|
| 405 |
+
| 1 | bicycle |
|
| 406 |
+
| 2 | car |
|
| 407 |
+
| 3 | motorbike |
|
| 408 |
+
| 5 | bus |
|
| 409 |
+
| 7 | truck |
|
| 410 |
+
|
| 411 |
+
The default confidence threshold in the web API is `0.5`. Lower values may detect more objects but can increase false positives. Higher values reduce weak detections but may miss smaller or partially occluded objects.
|
| 412 |
+
|
| 413 |
+
---
|
| 414 |
|
| 415 |
+
## Practical Notes
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 416 |
|
| 417 |
+
- `best.pt` should be available from the backend working directory unless another model path is provided.
|
| 418 |
+
- `logs/`, `uploads/`, and `output/` are created automatically.
|
| 419 |
+
- Annotated MP4 files are written with OpenCV. When `ffmpeg` is available, the backend can produce a browser-compatible H.264 copy for playback.
|
| 420 |
+
- Heatmap data depends on detection CSV files. If a summary exists without its matching detection CSV, the heatmap for that scene will be empty.
|
| 421 |
+
- Unique counts depend on tracking stability. Heavy occlusion, camera cuts, or very crowded scenes can create new IDs for the same physical object.
|
|
|
|
| 422 |
|
| 423 |
---
|
| 424 |
|
| 425 |
## License
|
| 426 |
|
| 427 |
+
MIT - see [LICENSE](LICENSE).
|
| 428 |
|
| 429 |
---
|
| 430 |
|
| 431 |
## Authors
|
| 432 |
|
| 433 |
+
AIMS Senegal - Computer Vision 2026
|
backend/__pycache__/tracker.cpython-311.pyc
CHANGED
|
Binary files a/backend/__pycache__/tracker.cpython-311.pyc and b/backend/__pycache__/tracker.cpython-311.pyc differ
|
|
|
backend/run_tracker.py
CHANGED
|
@@ -21,7 +21,6 @@ def parse_args():
|
|
| 21 |
p.add_argument("--save", action="store_true", help="Save annotated output video")
|
| 22 |
p.add_argument("--logs", default="logs", help="Directory to save logs")
|
| 23 |
p.add_argument("--out", default="output", help="Directory for output video")
|
| 24 |
-
p.add_argument("--line", type=float, default=0.55, help="Counting line position (0-1)")
|
| 25 |
return p.parse_args()
|
| 26 |
|
| 27 |
|
|
@@ -39,7 +38,6 @@ def main():
|
|
| 39 |
conf_threshold=args.conf,
|
| 40 |
scene_name=args.scene,
|
| 41 |
output_dir=args.logs,
|
| 42 |
-
counting_line_ratio=args.line,
|
| 43 |
)
|
| 44 |
tracker.setup_video(cap)
|
| 45 |
|
|
|
|
| 21 |
p.add_argument("--save", action="store_true", help="Save annotated output video")
|
| 22 |
p.add_argument("--logs", default="logs", help="Directory to save logs")
|
| 23 |
p.add_argument("--out", default="output", help="Directory for output video")
|
|
|
|
| 24 |
return p.parse_args()
|
| 25 |
|
| 26 |
|
|
|
|
| 38 |
conf_threshold=args.conf,
|
| 39 |
scene_name=args.scene,
|
| 40 |
output_dir=args.logs,
|
|
|
|
| 41 |
)
|
| 42 |
tracker.setup_video(cap)
|
| 43 |
|