Spaces:

shivamsshhiivvaamm
/

tracking

Sleeping

App Files Files Community

shivamsshhiivvaamm commited on Jan 28

Commit

28cfaab

verified ·

1 Parent(s): 9d5bbfb

Upload 3 files

Browse files

Files changed (3) hide show

bytetrack_yolox.py +174 -0
main.py +222 -0
webcam.html +381 -0

bytetrack_yolox.py ADDED Viewed

	@@ -0,0 +1,174 @@

+"""
+ByteTrack wrapper using official YOLOX implementation.
+This provides object tracking capabilities for the detection system.
+"""
+import numpy as np
+from collections import namedtuple
+try:
+    from yolox.tracker.byte_tracker import BYTETracker, STrack
+    YOLOX_AVAILABLE = True
+except ImportError:
+    YOLOX_AVAILABLE = False
+    print("Warning: YOLOX not available. Falling back to simple tracking.")
+# Simple detection object for ByteTrack
+Detection = namedtuple('Detection', ['tlwh', 'score', 'class_id', 'class_name'])
+class ByteTrackYOLOX:
+    """
+    Wrapper for YOLOX ByteTrack implementation.
+    Converts YOLO detections to ByteTrack format and back.
+    """
+    def __init__(self, fps=30, track_thresh=0.5, track_buffer=30, match_thresh=0.8):
+        """
+        Initialize ByteTrack tracker.
+        Args:
+            fps: Frame rate of the video
+            track_thresh: Detection confidence threshold for tracking
+            track_buffer: Number of frames to keep lost tracks
+            match_thresh: Matching threshold for data association
+        """
+        self.fps = fps
+        self.track_thresh = track_thresh
+        self.track_buffer = track_buffer
+        self.match_thresh = match_thresh
+        if YOLOX_AVAILABLE:
+            # Create BYTETracker arguments
+            class Args:
+                def __init__(self):
+                    self.track_thresh = track_thresh
+                    self.track_buffer = track_buffer
+                    self.match_thresh = match_thresh
+                    self.mot20 = False  # Use standard MOT17 settings
+            args = Args()
+            self.tracker = BYTETracker(args, frame_rate=fps)
+            print(f"✓ ByteTrack initialized (YOLOX) - FPS: {fps}, Track Thresh: {track_thresh}")
+        else:
+            # Fallback to simple tracking
+            self.tracker = None
+            self.tracks = []
+            self.next_id = 1
+            print("✓ Simple tracker initialized (YOLOX not available)")
+    def update(self, detections):
+        """
+        Update tracker with new detections.
+        Args:
+            detections: List of detection dicts with keys: 'box', 'confidence', 'class', 'id'
+                       box format: [x1, y1, x2, y2]
+        Returns:
+            Updated detections list with 'track_id' added to each detection
+        """
+        if not detections:
+            return detections
+        if YOLOX_AVAILABLE and self.tracker is not None:
+            return self._update_yolox(detections)
+        else:
+            return self._update_simple(detections)
+    def _update_yolox(self, detections):
+        """Update using official YOLOX ByteTrack."""
+        # Convert detections to ByteTrack format
+        # ByteTrack expects: [x1, y1, x2, y2, score]
+        det_array = []
+        for det in detections:
+            x1, y1, x2, y2 = det['box']
+            score = det['confidence']
+            det_array.append([x1, y1, x2, y2, score])
+        det_array = np.array(det_array) if det_array else np.empty((0, 5))
+        # Update tracker
+        online_targets = self.tracker.update(det_array, [640, 640], [640, 640])
+        # Map track IDs back to detections
+        # Match based on IoU
+        for det in detections:
+            det['track_id'] = None
+        for track in online_targets:
+            # Get track bounding box
+            tlwh = track.tlwh
+            track_box = [tlwh[0], tlwh[1], tlwh[0] + tlwh[2], tlwh[1] + tlwh[3]]
+            track_id = track.track_id
+            # Find best matching detection
+            best_iou = 0
+            best_det = None
+            for det in detections:
+                iou = self._compute_iou(det['box'], track_box)
+                if iou > best_iou:
+                    best_iou = iou
+                    best_det = det
+            # Assign track ID if good match
+            if best_det is not None and best_iou > 0.3:
+                best_det['track_id'] = track_id
+        return detections
+    def _update_simple(self, detections):
+        """Simple fallback tracking using IoU matching."""
+        # Assign sequential IDs to new detections
+        for det in detections:
+            # Simple: just assign new IDs each time
+            # In a real implementation, we'd match with previous frame
+            det['track_id'] = self.next_id
+            self.next_id += 1
+        return detections
+    def _compute_iou(self, box1, box2):
+        """Compute IoU between two boxes [x1, y1, x2, y2]."""
+        x1_min, y1_min, x1_max, y1_max = box1
+        x2_min, y2_min, x2_max, y2_max = box2
+        # Intersection area
+        inter_x_min = max(x1_min, x2_min)
+        inter_y_min = max(y1_min, y2_min)
+        inter_x_max = min(x1_max, x2_max)
+        inter_y_max = min(y1_max, y2_max)
+        inter_width = max(0, inter_x_max - inter_x_min)
+        inter_height = max(0, inter_y_max - inter_y_min)
+        inter_area = inter_width * inter_height
+        # Union area
+        box1_area = (x1_max - x1_min) * (y1_max - y1_min)
+        box2_area = (x2_max - x2_min) * (y2_max - y2_min)
+        union_area = box1_area + box2_area - inter_area
+        # IoU
+        if union_area == 0:
+            return 0
+        return inter_area / union_area
+    def reset(self):
+        """Reset the tracker."""
+        if YOLOX_AVAILABLE and self.tracker is not None:
+            # Re-initialize tracker
+            class Args:
+                def __init__(self, track_thresh, track_buffer, match_thresh):
+                    self.track_thresh = track_thresh
+                    self.track_buffer = track_buffer
+                    self.match_thresh = match_thresh
+                    self.mot20 = False
+            args = Args(self.track_thresh, self.track_buffer, self.match_thresh)
+            self.tracker = BYTETracker(args, frame_rate=self.fps)
+        else:
+            self.next_id = 1
+# Alias for backward compatibility
+ByteTrackWrapper = ByteTrackYOLOX

main.py ADDED Viewed

	@@ -0,0 +1,222 @@

+import cv2
+import numpy as np
+import onnxruntime as ort
+import shutil
+import os
+import uuid
+import base64
+import time
+import json
+from fastapi import FastAPI, UploadFile, File, Request
+from fastapi.responses import HTMLResponse, JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
+# Import the tracker (YOLOX-based)
+from bytetrack_yolox import ByteTrackWrapper
+# ---------------- CONFIGURATION ---------------- #
+YOLO_MODEL_PATH = "best.onnx"
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+os.makedirs("static", exist_ok=True)
+app.mount("/static", StaticFiles(directory="static"), name="static")
+# ---------------- YOLO MODEL ---------------- #
+class YOLO:
+    def __init__(self, model_path):
+        self.session = ort.InferenceSession(model_path)
+        self.input_name = self.session.get_inputs()[0].name
+        self.h, self.w = self.session.get_inputs()[0].shape[2:]
+        self.conf = 0.50
+        self.iou = 0.45
+        self.classes = [
+            "Zebra", "Lion", "Leopard", "Cheetah", "Tiger", "Bear", "Butterfly",
+            "Canary", "Crocodile", "Bull", "Camel", "Centipede", "Caterpillar",
+            "Duck", "Squirrel", "Spider", "Ladybug", "Elephant", "Horse", "Fox",
+            "Tortoise", "Frog", "Kangaroo", "Deer", "Eagle", "Monkey", "Snake",
+            "Owl", "Swan", "Goat", "Rabbit", "Giraffe", "Goose", "PolarBear",
+            "Raven", "Hippopotamus", "BrownBear", "Rhinoceros", "Woodpecker",
+            "Sheep", "Magpie", "Ostrich", "Jaguar", "Hedgehog", "Turkey",
+            "Raccoon", "Worm", "Harbor", "Panda", "RedPanda", "Otter", "Lynx",
+            "Scorpion", "Koala"
+        ]
+        np.random.seed(42)
+        # Generate a large palette of random colors for Tracks
+        self.colors = np.random.randint(0, 255, size=(200, 3)).tolist()
+    def preprocess(self, img):
+        h0, w0 = img.shape[:2]
+        scale = min(self.w / w0, self.h / h0)
+        nw, nh = int(w0 * scale), int(h0 * scale)
+        resized = cv2.resize(img, (nw, nh))
+        canvas = np.full((self.h, self.w, 3), 114, dtype=np.uint8)
+        canvas[:nh, :nw] = resized
+        img = cv2.cvtColor(canvas, cv2.COLOR_BGR2RGB)
+        img = img.transpose(2, 0, 1).astype(np.float32) / 255.0
+        img = np.expand_dims(img, 0)
+        return img, scale
+    def postprocess(self, output, scale):
+        preds = output[0][0].transpose()
+        boxes, scores, ids = [], [], []
+        for p in preds:
+            x,y,w,h = p[:4]
+            cls_scores = p[4:]
+            cid = int(np.argmax(cls_scores))
+            score = cls_scores[cid]
+            if score >= self.conf:
+                x1 = (x - w/2) / scale
+                y1 = (y - h/2) / scale
+                x2 = (x + w/2) / scale
+                y2 = (y + h/2) / scale
+                boxes.append([float(x1),float(y1),float(x2),float(y2)])
+                scores.append(float(score))
+                ids.append(cid)
+        results = []
+        idxs = cv2.dnn.NMSBoxes(boxes, scores, self.conf, self.iou)
+        if len(idxs) > 0:
+            for i in idxs.flatten():
+                results.append({
+                    "class": self.classes[ids[i]],
+                    "confidence": scores[i],
+                    "box": boxes[i],
+                    "id": ids[i]
+                })
+        return results
+    def draw(self, img, detections):
+        for d in detections:
+            x1,y1,x2,y2 = map(int, d["box"])
+            # Use Track ID for color if available, otherwise Class ID
+            track_id = d.get('track_id')
+            if track_id is not None:
+                # Color based on Track ID (consistent color for same object)
+                color_idx = int(track_id) % len(self.colors)
+                label = f"{d['class']} #{track_id}"
+            else:
+                # Fallback to Class ID
+                color_idx = int(d["id"]) % len(self.colors)
+                label = f"{d['class']} ({d['confidence']:.2f})"
+            color = self.colors[color_idx]
+            color = (int(color[0]), int(color[1]), int(color[2]))
+            cv2.rectangle(img, (x1,y1), (x2,y2), color, 3)
+            # Label background
+            (w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
+            cv2.rectangle(img, (x1, y1 - 25), (x1 + w, y1), color, -1)
+            cv2.putText(img, label, (x1, y1-8), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,255), 2)
+        return img
+# Initialize YOLO and Tracker
+yolo = YOLO(YOLO_MODEL_PATH)
+tracker = ByteTrackWrapper(fps=30, track_thresh=0.5, match_thresh=0.8)
+# ---------------- ROUTES ---------------- #
+@app.post("/detect", response_class=HTMLResponse)
+async def detect_image(file: UploadFile = File(...)):
+    start_t = time.time()
+    temp = f"temp_{file.filename}"
+    with open(temp, "wb") as f:
+        shutil.copyfileobj(file.file, f)
+    img = cv2.imread(temp)
+    if img is None:
+        return "<h2>Error reading image</h2>"
+    # 1. Inference
+    tensor, scale = yolo.preprocess(img)
+    output = yolo.session.run(None, {yolo.input_name: tensor})
+    detections = yolo.postprocess(output, scale)
+    # 2. Tracking
+    # Even on a static upload, we run the tracker to assign IDs.
+    tracker.update(detections)
+    # 3. Draw
+    img = yolo.draw(img, detections)
+    name = f"output_{uuid.uuid4().hex}.jpg"
+    path = f"static/{name}"
+    cv2.imwrite(path, img)
+    if os.path.exists(temp):
+        os.remove(temp)
+    process_ms = (time.time() - start_t) * 1000
+    return f"""
+    <h2>✅ Detection Result</h2>
+    <p>⏱️ Processed in {process_ms:.2f}ms</p>
+    <div style="margin-bottom: 20px;">
+        <img src="/static/{name}" width="800" style="border-radius: 10px; border: 2px solid #333;"/>
+    </div>
+    <a href="/">⬅ Upload Another</a>
+    """
+@app.post("/detect-frame")
+async def detect_frame(request: Request):
+    start_t = time.time()
+    data = await request.json()
+    img_data = data.get("image")
+    if not img_data:
+        return JSONResponse({"error": "No image provided"}, status_code=400)
+    # Decode Image
+    try:
+        # Splits 'data:image/jpeg;base64,...'
+        img_bytes = base64.b64decode(img_data.split(',')[1])
+        nparr = np.frombuffer(img_bytes, np.uint8)
+        img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+    except Exception as e:
+        return JSONResponse({"error": f"Invalid image data: {str(e)}"}, status_code=400)
+    # 1. YOLO Inference
+    tensor, scale = yolo.preprocess(img)
+    output = yolo.session.run(None, {yolo.input_name: tensor})
+    detections = yolo.postprocess(output, scale)
+    # 2. Update Tracker
+    # The tracker modifies 'detections' in-place, adding 'track_id' to objects
+    tracker.update(detections)
+    # 3. Draw
+    img = yolo.draw(img, detections)
+    # Encode back to base64
+    _, buffer = cv2.imencode('.jpg', img)
+    img_base64 = base64.b64encode(buffer).decode('utf-8')
+    end_t = time.time()
+    latency_ms = (end_t - start_t) * 1000
+    return JSONResponse({
+        "image": f"data:image/jpeg;base64,{img_base64}",
+        "detections": detections,
+        "latency_ms": f"{latency_ms:.1f}"
+    })
+@app.get("/", response_class=HTMLResponse)
+def webcam_page():
+    if os.path.exists("webcam.html"):
+        with open("webcam.html", "r", encoding="utf-8") as f:
+            return f.read()
+    else:
+        return "<h1>Error: webcam.html not found. Please create it.</h1>"
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

webcam.html ADDED Viewed

	@@ -0,0 +1,381 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Live Wildlife AI</title>
+    <style>
+        * {
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+        }
+        body {
+            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+            background: linear-gradient(135deg, #1e3c72 0%, #2a5298 100%);
+            min-height: 100vh;
+            display: flex;
+            flex-direction: column;
+            align-items: center;
+            padding: 20px;
+            color: #333;
+        }
+        .container {
+            background: white;
+            border-radius: 20px;
+            padding: 30px;
+            box-shadow: 0 20px 60px rgba(0, 0, 0, 0.5);
+            max-width: 1000px;
+            width: 100%;
+        }
+        h1 {
+            text-align: center;
+            color: #2c3e50;
+            margin-bottom: 20px;
+        }
+        .video-container {
+            position: relative;
+            width: 100%;
+            border-radius: 15px;
+            overflow: hidden;
+            background: #000;
+            box-shadow: 0 10px 20px rgba(0, 0, 0, 0.2);
+            min-height: 400px;
+        }
+        #videoElement,
+        #canvasElement {
+            width: 100%;
+            display: block;
+        }
+        #canvasElement {
+            display: none;
+        }
+        .controls {
+            display: flex;
+            justify-content: center;
+            gap: 15px;
+            margin-top: 20px;
+        }
+        button {
+            padding: 12px 25px;
+            font-size: 16px;
+            border: none;
+            border-radius: 8px;
+            cursor: pointer;
+            font-weight: 600;
+            transition: transform 0.2s;
+        }
+        #startBtn {
+            background: #28a745;
+            color: white;
+        }
+        #stopBtn {
+            background: #dc3545;
+            color: white;
+        }
+        button:disabled {
+            opacity: 0.5;
+            cursor: not-allowed;
+        }
+        button:hover:not(:disabled) {
+            transform: scale(1.05);
+        }
+        /* AI Panel Styling */
+        .ai-panel {
+            margin-top: 25px;
+            background: #f8f9fa;
+            border-radius: 12px;
+            overflow: hidden;
+            display: none;
+            /* Hidden by default */
+            box-shadow: 0 4px 15px rgba(0, 0, 0, 0.1);
+            animation: slideUp 0.5s ease;
+        }
+        .ai-header {
+            background: #007bff;
+            color: white;
+            padding: 10px 20px;
+            font-weight: bold;
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+        }
+        .ai-body {
+            padding: 20px;
+            display: grid;
+            grid-template-columns: 1fr 1fr;
+            gap: 15px;
+        }
+        .info-card {
+            background: white;
+            padding: 10px;
+            border-radius: 8px;
+            border: 1px solid #e0e0e0;
+        }
+        .info-label {
+            font-size: 0.85em;
+            text-transform: uppercase;
+            color: #666;
+            font-weight: bold;
+            margin-bottom: 5px;
+            display: block;
+        }
+        .info-value {
+            font-size: 1.1em;
+            color: #333;
+        }
+        .full-width {
+            grid-column: span 2;
+        }
+        .fun-fact {
+            background: #fff3cd;
+            border: 1px solid #ffeeba;
+            color: #856404;
+        }
+        @keyframes slideUp {
+            from {
+                opacity: 0;
+                transform: translateY(20px);
+            }
+            to {
+                opacity: 1;
+                transform: translateY(0);
+            }
+        }
+        .stats-bar {
+            margin-top: 15px;
+            display: flex;
+            justify-content: space-between;
+            background: #f8f9fa;
+            padding: 10px 20px;
+            border-radius: 10px;
+            font-size: 0.9em;
+            color: #666;
+            font-family: monospace;
+        }
+        .perf-item {
+            font-weight: bold;
+            color: #555;
+        }
+        .back-link {
+            text-align: center;
+            margin-top: 20px;
+        }
+        .back-link a {
+            color: white;
+            text-decoration: none;
+            opacity: 0.8;
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <h1>🦁 Wildlife AI Explorer</h1>
+        <div class="video-container">
+            <video id="videoElement" autoplay playsinline></video>
+            <canvas id="canvasElement"></canvas>
+        </div>
+        <div class="controls">
+            <button id="startBtn">Start Camera</button>
+            <button id="stopBtn" disabled>Stop</button>
+        </div>
+        <div class="stats-bar">
+            <span class="perf-item">Status: <span id="status">Idle</span></span>
+            <span class="perf-item">Objects: <span id="detCount">0</span></span>
+            <span class="perf-item">Latency: <span id="latency">0</span>ms</span>
+            <span class="perf-item">Tracked IDs:
+                <span id="trackIds">--</span>
+            </span>
+        </div>
+        <!-- Structured AI Panel -->
+        <div id="aiPanel" class="ai-panel">
+            <div class="ai-header">
+                <span id="aiTitle">Analysis Result</span>
+                <span style="font-size: 0.8em;">Via Gemini</span>
+            </div>
+            <div class="ai-body">
+                <div class="info-card">
+                    <span class="info-label">Common Name</span>
+                    <span class="info-value" id="field-name">--</span>
+                </div>
+                <div class="info-card">
+                    <span class="info-label">Scientific Name</span>
+                    <span class="info-value" id="field-scientific" style="font-style: italic;">--</span>
+                </div>
+                <div class="info-card">
+                    <span class="info-label">Habitat</span>
+                    <span class="info-value" id="field-habitat">--</span>
+                </div>
+                <div class="info-card">
+                    <span class="info-label">Diet</span>
+                    <span class="info-value" id="field-diet">--</span>
+                </div>
+                <div class="info-card">
+                    <span class="info-label">Danger Level</span>
+                    <span class="info-value" id="field-danger">--</span>
+                </div>
+                <div class="info-card full-width fun-fact">
+                    <span class="info-label">Fun Fact</span>
+                    <span class="info-value" id="field-fact">--</span>
+                </div>
+            </div>
+        </div>
+    </div>
+    <div class="back-link">
+        <a href="/">⬅ Back to Upload Mode</a>
+    </div>
+    <script>
+        const trackIdsSpan = document.getElementById('trackIds');
+        const video = document.getElementById('videoElement');
+        const canvas = document.getElementById('canvasElement');
+        const ctx = canvas.getContext('2d');
+        const startBtn = document.getElementById('startBtn');
+        const stopBtn = document.getElementById('stopBtn');
+        const aiPanel = document.getElementById('aiPanel');
+        const statusSpan = document.getElementById('status');
+        const countSpan = document.getElementById('detCount');
+        const latencySpan = document.getElementById('latency');
+        // Fields to populate
+        const fieldName = document.getElementById('field-name');
+        const fieldScientific = document.getElementById('field-scientific');
+        const fieldHabitat = document.getElementById('field-habitat');
+        const fieldDiet = document.getElementById('field-diet');
+        const fieldDanger = document.getElementById('field-danger');
+        const fieldFact = document.getElementById('field-fact');
+        let stream = null;
+        let isDetecting = false;
+        let animationId = null;
+        startBtn.addEventListener('click', async () => {
+            try {
+                stream = await navigator.mediaDevices.getUserMedia({
+                    video: { width: { ideal: 1280 }, height: { ideal: 720 }, facingMode: 'environment' }
+                });
+                video.srcObject = stream;
+                video.onloadedmetadata = () => {
+                    canvas.width = video.videoWidth;
+                    canvas.height = video.videoHeight;
+                    isDetecting = true;
+                    startBtn.disabled = true;
+                    stopBtn.disabled = false;
+                    statusSpan.innerText = "Running";
+                    video.style.display = 'none';
+                    canvas.style.display = 'block';
+                    detectFrame();
+                };
+            } catch (err) {
+                alert("Camera Error: " + err.message);
+            }
+        });
+        stopBtn.addEventListener('click', () => {
+            isDetecting = false;
+            if (stream) stream.getTracks().forEach(t => t.stop());
+            cancelAnimationFrame(animationId);
+            video.style.display = 'block';
+            canvas.style.display = 'none';
+            startBtn.disabled = false;
+            stopBtn.disabled = true;
+            statusSpan.innerText = "Stopped";
+            aiPanel.style.display = 'none';
+        });
+        async function detectFrame() {
+            if (!isDetecting) return;
+            ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
+            const imageData = canvas.toDataURL('image/jpeg', 0.6);
+            try {
+                const res = await fetch('/detect-frame', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({ image: imageData })
+                });
+                const data = await res.json();
+                // 1. Draw processed image
+                const img = new Image();
+                img.onload = () => ctx.drawImage(img, 0, 0, canvas.width, canvas.height);
+                img.src = data.image;
+                // 2. Stats
+                countSpan.innerText = data.detections.length;
+                const ids = data.detections
+                    .filter(d => d.track_id !== undefined)
+                    .map(d => `${d.class}:${d.track_id}`);
+                trackIdsSpan.innerText = ids.length ? ids.join(", ") : "--";
+                latencySpan.innerText = data.latency_ms || "0";
+                // 3. AI Data (Structured JSON)
+                if (data.ai_data && !data.ai_data.error) {
+                    const info = data.ai_data;
+                    aiPanel.style.display = 'block';
+                    // Populate fields
+                    fieldName.innerText = info.common_name || "Unknown";
+                    fieldScientific.innerText = info.scientific_name || "";
+                    fieldHabitat.innerText = info.habitat || "";
+                    fieldDiet.innerText = info.diet || "";
+                    fieldDanger.innerText = info.danger_level || "";
+                    fieldFact.innerText = info.fun_fact || "";
+                }
+            } catch (e) {
+                console.error("Frame error:", e);
+            }
+            // Loop
+            setTimeout(() => {
+                animationId = requestAnimationFrame(detectFrame);
+            }, 100);
+        }
+    </script>
+</body>
+</html>