geetxnsh
/

screen-on-off-classifier

ONNX

ml-intern

Model card Files Files and versions

xet

Community

geetxnsh commited on 20 days ago

Commit

aceaec1

verified ·

1 Parent(s): 9aaf5f3

Upload video_inference.py with huggingface_hub

Browse files

Files changed (1) hide show

video_inference.py +251 -0

video_inference.py ADDED Viewed

	@@ -0,0 +1,251 @@

+"""
+video_inference.py
+------------------
+Process an MP4 (or webcam) through the Screen ON/OFF classifier.
+Requirements:
+    pip install opencv-python-headless numpy onnxruntime
+Usage:
+    # Annotate a video, write to file (no GUI needed)
+    python video_inference.py --video input.mp4 --roi 200 100 300 400 --out output.mp4
+    # Frame-by-frame (lowest latency, best for real-time preview)
+    python video_inference.py --video input.mp4 --roi 200 100 300 400 --display --batch 1
+    # Batch mode (higher throughput, slight latency trade-off)
+    python video_inference.py --video input.mp4 --roi 200 100 300 400 --out output.mp4 --batch 8
+    # Live webcam
+    python video_inference.py --camera 0 --roi 200 100 300 400 --display
+The --roi values are: x y width height (pixel coords in the original frame).
+If your video is already cropped to the phone screen, omit --roi.
+"""
+import argparse
+import time
+import cv2
+import numpy as np
+import onnxruntime as ort
+class ScreenClassifier:
+    """ONNX wrapper with the exact preprocessing used during training."""
+    def __init__(self, onnx_path: str = "screen_classifier.onnx"):
+        opts = ort.SessionOptions()
+        opts.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+        opts.inter_op_num_threads = 1
+        opts.intra_op_num_threads = 2
+        self.session = ort.InferenceSession(
+            onnx_path,
+            sess_options=opts,
+            providers=["CPUExecutionProvider"],
+        )
+        self.input_name = self.session.get_inputs()[0].name
+    def _preprocess(self, bgr: np.ndarray) -> np.ndarray:
+        """BGR/HWC -> normalised greyscale NCHW (1,1,64,64)."""
+        gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY)
+        gray = cv2.resize(gray, (64, 64), interpolation=cv2.INTER_LINEAR)
+        gray = (gray.astype(np.float32) / 255.0 - 0.5) / 0.5
+        return gray[np.newaxis, np.newaxis, :, :]   # (1, 1, 64, 64)
+    def predict(self, frame: np.ndarray) -> tuple[str, float]:
+        x = self._preprocess(frame)
+        logit = self.session.run(None, {self.input_name: x})[0]
+        prob = 1.0 / (1.0 + np.exp(-logit.item()))
+        label = "ON" if prob > 0.5 else "OFF"
+        confidence = prob if label == "ON" else (1.0 - prob)
+        return label, float(confidence)
+    def predict_batch(self, frames: list[np.ndarray]) -> list[tuple[str, float]]:
+        if not frames:
+            return []
+        batch = np.concatenate([self._preprocess(f) for f in frames], axis=0)
+        logits = self.session.run(None, {self.input_name: batch})[0]
+        probs = 1.0 / (1.0 + np.exp(-logits)).flatten()
+        out = []
+        for p in probs:
+            label = "ON" if p > 0.5 else "OFF"
+            out.append((label, float(p if label == "ON" else 1.0 - p)))
+        return out
+def draw_label(frame: np.ndarray, label: str, conf: float,
+               x: int = 10, y: int = 30) -> np.ndarray:
+    """Draw green "ON" or red "OFF" label on a BGR frame."""
+    colour = (0, 255, 0) if label == "ON" else (0, 0, 255)
+    text = f"{label}  {conf:.2%}"
+    cv2.putText(frame, text, (x, y),
+                cv2.FONT_HERSHEY_SIMPLEX, 0.8, colour, 2)
+    return frame
+def _safe_display(win_name: str, frame: np.ndarray, display_enabled: bool) -> bool:
+    """Show frame if display is enabled; silently skip in headless envs."""
+    if not display_enabled:
+        return True
+    try:
+        cv2.imshow(win_name, frame)
+        return (cv2.waitKey(1) & 0xFF) != ord("q")
+    except cv2.error:
+        return True  # headless
+def main():
+    parser = argparse.ArgumentParser(description="Screen ON/OFF classifier for video")
+    parser.add_argument("--video", type=str, default=None,
+                        help="Path to input MP4/video file")
+    parser.add_argument("--camera", type=int, default=None,
+                        help="Webcam index (e.g. 0). Mutually exclusive with --video")
+    parser.add_argument("--roi", type=int, nargs=4, metavar=("X", "Y", "W", "H"),
+                        default=None,
+                        help="Crop region: x y width height")
+    parser.add_argument("--out", type=str, default=None,
+                        help="Path to write annotated output video (MP4)")
+    parser.add_argument("--display", action="store_true",
+                        help="Show live preview window (needs GUI)")
+    parser.add_argument("--model", type=str, default="screen_classifier.onnx",
+                        help="Path to ONNX model")
+    parser.add_argument("--batch", type=int, default=1,
+                        help="Inference batch size (1 = lowest latency, >1 = higher throughput)")
+    args = parser.parse_args()
+    if args.video is None and args.camera is None:
+        parser.error("Provide either --video <path> or --camera <index>")
+    if args.video and args.camera is not None:
+        parser.error("Use --video OR --camera, not both")
+    # ------------------------------------------------------------------ #
+    # Open source
+    # ------------------------------------------------------------------ #
+    source = args.video if args.video else args.camera
+    cap = cv2.VideoCapture(source)
+    if not cap.isOpened():
+        raise RuntimeError(f"Cannot open video source: {source}")
+    fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
+    frame_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    frame_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) if args.video else -1
+    print(f"Source       : {source}")
+    print(f"Resolution   : {frame_w}x{frame_h} @ {fps:.1f} FPS")
+    print(f"Total frames : {total_frames if total_frames > 0 else 'N/A (live)'}")
+    print(f"Model        : {args.model}")
+    print(f"Batch size   : {args.batch}")
+    # ------------------------------------------------------------------ #
+    # Optional output writer
+    # ------------------------------------------------------------------ #
+    writer = None
+    if args.out:
+        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+        writer = cv2.VideoWriter(args.out, fourcc, fps, (frame_w, frame_h))
+        if not writer.isOpened():
+            raise RuntimeError(f"Cannot open VideoWriter for {args.out}")
+    # ------------------------------------------------------------------ #
+    # Classifier
+    # ------------------------------------------------------------------ #
+    clf = ScreenClassifier(args.model)
+    # ROI defaults to full frame if not given
+    roi = args.roi
+    if roi is None:
+        roi = (0, 0, frame_w, frame_h)
+        print("No --roi specified; using full frame.")
+    else:
+        print(f"Crop ROI     : x={roi[0]}, y={roi[1]}, w={roi[2]}, h={roi[3]}")
+    rx, ry, rw, rh = roi
+    # ------------------------------------------------------------------ #
+    # Main loop
+    # ------------------------------------------------------------------ #
+    frame_idx = 0
+    t0 = time.perf_counter()
+    # For batch mode we accumulate (original_frame, crop) tuples
+    batch_buffer: list[tuple[np.ndarray, np.ndarray, int, int]] = []
+    while True:
+        ok, original_frame = cap.read()
+        if not ok:
+            break
+        crop = original_frame[ry:ry + rh, rx:rx + rw]
+        if args.batch == 1:
+            label, conf = clf.predict(crop)
+            out_frame = draw_label(original_frame.copy(), label, conf,
+                                  x=rx + 10, y=ry + 30)
+            if not _safe_display("Screen ON/OFF", out_frame, args.display):
+                break
+            if writer:
+                writer.write(out_frame)
+            frame_idx += 1
+        else:
+            batch_buffer.append((original_frame, crop, rx, ry))
+            if len(batch_buffer) == args.batch:
+                crops = [c for _, c, _, _ in batch_buffer]
+                results = clf.predict_batch(crops)
+                for i, (label, conf) in enumerate(results):
+                    orig, _, bx, by = batch_buffer[i]
+                    annotated = draw_label(orig, label, conf, x=bx + 10, y=by + 30)
+                    if not _safe_display("Screen ON/OFF", annotated, args.display):
+                        cap.release()
+                        if writer:
+                            writer.release()
+                        cv2.destroyAllWindows()
+                        return
+                    if writer:
+                        writer.write(annotated)
+                frame_idx += len(batch_buffer)
+                batch_buffer.clear()
+        if frame_idx % 60 == 0 and frame_idx > 0:
+            elapsed = time.perf_counter() - t0
+            print(f"Processed {frame_idx} frames  |  "
+                  f"{frame_idx / elapsed:.1f} FPS  |  "
+                  f"{elapsed:.1f} s elapsed")
+    # ------------------------------------------------------------------ #
+    # Drain remaining frames in batch buffer
+    # ------------------------------------------------------------------ #
+    if args.batch > 1 and batch_buffer:
+        crops = [c for _, c, _, _ in batch_buffer]
+        results = clf.predict_batch(crops)
+        for i, (label, conf) in enumerate(results):
+            orig, _, bx, by = batch_buffer[i]
+            annotated = draw_label(orig, label, conf, x=bx + 10, y=by + 30)
+            if writer:
+                writer.write(annotated)
+        frame_idx += len(batch_buffer)
+        batch_buffer.clear()
+    cap.release()
+    if writer:
+        writer.release()
+    try:
+        cv2.destroyAllWindows()
+    except cv2.error:
+        pass
+    total_time = time.perf_counter() - t0
+    avg_fps = frame_idx / total_time if total_time > 0 else 0.0
+    print(f"\nDone. {frame_idx} frames in {total_time:.2f} s ({avg_fps:.1f} FPS average)")
+if __name__ == "__main__":
+    main()