Spaces:

faizan-shaikh
/

VideoObjectDetection

Sleeping

App Files Files Community

Faizan Azizahmed Shaikh commited on Aug 10, 2023

Commit

59e57cd

1 Parent(s): 67c544d

Upload 6 files

Browse files

Files changed (6) hide show

app.py +66 -0
requirements.txt +0 -2
src/Webcam_Object_Detection.ipynb +101 -0
src/items.py +12 -0
src/realtime.py +42 -0
src/yolov8n.pt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,66 @@

+#!/usr/bin/env python
+# coding: utf-8
+# In[ ]:
+# import required libraries
+from ultralytics import YOLO
+import gradio as gr
+import cv2
+import math
+from items import classNames
+# In[ ]:
+# detection function
+def yolo_detect(feed, vid):
+    video = vid
+    # Load a pretrained YOLOv8n model
+    model = YOLO('yolov8n.pt')
+    # Run inference on the source
+    results = model(video, stream=True, verbose=False)
+    frames = list()
+    # plot annotations
+    for frame in results:
+        boxes = frame.boxes
+        single = frame.orig_img
+        for box in boxes:
+            # bounding box
+            x1, y1, x2, y2 = box.xyxy[0]
+            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) # convert to int values
+            # put box in cam
+            cv2.rectangle(single, (x1, y1), (x2, y2), (255, 0, 255), 3)
+            # object details
+            cv2.putText(single, classNames[int(box.cls[0])], (x1,y1), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 1)
+        frames.append(single)
+    cv2.destroyAllWindows()
+    h, w, c = frames[1].shape
+    out_file = "output.avi"
+    fourcc=cv2.VideoWriter_fourcc('X', 'V', 'I', 'D')
+    writer = out = cv2.VideoWriter(out_file, fourcc, 25.0, (w, h))
+    for i in range(len(frames)):
+        writer.write(frames[i])
+    writer.release()
+    return out_file
+# In[ ]:
+demo = gr.Interface(fn=yolo_detect,
+                    inputs=[gr.PlayableVideo(source='webcam'), gr.Video(autoplay=True)],
+                    outputs=[gr.PlayableVideo(autoplay=True, format='avi')],
+                    cache_examples=True, allow_flagging='never')
+demo.queue()
+demo.launch(inline=False, debug=True, show_api=False, quiet=True)

requirements.txt CHANGED Viewed

	@@ -1,2 +0,0 @@
1	- opencv_python==4.8.0.74
2	- ultralytics==8.0.150

src/Webcam_Object_Detection.ipynb ADDED Viewed

	@@ -0,0 +1,101 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d9904ec5-391d-4967-9357-c8779d677142",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import required libraries\n",
+    "from ultralytics import YOLO\n",
+    "import gradio as gr\n",
+    "import cv2\n",
+    "import math\n",
+    "from items import classNames"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1dbb6ae7-c844-4933-9a5c-f778bb1dfa83",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# detection function\n",
+    "def yolo_detect(feed, vid):\n",
+    "    video = vid\n",
+    "    # Load a pretrained YOLOv8n model\n",
+    "    model = YOLO('yolov8n.pt')\n",
+    "    \n",
+    "    # Run inference on the source\n",
+    "    results = model(video, stream=True, verbose=False) \n",
+    "    frames = list()\n",
+    "    \n",
+    "    # plot annotations\n",
+    "    for frame in results:\n",
+    "        boxes = frame.boxes\n",
+    "        single = frame.orig_img\n",
+    "        for box in boxes:\n",
+    "            # bounding box\n",
+    "            x1, y1, x2, y2 = box.xyxy[0]\n",
+    "            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) # convert to int values\n",
+    "\n",
+    "            # put box in cam\n",
+    "            cv2.rectangle(single, (x1, y1), (x2, y2), (255, 0, 255), 3)\n",
+    "\n",
+    "            # object details\n",
+    "            cv2.putText(single, classNames[int(box.cls[0])], (x1,y1), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 1)\n",
+    "            \n",
+    "        frames.append(single)\n",
+    "    cv2.destroyAllWindows()\n",
+    "    \n",
+    "    h, w, c = frames[1].shape\n",
+    "    \n",
+    "    out_file = \"output.avi\"\n",
+    "    fourcc=cv2.VideoWriter_fourcc('X', 'V', 'I', 'D')\n",
+    "    writer = out = cv2.VideoWriter(out_file, fourcc, 25.0, (w, h))\n",
+    "    for i in range(len(frames)):\n",
+    "        writer.write(frames[i])\n",
+    "    writer.release()\n",
+    "    return out_file"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "692f5c49-67cd-4c11-8ee9-03dc7cb98809",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "demo = gr.Interface(fn=yolo_detect, \n",
+    "                    inputs=[gr.PlayableVideo(source='webcam'), gr.Video(autoplay=True)],\n",
+    "                    outputs=[gr.PlayableVideo(autoplay=True, format='avi')],\n",
+    "                    cache_examples=True, allow_flagging='never')\n",
+    "demo.queue()\n",
+    "demo.launch(inline=False, debug=True, show_api=False, quiet=True)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

src/items.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# object classes
+classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
+              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
+              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
+              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
+              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
+              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
+              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
+              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
+              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
+              "teddy bear", "hair drier", "toothbrush"
+              ]

src/realtime.py ADDED Viewed

	@@ -0,0 +1,42 @@

+# import libraries
+from ultralytics import YOLO
+import cv2
+import sys
+def realtime(video):
+    # Load the YOLOv8 model
+    model = YOLO('yolov8n.pt')
+    # Open the video file
+    video_path = video
+    cap = cv2.VideoCapture(video_path)
+    cap.set(3, 720)
+    cap.set(4, 1280)
+    # Loop through the video frames
+    while cap.isOpened():
+        # Read a frame from the video
+        success, frame = cap.read()
+        if success:
+            # Run YOLOv8 inference on the frame
+            results = model(frame, verbose=False)
+            # Visualize the results on the frame
+            annotated_frame = results[0].plot()
+            # Display the annotated frame
+            cv2.imshow("YOLOv8 Inference", annotated_frame)
+            # Break the loop if 'q' is pressed
+            if cv2.waitKey(1) & 0xFF == ord("q"):
+                break
+        else:
+            # Break the loop if the end of the video is reached
+            break
+    # Release the video capture object and close the display window
+    cap.release()
+    cv2.destroyAllWindows()
+if __name__ == '__main__':
+    realtime(sys.argv[1])

src/yolov8n.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:31e20dde3def09e2cf938c7be6fe23d9150bbbe503982af13345706515f2ef95
+size 6534387