Spaces:

datbkpro
/

voicebot

Sleeping

App Files Files Community

datbkpro commited on Nov 7, 2025

Commit

df27322

verified ·

1 Parent(s): 1067054

Create stream_object_detection_service.py

Browse files

Files changed (1) hide show

services/stream_object_detection_service.py +72 -0

services/stream_object_detection_service.py ADDED Viewed

	@@ -0,0 +1,72 @@

+from PIL import ImageDraw, ImageFont
+import spaces
+import cv2
+from PIL import Image
+import numpy as np
+import torch
+import uuid
+SUBSAMPLE = 2  # giảm tốc độ khung hình để tăng tốc độ xử lý
+class StreamObjectDetection :
+    def draw_bounding_boxes(image, boxes, model, conf_threshold):
+        draw = ImageDraw.Draw(image)
+        font = ImageFont.load_default()
+        for score, label, box in zip(boxes["scores"], boxes["labels"], boxes["boxes"]):
+            if score < conf_threshold:
+                continue
+            x0, y0, x1, y1 = box
+            label_text = f"{model.config.id2label[label.item()]}: {score:.2f}"
+            draw.rectangle([x0, y0, x1, y1], outline="red", width=3)
+            draw.text((x0 + 3, y0 + 3), label_text, fill="white", font=font)
+        return image
+    @spaces.CPU  # dùng GPU nếu chạy trên Hugging Face
+    def stream_object_detection(video, conf_threshold):
+        cap = cv2.VideoCapture(video)
+        video_codec = cv2.VideoWriter_fourcc(*"mp4v")
+        fps = int(cap.get(cv2.CAP_PROP_FPS))
+        desired_fps = fps // SUBSAMPLE
+        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) // 2
+        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) // 2
+        iterating, frame = cap.read()
+        n_frames = 0
+        output_video_name = f"output_{uuid.uuid4()}.mp4"
+        output_video = cv2.VideoWriter(output_video_name, video_codec, desired_fps, (width, height))
+        batch = []
+        while iterating:
+            frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5)
+            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            if n_frames % SUBSAMPLE == 0:
+                batch.append(frame)
+            if len(batch) == 2 * desired_fps:  # mỗi 2s xử lý 1 lần
+                inputs = image_processor(images=batch, return_tensors="pt").to(model.device)
+                with torch.no_grad():
+                    outputs = model(**inputs)
+                boxes = image_processor.post_process_object_detection(
+                    outputs,
+                    target_sizes=torch.tensor([(height, width)] * len(batch)).to(model.device),
+                    threshold=conf_threshold,
+                )
+                for img, box in zip(batch, boxes):
+                    pil_image = draw_bounding_boxes(Image.fromarray(img), box, model, conf_threshold)
+                    frame = np.array(pil_image)[:, :, ::-1]
+                    output_video.write(frame)
+                batch = []
+                output_video.release()
+                yield output_video_name  # stream ra video đã xử lý
+                output_video_name = f"output_{uuid.uuid4()}.mp4"
+                output_video = cv2.VideoWriter(output_video_name, video_codec, desired_fps, (width, height))
+            iterating, frame = cap.read()
+            n_frames += 1