riversnow
/

so101-segmentation-model

@@ -1,54 +1,37 @@
 import cv2
 import numpy as np
-import torch
 from ultralytics import YOLO
-from safetensors.torch import load_file
-# 1. LOAD THE SKELETON WITH 8 CLASSES
-# By passing the dictionary with 8 names, YOLO automatically sets nc=8
-custom_names = {
-    0: "so101_base", 1: "so101_shoulder", 2: "so101_upper_arm",
-    3: "so101_lower_arm", 4: "so101_wrist", 5: "so101_gripper",
-    6: "so101_camera_mount", 7: "so101_moving_jaw"
-}
-# This builds the 'Small' architecture specifically for 8 classes
-model = YOLO("yolo11s-seg.yaml", task="segment")
-model.model.nc = 8
-model.names = custom_names
-# 2. INJECT THE WEIGHTS
-# Now that the 'Head' is the right size (8), this will not error
-safetensors_path = "weights/best.safetensors"
-weights = load_file(safetensors_path)
-model.model.load_state_dict(weights)
-# 3. VIDEO SETUP
 cap = cv2.VideoCapture("test_video.mp4")
 w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
 h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
 fps = cap.get(cv2.CAP_PROP_FPS)
-# 'avc1' for VS Code player compatibility
 fourcc = cv2.VideoWriter_fourcc(*"avc1")
 out = cv2.VideoWriter("comparison_output.mp4", fourcc, fps, (w * 2, h))
-print(f"Verified! Using {safetensors_path} with 8 custom classes.")
 while cap.isOpened():
     ret, frame = cap.read()
     if not ret:
         break
-    # Inference (verbose=False keeps the console clean)
-    results = model(frame, verbose=False)
-    # Left: Original / Right: Segments on Black
     black_bg = np.zeros_like(frame)
     right_side = results[0].plot(img=black_bg, boxes=False, labels=True)
-    # Combined frame
-    combined_frame = np.hstack((frame, right_side))
     out.write(combined_frame)
 cap.release()

 import cv2
 import numpy as np
 from ultralytics import YOLO
+model = YOLO("weights/best.pt")
 cap = cv2.VideoCapture("test_video.mp4")
 w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
 h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
 fps = cap.get(cv2.CAP_PROP_FPS)
 fourcc = cv2.VideoWriter_fourcc(*"avc1")
 out = cv2.VideoWriter("comparison_output.mp4", fourcc, fps, (w * 2, h))
+print("Generating side-by-side video...")
 while cap.isOpened():
     ret, frame = cap.read()
     if not ret:
         break
+    results = model(frame)
+    # LEFT SIDE: Original frame
+    left_side = frame
+    # RIGHT SIDE: Masks on a black background
     black_bg = np.zeros_like(frame)
+    # img=black_bg tells YOLO to draw segments onto the black canvas
     right_side = results[0].plot(img=black_bg, boxes=False, labels=True)
+    # Combine them horizontally
+    combined_frame = np.hstack((left_side, right_side))
     out.write(combined_frame)
 cap.release()