Spaces:
Sleeping
Sleeping
File size: 4,670 Bytes
c3cafb2 8023e2e c3cafb2 8023e2e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
import os
from moviepy import ImageSequenceClip
from natsort import natsorted
from transformers import pipeline
from transformers.image_utils import load_image
from PIL import ImageDraw, Image, ImageFont
from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
import torch
import cv2
import os
def create_video_from_images(folder_path, output_video_file, fps):
"""
Creates a video file from a sequence of images in a folder.
Args:
folder_path (str): The path to the folder containing the images.
output_video_file (str): The name of the output video file (e.g., 'my_video.mp4').
fps (int): The frames per second for the output video.
"""
if not os.path.isdir(folder_path):
print(f"Error: The folder '{folder_path}' does not exist.")
return
# List all image files in the folder.
# We use natsorted to ensure files with numerical names (e.g., image-1.png, image-10.png)
# are sorted in a human-friendly way.
supported_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.gif')
image_files = [
os.path.join(folder_path, f)
for f in natsorted(os.listdir(folder_path))
if f.lower().endswith(supported_extensions)
]
if not image_files:
print(f"Error: No supported image files found in '{folder_path}'.")
return
if len(image_files) < 2:
print("Error: At least two images are required to create a video.")
return
print(f"Found {len(image_files)} images. Creating video...")
try:
# Create a video clip from the list of image files.
clip = ImageSequenceClip(image_files, fps=fps)
# Write the video file to the specified path.
clip.write_videofile(output_video_file, fps=fps)
print(f"Successfully created video: '{output_video_file}'")
except Exception as e:
print(f"An error occurred while creating the video: {e}")
def object_detection(path_video, output_folder, config):
device = "cuda" if torch.cuda.is_available() else "cpu"
text_labels = config.get('labels', [])
frame_color = config.get('frame_colour')
checkpoint = "iSEE-Laboratory/llmdet_tiny" #"openmmlab-community/mm_grounding_dino_tiny_o365v1_goldg_v3det"
model = AutoModelForZeroShotObjectDetection.from_pretrained(checkpoint, device_map="auto")
processor = AutoProcessor.from_pretrained(checkpoint)
# Initialize video capture
vidcap = cv2.VideoCapture(path_video)
frame_count = 0
# Initialize hand tracking
while vidcap.isOpened():
ret, frame = vidcap.read()
if not ret:
break
print(f"Processing frame {frame_count}")
# Convert the BGR image to RGB and ensure RGB mode
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
image = Image.fromarray(rgb_frame).convert("RGB")
inputs = processor(text=text_labels, images=image, return_tensors="pt").to(device)
with torch.no_grad():
outputs = model(**inputs)
# monkeypatch ImageDraw.text to accept a `fontsize` argument (absolute pixels or fraction of image height)
results = processor.post_process_grounded_object_detection(
outputs, threshold=0.50, target_sizes=[(image.height, image.width)])[0]
draw = ImageDraw.Draw(image)
scores = results.get("scores", [])
text_labels_res = results.get("text_labels", [])
boxes = results.get("boxes", [])
for box, score, text_label in zip(boxes, scores, text_labels_res):
xmin, ymin, xmax, ymax = box
draw.rectangle((xmin, ymin, xmax, ymax), outline=frame_color, width=10)
# convert score to float safely
try:
score_val = float(score)
except Exception:
score_val = round(score.item(), 2)
# font_size = max(10, int(0.1 * image.height)) # 10% of image height, minimum 10 pixels
#font = ImageFont.load_default(size=80)
font = ImageFont.truetype("fonts/Perfect DOS VGA 437.ttf", size=60)
draw.text((xmin, ymin), f"{text_label}: {round(score_val,2)}", fill="black", stroke_width=1, stroke_fill="black", font=font)
# save the annotated image (PIL image is modified in-place)
image.save(f"{output_folder}/{frame_count}.png")
# Exit loop by pressing 'q'
if cv2.waitKey(1) & 0xFF == ord('q'):
break
frame_count += 1
if frame_count == 90: # limit to first 30 frames
break
# Release the video capture and close windows
vidcap.release()
cv2.destroyAllWindows() |