Spaces:
Running
Running
File size: 5,167 Bytes
1787f55 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
import yaml
import cv2
import numpy as np
from pathlib import Path
class ImageProcessor:
def __init__(self, yaml_path):
with open(yaml_path, 'r') as f:
self.config = yaml.safe_load(f)
self.images_info = []
self.reference_size = None
self._load_images()
def _load_images(self):
for img_config in self.config['images']:
img = cv2.imread(img_config['path'])
if img is None:
raise ValueError(f"Cannot load image: {img_config['path']}")
info = {
'image': img,
'duration': float(img_config.get('duration', 1.0)),
'translation': img_config.get('translation', [0, 0]),
'scale': float(img_config.get('scale', 1.0))
}
self.images_info.append(info)
if self.reference_size is None:
self.reference_size = (img.shape[1], img.shape[0])
def _translate_image(self, img, translation):
"""Perform only translation"""
height, width = img.shape[:2]
# Calculate translation amount (pixels)
tx = int(width * translation[0] / 100)
ty = int(height * translation[1] / 100)
# Create translation matrix
M = np.float32([[1, 0, tx], [0, 1, ty]])
# Apply translation while maintaining original dimensions
translated = cv2.warpAffine(img, M, (width, height))
return translated
def _crop_black_borders(self, img):
"""Crop out black borders from the image"""
# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Threshold to identify non-black areas
_, thresh = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)
# Find bounding box of non-black pixels
coords = cv2.findNonZero(thresh)
if coords is None:
return img
x, y, w, h = cv2.boundingRect(coords)
# Crop the image to the bounding box
return img[y:y+h, x:x+w]
def _scale_image(self, img, scale, target_size):
"""Scale the image"""
if scale <= 1:
return cv2.resize(img, target_size)
# First scale up
height, width = img.shape[:2]
scaled_width = int(width * scale)
scaled_height = int(height * scale)
scaled = cv2.resize(img, (scaled_width, scaled_height))
# Center-crop to target dimensions
start_x = (scaled_width - target_size[0]) // 2
start_y = (scaled_height - target_size[1]) // 2
cropped = scaled[start_y:start_y+target_size[1],
start_x:start_x+target_size[0]]
return cropped
def _transform_image(self, img, translation, scale):
"""Apply transformations in sequence: translation → cropping → scaling"""
original_size = (img.shape[1], img.shape[0])
# 1. Translation
translated = self._translate_image(img, translation)
# 2. Black border cropping
cropped = self._crop_black_borders(translated)
# 3. Scale back to original dimensions
transformed = self._scale_image(cropped, scale, original_size)
return transformed
def create_video(self, output_path, fps=25):
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, self.reference_size)
try:
for info in self.images_info:
# Transform image
transformed = self._transform_image(
info['image'],
info['translation'],
info['scale']
)
# Resize to reference dimensions if needed
if transformed.shape[:2] != (self.reference_size[1], self.reference_size[0]):
transformed = cv2.resize(transformed, self.reference_size)
# Write video frames
n_frames = int(info['duration'] * fps)
for _ in range(n_frames):
out.write(transformed)
finally:
out.release()
# Enhance video quality
self._improve_video_quality(output_path)
def _improve_video_quality(self, video_path):
import subprocess
temp_path = video_path + '.temp.mp4'
cmd = [
'ffmpeg', '-i', video_path,
'-c:v', 'libx264',
'-preset', 'slow',
'-crf', '18',
'-y',
temp_path
]
subprocess.run(cmd)
import os
os.replace(temp_path, video_path)
def main():
processor = ImageProcessor('tools/i2v_config.yaml')
processor.create_video('convertd_video.mp4', fps=25)
if __name__ == '__main__':
main()
|