kyboface commited on
Commit
a588b30
·
verified ·
1 Parent(s): 7af065d

Upload 2 files

Browse files
tools/convert_img_to_video.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import yaml
2
+ import cv2
3
+ import numpy as np
4
+ from pathlib import Path
5
+
6
+ class ImageProcessor:
7
+ def __init__(self, yaml_path):
8
+ with open(yaml_path, 'r') as f:
9
+ self.config = yaml.safe_load(f)
10
+
11
+ self.images_info = []
12
+ self.reference_size = None
13
+ self._load_images()
14
+
15
+ def _load_images(self):
16
+ for img_config in self.config['images']:
17
+ img = cv2.imread(img_config['path'])
18
+ if img is None:
19
+ raise ValueError(f"Cannot load image: {img_config['path']}")
20
+
21
+ info = {
22
+ 'image': img,
23
+ 'duration': float(img_config.get('duration', 1.0)),
24
+ 'translation': img_config.get('translation', [0, 0]),
25
+ 'scale': float(img_config.get('scale', 1.0))
26
+ }
27
+ self.images_info.append(info)
28
+
29
+ if self.reference_size is None:
30
+ self.reference_size = (img.shape[1], img.shape[0])
31
+
32
+ def _translate_image(self, img, translation):
33
+ """Perform only translation"""
34
+ height, width = img.shape[:2]
35
+
36
+ # Calculate translation amount (pixels)
37
+ tx = int(width * translation[0] / 100)
38
+ ty = int(height * translation[1] / 100)
39
+
40
+ # Create translation matrix
41
+ M = np.float32([[1, 0, tx], [0, 1, ty]])
42
+
43
+ # Apply translation while maintaining original dimensions
44
+ translated = cv2.warpAffine(img, M, (width, height))
45
+
46
+ return translated
47
+
48
+ def _crop_black_borders(self, img):
49
+ """Crop out black borders from the image"""
50
+ # Convert to grayscale
51
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
52
+
53
+ # Threshold to identify non-black areas
54
+ _, thresh = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)
55
+
56
+ # Find bounding box of non-black pixels
57
+ coords = cv2.findNonZero(thresh)
58
+ if coords is None:
59
+ return img
60
+
61
+ x, y, w, h = cv2.boundingRect(coords)
62
+
63
+ # Crop the image to the bounding box
64
+ return img[y:y+h, x:x+w]
65
+
66
+ def _scale_image(self, img, scale, target_size):
67
+ """Scale the image"""
68
+ if scale <= 1:
69
+ return cv2.resize(img, target_size)
70
+
71
+ # First scale up
72
+ height, width = img.shape[:2]
73
+ scaled_width = int(width * scale)
74
+ scaled_height = int(height * scale)
75
+ scaled = cv2.resize(img, (scaled_width, scaled_height))
76
+
77
+ # Center-crop to target dimensions
78
+ start_x = (scaled_width - target_size[0]) // 2
79
+ start_y = (scaled_height - target_size[1]) // 2
80
+ cropped = scaled[start_y:start_y+target_size[1],
81
+ start_x:start_x+target_size[0]]
82
+
83
+ return cropped
84
+
85
+ def _transform_image(self, img, translation, scale):
86
+ """Apply transformations in sequence: translation → cropping → scaling"""
87
+ original_size = (img.shape[1], img.shape[0])
88
+
89
+ # 1. Translation
90
+ translated = self._translate_image(img, translation)
91
+
92
+ # 2. Black border cropping
93
+ cropped = self._crop_black_borders(translated)
94
+
95
+ # 3. Scale back to original dimensions
96
+ transformed = self._scale_image(cropped, scale, original_size)
97
+
98
+ return transformed
99
+
100
+ def create_video(self, output_path, fps=25):
101
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
102
+ out = cv2.VideoWriter(output_path, fourcc, fps, self.reference_size)
103
+
104
+ try:
105
+ for info in self.images_info:
106
+ # Transform image
107
+ transformed = self._transform_image(
108
+ info['image'],
109
+ info['translation'],
110
+ info['scale']
111
+ )
112
+
113
+ # Resize to reference dimensions if needed
114
+ if transformed.shape[:2] != (self.reference_size[1], self.reference_size[0]):
115
+ transformed = cv2.resize(transformed, self.reference_size)
116
+
117
+ # Write video frames
118
+ n_frames = int(info['duration'] * fps)
119
+ for _ in range(n_frames):
120
+ out.write(transformed)
121
+
122
+ finally:
123
+ out.release()
124
+
125
+ # Enhance video quality
126
+ self._improve_video_quality(output_path)
127
+
128
+ def _improve_video_quality(self, video_path):
129
+ import subprocess
130
+ temp_path = video_path + '.temp.mp4'
131
+
132
+ cmd = [
133
+ 'ffmpeg', '-i', video_path,
134
+ '-c:v', 'libx264',
135
+ '-preset', 'slow',
136
+ '-crf', '18',
137
+ '-y',
138
+ temp_path
139
+ ]
140
+
141
+ subprocess.run(cmd)
142
+
143
+ import os
144
+ os.replace(temp_path, video_path)
145
+
146
+ def main():
147
+ processor = ImageProcessor('tools/i2v_config.yaml')
148
+ processor.create_video('convertd_video.mp4', fps=25)
149
+
150
+ if __name__ == '__main__':
151
+ main()
152
+
tools/i2v_config.yaml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ images:
2
+ # - path: "xxx.jpg" # Image path
3
+ # duration: 4.0 # Display duration (in seconds)
4
+ # # Translation: [x, y] percentage. Positive x is right, positive y is down.
5
+ # # This means the view pans 5% to the right and 2% up over 1 second.
6
+ # translation: [5, -2]
7
+ # # Scale: The final zoom factor. 1.0 is no zoom.
8
+ # # This means the view zooms from 1x to 1.2x over 1 second.
9
+ # scale: 1.2
10
+ - path: "examples/single/ref_image.png"
11
+ duration: 2 # seconds
12
+ translation: [0, 0] # [dx, dy] - pixels per second (approximately)
13
+ scale: 1.0 # Scale factor (1.0 = no change, >1.0 zoom in, <1.0 zoom out)
14
+ - path: "examples/single/ref_image.png"
15
+ duration: 3.0
16
+ translation: [-7, -7]
17
+ scale: 1.0