Spaces:

kyboface
/

MyInfiniteTal

Configuration error

App Files Files Community

kyboface commited on Oct 30

Commit

a588b30

verified ·

1 Parent(s): 7af065d

Upload 2 files

Browse files

Files changed (2) hide show

tools/convert_img_to_video.py +152 -0
tools/i2v_config.yaml +17 -0

tools/convert_img_to_video.py ADDED Viewed

	@@ -0,0 +1,152 @@

+import yaml
+import cv2
+import numpy as np
+from pathlib import Path
+class ImageProcessor:
+    def __init__(self, yaml_path):
+        with open(yaml_path, 'r') as f:
+            self.config = yaml.safe_load(f)
+        self.images_info = []
+        self.reference_size = None
+        self._load_images()
+    def _load_images(self):
+        for img_config in self.config['images']:
+            img = cv2.imread(img_config['path'])
+            if img is None:
+                raise ValueError(f"Cannot load image: {img_config['path']}")
+            info = {
+                'image': img,
+                'duration': float(img_config.get('duration', 1.0)),
+                'translation': img_config.get('translation', [0, 0]),
+                'scale': float(img_config.get('scale', 1.0))
+            }
+            self.images_info.append(info)
+            if self.reference_size is None:
+                self.reference_size = (img.shape[1], img.shape[0])
+    def _translate_image(self, img, translation):
+        """Perform only translation"""
+        height, width = img.shape[:2]
+        # Calculate translation amount (pixels)
+        tx = int(width * translation[0] / 100)
+        ty = int(height * translation[1] / 100)
+        # Create translation matrix
+        M = np.float32([[1, 0, tx], [0, 1, ty]])
+        # Apply translation while maintaining original dimensions
+        translated = cv2.warpAffine(img, M, (width, height))
+        return translated
+    def _crop_black_borders(self, img):
+        """Crop out black borders from the image"""
+        # Convert to grayscale
+        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        # Threshold to identify non-black areas
+        _, thresh = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)
+        # Find bounding box of non-black pixels
+        coords = cv2.findNonZero(thresh)
+        if coords is None:
+            return img
+        x, y, w, h = cv2.boundingRect(coords)
+        # Crop the image to the bounding box
+        return img[y:y+h, x:x+w]
+    def _scale_image(self, img, scale, target_size):
+        """Scale the image"""
+        if scale <= 1:
+            return cv2.resize(img, target_size)
+        # First scale up
+        height, width = img.shape[:2]
+        scaled_width = int(width * scale)
+        scaled_height = int(height * scale)
+        scaled = cv2.resize(img, (scaled_width, scaled_height))
+        # Center-crop to target dimensions
+        start_x = (scaled_width - target_size[0]) // 2
+        start_y = (scaled_height - target_size[1]) // 2
+        cropped = scaled[start_y:start_y+target_size[1],
+                        start_x:start_x+target_size[0]]
+        return cropped
+    def _transform_image(self, img, translation, scale):
+        """Apply transformations in sequence: translation → cropping → scaling"""
+        original_size = (img.shape[1], img.shape[0])
+        # 1. Translation
+        translated = self._translate_image(img, translation)
+        # 2. Black border cropping
+        cropped = self._crop_black_borders(translated)
+        # 3. Scale back to original dimensions
+        transformed = self._scale_image(cropped, scale, original_size)
+        return transformed
+    def create_video(self, output_path, fps=25):
+        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+        out = cv2.VideoWriter(output_path, fourcc, fps, self.reference_size)
+        try:
+            for info in self.images_info:
+                # Transform image
+                transformed = self._transform_image(
+                    info['image'],
+                    info['translation'],
+                    info['scale']
+                )
+                # Resize to reference dimensions if needed
+                if transformed.shape[:2] != (self.reference_size[1], self.reference_size[0]):
+                    transformed = cv2.resize(transformed, self.reference_size)
+                # Write video frames
+                n_frames = int(info['duration'] * fps)
+                for _ in range(n_frames):
+                    out.write(transformed)
+        finally:
+            out.release()
+        # Enhance video quality
+        self._improve_video_quality(output_path)
+    def _improve_video_quality(self, video_path):
+        import subprocess
+        temp_path = video_path + '.temp.mp4'
+        cmd = [
+            'ffmpeg', '-i', video_path,
+            '-c:v', 'libx264',
+            '-preset', 'slow',
+            '-crf', '18',
+            '-y',
+            temp_path
+        ]
+        subprocess.run(cmd)
+        import os
+        os.replace(temp_path, video_path)
+def main():
+    processor = ImageProcessor('tools/i2v_config.yaml')
+    processor.create_video('convertd_video.mp4', fps=25)
+if __name__ == '__main__':
+    main()

tools/i2v_config.yaml ADDED Viewed

	@@ -0,0 +1,17 @@

+images:
+  # - path: "xxx.jpg"  # Image path
+  #   duration: 4.0             # Display duration (in seconds)
+  #   # Translation: [x, y] percentage. Positive x is right, positive y is down.
+  #   # This means the view pans 5% to the right and 2% up over 1 second.
+  #   translation: [5, -2]
+  #   # Scale: The final zoom factor. 1.0 is no zoom.
+  #   # This means the view zooms from 1x to 1.2x over 1 second.
+  #   scale: 1.2
+  - path: "examples/single/ref_image.png"
+    duration: 2  # seconds
+    translation: [0, 0] # [dx, dy] - pixels per second (approximately)
+    scale: 1.0      # Scale factor (1.0 = no change, >1.0 zoom in, <1.0 zoom out)
+  - path: "examples/single/ref_image.png"
+    duration: 3.0
+    translation: [-7, -7]
+    scale: 1.0