Aitrepreneur commited on
Commit
8cfd5b3
·
verified ·
1 Parent(s): f038d36

Upload io_utils.py

Browse files
Files changed (1) hide show
  1. io_utils.py +86 -0
io_utils.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ from typing import Optional
4
+
5
+ import numpy as np
6
+ from moviepy.editor import ImageSequenceClip, AudioFileClip
7
+ from scipy.io import wavfile
8
+
9
+ # Fix permission problem: Setting up custom temporary directory ======================
10
+ # Create a temp folder in the project directory
11
+ custom_temp = os.path.join(os.getcwd(), "temp")
12
+ os.makedirs(custom_temp, exist_ok=True)
13
+ tempfile.tempdir = custom_temp
14
+ # ==============================================================================
15
+
16
+ def save_video(
17
+ output_path: str,
18
+ video_numpy: np.ndarray,
19
+ audio_numpy: Optional[np.ndarray] = None,
20
+ sample_rate: int = 16000,
21
+ fps: int = 24,
22
+ ) -> str:
23
+ """
24
+ Combine a sequence of video frames with an optional audio track and save as an MP4.
25
+
26
+ Args:
27
+ output_path (str): Path to the output MP4 file.
28
+ video_numpy (np.ndarray): Numpy array of frames. Shape (C, F, H, W).
29
+ Values can be in range [-1, 1] or [0, 255].
30
+ audio_numpy (Optional[np.ndarray]): 1D or 2D numpy array of audio samples, range [-1, 1].
31
+ sample_rate (int): Sample rate of the audio in Hz. Defaults to 16000.
32
+ fps (int): Frames per second for the video. Defaults to 24.
33
+
34
+ Returns:
35
+ str: Path to the saved MP4 file.
36
+ """
37
+
38
+ # Validate inputs
39
+ assert isinstance(video_numpy, np.ndarray), "video_numpy must be a numpy array"
40
+ assert video_numpy.ndim == 4, "video_numpy must have shape (C, F, H, W)"
41
+ assert video_numpy.shape[0] in {1, 3}, "video_numpy must have 1 or 3 channels"
42
+
43
+ if audio_numpy is not None:
44
+ assert isinstance(audio_numpy, np.ndarray), "audio_numpy must be a numpy array"
45
+ assert np.abs(audio_numpy).max() <= 1.0, "audio_numpy values must be in range [-1, 1]"
46
+
47
+ # Reorder dimensions: (C, F, H, W) → (F, H, W, C)
48
+ video_numpy = video_numpy.transpose(1, 2, 3, 0)
49
+
50
+ # Normalize frames if values are in [-1, 1]
51
+ if video_numpy.max() <= 1.0:
52
+ video_numpy = np.clip(video_numpy, -1, 1)
53
+ video_numpy = ((video_numpy + 1) / 2 * 255).astype(np.uint8)
54
+ else:
55
+ video_numpy = video_numpy.astype(np.uint8)
56
+
57
+ # Convert numpy array to a list of frames
58
+ frames = list(video_numpy)
59
+
60
+ # Create video clip
61
+ clip = ImageSequenceClip(frames, fps=fps)
62
+
63
+ # Add audio if provided (Windows-safe temp handling)
64
+ audio_clip = None
65
+ temp_audio_path = None
66
+ if audio_numpy is not None:
67
+ with tempfile.NamedTemporaryFile(suffix=".wav", mode="wb", delete=False) as temp_audio_file:
68
+ wavfile.write(
69
+ temp_audio_file.name,
70
+ sample_rate,
71
+ (audio_numpy * 32767).astype(np.int16),
72
+ )
73
+ temp_audio_path = temp_audio_file.name
74
+
75
+ audio_clip = AudioFileClip(temp_audio_path)
76
+ final_clip = clip.set_audio(audio_clip)
77
+ else:
78
+ final_clip = clip
79
+
80
+ # Write final video to disk
81
+ final_clip.write_videofile(
82
+ output_path, codec="libx264", audio_codec="aac", fps=fps, verbose=False, logger=None
83
+ )
84
+ final_clip.close()
85
+
86
+ return output_path