root commited on
Commit
2797e34
·
1 Parent(s): fee163c

adding rife

Browse files
Practical-RIFE ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit f3e48ceb02e4c21bc8868b03994b98f3402ffb3d
__pycache__/handler.cpython-310.pyc CHANGED
Binary files a/__pycache__/handler.cpython-310.pyc and b/__pycache__/handler.cpython-310.pyc differ
 
download_weights.py CHANGED
@@ -3,7 +3,6 @@ from pathlib import Path, PurePosixPath
3
 
4
  from huggingface_hub import hf_hub_download
5
 
6
-
7
  def prepare_base_model():
8
  print(f'Preparing base stable-diffusion-v1-5 weights...')
9
  local_dir = "./pretrained_weights/stable-diffusion-v1-5"
 
3
 
4
  from huggingface_hub import hf_hub_download
5
 
 
6
  def prepare_base_model():
7
  print(f'Preparing base stable-diffusion-v1-5 weights...')
8
  local_dir = "./pretrained_weights/stable-diffusion-v1-5"
handler.py CHANGED
@@ -10,6 +10,8 @@ from omegaconf import OmegaConf
10
  from transformers import CLIPVisionModelWithProjection
11
  import cv2
12
  import os
 
 
13
  from src.models.pose_guider import PoseGuider
14
  from src.models.unet_2d_condition import UNet2DConditionModel
15
  from src.models.unet_3d import UNet3DConditionModel
@@ -20,6 +22,10 @@ from roop.core import start, decode_execution_providers, suggest_max_memory, sug
20
  from roop.utilities import normalize_output_path
21
  from roop.processors.frame.core import get_frame_processors_modules
22
 
 
 
 
 
23
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
24
 
25
  if device.type != 'cuda':
@@ -35,6 +41,7 @@ class EndpointHandler():
35
 
36
  self.config = OmegaConf.load(config_path)
37
  self.weight_dtype = torch.float16
 
38
  self.pipeline = None
39
  self._initialize_pipeline()
40
 
@@ -45,13 +52,13 @@ class EndpointHandler():
45
  if not os.path.exists(config_path):
46
  raise FileNotFoundError(f"The sd-vae-ft-mse folder was not found at: {config_path}")
47
 
48
- vae = AutoencoderKL.from_pretrained(config_path).to(device, dtype=self.weight_dtype)
49
 
50
  pretrained_base_model_path_unet = os.path.join(base_dir, 'pretrained_weights', 'stable-diffusion-v1-5', 'unet')
51
  print("model path is " + pretrained_base_model_path_unet)
52
  reference_unet = UNet2DConditionModel.from_pretrained(
53
  pretrained_base_model_path_unet
54
- ).to(dtype=self.weight_dtype, device="cuda")
55
 
56
  inference_config_path = os.path.join(base_dir, 'configs', 'inference', 'inference_v2.yaml')
57
  motion_module_path = os.path.join(base_dir, 'pretrained_weights', 'motion_module.pth')
@@ -65,10 +72,10 @@ class EndpointHandler():
65
  pretrained_base_model_path_unet,
66
  motion_module_path,
67
  unet_additional_kwargs=infer_config.unet_additional_kwargs,
68
- ).to(device, dtype=self.weight_dtype)
69
 
70
- pose_guider = PoseGuider(320, block_out_channels=(16, 32, 96, 256)).to(device, dtype=self.weight_dtype)
71
- image_enc = CLIPVisionModelWithProjection.from_pretrained(image_encoder_path).to(device, dtype=self.weight_dtype)
72
  sched_kwargs = OmegaConf.to_container(infer_config.noise_scheduler_kwargs)
73
  scheduler = DDIMScheduler(**sched_kwargs)
74
 
@@ -83,7 +90,7 @@ class EndpointHandler():
83
  denoising_unet=denoising_unet,
84
  pose_guider=pose_guider,
85
  scheduler=scheduler
86
- ).to(device, dtype=self.weight_dtype)
87
 
88
  def _crop_face(self, image, save_path="cropped_face.jpg", margin=0.5):
89
  # Convert image to OpenCV format
@@ -137,17 +144,112 @@ class EndpointHandler():
137
  roop.globals.video_encoder = "libx264"
138
  roop.globals.video_quality = 50
139
  roop.globals.max_memory = suggest_max_memory()
140
- roop.globals.execution_providers = decode_execution_providers(["cpu"])
 
 
141
  roop.globals.execution_threads = suggest_execution_threads()
142
 
 
 
 
 
 
 
143
  for frame_processor in get_frame_processors_modules(roop.globals.frame_processors):
144
- if not frame_processor.pre_check():
145
- raise ValueError("Frame processor pre-check failed.")
 
 
 
146
 
147
  start()
148
 
 
 
 
 
 
149
  return os.path.join(os.getcwd(), output_path)
150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  def __call__(self, data: Any) -> Dict[str, str]:
152
  inputs = data.get("inputs", {})
153
  ref_image_base64 = inputs.get("ref_image", "")
@@ -169,11 +271,15 @@ class EndpointHandler():
169
 
170
  if not os.path.exists(pose_video_path):
171
  raise FileNotFoundError(f"The pose video was not found at: {pose_video_path}")
172
-
173
- torch.manual_seed(seed)
174
- pose_images = read_frames(pose_video_path)
175
- src_fps = get_fps(pose_video_path)
176
 
 
 
 
 
 
 
 
 
177
  pose_list = []
178
  total_length = min(length, len(pose_images))
179
  for pose_image_pil in pose_images[:total_length]:
@@ -199,11 +305,26 @@ class EndpointHandler():
199
  cropped_face_path = os.path.join(save_dir, "cropped_face.jpg")
200
  cropped_face = self._crop_face(ref_image, save_path=cropped_face_path)
201
 
 
 
 
 
202
  # Perform face swapping
203
- final_video_path = self._swap_face(cropped_face, animation_path)
 
 
 
 
 
 
 
 
 
 
 
204
 
205
  # Encode the final video in base64
206
- with open(final_video_path, "rb") as video_file:
207
  video_base64 = base64.b64encode(video_file.read()).decode("utf-8")
208
 
209
  torch.cuda.empty_cache()
 
10
  from transformers import CLIPVisionModelWithProjection
11
  import cv2
12
  import os
13
+ import sys
14
+ import skvideo.io
15
  from src.models.pose_guider import PoseGuider
16
  from src.models.unet_2d_condition import UNet2DConditionModel
17
  from src.models.unet_3d import UNet3DConditionModel
 
22
  from roop.utilities import normalize_output_path
23
  from roop.processors.frame.core import get_frame_processors_modules
24
 
25
+ import onnxruntime as ort
26
+ import gc
27
+ import subprocess
28
+
29
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
30
 
31
  if device.type != 'cuda':
 
41
 
42
  self.config = OmegaConf.load(config_path)
43
  self.weight_dtype = torch.float16
44
+ self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
45
  self.pipeline = None
46
  self._initialize_pipeline()
47
 
 
52
  if not os.path.exists(config_path):
53
  raise FileNotFoundError(f"The sd-vae-ft-mse folder was not found at: {config_path}")
54
 
55
+ vae = AutoencoderKL.from_pretrained(config_path).to(self.device, dtype=self.weight_dtype)
56
 
57
  pretrained_base_model_path_unet = os.path.join(base_dir, 'pretrained_weights', 'stable-diffusion-v1-5', 'unet')
58
  print("model path is " + pretrained_base_model_path_unet)
59
  reference_unet = UNet2DConditionModel.from_pretrained(
60
  pretrained_base_model_path_unet
61
+ ).to(dtype=self.weight_dtype, device=self.device)
62
 
63
  inference_config_path = os.path.join(base_dir, 'configs', 'inference', 'inference_v2.yaml')
64
  motion_module_path = os.path.join(base_dir, 'pretrained_weights', 'motion_module.pth')
 
72
  pretrained_base_model_path_unet,
73
  motion_module_path,
74
  unet_additional_kwargs=infer_config.unet_additional_kwargs,
75
+ ).to(self.device, dtype=self.weight_dtype)
76
 
77
+ pose_guider = PoseGuider(320, block_out_channels=(16, 32, 96, 256)).to(self.device, dtype=self.weight_dtype)
78
+ image_enc = CLIPVisionModelWithProjection.from_pretrained(image_encoder_path).to(self.device, dtype=self.weight_dtype)
79
  sched_kwargs = OmegaConf.to_container(infer_config.noise_scheduler_kwargs)
80
  scheduler = DDIMScheduler(**sched_kwargs)
81
 
 
90
  denoising_unet=denoising_unet,
91
  pose_guider=pose_guider,
92
  scheduler=scheduler
93
+ ).to(self.device, dtype=self.weight_dtype)
94
 
95
  def _crop_face(self, image, save_path="cropped_face.jpg", margin=0.5):
96
  # Convert image to OpenCV format
 
144
  roop.globals.video_encoder = "libx264"
145
  roop.globals.video_quality = 50
146
  roop.globals.max_memory = suggest_max_memory()
147
+
148
+ # Set GPU execution provider
149
+ roop.globals.execution_providers = decode_execution_providers(["CUDAExecutionProvider"])
150
  roop.globals.execution_threads = suggest_execution_threads()
151
 
152
+ # Ensure onnxruntime is using the GPU
153
+ ort.set_default_logger_severity(3) # Suppress verbose logging
154
+ providers = ['CUDAExecutionProvider']
155
+ options = ort.SessionOptions()
156
+ options.intra_op_num_threads = 1
157
+
158
  for frame_processor in get_frame_processors_modules(roop.globals.frame_processors):
159
+ if hasattr(frame_processor, 'onnx_session'):
160
+ frame_processor.onnx_session.set_providers(providers, options)
161
+
162
+ # Clear CUDA cache before starting the face swapping process
163
+ torch.cuda.empty_cache()
164
 
165
  start()
166
 
167
+ # Clear CUDA cache after the face swapping process
168
+ for frame_processor in roop.globals.frame_processors:
169
+ del frame_processor
170
+ torch.cuda.empty_cache()
171
+
172
  return os.path.join(os.getcwd(), output_path)
173
 
174
+ def print_memory_stat_for_stuff(self, phase, log_file="memory_stats.log"):
175
+ with open(log_file, "a") as f:
176
+ f.write(f"Memory Stats - {phase}:\n")
177
+ f.write(f"Allocated memory: {torch.cuda.memory_allocated() / 1024**2:.2f} MB\n")
178
+ f.write(f"Reserved memory: {torch.cuda.memory_reserved() / 1024**2:.2f} MB\n")
179
+ f.write(f"Max allocated memory: {torch.cuda.max_memory_allocated() / 1024**2:.2f} MB\n")
180
+ f.write(f"Max reserved memory: {torch.cuda.max_memory_reserved() / 1024**2:.2f} MB\n")
181
+ f.write("="*30 + "\n")
182
+
183
+ def convert_to_playable_format(self, input_path, output_path):
184
+ command = [
185
+ "ffmpeg",
186
+ "-i", input_path,
187
+ "-c:v", "libx264",
188
+ "-preset", "fast",
189
+ "-crf", "18",
190
+ "-y", # Overwrite output file if it exists
191
+ output_path
192
+ ]
193
+ result = subprocess.run(command, capture_output=True, text=True)
194
+ print("Conversion STDOUT:", result.stdout)
195
+ print("Conversion STDERR:", result.stderr)
196
+
197
+ if result.returncode != 0:
198
+ raise RuntimeError(f"FFmpeg conversion failed with exit code {result.returncode}")
199
+
200
+ def run_rife_interpolation(self, video_path, output_path, multi=2, scale=1.0):
201
+ base_dir = os.path.dirname(os.path.abspath(__file__))
202
+ directory = os.path.join(base_dir, "Practical-RIFE", "inference_video.py")
203
+ model_directory = os.path.join(base_dir, "Practical-RIFE", "train_log")
204
+ command = [
205
+ "python",
206
+ directory,
207
+ f"--video={video_path}",
208
+ f"--output={output_path}",
209
+ f"--multi={multi}",
210
+ f"--scale={scale}",
211
+ f"--model={model_directory}",
212
+ ]
213
+
214
+ result = subprocess.run(command, capture_output=True, text=True)
215
+ print(result)
216
+ print(result.stdout)
217
+ print(result.stderr)
218
+
219
+ if result.returncode != 0:
220
+ raise RuntimeError(f"RIFE interpolation failed with exit code {result.returncode}")
221
+ self.convert_to_playable_format(output_path, "completed_playable.mp4")
222
+
223
+ def speed_up_video(self, input_path, output_path, factor=4):
224
+ command = [
225
+ "ffmpeg",
226
+ "-i", input_path,
227
+ "-filter:v", f"setpts=PTS/{factor}",
228
+ "-an", # Remove audio
229
+ output_path
230
+ ]
231
+ result = subprocess.run(command, capture_output=True, text=True)
232
+ print("Speed Up Video STDOUT:", result.stdout)
233
+ print("Speed Up Video STDERR:", result.stderr)
234
+
235
+ if result.returncode != 0:
236
+ raise RuntimeError(f"FFmpeg speed up failed with exit code {result.returncode}")
237
+
238
+ def slow_down_video(self, input_path, output_path, factor=4):
239
+ command = [
240
+ "ffmpeg",
241
+ "-i", input_path,
242
+ "-filter:v", f"setpts={factor}*PTS",
243
+ "-an", # Remove audio
244
+ output_path
245
+ ]
246
+ result = subprocess.run(command, capture_output=True, text=True)
247
+ print("Slow Down Video STDOUT:", result.stdout)
248
+ print("Slow Down Video STDERR:", result.stderr)
249
+
250
+ if result.returncode != 0:
251
+ raise RuntimeError(f"FFmpeg slow down failed with exit code {result.returncode}")
252
+
253
  def __call__(self, data: Any) -> Dict[str, str]:
254
  inputs = data.get("inputs", {})
255
  ref_image_base64 = inputs.get("ref_image", "")
 
271
 
272
  if not os.path.exists(pose_video_path):
273
  raise FileNotFoundError(f"The pose video was not found at: {pose_video_path}")
 
 
 
 
274
 
275
+ # Speed up the pose video by 4x
276
+ sped_up_pose_video_path = os.path.join(base_dir, "sped_up_pose_video.mp4")
277
+ self.speed_up_video(pose_video_path, sped_up_pose_video_path, factor=4)
278
+
279
+ torch.manual_seed(seed)
280
+ pose_images = read_frames(sped_up_pose_video_path)
281
+ src_fps = get_fps(sped_up_pose_video_path)
282
+
283
  pose_list = []
284
  total_length = min(length, len(pose_images))
285
  for pose_image_pil in pose_images[:total_length]:
 
305
  cropped_face_path = os.path.join(save_dir, "cropped_face.jpg")
306
  cropped_face = self._crop_face(ref_image, save_path=cropped_face_path)
307
 
308
+ # Delete the pipeline and clear CUDA cache to free up memory
309
+ del self.pipeline
310
+ torch.cuda.empty_cache()
311
+
312
  # Perform face swapping
313
+ swapped_face_video_path = self._swap_face(cropped_face, animation_path)
314
+
315
+ # Slow down the produced video by 4x
316
+ slowed_down_animation_path = os.path.join(save_dir, "slowed_down_animation_output.mp4")
317
+ self.slow_down_video(swapped_face_video_path, slowed_down_animation_path, factor=4)
318
+
319
+ # Clear CUDA cache before RIFE interpolation
320
+ torch.cuda.empty_cache()
321
+
322
+ # Perform RIFE interpolation
323
+ rife_output_path = os.path.join(save_dir, "completed_result.mp4")
324
+ self.run_rife_interpolation(slowed_down_animation_path, rife_output_path, multi=2, scale=0.5)
325
 
326
  # Encode the final video in base64
327
+ with open(rife_output_path, "rb") as video_file:
328
  video_base64 = base64.b64encode(video_file.read()).decode("utf-8")
329
 
330
  torch.cuda.empty_cache()
input.jpg CHANGED
memory_stats.log ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Memory Stats - Preloading model:
2
+ Allocated memory: 20.48 MB
3
+ Reserved memory: 32.00 MB
4
+ Max allocated memory: 20.48 MB
5
+ Max reserved memory: 32.00 MB
6
+ ==============================
7
+ Memory Stats - post loading model model:
8
+ Allocated memory: 20.48 MB
9
+ Reserved memory: 62.00 MB
10
+ Max allocated memory: 40.96 MB
11
+ Max reserved memory: 62.00 MB
12
+ ==============================
13
+ Memory Stats - Before video release:
14
+ Allocated memory: 20.48 MB
15
+ Reserved memory: 62.00 MB
16
+ Max allocated memory: 40.96 MB
17
+ Max reserved memory: 62.00 MB
18
+ ==============================
19
+ Memory Stats - After video release:
20
+ Allocated memory: 20.48 MB
21
+ Reserved memory: 62.00 MB
22
+ Max allocated memory: 40.96 MB
23
+ Max reserved memory: 62.00 MB
24
+ ==============================
25
+ Memory Stats - Before videowriter vid_out:
26
+ Allocated memory: 20.48 MB
27
+ Reserved memory: 62.00 MB
28
+ Max allocated memory: 40.96 MB
29
+ Max reserved memory: 62.00 MB
30
+ ==============================
31
+ Memory Stats - After videowriter vid_out:
32
+ Allocated memory: 20.48 MB
33
+ Reserved memory: 62.00 MB
34
+ Max allocated memory: 40.96 MB
35
+ Max reserved memory: 62.00 MB
36
+ ==============================
37
+ Memory Stats - Preloading model:
38
+ Allocated memory: 20.48 MB
39
+ Reserved memory: 32.00 MB
40
+ Max allocated memory: 20.48 MB
41
+ Max reserved memory: 32.00 MB
42
+ ==============================
43
+ Memory Stats - post loading model model:
44
+ Allocated memory: 20.48 MB
45
+ Reserved memory: 62.00 MB
46
+ Max allocated memory: 40.96 MB
47
+ Max reserved memory: 62.00 MB
48
+ ==============================
49
+ Memory Stats - Before video release:
50
+ Allocated memory: 20.48 MB
51
+ Reserved memory: 62.00 MB
52
+ Max allocated memory: 40.96 MB
53
+ Max reserved memory: 62.00 MB
54
+ ==============================
55
+ Memory Stats - After video release:
56
+ Allocated memory: 20.48 MB
57
+ Reserved memory: 62.00 MB
58
+ Max allocated memory: 40.96 MB
59
+ Max reserved memory: 62.00 MB
60
+ ==============================
61
+ Memory Stats - Before videowriter vid_out:
62
+ Allocated memory: 20.48 MB
63
+ Reserved memory: 62.00 MB
64
+ Max allocated memory: 40.96 MB
65
+ Max reserved memory: 62.00 MB
66
+ ==============================
67
+ Memory Stats - After videowriter vid_out:
68
+ Allocated memory: 20.48 MB
69
+ Reserved memory: 62.00 MB
70
+ Max allocated memory: 40.96 MB
71
+ Max reserved memory: 62.00 MB
72
+ ==============================
output.mp4 CHANGED
Binary files a/output.mp4 and b/output.mp4 differ
 
output/gradio/animation_output.mp4 CHANGED
Binary files a/output/gradio/animation_output.mp4 and b/output/gradio/animation_output.mp4 differ
 
output/gradio/completed_result.mp4 ADDED
Binary file (44 Bytes). View file
 
output/gradio/cropped_face.jpg CHANGED
output/gradio/output_video.mp4 DELETED
Binary file (840 kB)
 
requirements.txt CHANGED
@@ -49,3 +49,8 @@ scipy==1.11.4
49
  torchdiffeq==0.2.3
50
  torchmetrics==1.2.1
51
  torchsde==0.2.5
 
 
 
 
 
 
49
  torchdiffeq==0.2.3
50
  torchmetrics==1.2.1
51
  torchsde==0.2.5
52
+
53
+
54
+ # Additional dependencies for RIFE
55
+ sk-video==1.1.10
56
+ moviepy==1.0.3
sampler.py CHANGED
@@ -18,7 +18,7 @@ inputs = {
18
  "pose_video_path": "pose_video.mp4",
19
  "width": 512,
20
  "height": 768,
21
- "length": 12,
22
  "num_inference_steps": 25,
23
  "cfg": 3.5,
24
  "seed": 123
@@ -28,12 +28,12 @@ inputs = {
28
  # Simulate an inference call
29
  output = handler(inputs)
30
 
31
- # Decode the base64 video output
32
- video_base64 = output.get("video", "")
33
- video_bytes = base64.b64decode(video_base64)
34
 
35
- # Save the video to a file
36
- with open("output_video.mp4", "wb") as video_file:
37
- video_file.write(video_bytes)
38
 
39
  print("Inference completed. Output video saved as output_video.mp4")
 
18
  "pose_video_path": "pose_video.mp4",
19
  "width": 512,
20
  "height": 768,
21
+ "length": 24,
22
  "num_inference_steps": 25,
23
  "cfg": 3.5,
24
  "seed": 123
 
28
  # Simulate an inference call
29
  output = handler(inputs)
30
 
31
+ # # Decode the base64 video output
32
+ # video_base64 = output.get("video", "")
33
+ # video_bytes = base64.b64decode(video_base64)
34
 
35
+ # # Save the video to a file
36
+ # with open("output_video.mp4", "wb") as video_file:
37
+ # video_file.write(video_bytes)
38
 
39
  print("Inference completed. Output video saved as output_video.mp4")
sped_up_pose_video.mp4 ADDED
Binary file (131 kB). View file