Ggh596 commited on
Commit
1aa6728
Β·
verified Β·
1 Parent(s): 151d334

Upload 18 files

Browse files
roop/ProcessMgr.py CHANGED
@@ -3,11 +3,10 @@ import cv2
3
  import numpy as np
4
  import psutil
5
 
6
- from enum import Enum
7
  from roop.ProcessOptions import ProcessOptions
8
 
9
- from roop.face_util import get_first_face, get_all_faces, rotate_image_180, rotate_anticlockwise, rotate_clockwise, clamp_cut_values
10
- from roop.utilities import compute_cosine_distance, get_device, str_to_class
11
  import roop.vr_util as vr
12
 
13
  from typing import Any, List, Callable
@@ -17,15 +16,18 @@ from threading import Thread, Lock
17
  from queue import Queue
18
  from tqdm import tqdm
19
  from roop.ffmpeg_writer import FFMPEG_VideoWriter
 
20
  import roop.globals
21
 
22
 
 
23
  # Poor man's enum to be able to compare to int
24
  class eNoFaceAction():
25
  USE_ORIGINAL_FRAME = 0
26
  RETRY_ROTATED = 1
27
  SKIP_FRAME = 2
28
- SKIP_FRAME_IF_DISSIMILAR = 3
 
29
 
30
 
31
 
@@ -44,6 +46,7 @@ def pick_queue(queue: Queue[str], queue_per_future: int) -> List[str]:
44
  return queues
45
 
46
 
 
47
  class ProcessMgr():
48
  input_face_datas = []
49
  target_face_datas = []
@@ -64,11 +67,16 @@ class ProcessMgr():
64
  processed_queue = None
65
 
66
  videowriter= None
 
67
 
68
  progress_gradio = None
69
  total_frames = 0
70
 
71
-
 
 
 
 
72
 
73
 
74
  plugins = {
@@ -101,12 +109,19 @@ class ProcessMgr():
101
  def initialize(self, input_faces, target_faces, options):
102
  self.input_face_datas = input_faces
103
  self.target_face_datas = target_faces
 
 
104
  self.options = options
105
  devicename = get_device()
106
 
107
  roop.globals.g_desired_face_analysis=["landmark_3d_68", "landmark_2d_106","detection","recognition"]
108
  if options.swap_mode == "all_female" or options.swap_mode == "all_male":
109
  roop.globals.g_desired_face_analysis.append("genderage")
 
 
 
 
 
110
 
111
  for p in self.processors:
112
  newp = next((x for x in options.processors.keys() if x == p.processorname), None)
@@ -123,6 +138,14 @@ class ProcessMgr():
123
  p = str_to_class(module, classname)
124
  if p is not None:
125
  extoption.update({"devicename": devicename})
 
 
 
 
 
 
 
 
126
  p.Initialize(extoption)
127
  newprocessors.append(p)
128
  else:
@@ -185,7 +208,8 @@ class ProcessMgr():
185
  resimg = self.process_frame(temp_frame)
186
  if resimg is not None:
187
  i = source_files.index(f)
188
- cv2.imwrite(target_files[i], resimg)
 
189
  if update:
190
  update()
191
 
@@ -239,7 +263,10 @@ class ProcessMgr():
239
  process, frame = self.processed_queue[nextindex % self.num_threads].get()
240
  nextindex += 1
241
  if frame is not None:
242
- self.videowriter.write_frame(frame)
 
 
 
243
  del frame
244
  elif process == False:
245
  num_producers -= 1
@@ -248,7 +275,11 @@ class ProcessMgr():
248
 
249
 
250
 
251
- def run_batch_inmem(self, source_video, target_video, frame_start, frame_end, fps, threads:int = 1, skip_audio=False):
 
 
 
 
252
  cap = cv2.VideoCapture(source_video)
253
  # frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
254
  frame_count = (frame_end - frame_start) + 1
@@ -275,7 +306,13 @@ class ProcessMgr():
275
  self.frames_queue.append(Queue(1))
276
  self.processed_queue.append(Queue(1))
277
 
278
- self.videowriter = FFMPEG_VideoWriter(target_video, (width, height), fps, codec=roop.globals.video_encoder, crf=roop.globals.video_quality, audiofile=None)
 
 
 
 
 
 
279
 
280
  readthread = Thread(target=self.read_frames_thread, args=(cap, frame_start, frame_end, threads))
281
  readthread.start()
@@ -298,7 +335,11 @@ class ProcessMgr():
298
  readthread.join()
299
  writethread.join()
300
  cap.release()
301
- self.videowriter.close()
 
 
 
 
302
  self.frames_queue.clear()
303
  self.processed_queue.clear()
304
 
@@ -317,11 +358,6 @@ class ProcessMgr():
317
  self.progress_gradio((progress.n, self.total_frames), desc='Processing', total=self.total_frames, unit='frames')
318
 
319
 
320
- # https://github.com/deepinsight/insightface#third-party-re-implementation-of-arcface
321
- # https://github.com/deepinsight/insightface/blob/master/alignment/coordinate_reg/image_infer.py
322
- # https://github.com/deepinsight/insightface/issues/1350
323
- # https://github.com/linghu8812/tensorrt_inference
324
-
325
 
326
  def process_frame(self, frame:Frame):
327
  if len(self.input_face_datas) < 1 and not self.options.show_face_masking:
@@ -332,8 +368,16 @@ class ProcessMgr():
332
  if roop.globals.no_face_action == eNoFaceAction.SKIP_FRAME_IF_DISSIMILAR:
333
  if len(self.input_face_datas) > num_swapped:
334
  return None
 
 
335
  return temp_frame
336
- if roop.globals.no_face_action == eNoFaceAction.USE_ORIGINAL_FRAME:
 
 
 
 
 
 
337
  return frame
338
  if roop.globals.no_face_action == eNoFaceAction.SKIP_FRAME:
339
  #This only works with in-mem processing, as it simply skips the frame.
@@ -374,6 +418,8 @@ class ProcessMgr():
374
 
375
  num_faces_found += 1
376
  temp_frame = self.process_face(self.options.selected_index, face, temp_frame)
 
 
377
  else:
378
  faces = get_all_faces(frame)
379
  if faces is None:
@@ -383,7 +429,14 @@ class ProcessMgr():
383
  for face in faces:
384
  num_faces_found += 1
385
  temp_frame = self.process_face(self.options.selected_index, face, temp_frame)
386
- del face
 
 
 
 
 
 
 
387
 
388
  elif self.options.swap_mode == "selected":
389
  num_targetfaces = len(self.target_face_datas)
@@ -397,7 +450,6 @@ class ProcessMgr():
397
  else:
398
  temp_frame = self.process_face(i, face, temp_frame)
399
  num_faces_found += 1
400
- del face
401
  if not roop.globals.vr_mode and num_faces_found == num_targetfaces:
402
  break
403
  elif self.options.swap_mode == "all_female" or self.options.swap_mode == "all_male":
@@ -406,7 +458,13 @@ class ProcessMgr():
406
  if face.sex == gender:
407
  num_faces_found += 1
408
  temp_frame = self.process_face(self.options.selected_index, face, temp_frame)
409
- del face
 
 
 
 
 
 
410
 
411
  if roop.globals.vr_mode and num_faces_found % 2 > 0:
412
  # stereo image, there has to be an even number of faces
@@ -541,17 +599,31 @@ class ProcessMgr():
541
 
542
  # img = vr.GetPerspective(frame, 90, theta, phi, 1280, 1280) # Generate perspective image
543
 
544
- fake_frame = None
545
- aligned_img, M = align_crop(frame, target_face.kps, 128)
 
 
 
 
 
 
 
 
546
  fake_frame = aligned_img
547
- swap_frame = aligned_img
548
  target_face.matrix = M
 
549
  for p in self.processors:
550
  if p.type == 'swap':
551
- if inputface is not None:
 
 
552
  for _ in range(0,self.options.num_swap_steps):
553
- swap_frame = p.Run(inputface, target_face, swap_frame)
554
- fake_frame = swap_frame
 
 
 
 
555
  scale_factor = 0.0
556
  elif p.type == 'mask':
557
  fake_frame = self.process_mask(p, aligned_img, fake_frame)
@@ -560,8 +632,8 @@ class ProcessMgr():
560
 
561
  upscale = 512
562
  orig_width = fake_frame.shape[1]
563
-
564
- fake_frame = cv2.resize(fake_frame, (upscale, upscale), cv2.INTER_CUBIC)
565
  mask_offsets = (0,0,0,0,1,20) if inputface is None else inputface.mask_offsets
566
 
567
 
@@ -571,9 +643,14 @@ class ProcessMgr():
571
  else:
572
  result = self.paste_upscale(fake_frame, enhanced_frame, target_face.matrix, frame, scale_factor, mask_offsets)
573
 
 
 
 
 
 
574
  if rotation_action is not None:
575
  fake_frame = self.auto_unrotate_frame(result, rotation_action)
576
- return self.paste_simple(fake_frame, saved_frame, startX, startY)
577
 
578
  return result
579
 
@@ -673,6 +750,43 @@ class ProcessMgr():
673
  return cv2.GaussianBlur(img_matte, blur_size, 0)
674
 
675
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
676
  def process_mask(self, processor, frame:Frame, target:Frame):
677
  img_mask = processor.Run(frame, self.options.masking_text)
678
  img_mask = cv2.resize(img_mask, (target.shape[1], target.shape[0]))
@@ -688,7 +802,98 @@ class ProcessMgr():
688
  result += img_mask * frame.astype(np.float32)
689
  return np.uint8(result)
690
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
691
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
692
 
693
 
694
  def unload_models():
@@ -699,4 +904,8 @@ class ProcessMgr():
699
  for p in self.processors:
700
  p.Release()
701
  self.processors.clear()
 
 
 
 
702
 
 
3
  import numpy as np
4
  import psutil
5
 
 
6
  from roop.ProcessOptions import ProcessOptions
7
 
8
+ from roop.face_util import get_first_face, get_all_faces, rotate_anticlockwise, rotate_clockwise, clamp_cut_values
9
+ from roop.utilities import compute_cosine_distance, get_device, str_to_class, shuffle_array
10
  import roop.vr_util as vr
11
 
12
  from typing import Any, List, Callable
 
16
  from queue import Queue
17
  from tqdm import tqdm
18
  from roop.ffmpeg_writer import FFMPEG_VideoWriter
19
+ from roop.StreamWriter import StreamWriter
20
  import roop.globals
21
 
22
 
23
+
24
  # Poor man's enum to be able to compare to int
25
  class eNoFaceAction():
26
  USE_ORIGINAL_FRAME = 0
27
  RETRY_ROTATED = 1
28
  SKIP_FRAME = 2
29
+ SKIP_FRAME_IF_DISSIMILAR = 3,
30
+ USE_LAST_SWAPPED = 4
31
 
32
 
33
 
 
46
  return queues
47
 
48
 
49
+
50
  class ProcessMgr():
51
  input_face_datas = []
52
  target_face_datas = []
 
67
  processed_queue = None
68
 
69
  videowriter= None
70
+ streamwriter = None
71
 
72
  progress_gradio = None
73
  total_frames = 0
74
 
75
+ num_frames_no_face = 0
76
+ last_swapped_frame = None
77
+
78
+ output_to_file = None
79
+ output_to_cam = None
80
 
81
 
82
  plugins = {
 
109
  def initialize(self, input_faces, target_faces, options):
110
  self.input_face_datas = input_faces
111
  self.target_face_datas = target_faces
112
+ self.num_frames_no_face = 0
113
+ self.last_swapped_frame = None
114
  self.options = options
115
  devicename = get_device()
116
 
117
  roop.globals.g_desired_face_analysis=["landmark_3d_68", "landmark_2d_106","detection","recognition"]
118
  if options.swap_mode == "all_female" or options.swap_mode == "all_male":
119
  roop.globals.g_desired_face_analysis.append("genderage")
120
+ elif options.swap_mode == "all_random":
121
+ # don't modify original list
122
+ self.input_face_datas = input_faces.copy()
123
+ shuffle_array(self.input_face_datas)
124
+
125
 
126
  for p in self.processors:
127
  newp = next((x for x in options.processors.keys() if x == p.processorname), None)
 
138
  p = str_to_class(module, classname)
139
  if p is not None:
140
  extoption.update({"devicename": devicename})
141
+ if p.type == "swap":
142
+ if self.options.swap_modelname == "InSwapper 128":
143
+ extoption.update({"modelname": "inswapper_128.onnx"})
144
+ elif self.options.swap_modelname == "ReSwapper 128":
145
+ extoption.update({"modelname": "reswapper_128.onnx"})
146
+ elif self.options.swap_modelname == "ReSwapper 256":
147
+ extoption.update({"modelname": "reswapper_256.onnx"})
148
+
149
  p.Initialize(extoption)
150
  newprocessors.append(p)
151
  else:
 
208
  resimg = self.process_frame(temp_frame)
209
  if resimg is not None:
210
  i = source_files.index(f)
211
+ # Also let numpy write the file to support utf-8/16 filenames
212
+ cv2.imencode(f'.{roop.globals.CFG.output_image_format}',resimg)[1].tofile(target_files[i])
213
  if update:
214
  update()
215
 
 
263
  process, frame = self.processed_queue[nextindex % self.num_threads].get()
264
  nextindex += 1
265
  if frame is not None:
266
+ if self.output_to_file:
267
+ self.videowriter.write_frame(frame)
268
+ if self.output_to_cam:
269
+ self.streamwriter.WriteToStream(frame)
270
  del frame
271
  elif process == False:
272
  num_producers -= 1
 
275
 
276
 
277
 
278
+ def run_batch_inmem(self, output_method, source_video, target_video, frame_start, frame_end, fps, threads:int = 1):
279
+ if len(self.processors) < 1:
280
+ print("No processor defined!")
281
+ return
282
+
283
  cap = cv2.VideoCapture(source_video)
284
  # frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
285
  frame_count = (frame_end - frame_start) + 1
 
306
  self.frames_queue.append(Queue(1))
307
  self.processed_queue.append(Queue(1))
308
 
309
+ self.output_to_file = output_method != "Virtual Camera"
310
+ self.output_to_cam = output_method == "Virtual Camera" or output_method == "Both"
311
+
312
+ if self.output_to_file:
313
+ self.videowriter = FFMPEG_VideoWriter(target_video, (width, height), fps, codec=roop.globals.video_encoder, crf=roop.globals.video_quality, audiofile=None)
314
+ if self.output_to_cam:
315
+ self.streamwriter = StreamWriter((width, height), int(fps))
316
 
317
  readthread = Thread(target=self.read_frames_thread, args=(cap, frame_start, frame_end, threads))
318
  readthread.start()
 
335
  readthread.join()
336
  writethread.join()
337
  cap.release()
338
+ if self.output_to_file:
339
+ self.videowriter.close()
340
+ if self.output_to_cam:
341
+ self.streamwriter.Close()
342
+
343
  self.frames_queue.clear()
344
  self.processed_queue.clear()
345
 
 
358
  self.progress_gradio((progress.n, self.total_frames), desc='Processing', total=self.total_frames, unit='frames')
359
 
360
 
 
 
 
 
 
361
 
362
  def process_frame(self, frame:Frame):
363
  if len(self.input_face_datas) < 1 and not self.options.show_face_masking:
 
368
  if roop.globals.no_face_action == eNoFaceAction.SKIP_FRAME_IF_DISSIMILAR:
369
  if len(self.input_face_datas) > num_swapped:
370
  return None
371
+ self.num_frames_no_face = 0
372
+ self.last_swapped_frame = temp_frame.copy()
373
  return temp_frame
374
+ if roop.globals.no_face_action == eNoFaceAction.USE_LAST_SWAPPED:
375
+ if self.last_swapped_frame is not None and self.num_frames_no_face < self.options.max_num_reuse_frame:
376
+ self.num_frames_no_face += 1
377
+ return self.last_swapped_frame.copy()
378
+ return frame
379
+
380
+ elif roop.globals.no_face_action == eNoFaceAction.USE_ORIGINAL_FRAME:
381
  return frame
382
  if roop.globals.no_face_action == eNoFaceAction.SKIP_FRAME:
383
  #This only works with in-mem processing, as it simply skips the frame.
 
418
 
419
  num_faces_found += 1
420
  temp_frame = self.process_face(self.options.selected_index, face, temp_frame)
421
+ del face
422
+
423
  else:
424
  faces = get_all_faces(frame)
425
  if faces is None:
 
429
  for face in faces:
430
  num_faces_found += 1
431
  temp_frame = self.process_face(self.options.selected_index, face, temp_frame)
432
+
433
+ elif self.options.swap_mode == "all_input" or self.options.swap_mode == "all_random":
434
+ for i,face in enumerate(faces):
435
+ num_faces_found += 1
436
+ if i < len(self.input_face_datas):
437
+ temp_frame = self.process_face(i, face, temp_frame)
438
+ else:
439
+ break
440
 
441
  elif self.options.swap_mode == "selected":
442
  num_targetfaces = len(self.target_face_datas)
 
450
  else:
451
  temp_frame = self.process_face(i, face, temp_frame)
452
  num_faces_found += 1
 
453
  if not roop.globals.vr_mode and num_faces_found == num_targetfaces:
454
  break
455
  elif self.options.swap_mode == "all_female" or self.options.swap_mode == "all_male":
 
458
  if face.sex == gender:
459
  num_faces_found += 1
460
  temp_frame = self.process_face(self.options.selected_index, face, temp_frame)
461
+
462
+ # might be slower but way more clean to release everything here
463
+ for face in faces:
464
+ del face
465
+ faces.clear()
466
+
467
+
468
 
469
  if roop.globals.vr_mode and num_faces_found % 2 > 0:
470
  # stereo image, there has to be an even number of faces
 
599
 
600
  # img = vr.GetPerspective(frame, 90, theta, phi, 1280, 1280) # Generate perspective image
601
 
602
+
603
+ """ Code ported/adapted from Facefusion which borrowed the idea from Rope:
604
+ Kind of subsampling the cutout and aligned face image and faceswapping slices of it up to
605
+ the desired output resolution. This works around the current resolution limitations without using enhancers.
606
+ """
607
+ model_output_size = self.options.swap_output_size
608
+ subsample_size = max(self.options.subsample_size, model_output_size)
609
+ subsample_total = subsample_size // model_output_size
610
+ aligned_img, M = align_crop(frame, target_face.kps, subsample_size)
611
+
612
  fake_frame = aligned_img
 
613
  target_face.matrix = M
614
+
615
  for p in self.processors:
616
  if p.type == 'swap':
617
+ swap_result_frames = []
618
+ subsample_frames = self.implode_pixel_boost(aligned_img, model_output_size, subsample_total)
619
+ for sliced_frame in subsample_frames:
620
  for _ in range(0,self.options.num_swap_steps):
621
+ sliced_frame = self.prepare_crop_frame(sliced_frame)
622
+ sliced_frame = p.Run(inputface, target_face, sliced_frame)
623
+ sliced_frame = self.normalize_swap_frame(sliced_frame)
624
+ swap_result_frames.append(sliced_frame)
625
+ fake_frame = self.explode_pixel_boost(swap_result_frames, model_output_size, subsample_total, subsample_size)
626
+ fake_frame = fake_frame.astype(np.uint8)
627
  scale_factor = 0.0
628
  elif p.type == 'mask':
629
  fake_frame = self.process_mask(p, aligned_img, fake_frame)
 
632
 
633
  upscale = 512
634
  orig_width = fake_frame.shape[1]
635
+ if orig_width != upscale:
636
+ fake_frame = cv2.resize(fake_frame, (upscale, upscale), cv2.INTER_CUBIC)
637
  mask_offsets = (0,0,0,0,1,20) if inputface is None else inputface.mask_offsets
638
 
639
 
 
643
  else:
644
  result = self.paste_upscale(fake_frame, enhanced_frame, target_face.matrix, frame, scale_factor, mask_offsets)
645
 
646
+ # Restore mouth before unrotating
647
+ if self.options.restore_original_mouth:
648
+ mouth_cutout, mouth_bb = self.create_mouth_mask(target_face, frame)
649
+ result = self.apply_mouth_area(result, mouth_cutout, mouth_bb)
650
+
651
  if rotation_action is not None:
652
  fake_frame = self.auto_unrotate_frame(result, rotation_action)
653
+ result = self.paste_simple(fake_frame, saved_frame, startX, startY)
654
 
655
  return result
656
 
 
750
  return cv2.GaussianBlur(img_matte, blur_size, 0)
751
 
752
 
753
+ def prepare_crop_frame(self, swap_frame):
754
+ model_type = 'inswapper'
755
+ model_mean = [0.0, 0.0, 0.0]
756
+ model_standard_deviation = [1.0, 1.0, 1.0]
757
+
758
+ if model_type == 'ghost':
759
+ swap_frame = swap_frame[:, :, ::-1] / 127.5 - 1
760
+ else:
761
+ swap_frame = swap_frame[:, :, ::-1] / 255.0
762
+ swap_frame = (swap_frame - model_mean) / model_standard_deviation
763
+ swap_frame = swap_frame.transpose(2, 0, 1)
764
+ swap_frame = np.expand_dims(swap_frame, axis = 0).astype(np.float32)
765
+ return swap_frame
766
+
767
+
768
+ def normalize_swap_frame(self, swap_frame):
769
+ model_type = 'inswapper'
770
+ swap_frame = swap_frame.transpose(1, 2, 0)
771
+
772
+ if model_type == 'ghost':
773
+ swap_frame = (swap_frame * 127.5 + 127.5).round()
774
+ else:
775
+ swap_frame = (swap_frame * 255.0).round()
776
+ swap_frame = swap_frame[:, :, ::-1]
777
+ return swap_frame
778
+
779
+ def implode_pixel_boost(self, aligned_face_frame, model_size, pixel_boost_total : int):
780
+ subsample_frame = aligned_face_frame.reshape(model_size, pixel_boost_total, model_size, pixel_boost_total, 3)
781
+ subsample_frame = subsample_frame.transpose(1, 3, 0, 2, 4).reshape(pixel_boost_total ** 2, model_size, model_size, 3)
782
+ return subsample_frame
783
+
784
+
785
+ def explode_pixel_boost(self, subsample_frame, model_size, pixel_boost_total, pixel_boost_size):
786
+ final_frame = np.stack(subsample_frame, axis = 0).reshape(pixel_boost_total, pixel_boost_total, model_size, model_size, 3)
787
+ final_frame = final_frame.transpose(2, 0, 3, 1, 4).reshape(pixel_boost_size, pixel_boost_size, 3)
788
+ return final_frame
789
+
790
  def process_mask(self, processor, frame:Frame, target:Frame):
791
  img_mask = processor.Run(frame, self.options.masking_text)
792
  img_mask = cv2.resize(img_mask, (target.shape[1], target.shape[0]))
 
802
  result += img_mask * frame.astype(np.float32)
803
  return np.uint8(result)
804
 
805
+
806
+ # Code for mouth restoration adapted from https://github.com/iVideoGameBoss/iRoopDeepFaceCam
807
+
808
+ def create_mouth_mask(self, face: Face, frame: Frame):
809
+ mouth_cutout = None
810
+
811
+ landmarks = face.landmark_2d_106
812
+ if landmarks is not None:
813
+ # Get mouth landmarks (indices 52 to 71 typically represent the outer mouth)
814
+ mouth_points = landmarks[52:71].astype(np.int32)
815
+
816
+ # Add padding to mouth area
817
+ min_x, min_y = np.min(mouth_points, axis=0)
818
+ max_x, max_y = np.max(mouth_points, axis=0)
819
+ min_x = max(0, min_x - (15*6))
820
+ min_y = max(0, min_y - 22)
821
+ max_x = min(frame.shape[1], max_x + (15*6))
822
+ max_y = min(frame.shape[0], max_y + (90*6))
823
 
824
+ # Extract the mouth area from the frame using the calculated bounding box
825
+ mouth_cutout = frame[min_y:max_y, min_x:max_x].copy()
826
+
827
+ return mouth_cutout, (min_x, min_y, max_x, max_y)
828
+
829
+
830
+
831
+ def create_feathered_mask(self, shape, feather_amount=30):
832
+ mask = np.zeros(shape[:2], dtype=np.float32)
833
+ center = (shape[1] // 2, shape[0] // 2)
834
+ cv2.ellipse(mask, center, (shape[1] // 2 - feather_amount, shape[0] // 2 - feather_amount),
835
+ 0, 0, 360, 1, -1)
836
+ mask = cv2.GaussianBlur(mask, (feather_amount*2+1, feather_amount*2+1), 0)
837
+ return mask / np.max(mask)
838
+
839
+ def apply_mouth_area(self, frame: np.ndarray, mouth_cutout: np.ndarray, mouth_box: tuple) -> np.ndarray:
840
+ min_x, min_y, max_x, max_y = mouth_box
841
+ box_width = max_x - min_x
842
+ box_height = max_y - min_y
843
+
844
+
845
+ # Resize the mouth cutout to match the mouth box size
846
+ if mouth_cutout is None or box_width is None or box_height is None:
847
+ return frame
848
+ try:
849
+ resized_mouth_cutout = cv2.resize(mouth_cutout, (box_width, box_height))
850
+
851
+ # Extract the region of interest (ROI) from the target frame
852
+ roi = frame[min_y:max_y, min_x:max_x]
853
+
854
+ # Ensure the ROI and resized_mouth_cutout have the same shape
855
+ if roi.shape != resized_mouth_cutout.shape:
856
+ resized_mouth_cutout = cv2.resize(resized_mouth_cutout, (roi.shape[1], roi.shape[0]))
857
+
858
+ # Apply color transfer from ROI to mouth cutout
859
+ color_corrected_mouth = self.apply_color_transfer(resized_mouth_cutout, roi)
860
+
861
+ # Create a feathered mask with increased feather amount
862
+ feather_amount = min(30, box_width // 15, box_height // 15)
863
+ mask = self.create_feathered_mask(resized_mouth_cutout.shape, feather_amount)
864
+
865
+ # Blend the color-corrected mouth cutout with the ROI using the feathered mask
866
+ mask = mask[:,:,np.newaxis] # Add channel dimension to mask
867
+ blended = (color_corrected_mouth * mask + roi * (1 - mask)).astype(np.uint8)
868
+
869
+ # Place the blended result back into the frame
870
+ frame[min_y:max_y, min_x:max_x] = blended
871
+ except Exception as e:
872
+ print(f'Error {e}')
873
+ pass
874
+
875
+ return frame
876
+
877
+ def apply_color_transfer(self, source, target):
878
+ """
879
+ Apply color transfer from target to source image
880
+ """
881
+ source = cv2.cvtColor(source, cv2.COLOR_BGR2LAB).astype("float32")
882
+ target = cv2.cvtColor(target, cv2.COLOR_BGR2LAB).astype("float32")
883
+
884
+ source_mean, source_std = cv2.meanStdDev(source)
885
+ target_mean, target_std = cv2.meanStdDev(target)
886
+
887
+ # Reshape mean and std to be broadcastable
888
+ source_mean = source_mean.reshape(1, 1, 3)
889
+ source_std = source_std.reshape(1, 1, 3)
890
+ target_mean = target_mean.reshape(1, 1, 3)
891
+ target_std = target_std.reshape(1, 1, 3)
892
+
893
+ # Perform the color transfer
894
+ source = (source - source_mean) * (target_std / source_std) + target_mean
895
+ return cv2.cvtColor(np.clip(source, 0, 255).astype("uint8"), cv2.COLOR_LAB2BGR)
896
+
897
 
898
 
899
  def unload_models():
 
904
  for p in self.processors:
905
  p.Release()
906
  self.processors.clear()
907
+ if self.videowriter is not None:
908
+ self.videowriter.close()
909
+ if self.streamwriter is not None:
910
+ self.streamwriter.Close()
911
 
roop/ProcessOptions.py CHANGED
@@ -1,6 +1,8 @@
1
  class ProcessOptions:
2
 
3
- def __init__(self, processordefines:dict, face_distance, blend_ratio, swap_mode, selected_index, masking_text, imagemask, num_steps, show_face_area, show_mask=False):
 
 
4
  self.processors = processordefines
5
  self.face_distance_threshold = face_distance
6
  self.blend_ratio = blend_ratio
@@ -10,4 +12,7 @@ class ProcessOptions:
10
  self.imagemask = imagemask
11
  self.num_swap_steps = num_steps
12
  self.show_face_area_overlay = show_face_area
13
- self.show_face_masking = show_mask
 
 
 
 
1
  class ProcessOptions:
2
 
3
+ def __init__(self, swap_model, processordefines:dict, face_distance, blend_ratio, swap_mode, selected_index, masking_text, imagemask, num_steps, subsample_size, show_face_area, restore_original_mouth, show_mask=False):
4
+ self.swap_modelname = swap_model
5
+ self.swap_output_size = int(swap_model.split()[-1])
6
  self.processors = processordefines
7
  self.face_distance_threshold = face_distance
8
  self.blend_ratio = blend_ratio
 
12
  self.imagemask = imagemask
13
  self.num_swap_steps = num_steps
14
  self.show_face_area_overlay = show_face_area
15
+ self.show_face_masking = show_mask
16
+ self.subsample_size = subsample_size
17
+ self.restore_original_mouth = restore_original_mouth
18
+ self.max_num_reuse_frame = 15
roop/StreamWriter.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import threading
2
+ import time
3
+ import pyvirtualcam
4
+
5
+
6
+ class StreamWriter():
7
+ FPS = 30
8
+ VCam = None
9
+ Active = False
10
+ THREAD_LOCK_STREAM = threading.Lock()
11
+ time_last_process = None
12
+ timespan_min = 0.0
13
+
14
+ def __enter__(self):
15
+ return self
16
+
17
+ def __exit__(self, exc_type, exc_value, traceback):
18
+ self.Close()
19
+
20
+ def __init__(self, size, fps):
21
+ self.time_last_process = time.perf_counter()
22
+ self.FPS = fps
23
+ self.timespan_min = 1.0 / fps
24
+ print('Detecting virtual cam devices')
25
+ self.VCam = pyvirtualcam.Camera(width=size[0], height=size[1], fps=fps, fmt=pyvirtualcam.PixelFormat.BGR, print_fps=False)
26
+ if self.VCam is None:
27
+ print("No virtual camera found!")
28
+ return
29
+ print(f'Using virtual camera: {self.VCam.device}')
30
+ print(f'Using {self.VCam.native_fmt}')
31
+ self.Active = True
32
+
33
+
34
+ def LimitFrames(self):
35
+ while True:
36
+ current_time = time.perf_counter()
37
+ time_passed = current_time - self.time_last_process
38
+ if time_passed >= self.timespan_min:
39
+ break
40
+
41
+ # First version used a queue and threading. Surprisingly this
42
+ # totally simple, blocking version is 10 times faster!
43
+ def WriteToStream(self, frame):
44
+ if self.VCam is None:
45
+ return
46
+ with self.THREAD_LOCK_STREAM:
47
+ self.LimitFrames()
48
+ self.VCam.send(frame)
49
+ self.time_last_process = time.perf_counter()
50
+
51
+
52
+ def Close(self):
53
+ self.Active = False
54
+ if self.VCam is None:
55
+ self.VCam.close()
56
+ self.VCam = None
57
+
58
+
59
+
60
+
roop/capturer.py CHANGED
@@ -4,6 +4,10 @@ import numpy as np
4
 
5
  from roop.typing import Frame
6
 
 
 
 
 
7
  def get_image_frame(filename: str):
8
  try:
9
  return cv2.imdecode(np.fromfile(filename, dtype=np.uint8), cv2.IMREAD_COLOR)
@@ -13,15 +17,27 @@ def get_image_frame(filename: str):
13
 
14
 
15
  def get_video_frame(video_path: str, frame_number: int = 0) -> Optional[Frame]:
16
- capture = cv2.VideoCapture(video_path)
17
- frame_total = capture.get(cv2.CAP_PROP_FRAME_COUNT)
18
- capture.set(cv2.CAP_PROP_POS_FRAMES, min(frame_total, frame_number - 1))
19
- has_frame, frame = capture.read()
20
- capture.release()
 
 
 
 
 
21
  if has_frame:
22
  return frame
23
  return None
24
 
 
 
 
 
 
 
 
25
 
26
  def get_video_frame_total(video_path: str) -> int:
27
  capture = cv2.VideoCapture(video_path)
 
4
 
5
  from roop.typing import Frame
6
 
7
+ current_video_path = None
8
+ current_frame_total = 0
9
+ current_capture = None
10
+
11
  def get_image_frame(filename: str):
12
  try:
13
  return cv2.imdecode(np.fromfile(filename, dtype=np.uint8), cv2.IMREAD_COLOR)
 
17
 
18
 
19
  def get_video_frame(video_path: str, frame_number: int = 0) -> Optional[Frame]:
20
+ global current_video_path, current_capture, current_frame_total
21
+
22
+ if video_path != current_video_path:
23
+ release_video()
24
+ current_capture = cv2.VideoCapture(video_path)
25
+ current_video_path = video_path
26
+ current_frame_total = current_capture.get(cv2.CAP_PROP_FRAME_COUNT)
27
+
28
+ current_capture.set(cv2.CAP_PROP_POS_FRAMES, min(current_frame_total, frame_number - 1))
29
+ has_frame, frame = current_capture.read()
30
  if has_frame:
31
  return frame
32
  return None
33
 
34
+ def release_video():
35
+ global current_capture
36
+
37
+ if current_capture is not None:
38
+ current_capture.release()
39
+ current_capture = None
40
+
41
 
42
  def get_video_frame_total(video_path: str) -> int:
43
  capture = cv2.VideoCapture(video_path)
roop/core.py CHANGED
@@ -14,6 +14,7 @@ import signal
14
  import torch
15
  import onnxruntime
16
  import pathlib
 
17
 
18
  from time import time
19
 
@@ -27,7 +28,7 @@ from roop.face_util import extract_face_images
27
  from roop.ProcessEntry import ProcessEntry
28
  from roop.ProcessMgr import ProcessMgr
29
  from roop.ProcessOptions import ProcessOptions
30
- from roop.capturer import get_video_frame_total
31
 
32
 
33
  clip_text = None
@@ -47,9 +48,12 @@ warnings.filterwarnings('ignore', category=UserWarning, module='torchvision')
47
  def parse_args() -> None:
48
  signal.signal(signal.SIGINT, lambda signal_number, frame: destroy())
49
  roop.globals.headless = False
 
 
 
 
 
50
  # Always enable all processors when using GUI
51
- if len(sys.argv) > 1:
52
- print('No CLI args supported - use Settings Tab instead')
53
  roop.globals.frame_processors = ['face_swapper', 'face_enhancer']
54
 
55
 
@@ -58,8 +62,20 @@ def encode_execution_providers(execution_providers: List[str]) -> List[str]:
58
 
59
 
60
  def decode_execution_providers(execution_providers: List[str]) -> List[str]:
61
- return [provider for provider, encoded_execution_provider in zip(onnxruntime.get_available_providers(), encode_execution_providers(onnxruntime.get_available_providers()))
62
  if any(execution_provider in encoded_execution_provider for execution_provider in execution_providers)]
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
 
65
  def suggest_max_memory() -> int:
@@ -118,6 +134,8 @@ def pre_check() -> bool:
118
 
119
  download_directory_path = util.resolve_relative_path('../models')
120
  util.conditional_download(download_directory_path, ['https://huggingface.co/countfloyd/deepfake/resolve/main/inswapper_128.onnx'])
 
 
121
  util.conditional_download(download_directory_path, ['https://huggingface.co/countfloyd/deepfake/resolve/main/GFPGANv1.4.onnx'])
122
  util.conditional_download(download_directory_path, ['https://github.com/csxmli2016/DMDNet/releases/download/v1/DMDNet.pth'])
123
  util.conditional_download(download_directory_path, ['https://huggingface.co/countfloyd/deepfake/resolve/main/GPEN-BFR-512.onnx'])
@@ -204,7 +222,7 @@ def live_swap(frame, options):
204
  return newframe
205
 
206
 
207
- def batch_process_regular(files:list[ProcessEntry], masking_engine:str, new_clip_text:str, use_new_method, imagemask, num_swap_steps, progress, selected_index = 0) -> None:
208
  global clip_text, process_mgr
209
 
210
  release_resources()
@@ -214,9 +232,11 @@ def batch_process_regular(files:list[ProcessEntry], masking_engine:str, new_clip
214
  mask = imagemask["layers"][0] if imagemask is not None else None
215
  if len(roop.globals.INPUT_FACESETS) <= selected_index:
216
  selected_index = 0
217
- options = ProcessOptions(get_processing_plugins(masking_engine), roop.globals.distance_threshold, roop.globals.blend_ratio, roop.globals.face_swap_mode, selected_index, new_clip_text, mask, num_swap_steps, False)
 
 
218
  process_mgr.initialize(roop.globals.INPUT_FACESETS, roop.globals.TARGET_FACES, options)
219
- batch_process(files, use_new_method)
220
  return
221
 
222
  def batch_process_with_options(files:list[ProcessEntry], options, progress):
@@ -230,11 +250,11 @@ def batch_process_with_options(files:list[ProcessEntry], options, progress):
230
  roop.globals.keep_frames = False
231
  roop.globals.wait_after_extraction = False
232
  roop.globals.skip_audio = False
233
- batch_process(files, True)
234
 
235
 
236
 
237
- def batch_process(files:list[ProcessEntry], use_new_method) -> None:
238
  global clip_text, process_mgr
239
 
240
  roop.globals.processing = True
@@ -287,9 +307,12 @@ def batch_process(files:list[ProcessEntry], use_new_method) -> None:
287
  if v.endframe == 0:
288
  v.endframe = get_video_frame_total(v.filename)
289
 
290
- update_status(f'Creating {os.path.basename(v.finalname)} with {fps} FPS...')
 
 
 
291
  start_processing = time()
292
- if roop.globals.keep_frames or not use_new_method:
293
  util.create_temp(v.filename)
294
  update_status('Extracting frames...')
295
  ffmpeg.extract_frames(v.filename,v.startframe,v.endframe, fps)
@@ -317,7 +340,7 @@ def batch_process(files:list[ProcessEntry], use_new_method) -> None:
317
  skip_audio = True
318
  else:
319
  skip_audio = roop.globals.skip_audio
320
- process_mgr.run_batch_inmem(v.filename, v.finalname, v.startframe, v.endframe, fps,roop.globals.execution_threads, skip_audio)
321
 
322
  if not roop.globals.processing:
323
  end_processing('Processing stopped!')
@@ -346,10 +369,12 @@ def batch_process(files:list[ProcessEntry], use_new_method) -> None:
346
  os.remove(video_file_name)
347
  else:
348
  shutil.move(video_file_name, destination)
349
- update_status(f'\nProcessing {os.path.basename(destination)} took {time() - start_processing} secs')
350
 
351
- else:
352
  update_status(f'Failed processing {os.path.basename(v.finalname)}!')
 
 
 
353
  end_processing('Finished')
354
 
355
 
@@ -371,8 +396,11 @@ def run() -> None:
371
  if not pre_check():
372
  return
373
  roop.globals.CFG = Settings('config.yaml')
 
374
  roop.globals.execution_threads = roop.globals.CFG.max_threads
375
  roop.globals.video_encoder = roop.globals.CFG.output_video_codec
376
  roop.globals.video_quality = roop.globals.CFG.video_quality
377
  roop.globals.max_memory = roop.globals.CFG.memory_limit if roop.globals.CFG.memory_limit > 0 else None
 
 
378
  main.run()
 
14
  import torch
15
  import onnxruntime
16
  import pathlib
17
+ import argparse
18
 
19
  from time import time
20
 
 
28
  from roop.ProcessEntry import ProcessEntry
29
  from roop.ProcessMgr import ProcessMgr
30
  from roop.ProcessOptions import ProcessOptions
31
+ from roop.capturer import get_video_frame_total, release_video
32
 
33
 
34
  clip_text = None
 
48
  def parse_args() -> None:
49
  signal.signal(signal.SIGINT, lambda signal_number, frame: destroy())
50
  roop.globals.headless = False
51
+
52
+ program = argparse.ArgumentParser(formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=100))
53
+ program.add_argument('--server_share', help='Public server', dest='server_share', action='store_true', default=False)
54
+ program.add_argument('--cuda_device_id', help='Index of the cuda gpu to use', dest='cuda_device_id', type=int, default=0)
55
+ roop.globals.startup_args = program.parse_args()
56
  # Always enable all processors when using GUI
 
 
57
  roop.globals.frame_processors = ['face_swapper', 'face_enhancer']
58
 
59
 
 
62
 
63
 
64
  def decode_execution_providers(execution_providers: List[str]) -> List[str]:
65
+ list_providers = [provider for provider, encoded_execution_provider in zip(onnxruntime.get_available_providers(), encode_execution_providers(onnxruntime.get_available_providers()))
66
  if any(execution_provider in encoded_execution_provider for execution_provider in execution_providers)]
67
+
68
+ try:
69
+ for i in range(len(list_providers)):
70
+ if list_providers[i] == 'CUDAExecutionProvider':
71
+ list_providers[i] = ('CUDAExecutionProvider', {'device_id': roop.globals.cuda_device_id})
72
+ torch.cuda.set_device(roop.globals.cuda_device_id)
73
+ break
74
+ except:
75
+ pass
76
+
77
+ return list_providers
78
+
79
 
80
 
81
  def suggest_max_memory() -> int:
 
134
 
135
  download_directory_path = util.resolve_relative_path('../models')
136
  util.conditional_download(download_directory_path, ['https://huggingface.co/countfloyd/deepfake/resolve/main/inswapper_128.onnx'])
137
+ util.conditional_download(download_directory_path, ['https://huggingface.co/countfloyd/deepfake/resolve/main/reswapper_128.onnx'])
138
+ util.conditional_download(download_directory_path, ['https://huggingface.co/countfloyd/deepfake/resolve/main/reswapper_256.onnx'])
139
  util.conditional_download(download_directory_path, ['https://huggingface.co/countfloyd/deepfake/resolve/main/GFPGANv1.4.onnx'])
140
  util.conditional_download(download_directory_path, ['https://github.com/csxmli2016/DMDNet/releases/download/v1/DMDNet.pth'])
141
  util.conditional_download(download_directory_path, ['https://huggingface.co/countfloyd/deepfake/resolve/main/GPEN-BFR-512.onnx'])
 
222
  return newframe
223
 
224
 
225
+ def batch_process_regular(swap_model, output_method, files:list[ProcessEntry], masking_engine:str, new_clip_text:str, use_new_method, imagemask, restore_original_mouth, num_swap_steps, progress, selected_index = 0) -> None:
226
  global clip_text, process_mgr
227
 
228
  release_resources()
 
232
  mask = imagemask["layers"][0] if imagemask is not None else None
233
  if len(roop.globals.INPUT_FACESETS) <= selected_index:
234
  selected_index = 0
235
+ options = ProcessOptions(swap_model, get_processing_plugins(masking_engine), roop.globals.distance_threshold, roop.globals.blend_ratio,
236
+ roop.globals.face_swap_mode, selected_index, new_clip_text, mask, num_swap_steps,
237
+ roop.globals.subsample_size, False, restore_original_mouth)
238
  process_mgr.initialize(roop.globals.INPUT_FACESETS, roop.globals.TARGET_FACES, options)
239
+ batch_process(output_method, files, use_new_method)
240
  return
241
 
242
  def batch_process_with_options(files:list[ProcessEntry], options, progress):
 
250
  roop.globals.keep_frames = False
251
  roop.globals.wait_after_extraction = False
252
  roop.globals.skip_audio = False
253
+ batch_process("Files", files, True)
254
 
255
 
256
 
257
+ def batch_process(output_method, files:list[ProcessEntry], use_new_method) -> None:
258
  global clip_text, process_mgr
259
 
260
  roop.globals.processing = True
 
307
  if v.endframe == 0:
308
  v.endframe = get_video_frame_total(v.filename)
309
 
310
+ is_streaming_only = output_method == "Virtual Camera"
311
+ if is_streaming_only == False:
312
+ update_status(f'Creating {os.path.basename(v.finalname)} with {fps} FPS...')
313
+
314
  start_processing = time()
315
+ if is_streaming_only == False and roop.globals.keep_frames or not use_new_method:
316
  util.create_temp(v.filename)
317
  update_status('Extracting frames...')
318
  ffmpeg.extract_frames(v.filename,v.startframe,v.endframe, fps)
 
340
  skip_audio = True
341
  else:
342
  skip_audio = roop.globals.skip_audio
343
+ process_mgr.run_batch_inmem(output_method, v.filename, v.finalname, v.startframe, v.endframe, fps,roop.globals.execution_threads)
344
 
345
  if not roop.globals.processing:
346
  end_processing('Processing stopped!')
 
369
  os.remove(video_file_name)
370
  else:
371
  shutil.move(video_file_name, destination)
 
372
 
373
+ elif is_streaming_only == False:
374
  update_status(f'Failed processing {os.path.basename(v.finalname)}!')
375
+ elapsed_time = time() - start_processing
376
+ average_fps = (v.endframe - v.startframe) / elapsed_time
377
+ update_status(f'\nProcessing {os.path.basename(destination)} took {elapsed_time:.2f} secs, {average_fps:.2f} frames/s')
378
  end_processing('Finished')
379
 
380
 
 
396
  if not pre_check():
397
  return
398
  roop.globals.CFG = Settings('config.yaml')
399
+ roop.globals.cuda_device_id = roop.globals.startup_args.cuda_device_id
400
  roop.globals.execution_threads = roop.globals.CFG.max_threads
401
  roop.globals.video_encoder = roop.globals.CFG.output_video_codec
402
  roop.globals.video_quality = roop.globals.CFG.video_quality
403
  roop.globals.max_memory = roop.globals.CFG.memory_limit if roop.globals.CFG.memory_limit > 0 else None
404
+ if roop.globals.startup_args.server_share:
405
+ roop.globals.CFG.server_share = True
406
  main.run()
roop/face_util.py CHANGED
@@ -9,18 +9,18 @@ import cv2
9
  import numpy as np
10
  from skimage import transform as trans
11
  from roop.capturer import get_video_frame
12
- from roop.utilities import resolve_relative_path, conditional_download
13
 
14
  FACE_ANALYSER = None
15
- THREAD_LOCK_ANALYSER = threading.Lock()
16
- THREAD_LOCK_SWAPPER = threading.Lock()
17
  FACE_SWAPPER = None
18
 
19
 
20
  def get_face_analyser() -> Any:
21
  global FACE_ANALYSER
22
 
23
- with THREAD_LOCK_ANALYSER:
24
  if FACE_ANALYSER is None or roop.globals.g_current_face_analysis != roop.globals.g_desired_face_analysis:
25
  model_path = resolve_relative_path('..')
26
  # removed genderage
@@ -210,27 +210,59 @@ arcface_dst = np.array(
210
  )
211
 
212
 
213
- def estimate_norm(lmk, image_size=112, mode="arcface"):
214
  assert lmk.shape == (5, 2)
215
- assert image_size % 112 == 0 or image_size % 128 == 0
216
  if image_size % 112 == 0:
217
  ratio = float(image_size) / 112.0
218
  diff_x = 0
219
- else:
220
  ratio = float(image_size) / 128.0
221
  diff_x = 8.0 * ratio
 
 
 
 
222
  dst = arcface_dst * ratio
223
  dst[:, 0] += diff_x
224
  tform = trans.SimilarityTransform()
225
  tform.estimate(lmk, dst)
226
  M = tform.params[0:2, :]
227
  return M
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
 
229
 
230
 
231
  # aligned, M = norm_crop2(f[1], face.kps, 512)
232
  def align_crop(img, landmark, image_size=112, mode="arcface"):
233
- M = estimate_norm(landmark, image_size, mode)
234
  warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)
235
  return warped, M
236
 
 
9
  import numpy as np
10
  from skimage import transform as trans
11
  from roop.capturer import get_video_frame
12
+ from roop.utilities import resolve_relative_path, conditional_thread_semaphore
13
 
14
  FACE_ANALYSER = None
15
+ #THREAD_LOCK_ANALYSER = threading.Lock()
16
+ #THREAD_LOCK_SWAPPER = threading.Lock()
17
  FACE_SWAPPER = None
18
 
19
 
20
  def get_face_analyser() -> Any:
21
  global FACE_ANALYSER
22
 
23
+ with conditional_thread_semaphore():
24
  if FACE_ANALYSER is None or roop.globals.g_current_face_analysis != roop.globals.g_desired_face_analysis:
25
  model_path = resolve_relative_path('..')
26
  # removed genderage
 
210
  )
211
 
212
 
213
+ """ def estimate_norm(lmk, image_size=112):
214
  assert lmk.shape == (5, 2)
 
215
  if image_size % 112 == 0:
216
  ratio = float(image_size) / 112.0
217
  diff_x = 0
218
+ elif image_size % 128 == 0:
219
  ratio = float(image_size) / 128.0
220
  diff_x = 8.0 * ratio
221
+ elif image_size % 512 == 0:
222
+ ratio = float(image_size) / 512.0
223
+ diff_x = 32.0 * ratio
224
+
225
  dst = arcface_dst * ratio
226
  dst[:, 0] += diff_x
227
  tform = trans.SimilarityTransform()
228
  tform.estimate(lmk, dst)
229
  M = tform.params[0:2, :]
230
  return M
231
+ """
232
+
233
+ def estimate_norm(lmk, image_size=112):
234
+ if image_size%112==0:
235
+ ratio = float(image_size)/112.0
236
+ diff_x = 0
237
+ else:
238
+ ratio = float(image_size)/128.0
239
+ diff_x = 8.0*ratio
240
+ dst = arcface_dst * ratio
241
+ dst[:,0] += diff_x
242
+
243
+ if image_size == 160:
244
+ dst[:,0] += 0.1
245
+ dst[:,1] += 0.1
246
+ elif image_size == 256:
247
+ dst[:,0] += 0.5
248
+ dst[:,1] += 0.5
249
+ elif image_size == 320:
250
+ dst[:,0] += 0.75
251
+ dst[:,1] += 0.75
252
+ elif image_size == 512:
253
+ dst[:,0] += 1.5
254
+ dst[:,1] += 1.5
255
+
256
+ tform = trans.SimilarityTransform()
257
+ tform.estimate(lmk, dst)
258
+ M = tform.params[0:2, :]
259
+ return M
260
 
261
 
262
 
263
  # aligned, M = norm_crop2(f[1], face.kps, 512)
264
  def align_crop(img, landmark, image_size=112, mode="arcface"):
265
+ M = estimate_norm(landmark, image_size)
266
  warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)
267
  return warped, M
268
 
roop/globals.py CHANGED
@@ -5,7 +5,9 @@ source_path = None
5
  target_path = None
6
  output_path = None
7
  target_folder_path = None
 
8
 
 
9
  frame_processors: List[str] = []
10
  keep_fps = None
11
  keep_frames = None
@@ -26,6 +28,7 @@ execution_threads = None
26
  headless = None
27
  log_level = 'error'
28
  selected_enhancer = None
 
29
  face_swap_mode = None
30
  blend_ratio = 0.5
31
  distance_threshold = 0.65
 
5
  target_path = None
6
  output_path = None
7
  target_folder_path = None
8
+ startup_args = None
9
 
10
+ cuda_device_id = 0
11
  frame_processors: List[str] = []
12
  keep_fps = None
13
  keep_frames = None
 
28
  headless = None
29
  log_level = 'error'
30
  selected_enhancer = None
31
+ subsample_size = 128
32
  face_swap_mode = None
33
  blend_ratio = 0.5
34
  distance_threshold = 0.65
roop/metadata.py CHANGED
@@ -1,2 +1,2 @@
1
  name = 'roop unleashed'
2
- version = '4.0.0'
 
1
  name = 'roop unleashed'
2
+ version = '4.4.0'
roop/util_ffmpeg.py CHANGED
@@ -73,12 +73,32 @@ def create_video(target_path: str, dest_filename: str, fps: float = 24.0, temp_d
73
 
74
 
75
  def create_gif_from_video(video_path: str, gif_path):
76
- from roop.capturer import get_video_frame
77
 
78
  fps = util.detect_fps(video_path)
79
  frame = get_video_frame(video_path)
 
80
 
81
- run_ffmpeg(['-i', video_path, '-vf', f'fps={fps},scale={frame.shape[0]}:-1:flags=lanczos,split[s0][s1];[s0]palettegen[p];[s1][p]paletteuse', '-loop', '0', gif_path])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
 
84
  def restore_audio(intermediate_video: str, original_video: str, trim_frame_start, trim_frame_end, final_video : str) -> None:
 
73
 
74
 
75
  def create_gif_from_video(video_path: str, gif_path):
76
+ from roop.capturer import get_video_frame, release_video
77
 
78
  fps = util.detect_fps(video_path)
79
  frame = get_video_frame(video_path)
80
+ release_video()
81
 
82
+ scalex = frame.shape[0]
83
+ scaley = frame.shape[1]
84
+
85
+ if scalex >= scaley:
86
+ scaley = -1
87
+ else:
88
+ scalex = -1
89
+
90
+ run_ffmpeg(['-i', video_path, '-vf', f'fps={fps},scale={int(scalex)}:{int(scaley)}:flags=lanczos,split[s0][s1];[s0]palettegen[p];[s1][p]paletteuse', '-loop', '0', gif_path])
91
+
92
+
93
+
94
+ def create_video_from_gif(gif_path: str, output_path):
95
+ fps = util.detect_fps(gif_path)
96
+ filter = """scale='trunc(in_w/2)*2':'trunc(in_h/2)*2',format=yuv420p,fps=10"""
97
+ run_ffmpeg(['-i', gif_path, '-vf', f'"{filter}"', '-movflags', '+faststart', '-shortest', output_path])
98
+
99
+
100
+ def repair_video(original_video: str, final_video : str):
101
+ run_ffmpeg(['-i', original_video, '-movflags', 'faststart', '-acodec', 'copy', '-vcodec', 'copy', final_video])
102
 
103
 
104
  def restore_audio(intermediate_video: str, original_video: str, trim_frame_start, trim_frame_end, final_video : str) -> None:
roop/utilities.py CHANGED
@@ -13,6 +13,12 @@ import tempfile
13
  import cv2
14
  import zipfile
15
  import traceback
 
 
 
 
 
 
16
 
17
  from pathlib import Path
18
  from typing import List, Any
@@ -26,6 +32,10 @@ import roop.globals
26
  TEMP_FILE = "temp.mp4"
27
  TEMP_DIRECTORY = "temp"
28
 
 
 
 
 
29
  # monkey patch ssl for mac
30
  if platform.system().lower() == "darwin":
31
  ssl._create_default_https_context = ssl._create_unverified_context
@@ -173,6 +183,8 @@ def has_extension(filepath: str, extensions: List[str]) -> bool:
173
 
174
  def is_image(image_path: str) -> bool:
175
  if image_path and os.path.isfile(image_path):
 
 
176
  mimetype, _ = mimetypes.guess_type(image_path)
177
  return bool(mimetype and mimetype.startswith("image/"))
178
  return False
@@ -337,3 +349,45 @@ gradio: {gradio.__version__}
337
 
338
  def compute_cosine_distance(emb1, emb2) -> float:
339
  return distance.cosine(emb1, emb2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  import cv2
14
  import zipfile
15
  import traceback
16
+ import threading
17
+ import threading
18
+ import random
19
+
20
+ from typing import Union, Any
21
+ from contextlib import nullcontext
22
 
23
  from pathlib import Path
24
  from typing import List, Any
 
32
  TEMP_FILE = "temp.mp4"
33
  TEMP_DIRECTORY = "temp"
34
 
35
+ THREAD_SEMAPHORE = threading.Semaphore()
36
+ NULL_CONTEXT = nullcontext()
37
+
38
+
39
  # monkey patch ssl for mac
40
  if platform.system().lower() == "darwin":
41
  ssl._create_default_https_context = ssl._create_unverified_context
 
183
 
184
  def is_image(image_path: str) -> bool:
185
  if image_path and os.path.isfile(image_path):
186
+ if image_path.endswith(".webp"):
187
+ return True
188
  mimetype, _ = mimetypes.guess_type(image_path)
189
  return bool(mimetype and mimetype.startswith("image/"))
190
  return False
 
349
 
350
  def compute_cosine_distance(emb1, emb2) -> float:
351
  return distance.cosine(emb1, emb2)
352
+
353
+ def has_cuda_device():
354
+ return torch.cuda is not None and torch.cuda.is_available()
355
+
356
+
357
+ def print_cuda_info():
358
+ try:
359
+ print(f'Number of CUDA devices: {torch.cuda.device_count()} Currently used Id: {torch.cuda.current_device()} Device Name: {torch.cuda.get_device_name(torch.cuda.current_device())}')
360
+ except:
361
+ print('No CUDA device found!')
362
+
363
+ def clean_dir(path: str):
364
+ contents = os.listdir(path)
365
+ for item in contents:
366
+ item_path = os.path.join(path, item)
367
+ try:
368
+ if os.path.isfile(item_path):
369
+ os.remove(item_path)
370
+ elif os.path.isdir(item_path):
371
+ shutil.rmtree(item_path)
372
+ except Exception as e:
373
+ print(e)
374
+
375
+
376
+ def conditional_thread_semaphore() -> Union[Any, Any]:
377
+ if 'DmlExecutionProvider' in roop.globals.execution_providers or 'ROCMExecutionProvider' in roop.globals.execution_providers:
378
+ return THREAD_SEMAPHORE
379
+ return NULL_CONTEXT
380
+
381
+ def shuffle_array(arr):
382
+ """
383
+ Shuffles the given array in place using the Fisher-Yates shuffle algorithm.
384
+
385
+ Args:
386
+ arr: The array to be shuffled.
387
+
388
+ Returns:
389
+ None. The array is shuffled in place.
390
+ """
391
+ for i in range(len(arr) - 1, 0, -1):
392
+ j = random.randint(0, i)
393
+ arr[i], arr[j] = arr[j], arr[i]
roop/virtualcam.py CHANGED
@@ -10,7 +10,7 @@ cam_active = False
10
  cam_thread = None
11
  vcam = None
12
 
13
- def virtualcamera(streamobs, cam_num,width,height):
14
  from roop.ProcessOptions import ProcessOptions
15
  from roop.core import live_swap, get_processing_plugins
16
 
@@ -44,10 +44,11 @@ def virtualcamera(streamobs, cam_num,width,height):
44
  print(f'Using {cam.native_fmt}')
45
  else:
46
  print(f'Not streaming to virtual camera!')
 
47
 
48
- # always use xseg masking
49
- options = ProcessOptions(get_processing_plugins("mask_xseg"), roop.globals.distance_threshold, roop.globals.blend_ratio,
50
- "all", 0, None, None, 1, False)
51
  while cam_active:
52
  ret, frame = cap.read()
53
  if not ret:
@@ -67,12 +68,12 @@ def virtualcamera(streamobs, cam_num,width,height):
67
 
68
 
69
 
70
- def start_virtual_cam(streamobs, cam_number, resolution):
71
  global cam_thread, cam_active
72
 
73
  if not cam_active:
74
  width, height = map(int, resolution.split('x'))
75
- cam_thread = threading.Thread(target=virtualcamera, args=[streamobs, cam_number, width, height])
76
  cam_thread.start()
77
 
78
 
@@ -83,5 +84,5 @@ def stop_virtual_cam():
83
  if cam_active:
84
  cam_active = False
85
  cam_thread.join()
86
-
87
 
 
10
  cam_thread = None
11
  vcam = None
12
 
13
+ def virtualcamera(swap_model, streamobs, use_xseg, use_mouthrestore, cam_num,width,height):
14
  from roop.ProcessOptions import ProcessOptions
15
  from roop.core import live_swap, get_processing_plugins
16
 
 
44
  print(f'Using {cam.native_fmt}')
45
  else:
46
  print(f'Not streaming to virtual camera!')
47
+ subsample_size = roop.globals.subsample_size
48
 
49
+
50
+ options = ProcessOptions(swap_model, get_processing_plugins("mask_xseg" if use_xseg else None), roop.globals.distance_threshold, roop.globals.blend_ratio,
51
+ "all", 0, None, None, 1, subsample_size, False, use_mouthrestore)
52
  while cam_active:
53
  ret, frame = cap.read()
54
  if not ret:
 
68
 
69
 
70
 
71
+ def start_virtual_cam(swap_model, streamobs, use_xseg, use_mouthrestore, cam_number, resolution):
72
  global cam_thread, cam_active
73
 
74
  if not cam_active:
75
  width, height = map(int, resolution.split('x'))
76
+ cam_thread = threading.Thread(target=virtualcamera, args=[swap_model, streamobs, use_xseg, use_mouthrestore, cam_number, width, height])
77
  cam_thread.start()
78
 
79
 
 
84
  if cam_active:
85
  cam_active = False
86
  cam_thread.join()
87
+
88