Upload 18 files
Browse files- roop/ProcessMgr.py +237 -28
- roop/ProcessOptions.py +7 -2
- roop/StreamWriter.py +60 -0
- roop/capturer.py +21 -5
- roop/core.py +42 -14
- roop/face_util.py +40 -8
- roop/globals.py +3 -0
- roop/metadata.py +1 -1
- roop/util_ffmpeg.py +22 -2
- roop/utilities.py +54 -0
- roop/virtualcam.py +8 -7
roop/ProcessMgr.py
CHANGED
|
@@ -3,11 +3,10 @@ import cv2
|
|
| 3 |
import numpy as np
|
| 4 |
import psutil
|
| 5 |
|
| 6 |
-
from enum import Enum
|
| 7 |
from roop.ProcessOptions import ProcessOptions
|
| 8 |
|
| 9 |
-
from roop.face_util import get_first_face, get_all_faces,
|
| 10 |
-
from roop.utilities import compute_cosine_distance, get_device, str_to_class
|
| 11 |
import roop.vr_util as vr
|
| 12 |
|
| 13 |
from typing import Any, List, Callable
|
|
@@ -17,15 +16,18 @@ from threading import Thread, Lock
|
|
| 17 |
from queue import Queue
|
| 18 |
from tqdm import tqdm
|
| 19 |
from roop.ffmpeg_writer import FFMPEG_VideoWriter
|
|
|
|
| 20 |
import roop.globals
|
| 21 |
|
| 22 |
|
|
|
|
| 23 |
# Poor man's enum to be able to compare to int
|
| 24 |
class eNoFaceAction():
|
| 25 |
USE_ORIGINAL_FRAME = 0
|
| 26 |
RETRY_ROTATED = 1
|
| 27 |
SKIP_FRAME = 2
|
| 28 |
-
SKIP_FRAME_IF_DISSIMILAR = 3
|
|
|
|
| 29 |
|
| 30 |
|
| 31 |
|
|
@@ -44,6 +46,7 @@ def pick_queue(queue: Queue[str], queue_per_future: int) -> List[str]:
|
|
| 44 |
return queues
|
| 45 |
|
| 46 |
|
|
|
|
| 47 |
class ProcessMgr():
|
| 48 |
input_face_datas = []
|
| 49 |
target_face_datas = []
|
|
@@ -64,11 +67,16 @@ class ProcessMgr():
|
|
| 64 |
processed_queue = None
|
| 65 |
|
| 66 |
videowriter= None
|
|
|
|
| 67 |
|
| 68 |
progress_gradio = None
|
| 69 |
total_frames = 0
|
| 70 |
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
|
| 74 |
plugins = {
|
|
@@ -101,12 +109,19 @@ class ProcessMgr():
|
|
| 101 |
def initialize(self, input_faces, target_faces, options):
|
| 102 |
self.input_face_datas = input_faces
|
| 103 |
self.target_face_datas = target_faces
|
|
|
|
|
|
|
| 104 |
self.options = options
|
| 105 |
devicename = get_device()
|
| 106 |
|
| 107 |
roop.globals.g_desired_face_analysis=["landmark_3d_68", "landmark_2d_106","detection","recognition"]
|
| 108 |
if options.swap_mode == "all_female" or options.swap_mode == "all_male":
|
| 109 |
roop.globals.g_desired_face_analysis.append("genderage")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
for p in self.processors:
|
| 112 |
newp = next((x for x in options.processors.keys() if x == p.processorname), None)
|
|
@@ -123,6 +138,14 @@ class ProcessMgr():
|
|
| 123 |
p = str_to_class(module, classname)
|
| 124 |
if p is not None:
|
| 125 |
extoption.update({"devicename": devicename})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
p.Initialize(extoption)
|
| 127 |
newprocessors.append(p)
|
| 128 |
else:
|
|
@@ -185,7 +208,8 @@ class ProcessMgr():
|
|
| 185 |
resimg = self.process_frame(temp_frame)
|
| 186 |
if resimg is not None:
|
| 187 |
i = source_files.index(f)
|
| 188 |
-
|
|
|
|
| 189 |
if update:
|
| 190 |
update()
|
| 191 |
|
|
@@ -239,7 +263,10 @@ class ProcessMgr():
|
|
| 239 |
process, frame = self.processed_queue[nextindex % self.num_threads].get()
|
| 240 |
nextindex += 1
|
| 241 |
if frame is not None:
|
| 242 |
-
self.
|
|
|
|
|
|
|
|
|
|
| 243 |
del frame
|
| 244 |
elif process == False:
|
| 245 |
num_producers -= 1
|
|
@@ -248,7 +275,11 @@ class ProcessMgr():
|
|
| 248 |
|
| 249 |
|
| 250 |
|
| 251 |
-
def run_batch_inmem(self, source_video, target_video, frame_start, frame_end, fps, threads:int = 1
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
cap = cv2.VideoCapture(source_video)
|
| 253 |
# frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 254 |
frame_count = (frame_end - frame_start) + 1
|
|
@@ -275,7 +306,13 @@ class ProcessMgr():
|
|
| 275 |
self.frames_queue.append(Queue(1))
|
| 276 |
self.processed_queue.append(Queue(1))
|
| 277 |
|
| 278 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
|
| 280 |
readthread = Thread(target=self.read_frames_thread, args=(cap, frame_start, frame_end, threads))
|
| 281 |
readthread.start()
|
|
@@ -298,7 +335,11 @@ class ProcessMgr():
|
|
| 298 |
readthread.join()
|
| 299 |
writethread.join()
|
| 300 |
cap.release()
|
| 301 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
self.frames_queue.clear()
|
| 303 |
self.processed_queue.clear()
|
| 304 |
|
|
@@ -317,11 +358,6 @@ class ProcessMgr():
|
|
| 317 |
self.progress_gradio((progress.n, self.total_frames), desc='Processing', total=self.total_frames, unit='frames')
|
| 318 |
|
| 319 |
|
| 320 |
-
# https://github.com/deepinsight/insightface#third-party-re-implementation-of-arcface
|
| 321 |
-
# https://github.com/deepinsight/insightface/blob/master/alignment/coordinate_reg/image_infer.py
|
| 322 |
-
# https://github.com/deepinsight/insightface/issues/1350
|
| 323 |
-
# https://github.com/linghu8812/tensorrt_inference
|
| 324 |
-
|
| 325 |
|
| 326 |
def process_frame(self, frame:Frame):
|
| 327 |
if len(self.input_face_datas) < 1 and not self.options.show_face_masking:
|
|
@@ -332,8 +368,16 @@ class ProcessMgr():
|
|
| 332 |
if roop.globals.no_face_action == eNoFaceAction.SKIP_FRAME_IF_DISSIMILAR:
|
| 333 |
if len(self.input_face_datas) > num_swapped:
|
| 334 |
return None
|
|
|
|
|
|
|
| 335 |
return temp_frame
|
| 336 |
-
if roop.globals.no_face_action == eNoFaceAction.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 337 |
return frame
|
| 338 |
if roop.globals.no_face_action == eNoFaceAction.SKIP_FRAME:
|
| 339 |
#This only works with in-mem processing, as it simply skips the frame.
|
|
@@ -374,6 +418,8 @@ class ProcessMgr():
|
|
| 374 |
|
| 375 |
num_faces_found += 1
|
| 376 |
temp_frame = self.process_face(self.options.selected_index, face, temp_frame)
|
|
|
|
|
|
|
| 377 |
else:
|
| 378 |
faces = get_all_faces(frame)
|
| 379 |
if faces is None:
|
|
@@ -383,7 +429,14 @@ class ProcessMgr():
|
|
| 383 |
for face in faces:
|
| 384 |
num_faces_found += 1
|
| 385 |
temp_frame = self.process_face(self.options.selected_index, face, temp_frame)
|
| 386 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 387 |
|
| 388 |
elif self.options.swap_mode == "selected":
|
| 389 |
num_targetfaces = len(self.target_face_datas)
|
|
@@ -397,7 +450,6 @@ class ProcessMgr():
|
|
| 397 |
else:
|
| 398 |
temp_frame = self.process_face(i, face, temp_frame)
|
| 399 |
num_faces_found += 1
|
| 400 |
-
del face
|
| 401 |
if not roop.globals.vr_mode and num_faces_found == num_targetfaces:
|
| 402 |
break
|
| 403 |
elif self.options.swap_mode == "all_female" or self.options.swap_mode == "all_male":
|
|
@@ -406,7 +458,13 @@ class ProcessMgr():
|
|
| 406 |
if face.sex == gender:
|
| 407 |
num_faces_found += 1
|
| 408 |
temp_frame = self.process_face(self.options.selected_index, face, temp_frame)
|
| 409 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 410 |
|
| 411 |
if roop.globals.vr_mode and num_faces_found % 2 > 0:
|
| 412 |
# stereo image, there has to be an even number of faces
|
|
@@ -541,17 +599,31 @@ class ProcessMgr():
|
|
| 541 |
|
| 542 |
# img = vr.GetPerspective(frame, 90, theta, phi, 1280, 1280) # Generate perspective image
|
| 543 |
|
| 544 |
-
|
| 545 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 546 |
fake_frame = aligned_img
|
| 547 |
-
swap_frame = aligned_img
|
| 548 |
target_face.matrix = M
|
|
|
|
| 549 |
for p in self.processors:
|
| 550 |
if p.type == 'swap':
|
| 551 |
-
|
|
|
|
|
|
|
| 552 |
for _ in range(0,self.options.num_swap_steps):
|
| 553 |
-
|
| 554 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 555 |
scale_factor = 0.0
|
| 556 |
elif p.type == 'mask':
|
| 557 |
fake_frame = self.process_mask(p, aligned_img, fake_frame)
|
|
@@ -560,8 +632,8 @@ class ProcessMgr():
|
|
| 560 |
|
| 561 |
upscale = 512
|
| 562 |
orig_width = fake_frame.shape[1]
|
| 563 |
-
|
| 564 |
-
|
| 565 |
mask_offsets = (0,0,0,0,1,20) if inputface is None else inputface.mask_offsets
|
| 566 |
|
| 567 |
|
|
@@ -571,9 +643,14 @@ class ProcessMgr():
|
|
| 571 |
else:
|
| 572 |
result = self.paste_upscale(fake_frame, enhanced_frame, target_face.matrix, frame, scale_factor, mask_offsets)
|
| 573 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 574 |
if rotation_action is not None:
|
| 575 |
fake_frame = self.auto_unrotate_frame(result, rotation_action)
|
| 576 |
-
|
| 577 |
|
| 578 |
return result
|
| 579 |
|
|
@@ -673,6 +750,43 @@ class ProcessMgr():
|
|
| 673 |
return cv2.GaussianBlur(img_matte, blur_size, 0)
|
| 674 |
|
| 675 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 676 |
def process_mask(self, processor, frame:Frame, target:Frame):
|
| 677 |
img_mask = processor.Run(frame, self.options.masking_text)
|
| 678 |
img_mask = cv2.resize(img_mask, (target.shape[1], target.shape[0]))
|
|
@@ -688,7 +802,98 @@ class ProcessMgr():
|
|
| 688 |
result += img_mask * frame.astype(np.float32)
|
| 689 |
return np.uint8(result)
|
| 690 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 691 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 692 |
|
| 693 |
|
| 694 |
def unload_models():
|
|
@@ -699,4 +904,8 @@ class ProcessMgr():
|
|
| 699 |
for p in self.processors:
|
| 700 |
p.Release()
|
| 701 |
self.processors.clear()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 702 |
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
import psutil
|
| 5 |
|
|
|
|
| 6 |
from roop.ProcessOptions import ProcessOptions
|
| 7 |
|
| 8 |
+
from roop.face_util import get_first_face, get_all_faces, rotate_anticlockwise, rotate_clockwise, clamp_cut_values
|
| 9 |
+
from roop.utilities import compute_cosine_distance, get_device, str_to_class, shuffle_array
|
| 10 |
import roop.vr_util as vr
|
| 11 |
|
| 12 |
from typing import Any, List, Callable
|
|
|
|
| 16 |
from queue import Queue
|
| 17 |
from tqdm import tqdm
|
| 18 |
from roop.ffmpeg_writer import FFMPEG_VideoWriter
|
| 19 |
+
from roop.StreamWriter import StreamWriter
|
| 20 |
import roop.globals
|
| 21 |
|
| 22 |
|
| 23 |
+
|
| 24 |
# Poor man's enum to be able to compare to int
|
| 25 |
class eNoFaceAction():
|
| 26 |
USE_ORIGINAL_FRAME = 0
|
| 27 |
RETRY_ROTATED = 1
|
| 28 |
SKIP_FRAME = 2
|
| 29 |
+
SKIP_FRAME_IF_DISSIMILAR = 3,
|
| 30 |
+
USE_LAST_SWAPPED = 4
|
| 31 |
|
| 32 |
|
| 33 |
|
|
|
|
| 46 |
return queues
|
| 47 |
|
| 48 |
|
| 49 |
+
|
| 50 |
class ProcessMgr():
|
| 51 |
input_face_datas = []
|
| 52 |
target_face_datas = []
|
|
|
|
| 67 |
processed_queue = None
|
| 68 |
|
| 69 |
videowriter= None
|
| 70 |
+
streamwriter = None
|
| 71 |
|
| 72 |
progress_gradio = None
|
| 73 |
total_frames = 0
|
| 74 |
|
| 75 |
+
num_frames_no_face = 0
|
| 76 |
+
last_swapped_frame = None
|
| 77 |
+
|
| 78 |
+
output_to_file = None
|
| 79 |
+
output_to_cam = None
|
| 80 |
|
| 81 |
|
| 82 |
plugins = {
|
|
|
|
| 109 |
def initialize(self, input_faces, target_faces, options):
|
| 110 |
self.input_face_datas = input_faces
|
| 111 |
self.target_face_datas = target_faces
|
| 112 |
+
self.num_frames_no_face = 0
|
| 113 |
+
self.last_swapped_frame = None
|
| 114 |
self.options = options
|
| 115 |
devicename = get_device()
|
| 116 |
|
| 117 |
roop.globals.g_desired_face_analysis=["landmark_3d_68", "landmark_2d_106","detection","recognition"]
|
| 118 |
if options.swap_mode == "all_female" or options.swap_mode == "all_male":
|
| 119 |
roop.globals.g_desired_face_analysis.append("genderage")
|
| 120 |
+
elif options.swap_mode == "all_random":
|
| 121 |
+
# don't modify original list
|
| 122 |
+
self.input_face_datas = input_faces.copy()
|
| 123 |
+
shuffle_array(self.input_face_datas)
|
| 124 |
+
|
| 125 |
|
| 126 |
for p in self.processors:
|
| 127 |
newp = next((x for x in options.processors.keys() if x == p.processorname), None)
|
|
|
|
| 138 |
p = str_to_class(module, classname)
|
| 139 |
if p is not None:
|
| 140 |
extoption.update({"devicename": devicename})
|
| 141 |
+
if p.type == "swap":
|
| 142 |
+
if self.options.swap_modelname == "InSwapper 128":
|
| 143 |
+
extoption.update({"modelname": "inswapper_128.onnx"})
|
| 144 |
+
elif self.options.swap_modelname == "ReSwapper 128":
|
| 145 |
+
extoption.update({"modelname": "reswapper_128.onnx"})
|
| 146 |
+
elif self.options.swap_modelname == "ReSwapper 256":
|
| 147 |
+
extoption.update({"modelname": "reswapper_256.onnx"})
|
| 148 |
+
|
| 149 |
p.Initialize(extoption)
|
| 150 |
newprocessors.append(p)
|
| 151 |
else:
|
|
|
|
| 208 |
resimg = self.process_frame(temp_frame)
|
| 209 |
if resimg is not None:
|
| 210 |
i = source_files.index(f)
|
| 211 |
+
# Also let numpy write the file to support utf-8/16 filenames
|
| 212 |
+
cv2.imencode(f'.{roop.globals.CFG.output_image_format}',resimg)[1].tofile(target_files[i])
|
| 213 |
if update:
|
| 214 |
update()
|
| 215 |
|
|
|
|
| 263 |
process, frame = self.processed_queue[nextindex % self.num_threads].get()
|
| 264 |
nextindex += 1
|
| 265 |
if frame is not None:
|
| 266 |
+
if self.output_to_file:
|
| 267 |
+
self.videowriter.write_frame(frame)
|
| 268 |
+
if self.output_to_cam:
|
| 269 |
+
self.streamwriter.WriteToStream(frame)
|
| 270 |
del frame
|
| 271 |
elif process == False:
|
| 272 |
num_producers -= 1
|
|
|
|
| 275 |
|
| 276 |
|
| 277 |
|
| 278 |
+
def run_batch_inmem(self, output_method, source_video, target_video, frame_start, frame_end, fps, threads:int = 1):
|
| 279 |
+
if len(self.processors) < 1:
|
| 280 |
+
print("No processor defined!")
|
| 281 |
+
return
|
| 282 |
+
|
| 283 |
cap = cv2.VideoCapture(source_video)
|
| 284 |
# frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 285 |
frame_count = (frame_end - frame_start) + 1
|
|
|
|
| 306 |
self.frames_queue.append(Queue(1))
|
| 307 |
self.processed_queue.append(Queue(1))
|
| 308 |
|
| 309 |
+
self.output_to_file = output_method != "Virtual Camera"
|
| 310 |
+
self.output_to_cam = output_method == "Virtual Camera" or output_method == "Both"
|
| 311 |
+
|
| 312 |
+
if self.output_to_file:
|
| 313 |
+
self.videowriter = FFMPEG_VideoWriter(target_video, (width, height), fps, codec=roop.globals.video_encoder, crf=roop.globals.video_quality, audiofile=None)
|
| 314 |
+
if self.output_to_cam:
|
| 315 |
+
self.streamwriter = StreamWriter((width, height), int(fps))
|
| 316 |
|
| 317 |
readthread = Thread(target=self.read_frames_thread, args=(cap, frame_start, frame_end, threads))
|
| 318 |
readthread.start()
|
|
|
|
| 335 |
readthread.join()
|
| 336 |
writethread.join()
|
| 337 |
cap.release()
|
| 338 |
+
if self.output_to_file:
|
| 339 |
+
self.videowriter.close()
|
| 340 |
+
if self.output_to_cam:
|
| 341 |
+
self.streamwriter.Close()
|
| 342 |
+
|
| 343 |
self.frames_queue.clear()
|
| 344 |
self.processed_queue.clear()
|
| 345 |
|
|
|
|
| 358 |
self.progress_gradio((progress.n, self.total_frames), desc='Processing', total=self.total_frames, unit='frames')
|
| 359 |
|
| 360 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 361 |
|
| 362 |
def process_frame(self, frame:Frame):
|
| 363 |
if len(self.input_face_datas) < 1 and not self.options.show_face_masking:
|
|
|
|
| 368 |
if roop.globals.no_face_action == eNoFaceAction.SKIP_FRAME_IF_DISSIMILAR:
|
| 369 |
if len(self.input_face_datas) > num_swapped:
|
| 370 |
return None
|
| 371 |
+
self.num_frames_no_face = 0
|
| 372 |
+
self.last_swapped_frame = temp_frame.copy()
|
| 373 |
return temp_frame
|
| 374 |
+
if roop.globals.no_face_action == eNoFaceAction.USE_LAST_SWAPPED:
|
| 375 |
+
if self.last_swapped_frame is not None and self.num_frames_no_face < self.options.max_num_reuse_frame:
|
| 376 |
+
self.num_frames_no_face += 1
|
| 377 |
+
return self.last_swapped_frame.copy()
|
| 378 |
+
return frame
|
| 379 |
+
|
| 380 |
+
elif roop.globals.no_face_action == eNoFaceAction.USE_ORIGINAL_FRAME:
|
| 381 |
return frame
|
| 382 |
if roop.globals.no_face_action == eNoFaceAction.SKIP_FRAME:
|
| 383 |
#This only works with in-mem processing, as it simply skips the frame.
|
|
|
|
| 418 |
|
| 419 |
num_faces_found += 1
|
| 420 |
temp_frame = self.process_face(self.options.selected_index, face, temp_frame)
|
| 421 |
+
del face
|
| 422 |
+
|
| 423 |
else:
|
| 424 |
faces = get_all_faces(frame)
|
| 425 |
if faces is None:
|
|
|
|
| 429 |
for face in faces:
|
| 430 |
num_faces_found += 1
|
| 431 |
temp_frame = self.process_face(self.options.selected_index, face, temp_frame)
|
| 432 |
+
|
| 433 |
+
elif self.options.swap_mode == "all_input" or self.options.swap_mode == "all_random":
|
| 434 |
+
for i,face in enumerate(faces):
|
| 435 |
+
num_faces_found += 1
|
| 436 |
+
if i < len(self.input_face_datas):
|
| 437 |
+
temp_frame = self.process_face(i, face, temp_frame)
|
| 438 |
+
else:
|
| 439 |
+
break
|
| 440 |
|
| 441 |
elif self.options.swap_mode == "selected":
|
| 442 |
num_targetfaces = len(self.target_face_datas)
|
|
|
|
| 450 |
else:
|
| 451 |
temp_frame = self.process_face(i, face, temp_frame)
|
| 452 |
num_faces_found += 1
|
|
|
|
| 453 |
if not roop.globals.vr_mode and num_faces_found == num_targetfaces:
|
| 454 |
break
|
| 455 |
elif self.options.swap_mode == "all_female" or self.options.swap_mode == "all_male":
|
|
|
|
| 458 |
if face.sex == gender:
|
| 459 |
num_faces_found += 1
|
| 460 |
temp_frame = self.process_face(self.options.selected_index, face, temp_frame)
|
| 461 |
+
|
| 462 |
+
# might be slower but way more clean to release everything here
|
| 463 |
+
for face in faces:
|
| 464 |
+
del face
|
| 465 |
+
faces.clear()
|
| 466 |
+
|
| 467 |
+
|
| 468 |
|
| 469 |
if roop.globals.vr_mode and num_faces_found % 2 > 0:
|
| 470 |
# stereo image, there has to be an even number of faces
|
|
|
|
| 599 |
|
| 600 |
# img = vr.GetPerspective(frame, 90, theta, phi, 1280, 1280) # Generate perspective image
|
| 601 |
|
| 602 |
+
|
| 603 |
+
""" Code ported/adapted from Facefusion which borrowed the idea from Rope:
|
| 604 |
+
Kind of subsampling the cutout and aligned face image and faceswapping slices of it up to
|
| 605 |
+
the desired output resolution. This works around the current resolution limitations without using enhancers.
|
| 606 |
+
"""
|
| 607 |
+
model_output_size = self.options.swap_output_size
|
| 608 |
+
subsample_size = max(self.options.subsample_size, model_output_size)
|
| 609 |
+
subsample_total = subsample_size // model_output_size
|
| 610 |
+
aligned_img, M = align_crop(frame, target_face.kps, subsample_size)
|
| 611 |
+
|
| 612 |
fake_frame = aligned_img
|
|
|
|
| 613 |
target_face.matrix = M
|
| 614 |
+
|
| 615 |
for p in self.processors:
|
| 616 |
if p.type == 'swap':
|
| 617 |
+
swap_result_frames = []
|
| 618 |
+
subsample_frames = self.implode_pixel_boost(aligned_img, model_output_size, subsample_total)
|
| 619 |
+
for sliced_frame in subsample_frames:
|
| 620 |
for _ in range(0,self.options.num_swap_steps):
|
| 621 |
+
sliced_frame = self.prepare_crop_frame(sliced_frame)
|
| 622 |
+
sliced_frame = p.Run(inputface, target_face, sliced_frame)
|
| 623 |
+
sliced_frame = self.normalize_swap_frame(sliced_frame)
|
| 624 |
+
swap_result_frames.append(sliced_frame)
|
| 625 |
+
fake_frame = self.explode_pixel_boost(swap_result_frames, model_output_size, subsample_total, subsample_size)
|
| 626 |
+
fake_frame = fake_frame.astype(np.uint8)
|
| 627 |
scale_factor = 0.0
|
| 628 |
elif p.type == 'mask':
|
| 629 |
fake_frame = self.process_mask(p, aligned_img, fake_frame)
|
|
|
|
| 632 |
|
| 633 |
upscale = 512
|
| 634 |
orig_width = fake_frame.shape[1]
|
| 635 |
+
if orig_width != upscale:
|
| 636 |
+
fake_frame = cv2.resize(fake_frame, (upscale, upscale), cv2.INTER_CUBIC)
|
| 637 |
mask_offsets = (0,0,0,0,1,20) if inputface is None else inputface.mask_offsets
|
| 638 |
|
| 639 |
|
|
|
|
| 643 |
else:
|
| 644 |
result = self.paste_upscale(fake_frame, enhanced_frame, target_face.matrix, frame, scale_factor, mask_offsets)
|
| 645 |
|
| 646 |
+
# Restore mouth before unrotating
|
| 647 |
+
if self.options.restore_original_mouth:
|
| 648 |
+
mouth_cutout, mouth_bb = self.create_mouth_mask(target_face, frame)
|
| 649 |
+
result = self.apply_mouth_area(result, mouth_cutout, mouth_bb)
|
| 650 |
+
|
| 651 |
if rotation_action is not None:
|
| 652 |
fake_frame = self.auto_unrotate_frame(result, rotation_action)
|
| 653 |
+
result = self.paste_simple(fake_frame, saved_frame, startX, startY)
|
| 654 |
|
| 655 |
return result
|
| 656 |
|
|
|
|
| 750 |
return cv2.GaussianBlur(img_matte, blur_size, 0)
|
| 751 |
|
| 752 |
|
| 753 |
+
def prepare_crop_frame(self, swap_frame):
|
| 754 |
+
model_type = 'inswapper'
|
| 755 |
+
model_mean = [0.0, 0.0, 0.0]
|
| 756 |
+
model_standard_deviation = [1.0, 1.0, 1.0]
|
| 757 |
+
|
| 758 |
+
if model_type == 'ghost':
|
| 759 |
+
swap_frame = swap_frame[:, :, ::-1] / 127.5 - 1
|
| 760 |
+
else:
|
| 761 |
+
swap_frame = swap_frame[:, :, ::-1] / 255.0
|
| 762 |
+
swap_frame = (swap_frame - model_mean) / model_standard_deviation
|
| 763 |
+
swap_frame = swap_frame.transpose(2, 0, 1)
|
| 764 |
+
swap_frame = np.expand_dims(swap_frame, axis = 0).astype(np.float32)
|
| 765 |
+
return swap_frame
|
| 766 |
+
|
| 767 |
+
|
| 768 |
+
def normalize_swap_frame(self, swap_frame):
|
| 769 |
+
model_type = 'inswapper'
|
| 770 |
+
swap_frame = swap_frame.transpose(1, 2, 0)
|
| 771 |
+
|
| 772 |
+
if model_type == 'ghost':
|
| 773 |
+
swap_frame = (swap_frame * 127.5 + 127.5).round()
|
| 774 |
+
else:
|
| 775 |
+
swap_frame = (swap_frame * 255.0).round()
|
| 776 |
+
swap_frame = swap_frame[:, :, ::-1]
|
| 777 |
+
return swap_frame
|
| 778 |
+
|
| 779 |
+
def implode_pixel_boost(self, aligned_face_frame, model_size, pixel_boost_total : int):
|
| 780 |
+
subsample_frame = aligned_face_frame.reshape(model_size, pixel_boost_total, model_size, pixel_boost_total, 3)
|
| 781 |
+
subsample_frame = subsample_frame.transpose(1, 3, 0, 2, 4).reshape(pixel_boost_total ** 2, model_size, model_size, 3)
|
| 782 |
+
return subsample_frame
|
| 783 |
+
|
| 784 |
+
|
| 785 |
+
def explode_pixel_boost(self, subsample_frame, model_size, pixel_boost_total, pixel_boost_size):
|
| 786 |
+
final_frame = np.stack(subsample_frame, axis = 0).reshape(pixel_boost_total, pixel_boost_total, model_size, model_size, 3)
|
| 787 |
+
final_frame = final_frame.transpose(2, 0, 3, 1, 4).reshape(pixel_boost_size, pixel_boost_size, 3)
|
| 788 |
+
return final_frame
|
| 789 |
+
|
| 790 |
def process_mask(self, processor, frame:Frame, target:Frame):
|
| 791 |
img_mask = processor.Run(frame, self.options.masking_text)
|
| 792 |
img_mask = cv2.resize(img_mask, (target.shape[1], target.shape[0]))
|
|
|
|
| 802 |
result += img_mask * frame.astype(np.float32)
|
| 803 |
return np.uint8(result)
|
| 804 |
|
| 805 |
+
|
| 806 |
+
# Code for mouth restoration adapted from https://github.com/iVideoGameBoss/iRoopDeepFaceCam
|
| 807 |
+
|
| 808 |
+
def create_mouth_mask(self, face: Face, frame: Frame):
|
| 809 |
+
mouth_cutout = None
|
| 810 |
+
|
| 811 |
+
landmarks = face.landmark_2d_106
|
| 812 |
+
if landmarks is not None:
|
| 813 |
+
# Get mouth landmarks (indices 52 to 71 typically represent the outer mouth)
|
| 814 |
+
mouth_points = landmarks[52:71].astype(np.int32)
|
| 815 |
+
|
| 816 |
+
# Add padding to mouth area
|
| 817 |
+
min_x, min_y = np.min(mouth_points, axis=0)
|
| 818 |
+
max_x, max_y = np.max(mouth_points, axis=0)
|
| 819 |
+
min_x = max(0, min_x - (15*6))
|
| 820 |
+
min_y = max(0, min_y - 22)
|
| 821 |
+
max_x = min(frame.shape[1], max_x + (15*6))
|
| 822 |
+
max_y = min(frame.shape[0], max_y + (90*6))
|
| 823 |
|
| 824 |
+
# Extract the mouth area from the frame using the calculated bounding box
|
| 825 |
+
mouth_cutout = frame[min_y:max_y, min_x:max_x].copy()
|
| 826 |
+
|
| 827 |
+
return mouth_cutout, (min_x, min_y, max_x, max_y)
|
| 828 |
+
|
| 829 |
+
|
| 830 |
+
|
| 831 |
+
def create_feathered_mask(self, shape, feather_amount=30):
|
| 832 |
+
mask = np.zeros(shape[:2], dtype=np.float32)
|
| 833 |
+
center = (shape[1] // 2, shape[0] // 2)
|
| 834 |
+
cv2.ellipse(mask, center, (shape[1] // 2 - feather_amount, shape[0] // 2 - feather_amount),
|
| 835 |
+
0, 0, 360, 1, -1)
|
| 836 |
+
mask = cv2.GaussianBlur(mask, (feather_amount*2+1, feather_amount*2+1), 0)
|
| 837 |
+
return mask / np.max(mask)
|
| 838 |
+
|
| 839 |
+
def apply_mouth_area(self, frame: np.ndarray, mouth_cutout: np.ndarray, mouth_box: tuple) -> np.ndarray:
|
| 840 |
+
min_x, min_y, max_x, max_y = mouth_box
|
| 841 |
+
box_width = max_x - min_x
|
| 842 |
+
box_height = max_y - min_y
|
| 843 |
+
|
| 844 |
+
|
| 845 |
+
# Resize the mouth cutout to match the mouth box size
|
| 846 |
+
if mouth_cutout is None or box_width is None or box_height is None:
|
| 847 |
+
return frame
|
| 848 |
+
try:
|
| 849 |
+
resized_mouth_cutout = cv2.resize(mouth_cutout, (box_width, box_height))
|
| 850 |
+
|
| 851 |
+
# Extract the region of interest (ROI) from the target frame
|
| 852 |
+
roi = frame[min_y:max_y, min_x:max_x]
|
| 853 |
+
|
| 854 |
+
# Ensure the ROI and resized_mouth_cutout have the same shape
|
| 855 |
+
if roi.shape != resized_mouth_cutout.shape:
|
| 856 |
+
resized_mouth_cutout = cv2.resize(resized_mouth_cutout, (roi.shape[1], roi.shape[0]))
|
| 857 |
+
|
| 858 |
+
# Apply color transfer from ROI to mouth cutout
|
| 859 |
+
color_corrected_mouth = self.apply_color_transfer(resized_mouth_cutout, roi)
|
| 860 |
+
|
| 861 |
+
# Create a feathered mask with increased feather amount
|
| 862 |
+
feather_amount = min(30, box_width // 15, box_height // 15)
|
| 863 |
+
mask = self.create_feathered_mask(resized_mouth_cutout.shape, feather_amount)
|
| 864 |
+
|
| 865 |
+
# Blend the color-corrected mouth cutout with the ROI using the feathered mask
|
| 866 |
+
mask = mask[:,:,np.newaxis] # Add channel dimension to mask
|
| 867 |
+
blended = (color_corrected_mouth * mask + roi * (1 - mask)).astype(np.uint8)
|
| 868 |
+
|
| 869 |
+
# Place the blended result back into the frame
|
| 870 |
+
frame[min_y:max_y, min_x:max_x] = blended
|
| 871 |
+
except Exception as e:
|
| 872 |
+
print(f'Error {e}')
|
| 873 |
+
pass
|
| 874 |
+
|
| 875 |
+
return frame
|
| 876 |
+
|
| 877 |
+
def apply_color_transfer(self, source, target):
|
| 878 |
+
"""
|
| 879 |
+
Apply color transfer from target to source image
|
| 880 |
+
"""
|
| 881 |
+
source = cv2.cvtColor(source, cv2.COLOR_BGR2LAB).astype("float32")
|
| 882 |
+
target = cv2.cvtColor(target, cv2.COLOR_BGR2LAB).astype("float32")
|
| 883 |
+
|
| 884 |
+
source_mean, source_std = cv2.meanStdDev(source)
|
| 885 |
+
target_mean, target_std = cv2.meanStdDev(target)
|
| 886 |
+
|
| 887 |
+
# Reshape mean and std to be broadcastable
|
| 888 |
+
source_mean = source_mean.reshape(1, 1, 3)
|
| 889 |
+
source_std = source_std.reshape(1, 1, 3)
|
| 890 |
+
target_mean = target_mean.reshape(1, 1, 3)
|
| 891 |
+
target_std = target_std.reshape(1, 1, 3)
|
| 892 |
+
|
| 893 |
+
# Perform the color transfer
|
| 894 |
+
source = (source - source_mean) * (target_std / source_std) + target_mean
|
| 895 |
+
return cv2.cvtColor(np.clip(source, 0, 255).astype("uint8"), cv2.COLOR_LAB2BGR)
|
| 896 |
+
|
| 897 |
|
| 898 |
|
| 899 |
def unload_models():
|
|
|
|
| 904 |
for p in self.processors:
|
| 905 |
p.Release()
|
| 906 |
self.processors.clear()
|
| 907 |
+
if self.videowriter is not None:
|
| 908 |
+
self.videowriter.close()
|
| 909 |
+
if self.streamwriter is not None:
|
| 910 |
+
self.streamwriter.Close()
|
| 911 |
|
roop/ProcessOptions.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
class ProcessOptions:
|
| 2 |
|
| 3 |
-
def __init__(self, processordefines:dict, face_distance, blend_ratio, swap_mode, selected_index, masking_text, imagemask, num_steps, show_face_area, show_mask=False):
|
|
|
|
|
|
|
| 4 |
self.processors = processordefines
|
| 5 |
self.face_distance_threshold = face_distance
|
| 6 |
self.blend_ratio = blend_ratio
|
|
@@ -10,4 +12,7 @@ class ProcessOptions:
|
|
| 10 |
self.imagemask = imagemask
|
| 11 |
self.num_swap_steps = num_steps
|
| 12 |
self.show_face_area_overlay = show_face_area
|
| 13 |
-
self.show_face_masking = show_mask
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
class ProcessOptions:
|
| 2 |
|
| 3 |
+
def __init__(self, swap_model, processordefines:dict, face_distance, blend_ratio, swap_mode, selected_index, masking_text, imagemask, num_steps, subsample_size, show_face_area, restore_original_mouth, show_mask=False):
|
| 4 |
+
self.swap_modelname = swap_model
|
| 5 |
+
self.swap_output_size = int(swap_model.split()[-1])
|
| 6 |
self.processors = processordefines
|
| 7 |
self.face_distance_threshold = face_distance
|
| 8 |
self.blend_ratio = blend_ratio
|
|
|
|
| 12 |
self.imagemask = imagemask
|
| 13 |
self.num_swap_steps = num_steps
|
| 14 |
self.show_face_area_overlay = show_face_area
|
| 15 |
+
self.show_face_masking = show_mask
|
| 16 |
+
self.subsample_size = subsample_size
|
| 17 |
+
self.restore_original_mouth = restore_original_mouth
|
| 18 |
+
self.max_num_reuse_frame = 15
|
roop/StreamWriter.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import threading
|
| 2 |
+
import time
|
| 3 |
+
import pyvirtualcam
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class StreamWriter():
|
| 7 |
+
FPS = 30
|
| 8 |
+
VCam = None
|
| 9 |
+
Active = False
|
| 10 |
+
THREAD_LOCK_STREAM = threading.Lock()
|
| 11 |
+
time_last_process = None
|
| 12 |
+
timespan_min = 0.0
|
| 13 |
+
|
| 14 |
+
def __enter__(self):
|
| 15 |
+
return self
|
| 16 |
+
|
| 17 |
+
def __exit__(self, exc_type, exc_value, traceback):
|
| 18 |
+
self.Close()
|
| 19 |
+
|
| 20 |
+
def __init__(self, size, fps):
|
| 21 |
+
self.time_last_process = time.perf_counter()
|
| 22 |
+
self.FPS = fps
|
| 23 |
+
self.timespan_min = 1.0 / fps
|
| 24 |
+
print('Detecting virtual cam devices')
|
| 25 |
+
self.VCam = pyvirtualcam.Camera(width=size[0], height=size[1], fps=fps, fmt=pyvirtualcam.PixelFormat.BGR, print_fps=False)
|
| 26 |
+
if self.VCam is None:
|
| 27 |
+
print("No virtual camera found!")
|
| 28 |
+
return
|
| 29 |
+
print(f'Using virtual camera: {self.VCam.device}')
|
| 30 |
+
print(f'Using {self.VCam.native_fmt}')
|
| 31 |
+
self.Active = True
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def LimitFrames(self):
|
| 35 |
+
while True:
|
| 36 |
+
current_time = time.perf_counter()
|
| 37 |
+
time_passed = current_time - self.time_last_process
|
| 38 |
+
if time_passed >= self.timespan_min:
|
| 39 |
+
break
|
| 40 |
+
|
| 41 |
+
# First version used a queue and threading. Surprisingly this
|
| 42 |
+
# totally simple, blocking version is 10 times faster!
|
| 43 |
+
def WriteToStream(self, frame):
|
| 44 |
+
if self.VCam is None:
|
| 45 |
+
return
|
| 46 |
+
with self.THREAD_LOCK_STREAM:
|
| 47 |
+
self.LimitFrames()
|
| 48 |
+
self.VCam.send(frame)
|
| 49 |
+
self.time_last_process = time.perf_counter()
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def Close(self):
|
| 53 |
+
self.Active = False
|
| 54 |
+
if self.VCam is None:
|
| 55 |
+
self.VCam.close()
|
| 56 |
+
self.VCam = None
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
|
roop/capturer.py
CHANGED
|
@@ -4,6 +4,10 @@ import numpy as np
|
|
| 4 |
|
| 5 |
from roop.typing import Frame
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
def get_image_frame(filename: str):
|
| 8 |
try:
|
| 9 |
return cv2.imdecode(np.fromfile(filename, dtype=np.uint8), cv2.IMREAD_COLOR)
|
|
@@ -13,15 +17,27 @@ def get_image_frame(filename: str):
|
|
| 13 |
|
| 14 |
|
| 15 |
def get_video_frame(video_path: str, frame_number: int = 0) -> Optional[Frame]:
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
if has_frame:
|
| 22 |
return frame
|
| 23 |
return None
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
def get_video_frame_total(video_path: str) -> int:
|
| 27 |
capture = cv2.VideoCapture(video_path)
|
|
|
|
| 4 |
|
| 5 |
from roop.typing import Frame
|
| 6 |
|
| 7 |
+
current_video_path = None
|
| 8 |
+
current_frame_total = 0
|
| 9 |
+
current_capture = None
|
| 10 |
+
|
| 11 |
def get_image_frame(filename: str):
|
| 12 |
try:
|
| 13 |
return cv2.imdecode(np.fromfile(filename, dtype=np.uint8), cv2.IMREAD_COLOR)
|
|
|
|
| 17 |
|
| 18 |
|
| 19 |
def get_video_frame(video_path: str, frame_number: int = 0) -> Optional[Frame]:
|
| 20 |
+
global current_video_path, current_capture, current_frame_total
|
| 21 |
+
|
| 22 |
+
if video_path != current_video_path:
|
| 23 |
+
release_video()
|
| 24 |
+
current_capture = cv2.VideoCapture(video_path)
|
| 25 |
+
current_video_path = video_path
|
| 26 |
+
current_frame_total = current_capture.get(cv2.CAP_PROP_FRAME_COUNT)
|
| 27 |
+
|
| 28 |
+
current_capture.set(cv2.CAP_PROP_POS_FRAMES, min(current_frame_total, frame_number - 1))
|
| 29 |
+
has_frame, frame = current_capture.read()
|
| 30 |
if has_frame:
|
| 31 |
return frame
|
| 32 |
return None
|
| 33 |
|
| 34 |
+
def release_video():
|
| 35 |
+
global current_capture
|
| 36 |
+
|
| 37 |
+
if current_capture is not None:
|
| 38 |
+
current_capture.release()
|
| 39 |
+
current_capture = None
|
| 40 |
+
|
| 41 |
|
| 42 |
def get_video_frame_total(video_path: str) -> int:
|
| 43 |
capture = cv2.VideoCapture(video_path)
|
roop/core.py
CHANGED
|
@@ -14,6 +14,7 @@ import signal
|
|
| 14 |
import torch
|
| 15 |
import onnxruntime
|
| 16 |
import pathlib
|
|
|
|
| 17 |
|
| 18 |
from time import time
|
| 19 |
|
|
@@ -27,7 +28,7 @@ from roop.face_util import extract_face_images
|
|
| 27 |
from roop.ProcessEntry import ProcessEntry
|
| 28 |
from roop.ProcessMgr import ProcessMgr
|
| 29 |
from roop.ProcessOptions import ProcessOptions
|
| 30 |
-
from roop.capturer import get_video_frame_total
|
| 31 |
|
| 32 |
|
| 33 |
clip_text = None
|
|
@@ -47,9 +48,12 @@ warnings.filterwarnings('ignore', category=UserWarning, module='torchvision')
|
|
| 47 |
def parse_args() -> None:
|
| 48 |
signal.signal(signal.SIGINT, lambda signal_number, frame: destroy())
|
| 49 |
roop.globals.headless = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
# Always enable all processors when using GUI
|
| 51 |
-
if len(sys.argv) > 1:
|
| 52 |
-
print('No CLI args supported - use Settings Tab instead')
|
| 53 |
roop.globals.frame_processors = ['face_swapper', 'face_enhancer']
|
| 54 |
|
| 55 |
|
|
@@ -58,8 +62,20 @@ def encode_execution_providers(execution_providers: List[str]) -> List[str]:
|
|
| 58 |
|
| 59 |
|
| 60 |
def decode_execution_providers(execution_providers: List[str]) -> List[str]:
|
| 61 |
-
|
| 62 |
if any(execution_provider in encoded_execution_provider for execution_provider in execution_providers)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
|
| 65 |
def suggest_max_memory() -> int:
|
|
@@ -118,6 +134,8 @@ def pre_check() -> bool:
|
|
| 118 |
|
| 119 |
download_directory_path = util.resolve_relative_path('../models')
|
| 120 |
util.conditional_download(download_directory_path, ['https://huggingface.co/countfloyd/deepfake/resolve/main/inswapper_128.onnx'])
|
|
|
|
|
|
|
| 121 |
util.conditional_download(download_directory_path, ['https://huggingface.co/countfloyd/deepfake/resolve/main/GFPGANv1.4.onnx'])
|
| 122 |
util.conditional_download(download_directory_path, ['https://github.com/csxmli2016/DMDNet/releases/download/v1/DMDNet.pth'])
|
| 123 |
util.conditional_download(download_directory_path, ['https://huggingface.co/countfloyd/deepfake/resolve/main/GPEN-BFR-512.onnx'])
|
|
@@ -204,7 +222,7 @@ def live_swap(frame, options):
|
|
| 204 |
return newframe
|
| 205 |
|
| 206 |
|
| 207 |
-
def batch_process_regular(files:list[ProcessEntry], masking_engine:str, new_clip_text:str, use_new_method, imagemask, num_swap_steps, progress, selected_index = 0) -> None:
|
| 208 |
global clip_text, process_mgr
|
| 209 |
|
| 210 |
release_resources()
|
|
@@ -214,9 +232,11 @@ def batch_process_regular(files:list[ProcessEntry], masking_engine:str, new_clip
|
|
| 214 |
mask = imagemask["layers"][0] if imagemask is not None else None
|
| 215 |
if len(roop.globals.INPUT_FACESETS) <= selected_index:
|
| 216 |
selected_index = 0
|
| 217 |
-
options = ProcessOptions(get_processing_plugins(masking_engine), roop.globals.distance_threshold, roop.globals.blend_ratio,
|
|
|
|
|
|
|
| 218 |
process_mgr.initialize(roop.globals.INPUT_FACESETS, roop.globals.TARGET_FACES, options)
|
| 219 |
-
batch_process(files, use_new_method)
|
| 220 |
return
|
| 221 |
|
| 222 |
def batch_process_with_options(files:list[ProcessEntry], options, progress):
|
|
@@ -230,11 +250,11 @@ def batch_process_with_options(files:list[ProcessEntry], options, progress):
|
|
| 230 |
roop.globals.keep_frames = False
|
| 231 |
roop.globals.wait_after_extraction = False
|
| 232 |
roop.globals.skip_audio = False
|
| 233 |
-
batch_process(files, True)
|
| 234 |
|
| 235 |
|
| 236 |
|
| 237 |
-
def batch_process(files:list[ProcessEntry], use_new_method) -> None:
|
| 238 |
global clip_text, process_mgr
|
| 239 |
|
| 240 |
roop.globals.processing = True
|
|
@@ -287,9 +307,12 @@ def batch_process(files:list[ProcessEntry], use_new_method) -> None:
|
|
| 287 |
if v.endframe == 0:
|
| 288 |
v.endframe = get_video_frame_total(v.filename)
|
| 289 |
|
| 290 |
-
|
|
|
|
|
|
|
|
|
|
| 291 |
start_processing = time()
|
| 292 |
-
if roop.globals.keep_frames or not use_new_method:
|
| 293 |
util.create_temp(v.filename)
|
| 294 |
update_status('Extracting frames...')
|
| 295 |
ffmpeg.extract_frames(v.filename,v.startframe,v.endframe, fps)
|
|
@@ -317,7 +340,7 @@ def batch_process(files:list[ProcessEntry], use_new_method) -> None:
|
|
| 317 |
skip_audio = True
|
| 318 |
else:
|
| 319 |
skip_audio = roop.globals.skip_audio
|
| 320 |
-
process_mgr.run_batch_inmem(v.filename, v.finalname, v.startframe, v.endframe, fps,roop.globals.execution_threads
|
| 321 |
|
| 322 |
if not roop.globals.processing:
|
| 323 |
end_processing('Processing stopped!')
|
|
@@ -346,10 +369,12 @@ def batch_process(files:list[ProcessEntry], use_new_method) -> None:
|
|
| 346 |
os.remove(video_file_name)
|
| 347 |
else:
|
| 348 |
shutil.move(video_file_name, destination)
|
| 349 |
-
update_status(f'\nProcessing {os.path.basename(destination)} took {time() - start_processing} secs')
|
| 350 |
|
| 351 |
-
|
| 352 |
update_status(f'Failed processing {os.path.basename(v.finalname)}!')
|
|
|
|
|
|
|
|
|
|
| 353 |
end_processing('Finished')
|
| 354 |
|
| 355 |
|
|
@@ -371,8 +396,11 @@ def run() -> None:
|
|
| 371 |
if not pre_check():
|
| 372 |
return
|
| 373 |
roop.globals.CFG = Settings('config.yaml')
|
|
|
|
| 374 |
roop.globals.execution_threads = roop.globals.CFG.max_threads
|
| 375 |
roop.globals.video_encoder = roop.globals.CFG.output_video_codec
|
| 376 |
roop.globals.video_quality = roop.globals.CFG.video_quality
|
| 377 |
roop.globals.max_memory = roop.globals.CFG.memory_limit if roop.globals.CFG.memory_limit > 0 else None
|
|
|
|
|
|
|
| 378 |
main.run()
|
|
|
|
| 14 |
import torch
|
| 15 |
import onnxruntime
|
| 16 |
import pathlib
|
| 17 |
+
import argparse
|
| 18 |
|
| 19 |
from time import time
|
| 20 |
|
|
|
|
| 28 |
from roop.ProcessEntry import ProcessEntry
|
| 29 |
from roop.ProcessMgr import ProcessMgr
|
| 30 |
from roop.ProcessOptions import ProcessOptions
|
| 31 |
+
from roop.capturer import get_video_frame_total, release_video
|
| 32 |
|
| 33 |
|
| 34 |
clip_text = None
|
|
|
|
| 48 |
def parse_args() -> None:
|
| 49 |
signal.signal(signal.SIGINT, lambda signal_number, frame: destroy())
|
| 50 |
roop.globals.headless = False
|
| 51 |
+
|
| 52 |
+
program = argparse.ArgumentParser(formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=100))
|
| 53 |
+
program.add_argument('--server_share', help='Public server', dest='server_share', action='store_true', default=False)
|
| 54 |
+
program.add_argument('--cuda_device_id', help='Index of the cuda gpu to use', dest='cuda_device_id', type=int, default=0)
|
| 55 |
+
roop.globals.startup_args = program.parse_args()
|
| 56 |
# Always enable all processors when using GUI
|
|
|
|
|
|
|
| 57 |
roop.globals.frame_processors = ['face_swapper', 'face_enhancer']
|
| 58 |
|
| 59 |
|
|
|
|
| 62 |
|
| 63 |
|
| 64 |
def decode_execution_providers(execution_providers: List[str]) -> List[str]:
|
| 65 |
+
list_providers = [provider for provider, encoded_execution_provider in zip(onnxruntime.get_available_providers(), encode_execution_providers(onnxruntime.get_available_providers()))
|
| 66 |
if any(execution_provider in encoded_execution_provider for execution_provider in execution_providers)]
|
| 67 |
+
|
| 68 |
+
try:
|
| 69 |
+
for i in range(len(list_providers)):
|
| 70 |
+
if list_providers[i] == 'CUDAExecutionProvider':
|
| 71 |
+
list_providers[i] = ('CUDAExecutionProvider', {'device_id': roop.globals.cuda_device_id})
|
| 72 |
+
torch.cuda.set_device(roop.globals.cuda_device_id)
|
| 73 |
+
break
|
| 74 |
+
except:
|
| 75 |
+
pass
|
| 76 |
+
|
| 77 |
+
return list_providers
|
| 78 |
+
|
| 79 |
|
| 80 |
|
| 81 |
def suggest_max_memory() -> int:
|
|
|
|
| 134 |
|
| 135 |
download_directory_path = util.resolve_relative_path('../models')
|
| 136 |
util.conditional_download(download_directory_path, ['https://huggingface.co/countfloyd/deepfake/resolve/main/inswapper_128.onnx'])
|
| 137 |
+
util.conditional_download(download_directory_path, ['https://huggingface.co/countfloyd/deepfake/resolve/main/reswapper_128.onnx'])
|
| 138 |
+
util.conditional_download(download_directory_path, ['https://huggingface.co/countfloyd/deepfake/resolve/main/reswapper_256.onnx'])
|
| 139 |
util.conditional_download(download_directory_path, ['https://huggingface.co/countfloyd/deepfake/resolve/main/GFPGANv1.4.onnx'])
|
| 140 |
util.conditional_download(download_directory_path, ['https://github.com/csxmli2016/DMDNet/releases/download/v1/DMDNet.pth'])
|
| 141 |
util.conditional_download(download_directory_path, ['https://huggingface.co/countfloyd/deepfake/resolve/main/GPEN-BFR-512.onnx'])
|
|
|
|
| 222 |
return newframe
|
| 223 |
|
| 224 |
|
| 225 |
+
def batch_process_regular(swap_model, output_method, files:list[ProcessEntry], masking_engine:str, new_clip_text:str, use_new_method, imagemask, restore_original_mouth, num_swap_steps, progress, selected_index = 0) -> None:
|
| 226 |
global clip_text, process_mgr
|
| 227 |
|
| 228 |
release_resources()
|
|
|
|
| 232 |
mask = imagemask["layers"][0] if imagemask is not None else None
|
| 233 |
if len(roop.globals.INPUT_FACESETS) <= selected_index:
|
| 234 |
selected_index = 0
|
| 235 |
+
options = ProcessOptions(swap_model, get_processing_plugins(masking_engine), roop.globals.distance_threshold, roop.globals.blend_ratio,
|
| 236 |
+
roop.globals.face_swap_mode, selected_index, new_clip_text, mask, num_swap_steps,
|
| 237 |
+
roop.globals.subsample_size, False, restore_original_mouth)
|
| 238 |
process_mgr.initialize(roop.globals.INPUT_FACESETS, roop.globals.TARGET_FACES, options)
|
| 239 |
+
batch_process(output_method, files, use_new_method)
|
| 240 |
return
|
| 241 |
|
| 242 |
def batch_process_with_options(files:list[ProcessEntry], options, progress):
|
|
|
|
| 250 |
roop.globals.keep_frames = False
|
| 251 |
roop.globals.wait_after_extraction = False
|
| 252 |
roop.globals.skip_audio = False
|
| 253 |
+
batch_process("Files", files, True)
|
| 254 |
|
| 255 |
|
| 256 |
|
| 257 |
+
def batch_process(output_method, files:list[ProcessEntry], use_new_method) -> None:
|
| 258 |
global clip_text, process_mgr
|
| 259 |
|
| 260 |
roop.globals.processing = True
|
|
|
|
| 307 |
if v.endframe == 0:
|
| 308 |
v.endframe = get_video_frame_total(v.filename)
|
| 309 |
|
| 310 |
+
is_streaming_only = output_method == "Virtual Camera"
|
| 311 |
+
if is_streaming_only == False:
|
| 312 |
+
update_status(f'Creating {os.path.basename(v.finalname)} with {fps} FPS...')
|
| 313 |
+
|
| 314 |
start_processing = time()
|
| 315 |
+
if is_streaming_only == False and roop.globals.keep_frames or not use_new_method:
|
| 316 |
util.create_temp(v.filename)
|
| 317 |
update_status('Extracting frames...')
|
| 318 |
ffmpeg.extract_frames(v.filename,v.startframe,v.endframe, fps)
|
|
|
|
| 340 |
skip_audio = True
|
| 341 |
else:
|
| 342 |
skip_audio = roop.globals.skip_audio
|
| 343 |
+
process_mgr.run_batch_inmem(output_method, v.filename, v.finalname, v.startframe, v.endframe, fps,roop.globals.execution_threads)
|
| 344 |
|
| 345 |
if not roop.globals.processing:
|
| 346 |
end_processing('Processing stopped!')
|
|
|
|
| 369 |
os.remove(video_file_name)
|
| 370 |
else:
|
| 371 |
shutil.move(video_file_name, destination)
|
|
|
|
| 372 |
|
| 373 |
+
elif is_streaming_only == False:
|
| 374 |
update_status(f'Failed processing {os.path.basename(v.finalname)}!')
|
| 375 |
+
elapsed_time = time() - start_processing
|
| 376 |
+
average_fps = (v.endframe - v.startframe) / elapsed_time
|
| 377 |
+
update_status(f'\nProcessing {os.path.basename(destination)} took {elapsed_time:.2f} secs, {average_fps:.2f} frames/s')
|
| 378 |
end_processing('Finished')
|
| 379 |
|
| 380 |
|
|
|
|
| 396 |
if not pre_check():
|
| 397 |
return
|
| 398 |
roop.globals.CFG = Settings('config.yaml')
|
| 399 |
+
roop.globals.cuda_device_id = roop.globals.startup_args.cuda_device_id
|
| 400 |
roop.globals.execution_threads = roop.globals.CFG.max_threads
|
| 401 |
roop.globals.video_encoder = roop.globals.CFG.output_video_codec
|
| 402 |
roop.globals.video_quality = roop.globals.CFG.video_quality
|
| 403 |
roop.globals.max_memory = roop.globals.CFG.memory_limit if roop.globals.CFG.memory_limit > 0 else None
|
| 404 |
+
if roop.globals.startup_args.server_share:
|
| 405 |
+
roop.globals.CFG.server_share = True
|
| 406 |
main.run()
|
roop/face_util.py
CHANGED
|
@@ -9,18 +9,18 @@ import cv2
|
|
| 9 |
import numpy as np
|
| 10 |
from skimage import transform as trans
|
| 11 |
from roop.capturer import get_video_frame
|
| 12 |
-
from roop.utilities import resolve_relative_path,
|
| 13 |
|
| 14 |
FACE_ANALYSER = None
|
| 15 |
-
THREAD_LOCK_ANALYSER = threading.Lock()
|
| 16 |
-
THREAD_LOCK_SWAPPER = threading.Lock()
|
| 17 |
FACE_SWAPPER = None
|
| 18 |
|
| 19 |
|
| 20 |
def get_face_analyser() -> Any:
|
| 21 |
global FACE_ANALYSER
|
| 22 |
|
| 23 |
-
with
|
| 24 |
if FACE_ANALYSER is None or roop.globals.g_current_face_analysis != roop.globals.g_desired_face_analysis:
|
| 25 |
model_path = resolve_relative_path('..')
|
| 26 |
# removed genderage
|
|
@@ -210,27 +210,59 @@ arcface_dst = np.array(
|
|
| 210 |
)
|
| 211 |
|
| 212 |
|
| 213 |
-
def estimate_norm(lmk, image_size=112
|
| 214 |
assert lmk.shape == (5, 2)
|
| 215 |
-
assert image_size % 112 == 0 or image_size % 128 == 0
|
| 216 |
if image_size % 112 == 0:
|
| 217 |
ratio = float(image_size) / 112.0
|
| 218 |
diff_x = 0
|
| 219 |
-
|
| 220 |
ratio = float(image_size) / 128.0
|
| 221 |
diff_x = 8.0 * ratio
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
dst = arcface_dst * ratio
|
| 223 |
dst[:, 0] += diff_x
|
| 224 |
tform = trans.SimilarityTransform()
|
| 225 |
tform.estimate(lmk, dst)
|
| 226 |
M = tform.params[0:2, :]
|
| 227 |
return M
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
|
| 229 |
|
| 230 |
|
| 231 |
# aligned, M = norm_crop2(f[1], face.kps, 512)
|
| 232 |
def align_crop(img, landmark, image_size=112, mode="arcface"):
|
| 233 |
-
M = estimate_norm(landmark, image_size
|
| 234 |
warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)
|
| 235 |
return warped, M
|
| 236 |
|
|
|
|
| 9 |
import numpy as np
|
| 10 |
from skimage import transform as trans
|
| 11 |
from roop.capturer import get_video_frame
|
| 12 |
+
from roop.utilities import resolve_relative_path, conditional_thread_semaphore
|
| 13 |
|
| 14 |
FACE_ANALYSER = None
|
| 15 |
+
#THREAD_LOCK_ANALYSER = threading.Lock()
|
| 16 |
+
#THREAD_LOCK_SWAPPER = threading.Lock()
|
| 17 |
FACE_SWAPPER = None
|
| 18 |
|
| 19 |
|
| 20 |
def get_face_analyser() -> Any:
|
| 21 |
global FACE_ANALYSER
|
| 22 |
|
| 23 |
+
with conditional_thread_semaphore():
|
| 24 |
if FACE_ANALYSER is None or roop.globals.g_current_face_analysis != roop.globals.g_desired_face_analysis:
|
| 25 |
model_path = resolve_relative_path('..')
|
| 26 |
# removed genderage
|
|
|
|
| 210 |
)
|
| 211 |
|
| 212 |
|
| 213 |
+
""" def estimate_norm(lmk, image_size=112):
|
| 214 |
assert lmk.shape == (5, 2)
|
|
|
|
| 215 |
if image_size % 112 == 0:
|
| 216 |
ratio = float(image_size) / 112.0
|
| 217 |
diff_x = 0
|
| 218 |
+
elif image_size % 128 == 0:
|
| 219 |
ratio = float(image_size) / 128.0
|
| 220 |
diff_x = 8.0 * ratio
|
| 221 |
+
elif image_size % 512 == 0:
|
| 222 |
+
ratio = float(image_size) / 512.0
|
| 223 |
+
diff_x = 32.0 * ratio
|
| 224 |
+
|
| 225 |
dst = arcface_dst * ratio
|
| 226 |
dst[:, 0] += diff_x
|
| 227 |
tform = trans.SimilarityTransform()
|
| 228 |
tform.estimate(lmk, dst)
|
| 229 |
M = tform.params[0:2, :]
|
| 230 |
return M
|
| 231 |
+
"""
|
| 232 |
+
|
| 233 |
+
def estimate_norm(lmk, image_size=112):
|
| 234 |
+
if image_size%112==0:
|
| 235 |
+
ratio = float(image_size)/112.0
|
| 236 |
+
diff_x = 0
|
| 237 |
+
else:
|
| 238 |
+
ratio = float(image_size)/128.0
|
| 239 |
+
diff_x = 8.0*ratio
|
| 240 |
+
dst = arcface_dst * ratio
|
| 241 |
+
dst[:,0] += diff_x
|
| 242 |
+
|
| 243 |
+
if image_size == 160:
|
| 244 |
+
dst[:,0] += 0.1
|
| 245 |
+
dst[:,1] += 0.1
|
| 246 |
+
elif image_size == 256:
|
| 247 |
+
dst[:,0] += 0.5
|
| 248 |
+
dst[:,1] += 0.5
|
| 249 |
+
elif image_size == 320:
|
| 250 |
+
dst[:,0] += 0.75
|
| 251 |
+
dst[:,1] += 0.75
|
| 252 |
+
elif image_size == 512:
|
| 253 |
+
dst[:,0] += 1.5
|
| 254 |
+
dst[:,1] += 1.5
|
| 255 |
+
|
| 256 |
+
tform = trans.SimilarityTransform()
|
| 257 |
+
tform.estimate(lmk, dst)
|
| 258 |
+
M = tform.params[0:2, :]
|
| 259 |
+
return M
|
| 260 |
|
| 261 |
|
| 262 |
|
| 263 |
# aligned, M = norm_crop2(f[1], face.kps, 512)
|
| 264 |
def align_crop(img, landmark, image_size=112, mode="arcface"):
|
| 265 |
+
M = estimate_norm(landmark, image_size)
|
| 266 |
warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)
|
| 267 |
return warped, M
|
| 268 |
|
roop/globals.py
CHANGED
|
@@ -5,7 +5,9 @@ source_path = None
|
|
| 5 |
target_path = None
|
| 6 |
output_path = None
|
| 7 |
target_folder_path = None
|
|
|
|
| 8 |
|
|
|
|
| 9 |
frame_processors: List[str] = []
|
| 10 |
keep_fps = None
|
| 11 |
keep_frames = None
|
|
@@ -26,6 +28,7 @@ execution_threads = None
|
|
| 26 |
headless = None
|
| 27 |
log_level = 'error'
|
| 28 |
selected_enhancer = None
|
|
|
|
| 29 |
face_swap_mode = None
|
| 30 |
blend_ratio = 0.5
|
| 31 |
distance_threshold = 0.65
|
|
|
|
| 5 |
target_path = None
|
| 6 |
output_path = None
|
| 7 |
target_folder_path = None
|
| 8 |
+
startup_args = None
|
| 9 |
|
| 10 |
+
cuda_device_id = 0
|
| 11 |
frame_processors: List[str] = []
|
| 12 |
keep_fps = None
|
| 13 |
keep_frames = None
|
|
|
|
| 28 |
headless = None
|
| 29 |
log_level = 'error'
|
| 30 |
selected_enhancer = None
|
| 31 |
+
subsample_size = 128
|
| 32 |
face_swap_mode = None
|
| 33 |
blend_ratio = 0.5
|
| 34 |
distance_threshold = 0.65
|
roop/metadata.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
| 1 |
name = 'roop unleashed'
|
| 2 |
-
version = '4.
|
|
|
|
| 1 |
name = 'roop unleashed'
|
| 2 |
+
version = '4.4.0'
|
roop/util_ffmpeg.py
CHANGED
|
@@ -73,12 +73,32 @@ def create_video(target_path: str, dest_filename: str, fps: float = 24.0, temp_d
|
|
| 73 |
|
| 74 |
|
| 75 |
def create_gif_from_video(video_path: str, gif_path):
|
| 76 |
-
from roop.capturer import get_video_frame
|
| 77 |
|
| 78 |
fps = util.detect_fps(video_path)
|
| 79 |
frame = get_video_frame(video_path)
|
|
|
|
| 80 |
|
| 81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
|
| 83 |
|
| 84 |
def restore_audio(intermediate_video: str, original_video: str, trim_frame_start, trim_frame_end, final_video : str) -> None:
|
|
|
|
| 73 |
|
| 74 |
|
| 75 |
def create_gif_from_video(video_path: str, gif_path):
|
| 76 |
+
from roop.capturer import get_video_frame, release_video
|
| 77 |
|
| 78 |
fps = util.detect_fps(video_path)
|
| 79 |
frame = get_video_frame(video_path)
|
| 80 |
+
release_video()
|
| 81 |
|
| 82 |
+
scalex = frame.shape[0]
|
| 83 |
+
scaley = frame.shape[1]
|
| 84 |
+
|
| 85 |
+
if scalex >= scaley:
|
| 86 |
+
scaley = -1
|
| 87 |
+
else:
|
| 88 |
+
scalex = -1
|
| 89 |
+
|
| 90 |
+
run_ffmpeg(['-i', video_path, '-vf', f'fps={fps},scale={int(scalex)}:{int(scaley)}:flags=lanczos,split[s0][s1];[s0]palettegen[p];[s1][p]paletteuse', '-loop', '0', gif_path])
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def create_video_from_gif(gif_path: str, output_path):
|
| 95 |
+
fps = util.detect_fps(gif_path)
|
| 96 |
+
filter = """scale='trunc(in_w/2)*2':'trunc(in_h/2)*2',format=yuv420p,fps=10"""
|
| 97 |
+
run_ffmpeg(['-i', gif_path, '-vf', f'"{filter}"', '-movflags', '+faststart', '-shortest', output_path])
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def repair_video(original_video: str, final_video : str):
|
| 101 |
+
run_ffmpeg(['-i', original_video, '-movflags', 'faststart', '-acodec', 'copy', '-vcodec', 'copy', final_video])
|
| 102 |
|
| 103 |
|
| 104 |
def restore_audio(intermediate_video: str, original_video: str, trim_frame_start, trim_frame_end, final_video : str) -> None:
|
roop/utilities.py
CHANGED
|
@@ -13,6 +13,12 @@ import tempfile
|
|
| 13 |
import cv2
|
| 14 |
import zipfile
|
| 15 |
import traceback
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
from pathlib import Path
|
| 18 |
from typing import List, Any
|
|
@@ -26,6 +32,10 @@ import roop.globals
|
|
| 26 |
TEMP_FILE = "temp.mp4"
|
| 27 |
TEMP_DIRECTORY = "temp"
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
# monkey patch ssl for mac
|
| 30 |
if platform.system().lower() == "darwin":
|
| 31 |
ssl._create_default_https_context = ssl._create_unverified_context
|
|
@@ -173,6 +183,8 @@ def has_extension(filepath: str, extensions: List[str]) -> bool:
|
|
| 173 |
|
| 174 |
def is_image(image_path: str) -> bool:
|
| 175 |
if image_path and os.path.isfile(image_path):
|
|
|
|
|
|
|
| 176 |
mimetype, _ = mimetypes.guess_type(image_path)
|
| 177 |
return bool(mimetype and mimetype.startswith("image/"))
|
| 178 |
return False
|
|
@@ -337,3 +349,45 @@ gradio: {gradio.__version__}
|
|
| 337 |
|
| 338 |
def compute_cosine_distance(emb1, emb2) -> float:
|
| 339 |
return distance.cosine(emb1, emb2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
import cv2
|
| 14 |
import zipfile
|
| 15 |
import traceback
|
| 16 |
+
import threading
|
| 17 |
+
import threading
|
| 18 |
+
import random
|
| 19 |
+
|
| 20 |
+
from typing import Union, Any
|
| 21 |
+
from contextlib import nullcontext
|
| 22 |
|
| 23 |
from pathlib import Path
|
| 24 |
from typing import List, Any
|
|
|
|
| 32 |
TEMP_FILE = "temp.mp4"
|
| 33 |
TEMP_DIRECTORY = "temp"
|
| 34 |
|
| 35 |
+
THREAD_SEMAPHORE = threading.Semaphore()
|
| 36 |
+
NULL_CONTEXT = nullcontext()
|
| 37 |
+
|
| 38 |
+
|
| 39 |
# monkey patch ssl for mac
|
| 40 |
if platform.system().lower() == "darwin":
|
| 41 |
ssl._create_default_https_context = ssl._create_unverified_context
|
|
|
|
| 183 |
|
| 184 |
def is_image(image_path: str) -> bool:
|
| 185 |
if image_path and os.path.isfile(image_path):
|
| 186 |
+
if image_path.endswith(".webp"):
|
| 187 |
+
return True
|
| 188 |
mimetype, _ = mimetypes.guess_type(image_path)
|
| 189 |
return bool(mimetype and mimetype.startswith("image/"))
|
| 190 |
return False
|
|
|
|
| 349 |
|
| 350 |
def compute_cosine_distance(emb1, emb2) -> float:
|
| 351 |
return distance.cosine(emb1, emb2)
|
| 352 |
+
|
| 353 |
+
def has_cuda_device():
|
| 354 |
+
return torch.cuda is not None and torch.cuda.is_available()
|
| 355 |
+
|
| 356 |
+
|
| 357 |
+
def print_cuda_info():
|
| 358 |
+
try:
|
| 359 |
+
print(f'Number of CUDA devices: {torch.cuda.device_count()} Currently used Id: {torch.cuda.current_device()} Device Name: {torch.cuda.get_device_name(torch.cuda.current_device())}')
|
| 360 |
+
except:
|
| 361 |
+
print('No CUDA device found!')
|
| 362 |
+
|
| 363 |
+
def clean_dir(path: str):
|
| 364 |
+
contents = os.listdir(path)
|
| 365 |
+
for item in contents:
|
| 366 |
+
item_path = os.path.join(path, item)
|
| 367 |
+
try:
|
| 368 |
+
if os.path.isfile(item_path):
|
| 369 |
+
os.remove(item_path)
|
| 370 |
+
elif os.path.isdir(item_path):
|
| 371 |
+
shutil.rmtree(item_path)
|
| 372 |
+
except Exception as e:
|
| 373 |
+
print(e)
|
| 374 |
+
|
| 375 |
+
|
| 376 |
+
def conditional_thread_semaphore() -> Union[Any, Any]:
|
| 377 |
+
if 'DmlExecutionProvider' in roop.globals.execution_providers or 'ROCMExecutionProvider' in roop.globals.execution_providers:
|
| 378 |
+
return THREAD_SEMAPHORE
|
| 379 |
+
return NULL_CONTEXT
|
| 380 |
+
|
| 381 |
+
def shuffle_array(arr):
|
| 382 |
+
"""
|
| 383 |
+
Shuffles the given array in place using the Fisher-Yates shuffle algorithm.
|
| 384 |
+
|
| 385 |
+
Args:
|
| 386 |
+
arr: The array to be shuffled.
|
| 387 |
+
|
| 388 |
+
Returns:
|
| 389 |
+
None. The array is shuffled in place.
|
| 390 |
+
"""
|
| 391 |
+
for i in range(len(arr) - 1, 0, -1):
|
| 392 |
+
j = random.randint(0, i)
|
| 393 |
+
arr[i], arr[j] = arr[j], arr[i]
|
roop/virtualcam.py
CHANGED
|
@@ -10,7 +10,7 @@ cam_active = False
|
|
| 10 |
cam_thread = None
|
| 11 |
vcam = None
|
| 12 |
|
| 13 |
-
def virtualcamera(streamobs, cam_num,width,height):
|
| 14 |
from roop.ProcessOptions import ProcessOptions
|
| 15 |
from roop.core import live_swap, get_processing_plugins
|
| 16 |
|
|
@@ -44,10 +44,11 @@ def virtualcamera(streamobs, cam_num,width,height):
|
|
| 44 |
print(f'Using {cam.native_fmt}')
|
| 45 |
else:
|
| 46 |
print(f'Not streaming to virtual camera!')
|
|
|
|
| 47 |
|
| 48 |
-
|
| 49 |
-
options = ProcessOptions(get_processing_plugins("mask_xseg"), roop.globals.distance_threshold, roop.globals.blend_ratio,
|
| 50 |
-
"all", 0, None, None, 1, False)
|
| 51 |
while cam_active:
|
| 52 |
ret, frame = cap.read()
|
| 53 |
if not ret:
|
|
@@ -67,12 +68,12 @@ def virtualcamera(streamobs, cam_num,width,height):
|
|
| 67 |
|
| 68 |
|
| 69 |
|
| 70 |
-
def start_virtual_cam(streamobs, cam_number, resolution):
|
| 71 |
global cam_thread, cam_active
|
| 72 |
|
| 73 |
if not cam_active:
|
| 74 |
width, height = map(int, resolution.split('x'))
|
| 75 |
-
cam_thread = threading.Thread(target=virtualcamera, args=[streamobs, cam_number, width, height])
|
| 76 |
cam_thread.start()
|
| 77 |
|
| 78 |
|
|
@@ -83,5 +84,5 @@ def stop_virtual_cam():
|
|
| 83 |
if cam_active:
|
| 84 |
cam_active = False
|
| 85 |
cam_thread.join()
|
| 86 |
-
|
| 87 |
|
|
|
|
| 10 |
cam_thread = None
|
| 11 |
vcam = None
|
| 12 |
|
| 13 |
+
def virtualcamera(swap_model, streamobs, use_xseg, use_mouthrestore, cam_num,width,height):
|
| 14 |
from roop.ProcessOptions import ProcessOptions
|
| 15 |
from roop.core import live_swap, get_processing_plugins
|
| 16 |
|
|
|
|
| 44 |
print(f'Using {cam.native_fmt}')
|
| 45 |
else:
|
| 46 |
print(f'Not streaming to virtual camera!')
|
| 47 |
+
subsample_size = roop.globals.subsample_size
|
| 48 |
|
| 49 |
+
|
| 50 |
+
options = ProcessOptions(swap_model, get_processing_plugins("mask_xseg" if use_xseg else None), roop.globals.distance_threshold, roop.globals.blend_ratio,
|
| 51 |
+
"all", 0, None, None, 1, subsample_size, False, use_mouthrestore)
|
| 52 |
while cam_active:
|
| 53 |
ret, frame = cap.read()
|
| 54 |
if not ret:
|
|
|
|
| 68 |
|
| 69 |
|
| 70 |
|
| 71 |
+
def start_virtual_cam(swap_model, streamobs, use_xseg, use_mouthrestore, cam_number, resolution):
|
| 72 |
global cam_thread, cam_active
|
| 73 |
|
| 74 |
if not cam_active:
|
| 75 |
width, height = map(int, resolution.split('x'))
|
| 76 |
+
cam_thread = threading.Thread(target=virtualcamera, args=[swap_model, streamobs, use_xseg, use_mouthrestore, cam_number, width, height])
|
| 77 |
cam_thread.start()
|
| 78 |
|
| 79 |
|
|
|
|
| 84 |
if cam_active:
|
| 85 |
cam_active = False
|
| 86 |
cam_thread.join()
|
| 87 |
+
|
| 88 |
|