Spaces:

nroggendorff
/

vfs

Paused

App Files Files Community

nroggendorff commited on Oct 22, 2025

Commit

4251451

1 Parent(s): 77b40bf

add cuda support

Browse files

i hope

Update app.py

Update requirements.txt

Update requirements.txt

Files changed (2) hide show

app.py +92 -32
requirements.txt +5 -17

app.py CHANGED Viewed

@@ -23,15 +23,15 @@ def load_pipeline():
             safety_checker=None,
         )
         pipe = pipe.to(device)
     return pipe
 @spaces.GPU
 def refine_with_img2img(image_path, strength=0.3, steps=30, seed=42):
     pipeline = load_pipeline()
     img = Image.open(image_path).convert("RGB")
     generator = torch.Generator(device=device).manual_seed(seed)
     result = pipeline(
@@ -48,7 +48,9 @@ def refine_with_img2img(image_path, strength=0.3, steps=30, seed=42):
 @spaces.GPU
-def refine_video_with_img2img(video_path, strength=0.3, steps=30, seed=42):
     pipeline = load_pipeline()
     generator = torch.Generator(device=device).manual_seed(seed)
@@ -61,42 +63,72 @@ def refine_video_with_img2img(video_path, strength=0.3, steps=30, seed=42):
     temp_output = "temp_refined.mp4"
     out = cv2.VideoWriter(temp_output, fourcc, fps, (width, height))
     frame_count = 0
     while True:
         ret, frame = cap.read()
         if not ret:
             break
         frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
         pil_frame = Image.fromarray(frame_rgb)
-        refined = pipeline(
-            prompt="high quality, detailed face, photorealistic, natural skin texture",
-            image=pil_frame,
-            strength=strength,
-            num_inference_steps=steps,
-            guidance_scale=7.5,
-            generator=generator,
-        ).images[0]
-        refined_cv = cv2.cvtColor(np.array(refined), cv2.COLOR_RGB2BGR)
-        out.write(refined_cv)
-        frame_count += 1
-        print(f"Processed frame {frame_count}")
     cap.release()
     out.release()
     os.replace(temp_output, video_path)
-def denoise_image(image_path, strength=10):
     img = cv2.imread(image_path)
-    denoised = cv2.fastNlMeansDenoisingColored(img, None, strength, strength, 7, 21)
     cv2.imwrite(image_path, denoised)
 def denoise_video(video_path, strength=10):
     cap = cv2.VideoCapture(video_path)
     fps = cap.get(cv2.CAP_PROP_FPS)
@@ -107,18 +139,39 @@ def denoise_video(video_path, strength=10):
     temp_output = "temp_denoised.mp4"
     out = cv2.VideoWriter(temp_output, fourcc, fps, (width, height))
     while True:
         ret, frame = cap.read()
         if not ret:
             break
-        denoised_frame = cv2.fastNlMeansDenoisingColored(
-            frame, None, strength, strength, 7, 21
-        )
         out.write(denoised_frame)
     cap.release()
     out.release()
     os.replace(temp_output, video_path)
@@ -133,6 +186,7 @@ def enhance_image(image_path):
     cv2.imwrite(image_path, enhanced)
 def process_media(
     image,
     image_or_video,
@@ -142,6 +196,7 @@ def process_media(
     img2img_strength,
     img2img_steps,
     seed,
 ):
     if os.path.exists("output_video.mp4"):
         os.remove("output_video.mp4")
@@ -157,7 +212,7 @@ def process_media(
         (".mp4", ".avi", ".mov")
     ):
         image.save("source.png")
-        cmd = f"python3 roop/run.py -s source.png -t '{image_or_video}' -o output_video.mp4"
         subprocess.run(cmd, shell=True)
         if os.path.exists("output_video.mp4"):
@@ -165,7 +220,6 @@ def process_media(
                 refine_video_with_img2img(
                     "output_video.mp4", img2img_strength, img2img_steps, seed
                 )
             denoise_video("output_video.mp4", denoise_strength)
             video_output = gr.Video(value="output_video.mp4", visible=True)
@@ -173,7 +227,7 @@ def process_media(
         (".png", ".jpg", ".jpeg")
     ):
         image.save("source.png")
-        cmd = f"python3 roop/run.py -s source.png -t '{image_or_video}' -o output_image.png"
         subprocess.run(cmd, shell=True)
         if os.path.exists("output_image.png"):
@@ -181,12 +235,9 @@ def process_media(
                 refine_with_img2img(
                     "output_image.png", img2img_strength, img2img_steps, seed
                 )
             denoise_image("output_image.png", denoise_strength)
             if enhance:
                 enhance_image("output_image.png")
             image_output = gr.Image(value="output_image.png", visible=True)
     return image_output, video_output
@@ -215,6 +266,11 @@ with gr.Blocks() as demo:
         )
         seed = gr.Number(label="Seed", value=42, precision=0)
     process_btn = gr.Button("Process")
     image_output = gr.Image(label="Output Image", visible=False)
@@ -231,6 +287,7 @@ with gr.Blocks() as demo:
             img2img_strength,
             img2img_steps,
             seed,
         ],
         outputs=[image_output, video_output],
     )
@@ -239,9 +296,12 @@ demo.queue()
 if __name__ == "__main__":
     if not os.path.exists("roop"):
-        Repo.clone_from("https://github.com/s0md3v/roop.git", "roop")
         subprocess.run("pip install -r roop/requirements.txt", shell=True)
     os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
     os.environ["TF_USE_LEGACY_KERAS"] = "1"
     demo.launch()

             safety_checker=None,
         )
         pipe = pipe.to(device)
+        if hasattr(pipe, "enable_attention_slicing"):
+            pipe.enable_attention_slicing()
     return pipe
 @spaces.GPU
 def refine_with_img2img(image_path, strength=0.3, steps=30, seed=42):
     pipeline = load_pipeline()
     img = Image.open(image_path).convert("RGB")
     generator = torch.Generator(device=device).manual_seed(seed)
     result = pipeline(
 @spaces.GPU
+def refine_video_with_img2img(
+    video_path, strength=0.3, steps=30, seed=42, batch_size=4
+):
     pipeline = load_pipeline()
     generator = torch.Generator(device=device).manual_seed(seed)
     temp_output = "temp_refined.mp4"
     out = cv2.VideoWriter(temp_output, fourcc, fps, (width, height))
+    frames_batch = []
     frame_count = 0
     while True:
         ret, frame = cap.read()
         if not ret:
+            if frames_batch:
+                for pil_frame in frames_batch:
+                    refined = pipeline(
+                        prompt="high quality, detailed face, photorealistic, natural skin texture",
+                        image=pil_frame,
+                        strength=strength,
+                        num_inference_steps=steps,
+                        guidance_scale=7.5,
+                        generator=generator,
+                    ).images[0]
+                    refined_cv = cv2.cvtColor(np.array(refined), cv2.COLOR_RGB2BGR)
+                    out.write(refined_cv)
             break
         frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
         pil_frame = Image.fromarray(frame_rgb)
+        frames_batch.append(pil_frame)
+        if len(frames_batch) >= batch_size:
+            for pil_frame in frames_batch:
+                refined = pipeline(
+                    prompt="high quality, detailed face, photorealistic, natural skin texture",
+                    image=pil_frame,
+                    strength=strength,
+                    num_inference_steps=steps,
+                    guidance_scale=7.5,
+                    generator=generator,
+                ).images[0]
+                refined_cv = cv2.cvtColor(np.array(refined), cv2.COLOR_RGB2BGR)
+                out.write(refined_cv)
+                frame_count += 1
+                print(f"Processed frame {frame_count}")
+            frames_batch = []
     cap.release()
     out.release()
     os.replace(temp_output, video_path)
+def denoise_image_gpu(image_path, strength=10):
     img = cv2.imread(image_path)
+    img_gpu = cv2.cuda_GpuMat()
+    img_gpu.upload(img)
+    denoised_gpu = cv2.cuda.fastNlMeansDenoisingColored(
+        img_gpu, strength, strength, 7, 21
+    )
+    denoised = denoised_gpu.download()
     cv2.imwrite(image_path, denoised)
+def denoise_image(image_path, strength=10):
+    try:
+        denoise_image_gpu(image_path, strength)
+    except:
+        img = cv2.imread(image_path)
+        denoised = cv2.fastNlMeansDenoisingColored(img, None, strength, strength, 7, 21)
+        cv2.imwrite(image_path, denoised)
 def denoise_video(video_path, strength=10):
     cap = cv2.VideoCapture(video_path)
     fps = cap.get(cv2.CAP_PROP_FPS)
     temp_output = "temp_denoised.mp4"
     out = cv2.VideoWriter(temp_output, fourcc, fps, (width, height))
+    use_gpu = False
+    try:
+        test_gpu = cv2.cuda_GpuMat()
+        use_gpu = True
+    except:
+        pass
     while True:
         ret, frame = cap.read()
         if not ret:
             break
+        if use_gpu:
+            try:
+                frame_gpu = cv2.cuda_GpuMat()
+                frame_gpu.upload(frame)
+                denoised_gpu = cv2.cuda.fastNlMeansDenoisingColored(
+                    frame_gpu, strength, strength, 7, 21
+                )
+                denoised_frame = denoised_gpu.download()
+            except:
+                denoised_frame = cv2.fastNlMeansDenoisingColored(
+                    frame, None, strength, strength, 7, 21
+                )
+        else:
+            denoised_frame = cv2.fastNlMeansDenoisingColored(
+                frame, None, strength, strength, 7, 21
+            )
         out.write(denoised_frame)
     cap.release()
     out.release()
     os.replace(temp_output, video_path)
     cv2.imwrite(image_path, enhanced)
+@spaces.GPU
 def process_media(
     image,
     image_or_video,
     img2img_strength,
     img2img_steps,
     seed,
+    execution_provider,
 ):
     if os.path.exists("output_video.mp4"):
         os.remove("output_video.mp4")
         (".mp4", ".avi", ".mov")
     ):
         image.save("source.png")
+        cmd = f"python3 roop/run.py -s source.png -t '{image_or_video}' -o output_video.mp4 --execution-provider {execution_provider}"
         subprocess.run(cmd, shell=True)
         if os.path.exists("output_video.mp4"):
                 refine_video_with_img2img(
                     "output_video.mp4", img2img_strength, img2img_steps, seed
                 )
             denoise_video("output_video.mp4", denoise_strength)
             video_output = gr.Video(value="output_video.mp4", visible=True)
         (".png", ".jpg", ".jpeg")
     ):
         image.save("source.png")
+        cmd = f"python3 roop/run.py -s source.png -t '{image_or_video}' -o output_image.png --execution-provider {execution_provider}"
         subprocess.run(cmd, shell=True)
         if os.path.exists("output_image.png"):
                 refine_with_img2img(
                     "output_image.png", img2img_strength, img2img_steps, seed
                 )
             denoise_image("output_image.png", denoise_strength)
             if enhance:
                 enhance_image("output_image.png")
             image_output = gr.Image(value="output_image.png", visible=True)
     return image_output, video_output
         )
         seed = gr.Number(label="Seed", value=42, precision=0)
+    with gr.Row():
+        execution_provider = gr.Radio(
+            choices=["cuda", "tensorrt"], value="cuda", label="Roop Execution Provider"
+        )
     process_btn = gr.Button("Process")
     image_output = gr.Image(label="Output Image", visible=False)
             img2img_strength,
             img2img_steps,
             seed,
+            execution_provider,
         ],
         outputs=[image_output, video_output],
     )
 if __name__ == "__main__":
     if not os.path.exists("roop"):
+        Repo.clone_from("https://github.com/nroggendorff/roop.git", "roop")
         subprocess.run("pip install -r roop/requirements.txt", shell=True)
     os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
     os.environ["TF_USE_LEGACY_KERAS"] = "1"
+    os.environ["OMP_NUM_THREADS"] = "8"
+    os.environ["MKL_NUM_THREADS"] = "8"
     demo.launch()

requirements.txt CHANGED Viewed

@@ -1,19 +1,7 @@
-numpy==1.24.3
-opencv-python==4.7.0.72
-onnx==1.14.0
-gitpython==3.1.30
-pillow==9.5.0
-insightface==0.7.3
-psutil==5.9.5
-tk==0.1.0
-customtkinter==5.2.0
-typing-extensions>=4.8.0
-tkinterdnd2==0.3.0
-onnxruntime
-tensorflow>=2.14.0
-opennsfw2==0.10.2
-tqdm==4.65.0
 diffusers
 accelerate
-transformers
-torch==2.2

+# torch==2.2.0
+transformers
 diffusers
 accelerate
+gitpython
+opencv-python
+pillow