File size: 9,096 Bytes
31fe446
 
 
 
 
 
 
 
 
0c6554e
 
 
31fe446
 
 
 
 
 
 
 
 
5f6d3d5
31fe446
 
 
 
5f6d3d5
 
 
31fe446
0c6554e
 
31fe446
0c6554e
31fe446
0c6554e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31fe446
0c6554e
 
31fe446
0c6554e
 
 
 
 
 
 
 
 
 
 
 
31fe446
0c6554e
 
 
 
 
 
31fe446
0c6554e
31fe446
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0c6554e
31fe446
 
 
 
 
 
 
 
0c6554e
 
 
 
 
31fe446
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64b6a2a
 
 
 
 
 
 
 
0c6554e
 
 
 
 
64b6a2a
0c6554e
64b6a2a
0c6554e
64b6a2a
 
 
 
 
 
 
 
0c6554e
64b6a2a
0c6554e
64b6a2a
 
 
 
 
 
 
 
0c6554e
64b6a2a
 
 
 
 
 
 
0c6554e
64b6a2a
 
 
 
 
 
 
 
 
0c6554e
64b6a2a
 
 
 
 
 
 
 
e2d89f9
 
 
 
 
 
31fe446
 
0c6554e
 
 
 
31fe446
 
 
0c6554e
31fe446
 
0c6554e
31fe446
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0c6554e
 
 
ec63cbd
 
31fe446
 
 
 
ec63cbd
31fe446
 
0c6554e
 
31fe446
 
ec63cbd
31fe446
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
import os
import gradio as gr
import numpy as np
import random
import spaces
import torch
from diffusers import Flux2KleinPipeline
from PIL import Image

# NEW: for reading first frame
import cv2

dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"

MAX_SEED = np.iinfo(np.int32).max

REPO_ID_DISTILLED = "black-forest-labs/FLUX.2-klein-9B"
LORA_REPO_ID = "Alissonerdx/BFS-Best-Face-Swap"
LORA_FILENAME = "bfs_head_v1_flux-klein_9b_step3750_rank64.safetensors"

FACE_SWAP_PROMPT = "start with Picture 2 as the base image, keeping its lighting, environment, background, and setting. remove the person from Picture 2 completely and replace them with the person from Picture 1, strictly preserving the face, hair, skin tone, and physical features of Picture 1. match the body position, posture, pose, and head rotation of the original person in Picture 2. blend the subject naturally into the scene so that the lighting and shadows on the body reflect the environment of Picture 2, high quality, sharp details, 4k."
print("Loading FLUX.2 Klein 9B Distilled model...")
pipe = Flux2KleinPipeline.from_pretrained(REPO_ID_DISTILLED, torch_dtype=dtype)
pipe.to(device)

# print(f"Loading LoRA from {LORA_REPO_ID}...")
# pipe.load_lora_weights(LORA_REPO_ID, weight_name=LORA_FILENAME)
# print("LoRA loaded successfully!")


def first_frame_from_video(video_value) -> Image.Image:
    """
    Convert a Gradio Video input into the first frame as a PIL Image.

    Gradio Video value is typically either:
      - a filepath string
      - or a dict-like object with a path
      - or (older versions) a tuple
    We handle the common cases robustly.
    """
    if video_value is None:
        return None

    video_path = None

    # Common shapes across gradio versions:
    if isinstance(video_value, str):
        video_path = video_value
    elif isinstance(video_value, dict) and "path" in video_value:
        video_path = video_value["path"]
    elif isinstance(video_value, (list, tuple)) and len(video_value) > 0:
        # sometimes it's (path, metadata) or [path, ...]
        video_path = video_value[0]
    else:
        # last attempt: attribute access
        video_path = getattr(video_value, "path", None)

    if not video_path or not os.path.exists(video_path):
        raise gr.Error("Could not read the uploaded video file.")

    cap = cv2.VideoCapture(video_path)
    ok, frame_bgr = cap.read()
    cap.release()

    if not ok or frame_bgr is None:
        raise gr.Error("Could not extract the first frame from the video.")

    frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
    return Image.fromarray(frame_rgb)


def update_dimensions_from_video(target_video):
    """
    Update width/height based on the first frame of the uploaded video.

    Keeps one side at 1024 and scales the other proportionally,
    with both sides as multiples of 8.
    """
    target_image = first_frame_from_video(target_video)
    if target_image is None:
        return 1024, 1024

    img_width, img_height = target_image.size
    aspect_ratio = img_width / img_height

    if aspect_ratio >= 1:  # Landscape or square
        new_width = 1024
        new_height = int(1024 / aspect_ratio)
    else:  # Portrait
        new_height = 1024
        new_width = int(1024 * aspect_ratio)

    new_width = round(new_width / 8) * 8
    new_height = round(new_height / 8) * 8

    new_width = max(256, min(1024, new_width))
    new_height = max(256, min(1024, new_height))

    return new_width, new_height


@spaces.GPU(duration=85)
def face_swap(
    reference_face: Image.Image,
    target_video,  # CHANGED: now a video input
    seed: int = 42,
    randomize_seed: bool = False,
    width: int = 1024,
    height: int = 1024,
    num_inference_steps: int = 4,
    guidance_scale: float = 1.0,
    progress=gr.Progress(track_tqdm=True)
):
    if reference_face is None or target_video is None:
        raise gr.Error("Please provide both a reference face and a target video!")

    # Extract first frame to use as target image
    target_image = first_frame_from_video(target_video)

    if randomize_seed:
        seed = random.randint(0, MAX_SEED)

    generator = torch.Generator(device=device).manual_seed(seed)

    image_list = [target_image, reference_face]

    progress(0.2, desc="Swapping face...")

    image = pipe(
        prompt=FACE_SWAP_PROMPT,
        image=image_list,
        height=height,
        width=width,
        num_inference_steps=num_inference_steps,
        guidance_scale=guidance_scale,
        generator=generator,
    ).images[0]

    return (target_image, image), seed


css = """
#col-container {
    margin: 0 auto;
    max-width: 1200px;
}
.image-container img {
    object-fit: contain;
}
"""

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown("""# Face Swap with FLUX.2 Klein 9B

Swap faces using Flux.2 Klein 9B [Alissonerdx/BFS-Best-Face-Swap](https://huggingface.co/Alissonerdx/BFS-Best-Face-Swap) LoRA
        """)

        with gr.Row():
            with gr.Column():
                with gr.Row():
                    reference_face = gr.Image(
                        label="Reference Face",
                        type="pil",
                        sources=["upload"],
                        elem_classes="image-container"
                    )

                    # CHANGED: target image -> target video
                    target_video = gr.Video(
                        label="Target Video (Body/Scene) - first frame will be used",
                        sources=["upload"]
                    )

                run_button = gr.Button("Swap Face", visible=False)

                with gr.Accordion("Advanced Settings", open=False):
                    seed = gr.Slider(
                        label="Seed",
                        minimum=0,
                        maximum=MAX_SEED,
                        step=1,
                        value=0,
                    )

                    randomize_seed = gr.Checkbox(label="Randomize seed", value=True)

                    with gr.Row():
                        width = gr.Slider(
                            label="Width",
                            minimum=256,
                            maximum=1024,
                            step=8,
                            value=1024,
                        )

                        height = gr.Slider(
                            label="Height",
                            minimum=256,
                            maximum=1024,
                            step=8,
                            value=1024,
                        )

                    with gr.Row():
                        num_inference_steps = gr.Slider(
                            label="Inference Steps",
                            minimum=1,
                            maximum=20,
                            step=1,
                            value=4,
                            info="Number of denoising steps (4 is optimal for distilled model)"
                        )

                        guidance_scale = gr.Slider(
                            label="Guidance Scale",
                            minimum=0.0,
                            maximum=5.0,
                            step=0.1,
                            value=1.0,
                            info="How closely to follow the prompt (1.0 is optimal for distilled model)"
                        )

            comparison_slider = gr.ImageSlider(
                label="Before / After",
                type="pil"
            )

        seed_output = gr.Number(label="Seed Used", visible=False)

    # CHANGED: auto-update dimensions when target video is uploaded/changed
    target_video.change(
        fn=update_dimensions_from_video,
        inputs=[target_video],
        outputs=[width, height]
    )

    # CHANGED: swap inputs use target_video instead of target_image
    swap_inputs = [
        reference_face,
        target_video,
        seed,
        randomize_seed,
        width,
        height,
        num_inference_steps,
        guidance_scale
    ]
    swap_outputs = [comparison_slider, seed_output]

    run_button.click(
        fn=face_swap,
        inputs=swap_inputs,
        outputs=swap_outputs,
    )

    def auto_swap_wrapper(ref_face, targ_vid, s, rand_s, w, h, steps, cfg):
        if ref_face is not None and targ_vid is not None:
            result = face_swap(ref_face, targ_vid, s, rand_s, w, h, steps, cfg)
            return result[0], result[1], gr.update(visible=True)
        return None, s, gr.update(visible=False)

    reference_face.change(
        fn=auto_swap_wrapper,
        inputs=swap_inputs,
        outputs=[comparison_slider, seed_output, run_button],
    )

    # CHANGED: trigger on target video upload/change
    target_video.change(
        fn=auto_swap_wrapper,
        inputs=swap_inputs,
        outputs=[comparison_slider, seed_output, run_button],
    )

if __name__ == "__main__":
    demo.launch(share=True, theme=gr.themes.Citrus())