Face-Swap-Frame

Running on Zero

App Files Files Community

Face-Swap-Frame / app.py

linoyts HF Staff

Update app.py

e651a6d verified 11 days ago

raw

history blame contribute delete

8.93 kB

	import os
	import gradio as gr
	import numpy as np
	import random
	import spaces
	import torch
	from diffusers import Flux2KleinPipeline
	from PIL import Image

	# NEW: for reading first frame
	import cv2

	dtype = torch.bfloat16
	device = "cuda" if torch.cuda.is_available() else "cpu"

	MAX_SEED = np.iinfo(np.int32).max

	REPO_ID_DISTILLED = "black-forest-labs/FLUX.2-klein-9B"
	LORA_REPO_ID = "Alissonerdx/BFS-Best-Face-Swap"
	LORA_FILENAME = "bfs_head_v1_flux-klein_9b_step3750_rank64.safetensors"

	FACE_SWAP_PROMPT = "head_swap: start with Picture 1 as the base image, keeping its lighting, environment, and background. remove the head from Picture 1 completely and replace it with the head from Picture 2, strictly preserving the hair, eye color, nose structure of Picture 2. copy the direction of the eye, head rotation, micro expressions from Picture 1, high quality, sharp details, 4k."
	print("Loading FLUX.2 Klein 9B Distilled model...")
	pipe = Flux2KleinPipeline.from_pretrained(REPO_ID_DISTILLED, torch_dtype=dtype)
	pipe.to(device)

	print(f"Loading LoRA from {LORA_REPO_ID}...")
	pipe.load_lora_weights(LORA_REPO_ID, weight_name=LORA_FILENAME)
	print("LoRA loaded successfully!")


	def first_frame_from_video(video_value) -> Image.Image:
	"""
	Convert a Gradio Video input into the first frame as a PIL Image.

	Gradio Video value is typically either:
	- a filepath string
	- or a dict-like object with a path
	- or (older versions) a tuple
	We handle the common cases robustly.
	"""
	if video_value is None:
	return None

	video_path = None

	# Common shapes across gradio versions:
	if isinstance(video_value, str):
	video_path = video_value
	elif isinstance(video_value, dict) and "path" in video_value:
	video_path = video_value["path"]
	elif isinstance(video_value, (list, tuple)) and len(video_value) > 0:
	# sometimes it's (path, metadata) or [path, ...]
	video_path = video_value[0]
	else:
	# last attempt: attribute access
	video_path = getattr(video_value, "path", None)

	if not video_path or not os.path.exists(video_path):
	raise gr.Error("Could not read the uploaded video file.")

	cap = cv2.VideoCapture(video_path)
	ok, frame_bgr = cap.read()
	cap.release()

	if not ok or frame_bgr is None:
	raise gr.Error("Could not extract the first frame from the video.")

	frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
	return Image.fromarray(frame_rgb)


	def update_dimensions_from_video(target_video):
	"""
	Update width/height based on the first frame of the uploaded video.

	Keeps one side at 1024 and scales the other proportionally,
	with both sides as multiples of 8.
	"""
	target_image = first_frame_from_video(target_video)
	if target_image is None:
	return 1024, 1024

	img_width, img_height = target_image.size
	aspect_ratio = img_width / img_height

	if aspect_ratio >= 1: # Landscape or square
	new_width = 1024
	new_height = int(1024 / aspect_ratio)
	else: # Portrait
	new_height = 1024
	new_width = int(1024 * aspect_ratio)

	new_width = round(new_width / 8) * 8
	new_height = round(new_height / 8) * 8

	new_width = max(256, min(1024, new_width))
	new_height = max(256, min(1024, new_height))

	return new_width, new_height


	@spaces.GPU(duration=85)
	def face_swap(
	reference_face: Image.Image,
	target_video, # CHANGED: now a video input
	seed: int = 42,
	randomize_seed: bool = False,
	width: int = 1024,
	height: int = 1024,
	num_inference_steps: int = 4,
	guidance_scale: float = 1.0,
	progress=gr.Progress(track_tqdm=True)
	):
	if reference_face is None or target_video is None:
	raise gr.Error("Please provide both a reference face and a target video!")

	# Extract first frame to use as target image
	target_image = first_frame_from_video(target_video)

	if randomize_seed:
	seed = random.randint(0, MAX_SEED)

	generator = torch.Generator(device=device).manual_seed(seed)

	image_list = [target_image, reference_face]

	progress(0.2, desc="Swapping face...")

	image = pipe(
	prompt=FACE_SWAP_PROMPT,
	image=image_list,
	height=height,
	width=width,
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale,
	generator=generator,
	).images[0]

	return (target_image, image), seed


	css = """
	#col-container {
	margin: 0 auto;
	max-width: 1200px;
	}
	.image-container img {
	object-fit: contain;
	}
	"""

	with gr.Blocks(css=css) as demo:
	with gr.Column(elem_id="col-container"):
	gr.Markdown("""# Face Swap with FLUX.2 Klein 9B

	Swap faces using Flux.2 Klein 9B [Alissonerdx/BFS-Best-Face-Swap](https://huggingface.co/Alissonerdx/BFS-Best-Face-Swap) LoRA
	""")

	with gr.Row():
	with gr.Column():
	with gr.Row():
	reference_face = gr.Image(
	label="Reference Face",
	type="pil",
	sources=["upload"],
	elem_classes="image-container"
	)

	# CHANGED: target image -> target video
	target_video = gr.Video(
	label="Target Video (Body/Scene) - first frame will be used",
	sources=["upload"]
	)

	run_button = gr.Button("Swap Face", visible=False)

	with gr.Accordion("Advanced Settings", open=False):
	seed = gr.Slider(
	label="Seed",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=0,
	)

	randomize_seed = gr.Checkbox(label="Randomize seed", value=True)

	with gr.Row():
	width = gr.Slider(
	label="Width",
	minimum=256,
	maximum=1024,
	step=8,
	value=1024,
	)

	height = gr.Slider(
	label="Height",
	minimum=256,
	maximum=1024,
	step=8,
	value=1024,
	)

	with gr.Row():
	num_inference_steps = gr.Slider(
	label="Inference Steps",
	minimum=1,
	maximum=20,
	step=1,
	value=4,
	info="Number of denoising steps (4 is optimal for distilled model)"
	)

	guidance_scale = gr.Slider(
	label="Guidance Scale",
	minimum=0.0,
	maximum=5.0,
	step=0.1,
	value=1.0,
	info="How closely to follow the prompt (1.0 is optimal for distilled model)"
	)

	comparison_slider = gr.ImageSlider(
	label="Before / After",
	type="pil"
	)

	seed_output = gr.Number(label="Seed Used", visible=False)

	# CHANGED: auto-update dimensions when target video is uploaded/changed
	target_video.change(
	fn=update_dimensions_from_video,
	inputs=[target_video],
	outputs=[width, height]
	)

	# CHANGED: swap inputs use target_video instead of target_image
	swap_inputs = [
	reference_face,
	target_video,
	seed,
	randomize_seed,
	width,
	height,
	num_inference_steps,
	guidance_scale
	]
	swap_outputs = [comparison_slider, seed_output]

	run_button.click(
	fn=face_swap,
	inputs=swap_inputs,
	outputs=swap_outputs,
	)

	def auto_swap_wrapper(ref_face, targ_vid, s, rand_s, w, h, steps, cfg):
	if ref_face is not None and targ_vid is not None:
	result = face_swap(ref_face, targ_vid, s, rand_s, w, h, steps, cfg)
	return result[0], result[1], gr.update(visible=True)
	return None, s, gr.update(visible=False)

	reference_face.change(
	fn=auto_swap_wrapper,
	inputs=swap_inputs,
	outputs=[comparison_slider, seed_output, run_button],
	)

	# CHANGED: trigger on target video upload/change
	target_video.change(
	fn=auto_swap_wrapper,
	inputs=swap_inputs,
	outputs=[comparison_slider, seed_output, run_button],
	)

	if __name__ == "__main__":
	demo.launch(share=True, theme=gr.themes.Citrus())