| import subprocess |
| from PIL import Image |
|
|
| def download_file(url, output_filename): |
| command = ['wget', '-O', output_filename, '-q', url] |
| subprocess.run(command, check=True) |
|
|
| url1 = 'https://storage.googleapis.com/mediapipe-models/image_segmenter/selfie_multiclass_256x256/float32/latest/selfie_multiclass_256x256.tflite' |
| url2 = 'https://storage.googleapis.com/mediapipe-models/image_segmenter/selfie_segmenter/float16/latest/selfie_segmenter.tflite' |
|
|
| filename1 = 'selfie_multiclass_256x256.tflite' |
| filename2 = 'selfie_segmenter.tflite' |
|
|
| download_file(url1, filename1) |
| download_file(url2, filename2) |
|
|
| import cv2 |
| import mediapipe as mp |
| import numpy as np |
| from mediapipe.tasks import python |
| from mediapipe.tasks.python import vision |
| import random |
| import gradio as gr |
| import spaces |
| import torch |
| from diffusers import FluxInpaintPipeline |
| from diffusers import FlowMatchEulerDiscreteScheduler, AutoencoderKL |
| from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel |
| from transformers import CLIPTextModel, CLIPTokenizer,T5EncoderModel, T5TokenizerFast |
|
|
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
| bfl_repo="black-forest-labs/FLUX.1-dev" |
|
|
| BG_COLOR = (255, 255, 255) |
| MASK_COLOR = (0, 0 , 0) |
|
|
| def maskPerson(input): |
| base_options = python.BaseOptions(model_asset_path='selfie_multiclass_256x256.tflite') |
| options = vision.ImageSegmenterOptions(base_options=base_options, |
| output_category_mask=True) |
|
|
| with vision.ImageSegmenter.create_from_options(options) as segmenter: |
| image = mp.Image.create_from_file(input) |
| segmentation_result = segmenter.segment(image) |
| person_mask = segmentation_result.confidence_masks[0] |
|
|
| image_data = image.numpy_view() |
| fg_image = np.zeros(image_data.shape, dtype=np.uint8) |
| fg_image[:] = MASK_COLOR |
| bg_image = np.zeros(image_data.shape, dtype=np.uint8) |
| bg_image[:] = BG_COLOR |
|
|
| condition = np.stack((person_mask.numpy_view(),) * 3, axis=-1) > 0.2 |
| output_image = np.where(condition, fg_image, bg_image) |
|
|
| return output_image |
|
|
| def maskHead(input): |
| base_options = python.BaseOptions(model_asset_path='selfie_multiclass_256x256.tflite') |
| options = vision.ImageSegmenterOptions(base_options=base_options, |
| output_category_mask=True) |
|
|
| with vision.ImageSegmenter.create_from_options(options) as segmenter: |
| image = mp.Image.create_from_file(input) |
|
|
| segmentation_result = segmenter.segment(image) |
|
|
| hairmask = segmentation_result.confidence_masks[1] |
| facemask = segmentation_result.confidence_masks[3] |
|
|
| image_data = image.numpy_view() |
| fg_image = np.zeros(image_data.shape, dtype=np.uint8) |
| fg_image[:] = MASK_COLOR |
| bg_image = np.zeros(image_data.shape, dtype=np.uint8) |
| bg_image[:] = BG_COLOR |
|
|
| combined_mask = np.maximum(hairmask.numpy_view(), facemask.numpy_view()) |
|
|
| condition = np.stack((combined_mask,) * 3, axis=-1) > 0.2 |
| output_image = np.where(condition, fg_image, bg_image) |
|
|
| return output_image |
|
|
| def random_positioning(input, output_size=(1024, 1024)): |
| background = cv2.imread("default.jpeg") |
| if background is None: |
| raise ValueError("Unable to load background image") |
|
|
| background = cv2.resize(background, output_size, interpolation=cv2.INTER_AREA) |
| |
| if input is None: |
| raise ValueError("Unable to load input image") |
|
|
| scale_factor = random.uniform(0.5, 1.0) |
| new_size = (int(input.shape[1] * scale_factor), int(input.shape[0] * scale_factor)) |
| |
| resized_image = cv2.resize(input, new_size, interpolation=cv2.INTER_AREA) |
|
|
| if background.shape[2] != resized_image.shape[2]: |
| raise ValueError("Input image and background image must have the same number of channels") |
|
|
| x_offset = random.randint(0, output_size[0] - new_size[0]) |
| y_offset = random.randint(0, output_size[1] - new_size[1]) |
| background[y_offset:y_offset+new_size[1], x_offset:x_offset+new_size[0]] = resized_image |
| |
| return background |
|
|
|
|
| def remove_background(image_path, mask): |
| image = cv2.imread(image_path) |
| inverted_mask = cv2.bitwise_not(mask) |
| |
| _, binary_mask = cv2.threshold(inverted_mask, 127, 255, cv2.THRESH_BINARY) |
| |
| result = np.zeros_like(image, dtype=np.uint8) |
| |
| result[binary_mask == 255] = image[binary_mask == 255] |
| |
| return result |
| |
| pipe = FluxInpaintPipeline.from_pretrained(bfl_repo, torch_dtype=torch.bfloat16).to(DEVICE) |
| MAX_SEED = np.iinfo(np.int32).max |
| TRIGGER = "a photo of TOK" |
|
|
|
|
| @spaces.GPU(duration=75) |
| def execute(image, prompt, debug=False): |
| if not prompt : |
| gr.Info("Please enter a text prompt.") |
| return None |
|
|
| if not image : |
| gr.Info("Please upload a image.") |
| return None |
|
|
| img = cv2.imread(image) |
| img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) |
|
|
| imgs = [ random_positioning(img)] |
|
|
| pipe.load_lora_weights("XLabs-AI/flux-RealismLora", weight_name='lora.safetensors') |
| response = [] |
|
|
| seed_slicer = random.randint(0, MAX_SEED) |
| generator = torch.Generator().manual_seed(seed_slicer) |
|
|
| for image in range(len(imgs)): |
| current_img = imgs[image] |
| cv2.imwrite('base_image.jpg', current_img) |
| cv2.imwrite("mask_person.jpg", maskPerson('base_image.jpg')) |
| cv2.imwrite("mask_face.jpg", maskHead('base_image.jpg')) |
| |
| |
| im = Image.open('base_image.jpg') |
| np_arr = np.array(im) |
| rgb_image = cv2.cvtColor(np_arr, cv2.COLOR_BGR2RGB) |
|
|
| im = Image.fromarray(rgb_image) |
| |
| person = np.array(Image.open('mask_person.jpg')) |
| face = np.array(Image.open('mask_face.jpg')) |
|
|
| person_gray = cv2.cvtColor(person, cv2.COLOR_BGR2GRAY) |
| face_gray = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY) |
|
|
| _, mask = cv2.threshold(face_gray, 1, 255, cv2.THRESH_BINARY_INV) |
| mask_inv = cv2.bitwise_not(mask) |
| person_masked = cv2.bitwise_and(person_gray, person_gray, mask=mask_inv) |
| face_masked = cv2.bitwise_and(face_gray, face_gray, mask=mask) |
| result = cv2.add(person_masked, face_masked) |
| cv2.imwrite('join.jpg', result) |
|
|
| fund_mask = Image.open('join.jpg') |
| |
| result0 = pipe( |
| prompt=f"{prompt} {TRIGGER}", |
| image=im, |
| mask_image=fund_mask, |
| width=1024, |
| height=1024, |
| strength=0.85, |
| generator=generator, |
| num_inference_steps=28, |
| max_sequence_length=256, |
| joint_attention_kwargs={"scale": 0.9}, |
| ).images[0] |
|
|
| arr = np.array(result0) |
| rgb_image = cv2.cvtColor(arr, cv2.COLOR_BGR2RGB) |
| cv2.imwrite('person.jpg', rgb_image) |
| cv2.imwrite("mask.jpg", maskHead('person.jpg')) |
| mask = Image.open('mask.jpg') |
|
|
| result = pipe( |
| prompt=f"{prompt} {TRIGGER}", |
| image=result0, |
| mask_image=mask, |
| width=1024, |
| height=1024, |
| strength=0.85, |
| generator=generator, |
| num_inference_steps=28, |
| max_sequence_length=256, |
| joint_attention_kwargs={"scale": 0.9}, |
| ).images[0] |
|
|
| if debug: |
| response.append(im) |
| response.append(person) |
| response.append(face) |
| response.append(fund_mask) |
| response.append(result0) |
| response.append(mask) |
|
|
| response.append(result) |
| return response |
|
|
| |
| |
| description = "This is an unofficial implementation of the ip face adapter for FLUX DEV and does not explicitly follow the ip face model, I created a wrapper with inpaint and mediapipe, I like to call Fake IP Adapter" |
| title = "Flux IP Face Adapter" |
| iface = gr.Interface( |
| fn=execute, |
| description=description, |
| title=title, |
| inputs=[ |
| gr.Image(type="filepath"), |
| gr.Textbox(label="Prompt"), |
| gr.Checkbox(label="Debug Mode") |
| ], |
| outputs="gallery" |
| ) |
|
|
| iface.launch(share=True, debug=True) |