Ffftdtd5dtft commited on
Commit
f4d1193
verified
1 Parent(s): d98c343

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +155 -47
app.py CHANGED
@@ -1,69 +1,177 @@
1
- import torch
2
- from diffusers import StableDiffusionImg2ImgPipeline
3
  import gradio as gr
4
- import cv2
5
  import numpy as np
 
 
 
 
6
  import face_recognition
 
 
 
 
 
 
 
 
 
 
 
7
 
8
- # Cargar el pipeline con el modelo FLUX.1-schnell
9
- pipe = StableDiffusionImg2ImgPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell")
10
- pipe.to("cuda") # Aseg煤rate de que est茅 en GPU para un mejor rendimiento
 
 
 
 
 
 
 
 
 
11
 
12
- # Funci贸n para realizar el faceswap
13
  def swap_faces(source_image, target_image):
14
- # Detectar las caras en ambas im谩genes
15
  source_face = face_recognition.face_locations(source_image)[0]
16
- source_encoding = face_recognition.face_encodings(source_image, [source_face])[0]
17
-
18
  target_face = face_recognition.face_locations(target_image)[0]
19
- target_encoding = face_recognition.face_encodings(target_image, [target_face])[0]
20
-
21
- # Obtener los puntos de la cara en la imagen original
22
- source_points = face_recognition.face_landmarks(source_image, [source_face])[0]
23
- target_points = face_recognition.face_landmarks(target_image, [target_face])[0]
24
-
25
- # Transformar la cara objetivo en la cara fuente
26
  target_face_image = target_image[target_face[0]:target_face[2], target_face[3]:target_face[1]]
27
  source_face_image = source_image[source_face[0]:source_face[2], source_face[3]:source_face[1]]
28
-
29
- # Redimensionar la cara fuente para que coincida con la cara objetivo
30
  target_face_resized = cv2.resize(target_face_image, (source_face_image.shape[1], source_face_image.shape[0]))
31
 
32
- # Crear una m谩scara para la cara
33
  mask = np.zeros_like(source_face_image)
34
- cv2.fillConvexPoly(mask, np.array(list(target_points.values())), (255, 255, 255))
35
 
36
- # Intercambiar las caras
37
  swapped_face = cv2.seamlessClone(target_face_resized, source_image, mask, (source_face[3] + source_face_image.shape[1]//2, source_face[0] + source_face_image.shape[0]//2), cv2.NORMAL_CLONE)
38
 
39
  return swapped_face
40
 
41
- # Funci贸n de generaci贸n de imagen
42
- def generate_image(init_image, strength, prompt, reference_image=None):
43
- with torch.cuda.amp.autocast():
44
- generated_image = pipe(prompt=prompt, init_image=init_image, strength=strength).images[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- # Si se proporciona una imagen de referencia, realizar el face swap
47
- if reference_image is not None:
 
48
  generated_image = np.array(generated_image)
49
- generated_image = swap_faces(generated_image, np.array(reference_image))
50
  generated_image = Image.fromarray(generated_image)
51
 
52
- return generated_image
53
-
54
- # Configurar la interfaz de Gradio
55
- interface = gr.Interface(
56
- fn=generate_image,
57
- inputs=[
58
- gr.Image(source="upload", type="pil", label="Imagen Inicial"),
59
- gr.Slider(0.0, 1.0, value=0.75, label="Strength"),
60
- gr.Textbox(label="Prompt"),
61
- gr.Image(source="upload", type="pil", label="Imagen de Referencia (opcional)"),
62
- ],
63
- outputs=gr.Image(label="Imagen Generada"),
64
- title="Generador de Im谩genes Estilo Img2Img con FaceSwap",
65
- description="Genera im谩genes utilizando el modelo FLUX.1-schnell a partir de una imagen inicial y un prompt, con la opci贸n de intercambiar caras.",
66
- )
67
-
68
- # Ejecutar la aplicaci贸n
69
- interface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
2
  import numpy as np
3
+ import random
4
+ import torch
5
+ import spaces
6
+ import cv2
7
  import face_recognition
8
+ from diffusers import DiffusionPipeline, StableDiffusionImg2ImgPipeline
9
+ from PIL import Image
10
+ from transformers import CLIPTextModel, CLIPTokenizer
11
+
12
+ # Configuraci贸n del dispositivo y tipo de datos
13
+ dtype = torch.bfloat16
14
+ device = "cuda" if torch.cuda.is_available() else "cpu"
15
+
16
+ # Cargar los pipelines para las diferentes tareas
17
+ pipe_diffusion = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=dtype).to(device)
18
+ pipe_img2img = StableDiffusionImg2ImgPipeline.from_pretrained("runwayml/stable-diffusion-v1-5").to(device)
19
 
20
+ MAX_SEED = np.iinfo(np.int32).max
21
+ MAX_IMAGE_SIZE = 2048
22
+
23
+ # Configurar tokenizer y modelo CLIP para truncar correctamente los prompts
24
+ tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
25
+ text_model = CLIPTextModel.from_pretrained("openai/clip-vit-base-patch32")
26
+
27
+ def truncate_prompt(prompt, max_length=77):
28
+ inputs = tokenizer(prompt, return_tensors="pt")
29
+ input_ids = inputs["input_ids"][0][:max_length]
30
+ truncated_prompt = tokenizer.decode(input_ids, skip_special_tokens=True)
31
+ return truncated_prompt
32
 
 
33
  def swap_faces(source_image, target_image):
 
34
  source_face = face_recognition.face_locations(source_image)[0]
 
 
35
  target_face = face_recognition.face_locations(target_image)[0]
36
+
 
 
 
 
 
 
37
  target_face_image = target_image[target_face[0]:target_face[2], target_face[3]:target_face[1]]
38
  source_face_image = source_image[source_face[0]:source_face[2], source_face[3]:source_face[1]]
39
+
 
40
  target_face_resized = cv2.resize(target_face_image, (source_face_image.shape[1], source_face_image.shape[0]))
41
 
 
42
  mask = np.zeros_like(source_face_image)
43
+ cv2.fillConvexPoly(mask, np.array(list(face_recognition.face_landmarks(target_image, [target_face])[0].values())), (255, 255, 255))
44
 
 
45
  swapped_face = cv2.seamlessClone(target_face_resized, source_image, mask, (source_face[3] + source_face_image.shape[1]//2, source_face[0] + source_face_image.shape[0]//2), cv2.NORMAL_CLONE)
46
 
47
  return swapped_face
48
 
49
+ @spaces.GPU()
50
+ def infer(prompt, seed=42, randomize_seed=False, width=1024, height=1024, num_inference_steps=4, init_image=None, reference_image=None, img2img_strength=0.75, progress=gr.Progress(track_tqdm=True)):
51
+ if randomize_seed:
52
+ seed = random.randint(0, MAX_SEED)
53
+ generator = torch.Generator().manual_seed(seed)
54
+
55
+ # Truncar el prompt si es demasiado largo
56
+ prompt = truncate_prompt(prompt)
57
+
58
+ # Convertir init_image a formato PIL si no lo est谩
59
+ if init_image and not isinstance(init_image, Image.Image):
60
+ init_image = Image.fromarray(np.array(init_image))
61
+
62
+ # Generaci贸n de la imagen
63
+ if init_image:
64
+ init_image = init_image.convert("RGB")
65
+ generated_image = pipe_img2img(
66
+ prompt=prompt,
67
+ init_image=init_image,
68
+ strength=img2img_strength,
69
+ num_inference_steps=num_inference_steps,
70
+ generator=generator
71
+ ).images[0]
72
+ else:
73
+ generated_image = pipe_diffusion(
74
+ prompt=prompt,
75
+ width=width,
76
+ height=height,
77
+ num_inference_steps=num_inference_steps,
78
+ generator=generator,
79
+ guidance_scale=0.0
80
+ ).images[0]
81
 
82
+ # Aplicaci贸n de Face Swap
83
+ if reference_image:
84
+ reference_image = np.array(reference_image)
85
  generated_image = np.array(generated_image)
86
+ generated_image = swap_faces(generated_image, reference_image)
87
  generated_image = Image.fromarray(generated_image)
88
 
89
+ return generated_image, seed
90
+
91
+ examples = [
92
+ "a tiny astronaut hatching from an egg on the moon",
93
+ "a cat holding a sign that says hello world",
94
+ "an anime illustration of a wiener schnitzel",
95
+ ]
96
+
97
+ css = """
98
+ #col-container {
99
+ margin: 0 auto;
100
+ max-width: 520px;
101
+ }
102
+ """
103
+
104
+ with gr.Blocks(css=css) as demo:
105
+ with gr.Column(elem_id="col-container"):
106
+ gr.Markdown(f"""# FLUX.1 [schnell] + Stable Diffusion img2img + Face Swap
107
+ Combinaci贸n de generaci贸n de im谩genes, transformaci贸n de im谩genes con img2img, y Face Swap.
108
+ """)
109
+
110
+ with gr.Row():
111
+ prompt = gr.Text(
112
+ label="Prompt",
113
+ show_label=False,
114
+ max_lines=1,
115
+ placeholder="Enter your prompt",
116
+ container=False,
117
+ )
118
+ run_button = gr.Button("Run", scale=0)
119
+
120
+ result = gr.Image(label="Result", show_label=False)
121
+
122
+ with gr.Accordion("Advanced Settings", open=False):
123
+ seed = gr.Slider(
124
+ label="Seed",
125
+ minimum=0,
126
+ maximum=MAX_SEED,
127
+ step=1,
128
+ value=0,
129
+ )
130
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
131
+ width = gr.Slider(
132
+ label="Width",
133
+ minimum=256,
134
+ maximum=MAX_IMAGE_SIZE,
135
+ step=32,
136
+ value=1024,
137
+ )
138
+ height = gr.Slider(
139
+ label="Height",
140
+ minimum=256,
141
+ maximum=MAX_IMAGE_SIZE,
142
+ step=32,
143
+ value=1024,
144
+ )
145
+ num_inference_steps = gr.Slider(
146
+ label="Number of inference steps",
147
+ minimum=1,
148
+ maximum=50,
149
+ step=1,
150
+ value=4,
151
+ )
152
+ init_image = gr.Image(type="pil", label="Imagen Inicial (opcional)")
153
+ img2img_strength = gr.Slider(
154
+ label="Img2Img Strength",
155
+ minimum=0.0,
156
+ maximum=1.0,
157
+ step=0.05,
158
+ value=0.75,
159
+ )
160
+ reference_image = gr.Image(type="pil", label="Imagen de Referencia (opcional)")
161
+
162
+ gr.Examples(
163
+ examples=examples,
164
+ fn=infer,
165
+ inputs=[prompt],
166
+ outputs=[result, seed],
167
+ cache_examples="lazy"
168
+ )
169
+
170
+ gr.on(
171
+ triggers=[run_button.click, prompt.submit],
172
+ fn=infer,
173
+ inputs=[prompt, seed, randomize_seed, width, height, num_inference_steps, init_image, reference_image, img2img_strength],
174
+ outputs=[result, seed]
175
+ )
176
+
177
+ demo.launch()