Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,10 +14,11 @@ from kolors.models.controlnet import ControlNetModel
|
|
| 14 |
from diffusers import AutoencoderKL
|
| 15 |
from kolors.models.unet_2d_condition import UNet2DConditionModel
|
| 16 |
from diffusers import EulerDiscreteScheduler
|
| 17 |
-
from PIL import Image
|
| 18 |
from annotator.midas import MidasDetector
|
| 19 |
from annotator.dwpose import DWposeDetector
|
| 20 |
from annotator.util import resize_image, HWC3
|
|
|
|
| 21 |
|
| 22 |
device = "cuda"
|
| 23 |
ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors")
|
|
@@ -253,6 +254,59 @@ def load_description(fp):
|
|
| 253 |
content = f.read()
|
| 254 |
return content
|
| 255 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
with gr.Blocks(css=css) as Kolors:
|
| 257 |
with gr.Row():
|
| 258 |
with gr.Column(elem_id="col-left"):
|
|
@@ -263,7 +317,39 @@ with gr.Blocks(css=css) as Kolors:
|
|
| 263 |
lines=2
|
| 264 |
)
|
| 265 |
with gr.Row():
|
| 266 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 267 |
with gr.Accordion("Advanced Settings", open=False):
|
| 268 |
negative_prompt = gr.Textbox(
|
| 269 |
label="Negative prompt",
|
|
|
|
| 14 |
from diffusers import AutoencoderKL
|
| 15 |
from kolors.models.unet_2d_condition import UNet2DConditionModel
|
| 16 |
from diffusers import EulerDiscreteScheduler
|
| 17 |
+
from PIL import Image, ImageDraw, ImageFont
|
| 18 |
from annotator.midas import MidasDetector
|
| 19 |
from annotator.dwpose import DWposeDetector
|
| 20 |
from annotator.util import resize_image, HWC3
|
| 21 |
+
import os
|
| 22 |
|
| 23 |
device = "cuda"
|
| 24 |
ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors")
|
|
|
|
| 254 |
content = f.read()
|
| 255 |
return content
|
| 256 |
|
| 257 |
+
# Add the text_to_image function
|
| 258 |
+
def text_to_image(text, size, position):
|
| 259 |
+
width, height = 1024, 576
|
| 260 |
+
image = Image.new("RGB", (width, height), "white")
|
| 261 |
+
draw = ImageDraw.Draw(image)
|
| 262 |
+
|
| 263 |
+
font_files = ["Arial_Unicode.ttf"]
|
| 264 |
+
font = None
|
| 265 |
+
for font_file in font_files:
|
| 266 |
+
font_path = os.path.join(os.path.dirname(__file__), font_file)
|
| 267 |
+
if os.path.exists(font_path):
|
| 268 |
+
try:
|
| 269 |
+
font = ImageFont.truetype(font_path, size=size)
|
| 270 |
+
print(f"Using font: {font_file}")
|
| 271 |
+
break
|
| 272 |
+
except IOError:
|
| 273 |
+
print(f"Error loading font: {font_file}")
|
| 274 |
+
if font is None:
|
| 275 |
+
print("No suitable font found. Using default font.")
|
| 276 |
+
font = ImageFont.load_default()
|
| 277 |
+
|
| 278 |
+
lines = text.split('\n')
|
| 279 |
+
max_line_width = 0
|
| 280 |
+
total_height = 0
|
| 281 |
+
line_heights = []
|
| 282 |
+
for line in lines:
|
| 283 |
+
left, top, right, bottom = draw.textbbox((0, 0), line, font=font)
|
| 284 |
+
line_width = right - left
|
| 285 |
+
line_height = bottom - top
|
| 286 |
+
line_heights.append(line_height)
|
| 287 |
+
max_line_width = max(max_line_width, line_width)
|
| 288 |
+
total_height += line_height
|
| 289 |
+
|
| 290 |
+
position_mapping = {
|
| 291 |
+
"top-left": (10, 10),
|
| 292 |
+
"top-center": ((width - max_line_width) / 2, 10),
|
| 293 |
+
"top-right": (width - max_line_width - 10, 10),
|
| 294 |
+
"middle-left": (10, (height - total_height) / 2),
|
| 295 |
+
"middle-center": ((width - max_line_width) / 2, (height - total_height) / 2),
|
| 296 |
+
"middle-right": (width - max_line_width - 10, (height - total_height) / 2),
|
| 297 |
+
"bottom-left": (10, height - total_height - 10),
|
| 298 |
+
"bottom-center": ((width - max_line_width) / 2, height - total_height - 10),
|
| 299 |
+
"bottom-right": (width - max_line_width - 10, height - total_height - 10),
|
| 300 |
+
}
|
| 301 |
+
|
| 302 |
+
x, y = position_mapping.get(position, ((width - max_line_width) / 2, height - total_height - 10))
|
| 303 |
+
for i, line in enumerate(lines):
|
| 304 |
+
draw.text((x, y), line, fill="black", font=font)
|
| 305 |
+
y += line_heights[i]
|
| 306 |
+
|
| 307 |
+
return image
|
| 308 |
+
|
| 309 |
+
# Modify the main Gradio interface
|
| 310 |
with gr.Blocks(css=css) as Kolors:
|
| 311 |
with gr.Row():
|
| 312 |
with gr.Column(elem_id="col-left"):
|
|
|
|
| 317 |
lines=2
|
| 318 |
)
|
| 319 |
with gr.Row():
|
| 320 |
+
image_input_type = gr.Radio(["Upload Image", "Generate Text Image"], label="Input Type", value="Upload Image")
|
| 321 |
+
|
| 322 |
+
with gr.Row():
|
| 323 |
+
image = gr.Image(label="Image", type="pil", visible=True)
|
| 324 |
+
with gr.Column(visible=False) as text_image_inputs:
|
| 325 |
+
text_input = gr.Textbox(label="Enter Text", lines=5, placeholder="Type your text here...")
|
| 326 |
+
font_size = gr.Radio([48, 72, 96, 144], label="Font Size", value=72)
|
| 327 |
+
text_position = gr.Dropdown(
|
| 328 |
+
["top-left", "top-center", "top-right", "middle-left", "middle-center", "middle-right", "bottom-left", "bottom-center", "bottom-right"],
|
| 329 |
+
label="Text Position",
|
| 330 |
+
value="middle-center"
|
| 331 |
+
)
|
| 332 |
+
generate_text_image = gr.Button("Generate Text Image")
|
| 333 |
+
|
| 334 |
+
|
| 335 |
+
def toggle_image_input(choice):
|
| 336 |
+
return {
|
| 337 |
+
image: gr.update(visible=choice == "Upload Image"),
|
| 338 |
+
text_image_inputs: gr.update(visible=choice == "Generate Text Image")
|
| 339 |
+
}
|
| 340 |
+
|
| 341 |
+
image_input_type.change(toggle_image_input, image_input_type, [image, text_image_inputs])
|
| 342 |
+
|
| 343 |
+
def generate_and_use_text_image(text, size, position):
|
| 344 |
+
text_image = text_to_image(text, size, position)
|
| 345 |
+
return text_image
|
| 346 |
+
|
| 347 |
+
generate_text_image.click(
|
| 348 |
+
generate_and_use_text_image,
|
| 349 |
+
inputs=[text_input, font_size, text_position],
|
| 350 |
+
outputs=image
|
| 351 |
+
)
|
| 352 |
+
|
| 353 |
with gr.Accordion("Advanced Settings", open=False):
|
| 354 |
negative_prompt = gr.Textbox(
|
| 355 |
label="Negative prompt",
|