Spaces:
Sleeping
Sleeping
Commit
·
78551e3
1
Parent(s):
a6470eb
update
Browse files
app.py
CHANGED
|
@@ -23,7 +23,7 @@ if not os.path.exists('images2'):
|
|
| 23 |
with zipfile.ZipFile('images2.zip', 'r') as zip_ref:
|
| 24 |
zip_ref.extractall('.')
|
| 25 |
|
| 26 |
-
os.system('nvidia-smi')
|
| 27 |
os.system('ls')
|
| 28 |
|
| 29 |
#### import m1
|
|
@@ -186,6 +186,27 @@ def get_pixels(i, t, evt: gr.SelectData):
|
|
| 186 |
return image
|
| 187 |
|
| 188 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
|
| 190 |
|
| 191 |
def text_to_image(prompt,keywords,positive_prompt,radio,slider_step,slider_guidance,slider_batch,slider_temperature,slider_natural):
|
|
@@ -204,6 +225,7 @@ def text_to_image(prompt,keywords,positive_prompt,radio,slider_step,slider_guida
|
|
| 204 |
user_prompt = f'{user_prompt}'
|
| 205 |
composed_prompt = user_prompt
|
| 206 |
prompt = tokenizer.encode(user_prompt)
|
|
|
|
| 207 |
else:
|
| 208 |
if len(stack) == 0:
|
| 209 |
|
|
@@ -245,6 +267,8 @@ def text_to_image(prompt,keywords,positive_prompt,radio,slider_step,slider_guida
|
|
| 245 |
# user_prompt = prompt
|
| 246 |
current_ocr = ocrs
|
| 247 |
|
|
|
|
|
|
|
| 248 |
ocr_ids = []
|
| 249 |
print('user_prompt', user_prompt)
|
| 250 |
print('current_ocr', current_ocr)
|
|
@@ -284,7 +308,7 @@ def text_to_image(prompt,keywords,positive_prompt,radio,slider_step,slider_guida
|
|
| 284 |
|
| 285 |
else:
|
| 286 |
user_prompt += ' <|endoftext|>'
|
| 287 |
-
|
| 288 |
|
| 289 |
for items in stack:
|
| 290 |
position, text = items
|
|
@@ -358,10 +382,10 @@ def text_to_image(prompt,keywords,positive_prompt,radio,slider_step,slider_guida
|
|
| 358 |
row = index // 2
|
| 359 |
col = index % 2
|
| 360 |
new_image.paste(image, (col*width, row*height))
|
| 361 |
-
os.system('nvidia-smi')
|
| 362 |
torch.cuda.empty_cache()
|
| 363 |
-
os.system('nvidia-smi')
|
| 364 |
-
return tuple(results), composed_prompt
|
| 365 |
|
| 366 |
elif radio == 'TextDiffuser-2-LCM':
|
| 367 |
generator = torch.Generator(device=pipe.device).manual_seed(random.randint(0,1000))
|
|
@@ -373,10 +397,10 @@ def text_to_image(prompt,keywords,positive_prompt,radio,slider_step,slider_guida
|
|
| 373 |
guidance_scale=1,
|
| 374 |
# num_images_per_prompt=slider_batch,
|
| 375 |
).images
|
| 376 |
-
os.system('nvidia-smi')
|
| 377 |
torch.cuda.empty_cache()
|
| 378 |
-
os.system('nvidia-smi')
|
| 379 |
-
return tuple(image), composed_prompt
|
| 380 |
|
| 381 |
with gr.Blocks() as demo:
|
| 382 |
|
|
@@ -428,7 +452,7 @@ with gr.Blocks() as demo:
|
|
| 428 |
t = gr.Textbox(label="Keyword", value='input_keyword')
|
| 429 |
redo = gr.Button(value='Redo - Cancel the last keyword')
|
| 430 |
undo = gr.Button(value='Undo - Clear the canvas')
|
| 431 |
-
skip_button = gr.Button(value='Skip - Operate next keyword')
|
| 432 |
|
| 433 |
i.select(get_pixels,[i,t],[i])
|
| 434 |
redo.click(exe_redo, [i,t],[i])
|
|
@@ -439,8 +463,8 @@ with gr.Blocks() as demo:
|
|
| 439 |
slider_natural = gr.Checkbox(label="Natural image generation", value=False, info="The text position and content info will not be incorporated.")
|
| 440 |
slider_step = gr.Slider(minimum=1, maximum=50, value=20, step=1, label="Sampling step", info="The sampling step for TextDiffuser-2. You may decease the step to 4 when using LCM.")
|
| 441 |
slider_guidance = gr.Slider(minimum=1, maximum=13, value=7.5, step=0.5, label="Scale of classifier-free guidance", info="The scale of cfg and is set to 7.5 in default. When using LCM, cfg is set to 1.")
|
| 442 |
-
slider_batch = gr.Slider(minimum=1, maximum=
|
| 443 |
-
slider_temperature = gr.Slider(minimum=0.1, maximum=2, value=
|
| 444 |
# slider_seed = gr.Slider(minimum=1, maximum=10000, label="Seed", randomize=True)
|
| 445 |
button = gr.Button("Generate")
|
| 446 |
|
|
@@ -450,8 +474,10 @@ with gr.Blocks() as demo:
|
|
| 450 |
with gr.Accordion("Intermediate results", open=False):
|
| 451 |
gr.Markdown("Composed prompt")
|
| 452 |
composed_prompt = gr.Textbox(label='')
|
|
|
|
|
|
|
| 453 |
|
| 454 |
-
button.click(text_to_image, inputs=[prompt,keywords,positive_prompt, radio,slider_step,slider_guidance,slider_batch,slider_temperature,slider_natural], outputs=[output, composed_prompt])
|
| 455 |
|
| 456 |
gr.Markdown("## Prompt Examples")
|
| 457 |
gr.Examples(
|
|
|
|
| 23 |
with zipfile.ZipFile('images2.zip', 'r') as zip_ref:
|
| 24 |
zip_ref.extractall('.')
|
| 25 |
|
| 26 |
+
# os.system('nvidia-smi')
|
| 27 |
os.system('ls')
|
| 28 |
|
| 29 |
#### import m1
|
|
|
|
| 186 |
return image
|
| 187 |
|
| 188 |
|
| 189 |
+
font_layout = ImageFont.truetype('./Arial.ttf', 16)
|
| 190 |
+
|
| 191 |
+
def get_layout_image(ocrs):
|
| 192 |
+
|
| 193 |
+
blank = Image.new('RGB', (256,256), (0,0,0))
|
| 194 |
+
draw = ImageDraw.ImageDraw(blank)
|
| 195 |
+
|
| 196 |
+
for line in ocrs.split('\n'):
|
| 197 |
+
line = line.strip()
|
| 198 |
+
|
| 199 |
+
if len(line) == 0:
|
| 200 |
+
break
|
| 201 |
+
|
| 202 |
+
pred = ' '.join(line.split()[:-1])
|
| 203 |
+
box = line.split()[-1]
|
| 204 |
+
l, t, r, b = [int(i)*2 for i in box.split(',')] # the size of canvas is 256x256
|
| 205 |
+
draw.rectangle([(l, t), (r, b)], outline ="red")
|
| 206 |
+
draw.text((l, t), pred, font=font_layout)
|
| 207 |
+
|
| 208 |
+
return blank
|
| 209 |
+
|
| 210 |
|
| 211 |
|
| 212 |
def text_to_image(prompt,keywords,positive_prompt,radio,slider_step,slider_guidance,slider_batch,slider_temperature,slider_natural):
|
|
|
|
| 225 |
user_prompt = f'{user_prompt}'
|
| 226 |
composed_prompt = user_prompt
|
| 227 |
prompt = tokenizer.encode(user_prompt)
|
| 228 |
+
layout_image = None
|
| 229 |
else:
|
| 230 |
if len(stack) == 0:
|
| 231 |
|
|
|
|
| 267 |
# user_prompt = prompt
|
| 268 |
current_ocr = ocrs
|
| 269 |
|
| 270 |
+
layout_image = get_layout_image(ocrs)
|
| 271 |
+
|
| 272 |
ocr_ids = []
|
| 273 |
print('user_prompt', user_prompt)
|
| 274 |
print('current_ocr', current_ocr)
|
|
|
|
| 308 |
|
| 309 |
else:
|
| 310 |
user_prompt += ' <|endoftext|>'
|
| 311 |
+
layout_image = None
|
| 312 |
|
| 313 |
for items in stack:
|
| 314 |
position, text = items
|
|
|
|
| 382 |
row = index // 2
|
| 383 |
col = index % 2
|
| 384 |
new_image.paste(image, (col*width, row*height))
|
| 385 |
+
# os.system('nvidia-smi')
|
| 386 |
torch.cuda.empty_cache()
|
| 387 |
+
# os.system('nvidia-smi')
|
| 388 |
+
return tuple(results), composed_prompt, layout_image
|
| 389 |
|
| 390 |
elif radio == 'TextDiffuser-2-LCM':
|
| 391 |
generator = torch.Generator(device=pipe.device).manual_seed(random.randint(0,1000))
|
|
|
|
| 397 |
guidance_scale=1,
|
| 398 |
# num_images_per_prompt=slider_batch,
|
| 399 |
).images
|
| 400 |
+
# os.system('nvidia-smi')
|
| 401 |
torch.cuda.empty_cache()
|
| 402 |
+
# os.system('nvidia-smi')
|
| 403 |
+
return tuple(image), composed_prompt, layout_image
|
| 404 |
|
| 405 |
with gr.Blocks() as demo:
|
| 406 |
|
|
|
|
| 452 |
t = gr.Textbox(label="Keyword", value='input_keyword')
|
| 453 |
redo = gr.Button(value='Redo - Cancel the last keyword')
|
| 454 |
undo = gr.Button(value='Undo - Clear the canvas')
|
| 455 |
+
skip_button = gr.Button(value='Skip - Operate the next keyword')
|
| 456 |
|
| 457 |
i.select(get_pixels,[i,t],[i])
|
| 458 |
redo.click(exe_redo, [i,t],[i])
|
|
|
|
| 463 |
slider_natural = gr.Checkbox(label="Natural image generation", value=False, info="The text position and content info will not be incorporated.")
|
| 464 |
slider_step = gr.Slider(minimum=1, maximum=50, value=20, step=1, label="Sampling step", info="The sampling step for TextDiffuser-2. You may decease the step to 4 when using LCM.")
|
| 465 |
slider_guidance = gr.Slider(minimum=1, maximum=13, value=7.5, step=0.5, label="Scale of classifier-free guidance", info="The scale of cfg and is set to 7.5 in default. When using LCM, cfg is set to 1.")
|
| 466 |
+
slider_batch = gr.Slider(minimum=1, maximum=4, value=4, step=1, label="Batch size", info="The number of images to be sampled.")
|
| 467 |
+
slider_temperature = gr.Slider(minimum=0.1, maximum=2, value=1.4, step=0.1, label="Temperature", info="Control the diversity of layout planner. Higher value indicates more diversity.")
|
| 468 |
# slider_seed = gr.Slider(minimum=1, maximum=10000, label="Seed", randomize=True)
|
| 469 |
button = gr.Button("Generate")
|
| 470 |
|
|
|
|
| 474 |
with gr.Accordion("Intermediate results", open=False):
|
| 475 |
gr.Markdown("Composed prompt")
|
| 476 |
composed_prompt = gr.Textbox(label='')
|
| 477 |
+
layout = gr.Image()
|
| 478 |
+
|
| 479 |
|
| 480 |
+
button.click(text_to_image, inputs=[prompt,keywords,positive_prompt, radio,slider_step,slider_guidance,slider_batch,slider_temperature,slider_natural], outputs=[output, composed_prompt, layout])
|
| 481 |
|
| 482 |
gr.Markdown("## Prompt Examples")
|
| 483 |
gr.Examples(
|