Commit
·
0098e32
1
Parent(s):
8078b9d
Attempt to add vqgan and diffusion
Browse files
app.py
CHANGED
|
@@ -8,10 +8,10 @@ import shortuuid
|
|
| 8 |
|
| 9 |
latent = gr.Interface.load("spaces/multimodalart/latentdiffusion")
|
| 10 |
rudalle = gr.Interface.load("spaces/multimodalart/rudalle")
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
-
#print(rudalle)
|
| 13 |
-
#guided = gr.Interface.load("spaces/EleutherAI/clip-guided-diffusion")
|
| 14 |
-
#print(guided)
|
| 15 |
def text2image_latent(text,steps,width,height,images,diversity):
|
| 16 |
results = latent(text, steps, width, height, images, diversity)
|
| 17 |
image_paths = []
|
|
@@ -35,11 +35,14 @@ def text2image_rudalle(text,aspect,model):
|
|
| 35 |
image = rudalle(text,aspect,model)[0]
|
| 36 |
return(image)
|
| 37 |
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
css_mt = {"margin-top": "1em"}
|
| 45 |
|
|
@@ -53,21 +56,33 @@ with gr.Blocks() as mindseye:
|
|
| 53 |
with gr.Row():
|
| 54 |
with gr.Tabs():
|
| 55 |
with gr.TabItem("Latent Diffusion"):
|
|
|
|
| 56 |
steps = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=45,maximum=50,minimum=1,step=1)
|
| 57 |
width = gr.inputs.Slider(label="Width", default=256, step=32, maximum=256, minimum=32)
|
| 58 |
height = gr.inputs.Slider(label="Height", default=256, step=32, maximum = 256, minimum=32)
|
| 59 |
images = gr.inputs.Slider(label="Images - How many images you wish to generate", default=2, step=1, minimum=1, maximum=4)
|
| 60 |
diversity = gr.inputs.Slider(label="Diversity scale - How different from one another you wish the images to be",default=5.0, minimum=1.0, maximum=15.0)
|
| 61 |
get_image_latent = gr.Button("Generate Image",css=css_mt)
|
| 62 |
-
|
| 63 |
with gr.TabItem("ruDALLE"):
|
|
|
|
| 64 |
aspect = gr.inputs.Radio(label="Aspect Ratio", choices=["Square", "Horizontal", "Vertical"],default="Square")
|
| 65 |
model = gr.inputs.Dropdown(label="Model", choices=["Surrealism","Realism", "Emoji"], default="Surrealism")
|
| 66 |
get_image_rudalle = gr.Button("Generate Image",css=css_mt)
|
| 67 |
with gr.TabItem("VQGAN+CLIP"):
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
with gr.Row():
|
| 72 |
with gr.Tabs():
|
| 73 |
with gr.TabItem("Image output"):
|
|
@@ -77,4 +92,6 @@ with gr.Blocks() as mindseye:
|
|
| 77 |
|
| 78 |
get_image_latent.click(text2image_latent, inputs=[text,steps,width,height,images,diversity], outputs=[image,gallery])
|
| 79 |
get_image_rudalle.click(text2image_rudalle, inputs=[text,aspect,model], outputs=image)
|
|
|
|
|
|
|
| 80 |
mindseye.launch()
|
|
|
|
| 8 |
|
| 9 |
latent = gr.Interface.load("spaces/multimodalart/latentdiffusion")
|
| 10 |
rudalle = gr.Interface.load("spaces/multimodalart/rudalle")
|
| 11 |
+
diffusion = gr.Interface.load("spaces/multimodalart/diffusion")
|
| 12 |
+
print(diffusion)
|
| 13 |
+
vqgan = gr.Interface.load("spaces/multimodalart/vqgan")
|
| 14 |
|
|
|
|
|
|
|
|
|
|
| 15 |
def text2image_latent(text,steps,width,height,images,diversity):
|
| 16 |
results = latent(text, steps, width, height, images, diversity)
|
| 17 |
image_paths = []
|
|
|
|
| 35 |
image = rudalle(text,aspect,model)[0]
|
| 36 |
return(image)
|
| 37 |
|
| 38 |
+
def text2image_vqgan(text,width,height,style,steps,flavor):
|
| 39 |
+
results = vqgan(text,width,height,style,steps,flavor)
|
| 40 |
+
return(results)
|
| 41 |
+
|
| 42 |
+
def text2image_diffusion(steps_diff, images_diff, weight, clip):
|
| 43 |
+
results = diffusion(steps_diff, images_diff, weight, clip)
|
| 44 |
+
print(results)
|
| 45 |
+
return(results)
|
| 46 |
|
| 47 |
css_mt = {"margin-top": "1em"}
|
| 48 |
|
|
|
|
| 56 |
with gr.Row():
|
| 57 |
with gr.Tabs():
|
| 58 |
with gr.TabItem("Latent Diffusion"):
|
| 59 |
+
gr.Markdown("Latent Diffusion is the state of the art of open source text-to-image models, superb in text synthesis. Sometimes struggles with complex prompts")
|
| 60 |
steps = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=45,maximum=50,minimum=1,step=1)
|
| 61 |
width = gr.inputs.Slider(label="Width", default=256, step=32, maximum=256, minimum=32)
|
| 62 |
height = gr.inputs.Slider(label="Height", default=256, step=32, maximum = 256, minimum=32)
|
| 63 |
images = gr.inputs.Slider(label="Images - How many images you wish to generate", default=2, step=1, minimum=1, maximum=4)
|
| 64 |
diversity = gr.inputs.Slider(label="Diversity scale - How different from one another you wish the images to be",default=5.0, minimum=1.0, maximum=15.0)
|
| 65 |
get_image_latent = gr.Button("Generate Image",css=css_mt)
|
|
|
|
| 66 |
with gr.TabItem("ruDALLE"):
|
| 67 |
+
gr.Markdown("ruDALLE is a replication of DALL-E 1 in the russian language. No worries, your prompts will be translated automatically to russian. In case you see an error, try again a few times")
|
| 68 |
aspect = gr.inputs.Radio(label="Aspect Ratio", choices=["Square", "Horizontal", "Vertical"],default="Square")
|
| 69 |
model = gr.inputs.Dropdown(label="Model", choices=["Surrealism","Realism", "Emoji"], default="Surrealism")
|
| 70 |
get_image_rudalle = gr.Button("Generate Image",css=css_mt)
|
| 71 |
with gr.TabItem("VQGAN+CLIP"):
|
| 72 |
+
gr.Markdown("VQGAN+CLIP is the most famous text-to-image generator. Can produce good artistic results")
|
| 73 |
+
width_vq = gr.inputs.Slider(label="Width", default=256, minimum=32, step=32, maximum=512)
|
| 74 |
+
height_vq= gr.inputs.Slider(label="Height", default=256, minimum=32, step=32, maximum=512)
|
| 75 |
+
style = gr.inputs.Dropdown(label="Style - Hyper Fast Results is fast but compromises a bit of the quality",choices=["Default","Balanced","Detailed","Consistent Creativity","Realistic","Smooth","Subtle MSE","Hyper Fast Results"],default="Hyper Fast Results")
|
| 76 |
+
steps = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate. All styles that are not Hyper Fast need at least 200 steps",default=50,maximum=300,minimum=1,step=1)
|
| 77 |
+
flavor = gr.inputs.Dropdown(label="Flavor - pick a flavor for the style of the images, based on the images below",choices=["ginger", "cumin", "holywater", "zynth", "wyvern", "aaron", "moth", "juu"])
|
| 78 |
+
get_image_vqgan = gr.button("Generate Image",css=css_mt)
|
| 79 |
+
with gr.TabItem("Guided Diffusion"):
|
| 80 |
+
gr.Markdown("Guided Diffusion models produce superb quality results. V-Diffusion is its latest implementation")
|
| 81 |
+
steps_diff = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=40,maximum=80,minimum=1,step=1)
|
| 82 |
+
images_diff = gr.inputs.Slider(label="Number of images in parallel", default=2, maximum=4, minimum=1, step=1)
|
| 83 |
+
weight = gr.inputs.Slider(label="Weight - how closely the image should resemble the prompt", default=5, maximum=15, minimum=0, step=1)
|
| 84 |
+
clip = gr.inputs.Checkbox(label="CLIP Guided - improves coherence with complex prompts, makes it slower")
|
| 85 |
+
get_image_diffusion = gr.button("Generate Image",css=css_mt)
|
| 86 |
with gr.Row():
|
| 87 |
with gr.Tabs():
|
| 88 |
with gr.TabItem("Image output"):
|
|
|
|
| 92 |
|
| 93 |
get_image_latent.click(text2image_latent, inputs=[text,steps,width,height,images,diversity], outputs=[image,gallery])
|
| 94 |
get_image_rudalle.click(text2image_rudalle, inputs=[text,aspect,model], outputs=image)
|
| 95 |
+
get_image_vqgan.click(text2image_vqgan, inputs=[text,width_vq,height_vq,style,steps,flavor],outputs=image)
|
| 96 |
+
get_image_diffusion.click(text2image_diffusion, inputs=[steps_diff, images_diff, weight, clip],outputs=gallery)
|
| 97 |
mindseye.launch()
|