Falln87 commited on
Commit
e2d7812
·
verified ·
1 Parent(s): e9d58b1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +187 -0
app.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from diffusers import DiffusionPipeline, QwenImageEditPipeline
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
5
+ from qwen_vl import QwenVLTokenizer
6
+ from PIL import Image
7
+
8
+ # Global variables for each model pipeline
9
+ gen_pipe = None
10
+ edit_pipe = None
11
+
12
+ # --- Model Loading Functions ---
13
+
14
+ def setup_generation_model():
15
+ """
16
+ Loads and quantizes the Qwen/Qwen-Image model for text-to-image generation using bitsandbytes.
17
+ """
18
+ global gen_pipe
19
+ if gen_pipe is not None:
20
+ return "Generation Model already loaded. ✨"
21
+
22
+ model_id = "Qwen/Qwen-Image"
23
+ device = "cuda" if torch.cuda.is_available() else "cpu"
24
+
25
+ print(f"Loading Qwen-Image Generation Model on {device} with bitsandbytes quantization...")
26
+
27
+ try:
28
+ # Define BitsAndBytesConfig for 4-bit quantization
29
+ bnb_config = BitsAndBytesConfig(
30
+ load_in_4bit=True,
31
+ bnb_4bit_quant_type="nf4",
32
+ bnb_4bit_compute_dtype=torch.bfloat16,
33
+ bnb_4bit_use_double_quant=True,
34
+ )
35
+
36
+ gen_model = AutoModelForCausalLM.from_pretrained(
37
+ model_id,
38
+ quantization_config=bnb_config if device == "cuda" else None,
39
+ torch_dtype=torch.bfloat16,
40
+ trust_remote_code=True
41
+ )
42
+
43
+ gen_tokenizer = QwenVLTokenizer.from_pretrained(model_id, trust_remote_code=True)
44
+
45
+ gen_pipe = DiffusionPipeline.from_pretrained(
46
+ model_id,
47
+ model=gen_model,
48
+ tokenizer=gen_tokenizer,
49
+ torch_dtype=torch.bfloat16,
50
+ use_safetensors=True,
51
+ trust_remote_code=True
52
+ )
53
+ gen_pipe.to(device)
54
+ print("Qwen-Image Generation Model loaded and quantized successfully.")
55
+ return "Generation Model loaded! 🚀"
56
+ except Exception as e:
57
+ gen_pipe = None
58
+ return f"Generation Model setup failed. Error: {e}"
59
+
60
+ def setup_editing_model():
61
+ """
62
+ Loads the Qwen/Qwen-Image-Edit pipeline for image-to-image editing using bitsandbytes.
63
+ """
64
+ global edit_pipe
65
+ if edit_pipe is not None:
66
+ return "Editing Model already loaded. ✨"
67
+
68
+ model_id = "Qwen/Qwen-Image-Edit"
69
+ device = "cuda" if torch.cuda.is_available() else "cpu"
70
+
71
+ print(f"Loading Qwen-Image-Edit Model on {device} with bitsandbytes quantization...")
72
+
73
+ try:
74
+ # Note: QwenImageEditPipeline is a custom pipeline, so direct bnb quantization might not work
75
+ # as seamlessly as with the CausalLM model. We'll rely on it internally.
76
+ edit_pipe = QwenImageEditPipeline.from_pretrained(
77
+ model_id,
78
+ torch_dtype=torch.bfloat16,
79
+ use_safetensors=True,
80
+ trust_remote_code=True
81
+ )
82
+ edit_pipe.to(device)
83
+ print("Qwen-Image-Edit model loaded successfully.")
84
+ return "Editing Model loaded! ✂️"
85
+ except Exception as e:
86
+ edit_pipe = None
87
+ return f"Editing Model setup failed. Error: {e}"
88
+
89
+ # --- Generation and Editing Functions (remain the same as before) ---
90
+
91
+ def generate_image(prompt, negative_prompt, num_inference_steps, guidance_scale, seed):
92
+ global gen_pipe
93
+ if gen_pipe is None: return None, "Model not loaded.", ""
94
+ generator = torch.Generator(device=gen_pipe.device).manual_seed(seed) if seed != -1 else None
95
+ try:
96
+ image = gen_pipe(prompt=prompt, negative_prompt=negative_prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, generator=generator).images[0]
97
+ return image, "Image generated successfully!", ""
98
+ except Exception as e:
99
+ return None, "An error occurred during image generation.", f"Error: {e}"
100
+
101
+ def edit_image(input_image_pil, prompt, negative_prompt, num_inference_steps, guidance_scale, true_cfg_scale, denoising_strength, seed):
102
+ global edit_pipe
103
+ if edit_pipe is None: return None, "Model not loaded.", ""
104
+ if input_image_pil is None: return None, "Please upload an image.", ""
105
+ generator = torch.Generator(device=edit_pipe.device).manual_seed(seed) if seed != -1 else None
106
+ try:
107
+ edited_image = edit_pipe(image=input_image_pil.convert("RGB"), prompt=prompt, negative_prompt=negative_prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, true_cfg_scale=true_cfg_scale, denoising_strength=denoising_strength, generator=generator).images[0]
108
+ return edited_image, "Image edited successfully!", ""
109
+ except Exception as e:
110
+ return None, "An error occurred during image editing.", f"Error: {e}"
111
+
112
+ # --- Gradio UI ---
113
+
114
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
115
+ gr.Markdown("# 🎨 Qwen Image Studio: Generation & Editing")
116
+ gr.Markdown("Explore the power of Qwen models for advanced image generation and detailed editing.")
117
+
118
+ with gr.Tab("Image Generation (Qwen/Qwen-Image)"):
119
+ gr.Markdown("### Text-to-Image Generation")
120
+ gr.Markdown("Create new images from text prompts. ")
121
+
122
+ with gr.Row():
123
+ gen_model_status = gr.Textbox(value="Generation Model not loaded. Click 'Load' to begin.", interactive=False, label="Model Status")
124
+ load_gen_button = gr.Button("Load Generation Model", variant="primary")
125
+ load_gen_button.click(fn=setup_generation_model, outputs=gen_model_status)
126
+
127
+ with gr.Column():
128
+ gen_prompt = gr.Textbox(label="Prompt", placeholder="A majestic dragon flying over a futuristic city at sunset, highly detailed, photorealistic", lines=2)
129
+ gen_negative_prompt = gr.Textbox(label="Negative Prompt (Optional)", placeholder="blurry, low quality, distorted, bad anatomy", lines=1)
130
+
131
+ with gr.Accordion("Advanced Settings", open=False):
132
+ with gr.Row():
133
+ gen_num_steps = gr.Slider(minimum=10, maximum=150, step=1, value=50, label="Inference Steps")
134
+ gen_guidance_scale = gr.Slider(minimum=1.0, maximum=20.0, step=0.5, value=7.5, label="Guidance Scale")
135
+ with gr.Row():
136
+ gen_seed = gr.Number(label="Seed (-1 for random)", value=-1, precision=0)
137
+
138
+ generate_button = gr.Button("Generate Image", variant="secondary")
139
+
140
+ gen_output_image = gr.Image(label="Generated Image")
141
+ gen_status_text = gr.Textbox(label="Status", interactive=False)
142
+ gen_error_text = gr.Textbox(label="Error Details", interactive=False, visible=False)
143
+
144
+ generate_button.click(
145
+ fn=generate_image,
146
+ inputs=[gen_prompt, gen_negative_prompt, gen_num_steps, gen_guidance_scale, gen_seed],
147
+ outputs=[gen_output_image, gen_status_text, gen_error_text]
148
+ )
149
+
150
+ with gr.Tab("Image Editing (Qwen/Qwen-Image-Edit)"):
151
+ gr.Markdown("### Image-to-Image Editing")
152
+ gr.Markdown("Upload an image and provide a text prompt to transform it. This model excels at semantic and appearance editing.")
153
+
154
+ with gr.Row():
155
+ edit_model_status = gr.Textbox(value="Editing Model not loaded. Click 'Load' to begin.", interactive=False, label="Model Status")
156
+ load_edit_button = gr.Button("Load Editing Model", variant="primary")
157
+ load_edit_button.click(fn=setup_editing_model, outputs=edit_model_status)
158
+
159
+ with gr.Column():
160
+ edit_input_image = gr.Image(label="Upload Image to Edit", type="pil")
161
+ edit_prompt = gr.Textbox(label="Edit Prompt", placeholder="Change the dog's fur to a vibrant blue and add a red collar", lines=2)
162
+ edit_negative_prompt = gr.Textbox(label="Negative Prompt (Optional)", placeholder="blurry, low quality, distorted, messy", lines=1)
163
+
164
+ with gr.Accordion("Advanced Settings", open=False):
165
+ with gr.Row():
166
+ edit_num_steps = gr.Slider(minimum=10, maximum=150, step=1, value=50, label="Inference Steps")
167
+ edit_guidance_scale = gr.Slider(minimum=1.0, maximum=20.0, step=0.5, value=7.5, label="Guidance Scale")
168
+ with gr.Row():
169
+ edit_true_cfg_scale = gr.Slider(minimum=1.0, maximum=10.0, step=0.1, value=4.0, label="True CFG Scale (for more precise control)")
170
+ edit_denoising_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.8, label="Denoising Strength (how much to change original)")
171
+ with gr.Row():
172
+ edit_seed = gr.Number(label="Seed (-1 for random)", value=-1, precision=0)
173
+
174
+ edit_button = gr.Button("Edit Image", variant="secondary")
175
+
176
+ edit_output_image = gr.Image(label="Edited Image")
177
+ edit_status_text = gr.Textbox(label="Status", interactive=False)
178
+ edit_error_text = gr.Textbox(label="Error Details", interactive=False, visible=False)
179
+
180
+ edit_button.click(
181
+ fn=edit_image,
182
+ inputs=[edit_input_image, edit_prompt, edit_negative_prompt, edit_num_steps, edit_guidance_scale, edit_true_cfg_scale, edit_denoising_strength, edit_seed],
183
+ outputs=[edit_output_image, edit_status_text, edit_error_text]
184
+ )
185
+
186
+ # Launch the app
187
+ demo.launch(inbrowser=True, share=False)