linoyts HF Staff commited on
Commit
af4203c
·
verified ·
1 Parent(s): 5a7485b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -224
app.py CHANGED
@@ -9,171 +9,61 @@ import numpy as np
9
  from diffusers import BriaFiboPipeline
10
  from diffusers.modular_pipelines import ModularPipeline
11
 
12
- from optimization import optimize_pipeline_
13
-
14
- # resolutions=[
15
- # "832 1248",
16
- # "896 1152",
17
- # "960 1088",
18
- # "1024 1024",
19
- # "1088 960",
20
- # "1152 896",
21
- # "1216 832",
22
- # "1280 800",
23
- # "1344 768",
24
- # ]
25
  MAX_SEED = np.iinfo(np.int32).max
26
  dtype = torch.bfloat16
27
  device = "cuda" if torch.cuda.is_available() else "cpu"
28
 
29
  torch.set_grad_enabled(False)
30
  vlm_pipe = ModularPipeline.from_pretrained("briaai/FIBO-VLM-prompt-to-JSON", trust_remote_code=True).to(device)
 
31
 
32
- pipe = BriaFiboPipeline.from_pretrained(
33
- "briaai/FIBO",
34
- trust_remote_code=True,
35
- dtype=dtype).to(device)
36
- test_prompt_json = """
37
- {
38
- "short_description": "A surreal and whimsical scene featuring a man, a woman, and a dog posed against a tri-colored backdrop. The woman stands in front of the red section, wearing a t-shirt with a Yoda motif and a skirt with birds. The dog, dressed as a superdog, sits on a checkerboard chair in front of the white section, with a blue tennis ball in its mouth. The man, in a purple suit, stands in front of the gold section, holding a tree branch with a blue jay. The backdrop is divided into red, white, and gold sections, with a small metal grating in the top left and a tear in the gold section. A rustic framed oil painting of the pyramids hangs above the dog.",
39
- "objects": [
40
- {
41
- "description": "A woman standing in front of the red backdrop. She is wearing a beige t-shirt with a Yoda motif and a long skirt with birds on it. Her right hand is holding an axe.",
42
- "location": "Center-left",
43
- "relationship": "She is positioned in front of the red backdrop and to the left of the dog and man.",
44
- "relative_size": "Medium",
45
- "shape_and_color": "Humanoid shape, beige and multicolored clothing.",
46
- "appearance_details": "She has a long skirt with birds on it and is holding an axe.",
47
- "pose": "Standing upright with a slight tilt to the right.",
48
- "expression": "Neutral",
49
- "clothing": "She is wearing a beige t-shirt with a Yoda motif and a long skirt with birds on it.",
50
- "action": "Standing",
51
- "gender": "Female",
52
- "skin_tone_and_texture": "Fair, smooth."
53
- },
54
- {
55
- "description": "A dog dressed as a superdog, sitting on a checkerboard chair in front of the white backdrop. It has a blue tennis ball in its mouth.",
56
- "location": "Center",
57
- "relationship": "It is positioned in front of the white backdrop and between the woman and the man.",
58
- "relative_size": "Medium",
59
- "shape_and_color": "Canine shape, brown and white fur, blue tennis ball.",
60
- "appearance_details": "It is dressed as a superdog and has a blue tennis ball in its mouth.",
61
- "pose": "Sitting upright.",
62
- "expression": "Neutral",
63
- "clothing": "Superdog costume.",
64
- "action": "Sitting",
65
- "gender": "Male",
66
- "skin_tone_and_texture": "Brown and white fur, soft."
67
- },
68
- {
69
- "description": "A man standing in front of the gold backdrop. He is wearing a three piece purple suit and has spiky blue hair. His left hand is holding a tree branch with a blue jay on it.",
70
- "location": "Center-right",
71
- "relationship": "He is positioned in front of the gold backdrop and to the right of the woman and dog.",
72
- "relative_size": "Medium",
73
- "shape_and_color": "Humanoid shape, purple suit, blue hair.",
74
- "appearance_details": "He has spiky blue hair and is holding a tree branch with a blue jay on it.",
75
- "pose": "Standing upright with a slight tilt to the left.",
76
- "expression": "Neutral",
77
- "clothing": "He is wearing a three piece purple suit.",
78
- "action": "Standing",
79
- "gender": "Male",
80
- "skin_tone_and_texture": "Fair, smooth."
81
- },
82
- {
83
- "description": "A checkerboard armchair in yellow and brown.",
84
- "location": "Bottom-center",
85
- "relationship": "The dog is sitting on the chair.",
86
- "relative_size": "Small",
87
- "shape_and_color": "Chair shape, yellow and brown.",
88
- "texture": "Smooth. End of texture answer.",
89
- "appearance_details": "The chair is a checkerboard armchair in yellow and brown."
90
- },
91
- {
92
- "description": "A rustic framed oil painting of the pyramids.",
93
- "location": "Top-center",
94
- "relationship": "The painting is hanging above the dog.",
95
- "relative_size": "Small",
96
- "shape_and_color": "Rectangular shape, brown frame, yellow and brown pyramids.",
97
- "texture": "Rough. End of texture answer.",
98
- "appearance_details": "The painting is a rustic framed oil painting of the pyramids."
99
- }
100
- ],
101
- "background_setting": "The background is a tri-colored backdrop divided equally into red, white, and gold sections. There is a small rectangular metal grating in the top left corner and a subtle tear in the gold backdrop in the bottom right corner.",
102
- "lighting": {
103
- "conditions": "Studio lighting",
104
- "direction": "Front-lit",
105
- "shadows": "Soft shadows are present, indicating diffused lighting."
106
- },
107
- "aesthetics": {
108
- "composition": "The composition is centered, with the three figures arranged in a row. The backdrop is divided into thirds, creating a symmetrical balance.",
109
- "color_scheme": "The color scheme is triadic, with red, white, and gold dominating the backdrop, complemented by the various colors of the figures' clothing and accessories.",
110
- "mood_atmosphere": "The mood is whimsical and surreal, with a touch of humor due to the unusual costumes and props.",
111
- "preference_score": "high",
112
- "aesthetic_score": "high"
113
- },
114
- "photographic_characteristics": {
115
- "depth_of_field": "Deep",
116
- "focus": "Sharp focus on all subjects",
117
- "camera_angle": "Eye-level",
118
- "lens_focal_length": "Standard"
119
- },
120
- "style_medium": "Photograph",
121
- "text_render": [
122
- {
123
- "text": "Yoda",
124
- "location": "Center of the woman's t-shirt",
125
- "size": "Small",
126
- "color": "Beige",
127
- "font": "Cartoonish",
128
- "appearance_details": "The text is part of a graphic design on the t-shirt."
129
- }
130
- ],
131
- "context": "This is a surreal and whimsical portrait of a man, a woman, and a dog posed against a tri-colored backdrop. It could be an art piece or a promotional image for a quirky event or product.",
132
- "artistic_style": "Surreal Pop"
133
- }
134
- """
135
- optimize_pipeline_(pipe, test_prompt_json)
136
-
137
- def handle_json(text):
138
- try:
139
- json.loads(text)
140
- return text
141
- except:
142
- return "Error"
143
-
144
 
145
  @spaces.GPU(duration=100)
146
- def infer(prompt,
147
- negative_prompt="",
148
- seed=42,
149
- randomize_seed=False,
150
- width=1024,
151
- height=1024,
152
- guidance_scale=5,
153
- num_inference_steps=50,
154
- ):
155
-
 
 
 
156
  if randomize_seed:
157
  seed = random.randint(0, MAX_SEED)
158
-
159
- t=time.time()
160
 
161
-
162
  with torch.inference_mode():
163
- # 1. Create a prompt to generate an initial image
164
- output = vlm_pipe(prompt=prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  json_prompt = output.values["json_prompt"]
166
 
 
 
 
 
 
 
 
 
167
 
168
- image = pipe(prompt=json_prompt,
169
- num_inference_steps=num_inference_steps,
170
- negative_prompt=negative_prompt,
171
- width=width,height=height,
172
- guidance_scale=guidance_scale).images[0]
173
 
174
-
175
-
176
- return image, json_prompt
177
 
178
  css = """
179
  #col-container{
@@ -181,85 +71,61 @@ css = """
181
  max-width: 768px;
182
  }
183
  """
184
- with gr.Blocks(css=css) as demo:
 
185
  with gr.Column(elem_id="col-container"):
186
- gr.Markdown("## FOBI")
187
-
188
- with gr.Group():
189
- with gr.Column():
 
 
 
 
190
  with gr.Row():
191
- prompt_in = gr.Textbox(label="Prompt")
192
- prompt_in_json = gr.JSON(label="Json")
193
-
194
- submit_btn = gr.Button("Generate")
195
  result = gr.Image(label="output")
 
 
196
  with gr.Accordion("Advanced Settings", open=False):
197
- with gr.Row():
198
- seed = gr.Slider(
199
- label="Seed",
200
- minimum=0,
201
- maximum=MAX_SEED,
202
- step=1,
203
- value=0,
204
- )
205
-
206
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
207
-
208
- with gr.Row():
209
- guidance_scale = gr.Slider(
210
- label="guidance scale",
211
- minimum=1.0,
212
- maximum=10.0,
213
- step=0.1,
214
- value=5.0
215
- )
216
- num_inference_steps = gr.Slider(
217
- label="number of inference steps",
218
- minimum=1,
219
- maximum=60,
220
- step=1,
221
- value=50,
222
- )
223
- height = gr.Slider(
224
- label="Height",
225
- minimum=768,
226
- maximum=1248,
227
- step=32,
228
- value=1024,
229
- )
230
-
231
- width = gr.Slider(
232
- label="Width",
233
- minimum=832,
234
- maximum=1344,
235
- step=64,
236
- value=1024,
237
- )
238
- with gr.Row():
239
- negative_prompt = gr.Textbox(label="negative prompt", value=json.dumps(''))
240
- negative_prompt_json = gr.JSON(label="json negative prompt", value=json.dumps(''))
241
 
242
- # prompt_in.change(
243
- # handle_json,
244
- # inputs=prompt_in,
245
- # outputs=prompt_in_json)
 
 
 
 
 
 
 
 
 
 
 
 
 
246
 
247
- # negative_prompt.change(handle_json, inputs=negative_prompt, outputs=negative_prompt_json)
248
-
249
- submit_btn.click(
250
- fn = infer,
251
- inputs = [
252
- prompt_in,
253
- negative_prompt,
254
- seed,
255
- randomize_seed,
256
- width,
257
- height,
258
- guidance_scale,
259
- num_inference_steps,
260
- ],
261
- outputs = [
262
- result, prompt_in_json
263
- ]
264
- )
265
- demo.queue().launch()
 
9
  from diffusers import BriaFiboPipeline
10
  from diffusers.modular_pipelines import ModularPipeline
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  MAX_SEED = np.iinfo(np.int32).max
13
  dtype = torch.bfloat16
14
  device = "cuda" if torch.cuda.is_available() else "cpu"
15
 
16
  torch.set_grad_enabled(False)
17
  vlm_pipe = ModularPipeline.from_pretrained("briaai/FIBO-VLM-prompt-to-JSON", trust_remote_code=True).to(device)
18
+ pipe = BriaFiboPipeline.from_pretrained("briaai/FIBO", trust_remote_code=True, dtype=dtype).to(device)
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  @spaces.GPU(duration=100)
22
+ def infer(
23
+ prompt,
24
+ prompt_refine,
25
+ prompt_in_json,
26
+ negative_prompt="",
27
+ seed=42,
28
+ randomize_seed=False,
29
+ width=1024,
30
+ height=1024,
31
+ guidance_scale=5,
32
+ num_inference_steps=50,
33
+ mode="generate",
34
+ ):
35
  if randomize_seed:
36
  seed = random.randint(0, MAX_SEED)
 
 
37
 
 
38
  with torch.inference_mode():
39
+ if negative_prompt:
40
+ neg_output = vlm_pipe(prompt=negative_prompt)
41
+ neg_json_prompt = neg_output.values["json_prompt"]
42
+ else:
43
+ neg_json_prompt = ""
44
+
45
+ if mode == "refine":
46
+ json_prompt_str = (
47
+ json.dumps(prompt_in_json)
48
+ if isinstance(prompt_in_json, (dict, list))
49
+ else str(prompt_in_json)
50
+ )
51
+ output = vlm_pipe(json_prompt=json_prompt_str, prompt=prompt_refine)
52
+ else:
53
+ output = vlm_pipe(prompt=prompt)
54
  json_prompt = output.values["json_prompt"]
55
 
56
+ image = pipe(
57
+ prompt=json_prompt,
58
+ num_inference_steps=num_inference_steps,
59
+ negative_prompt=neg_json_prompt,
60
+ width=width,
61
+ height=height,
62
+ guidance_scale=guidance_scale,
63
+ ).images[0]
64
 
65
+ return image, seed, json_prompt, neg_json_prompt
 
 
 
 
66
 
 
 
 
67
 
68
  css = """
69
  #col-container{
 
71
  max-width: 768px;
72
  }
73
  """
74
+
75
+ with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="violet")) as demo:
76
  with gr.Column(elem_id="col-container"):
77
+ gr.Markdown("## FIBO")
78
+
79
+ with gr.Row():
80
+ with gr.Tab("generate") as tab_generate:
81
+ with gr.Row():
82
+ prompt_generate = gr.Textbox(label="Prompt")
83
+
84
+ with gr.Tab("refine") as tab_refine:
85
  with gr.Row():
86
+ prompt_refine = gr.Textbox(label="Prompt")
87
+
88
+ submit_btn = gr.Button("Generate")
 
89
  result = gr.Image(label="output")
90
+ with gr.Accordion("Structured Prompt", open=False):
91
+ prompt_in_json = gr.JSON(label="json structured prompt")
92
  with gr.Accordion("Advanced Settings", open=False):
93
+ with gr.Row():
94
+ seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
95
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
96
+ with gr.Row():
97
+ guidance_scale = gr.Slider(label="guidance scale", minimum=1.0, maximum=10.0, step=0.1, value=5.0)
98
+ num_inference_steps = gr.Slider(
99
+ label="number of inference steps", minimum=1, maximum=60, step=1, value=50
100
+ )
101
+ height = gr.Slider(label="Height", minimum=768, maximum=1248, step=32, value=1024)
102
+ width = gr.Slider(label="Width", minimum=832, maximum=1344, step=64, value=1024)
103
+ with gr.Row():
104
+ negative_prompt = gr.Textbox(label="negative prompt")
105
+ negative_prompt_json = gr.JSON(label="json negative prompt")
106
+
107
+ # Track active tab
108
+ current_mode = gr.State("generate")
109
+
110
+ tab_generate.select(lambda: "generate", outputs=current_mode)
111
+ tab_refine.select(lambda: "refine", outputs=current_mode)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
+ submit_btn.click(
114
+ fn=infer,
115
+ inputs=[
116
+ prompt_generate,
117
+ prompt_refine,
118
+ prompt_in_json,
119
+ negative_prompt,
120
+ seed,
121
+ randomize_seed,
122
+ width,
123
+ height,
124
+ guidance_scale,
125
+ num_inference_steps,
126
+ current_mode,
127
+ ],
128
+ outputs=[result, seed, prompt_in_json, negative_prompt_json],
129
+ )
130
 
131
+ demo.queue().launch()