HuiZhang0812 commited on
Commit
4baba69
·
verified ·
1 Parent(s): 09d10ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -100
app.py CHANGED
@@ -6,7 +6,6 @@ from src.pipeline.pipeline_CreatiLayout import CreatiLayoutSD3Pipeline
6
  from utils.bbox_visualization import bbox_visualization,scale_boxes
7
  from PIL import Image
8
  import os
9
- import pandas as pd
10
  from huggingface_hub import login
11
 
12
  hf_token = os.getenv("HF_TOKEN")
@@ -30,7 +29,7 @@ pipe = pipe.to("cuda")
30
  print("pipeline is loaded.")
31
 
32
  @spaces.GPU
33
- def process_image_and_text(global_caption, box_detail_phrases_list:pd.DataFrame, boxes:pd.DataFrame,seed: int=42, randomize_seed: bool=False, guidance_scale: float=7.5, num_inference_steps: int=50):
34
 
35
  if randomize_seed:
36
  seed = torch.randint(0, 100, (1,)).item()
@@ -38,20 +37,37 @@ def process_image_and_text(global_caption, box_detail_phrases_list:pd.DataFrame,
38
  height = 1024
39
  width = 1024
40
 
41
- box_detail_phrases_list_tmp = box_detail_phrases_list.values.tolist()
42
- box_detail_phrases_list_tmp = [c[0] for c in box_detail_phrases_list_tmp]
43
- boxes = boxes.astype(float).values.tolist()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
- white_image = Image.new('RGB', (width, height), color='rgb(256,256,256)')
46
- show_input = {"boxes":scale_boxes(boxes,width,height),"labels":box_detail_phrases_list_tmp}
47
- bbox_visualization_img = bbox_visualization(white_image,show_input)
48
 
49
  result_img = pipe(
50
  prompt=global_caption,
51
  generator=torch.Generator(device="cuda").manual_seed(seed),
52
  guidance_scale=guidance_scale,
53
  num_inference_steps=num_inference_steps,
54
- bbox_phrases=box_detail_phrases_list_tmp,
55
  bbox_raw=boxes,
56
  height=height,
57
  width=width
@@ -59,109 +75,67 @@ def process_image_and_text(global_caption, box_detail_phrases_list:pd.DataFrame,
59
 
60
  return bbox_visualization_img, result_img
61
 
62
- def get_samples():
63
- sample_list = [
64
- {
65
- "global_caption": "A picturesque scene features Iron Man standing confidently on a rugged rock by the sea, holding a drawing board with his hands. The board displays the words 'Creative Layout' in a playful, hand-drawn font. The serene sea shimmers under the setting sun. The sky is painted with a gradient of warm colors, from deep oranges to soft purples.",
66
- "region_caption_list": [
67
- "Iron Man standing confidently on a rugged rock.",
68
- "A rugged rock by the sea.",
69
- "A drawing board with the words \"Creative Layout\" in a playful, hand-drawn font.",
70
- "The serene sea shimmers under the setting sun.",
71
- "The sky is a shade of deep orange to soft purple."
72
- ],
73
- "region_bboxes_list": [
74
- [0.40, 0.35, 0.55, 0.80],
75
- [0.35, 0.75, 0.60, 0.95],
76
- [0.40, 0.45, 0.55, 0.65],
77
- [0.00, 0.30, 1.00, 0.90],
78
- [0.00, 0.00, 1.00, 0.30]
79
- ]
80
- },
81
- {
82
- "global_caption": "This is a photo showcasing two wooden benches in a park. The bench on the left is painted in a vibrant blue, while the one on the right is painted in a green. Both are placed on a path paved with stones, surrounded by lush trees and shrubs. The sunlight filters through the leaves, casting dappled shadows on the ground, creating a tranquil and comfortable atmosphere.",
83
- "region_caption_list": [
84
- "A weathered, blue wooden bench with green elements in a natural setting.",
85
- "Old, weathered wooden benches with green paint.",
86
- "A dirt path in a park with green grass on the sides and two colorful wooden benches.",
87
- "Thick, verdant foliage of mature trees in a dense forest."
88
- ],
89
- "region_bboxes_list": [
90
- [0.30, 0.44, 0.62, 0.78],
91
- [0.54, 0.41, 0.75, 0.65],
92
- [0.00, 0.39, 1.00, 1.00],
93
- [0.00, 0.00, 1.00, 0.43]
94
- ]
95
- },
96
- {
97
- "global_caption": "This is a wedding photo taken in a photography studio, showing a newlywed couple sitting on a brown leather sofa in a modern indoor setting. The groom is dressed in a pink suit, paired with a pink tie and white shirt, while the bride is wearing a white wedding dress with a long veil. They are sitting on a brown leather sofa, with a wooden table in front of them, on which a bouquet of flowers is placed. The background is a bar with a staircase and a wall decorated with lights, creating a warm and romantic atmosphere.",
98
- "region_caption_list": [
99
- "A floral arrangement consisting of roses, carnations, and eucalyptus leaves on a wooden surface.",
100
- "A white wedding dress with off-the-shoulder ruffles and a long, sheer veil.",
101
- "A polished wooden table with visible grain and knots.",
102
- "A close-up of a dark brown leather sofa with tufted upholstery and button details.",
103
- "A man in a pink suit with a white shirt and red tie, sitting on a leather armchair.",
104
- "A person in a suit seated on a leather armchair near a wooden staircase with books and bottles.",
105
- "Bride in white gown with veil, groom in maroon suit and pink tie, seated on leather armchairs."
106
- ],
107
- "region_bboxes_list": [
108
- [0.09, 0.65, 0.31, 0.93],
109
- [0.62, 0.25, 0.89, 0.90],
110
- [0.01, 0.70, 0.78, 0.99],
111
- [0.76, 0.65, 1.00, 0.99],
112
- [0.27, 0.32, 0.72, 0.75],
113
- [0.00, 0.01, 0.52, 0.72],
114
- [0.27, 0.09, 0.94, 0.89]
115
- ]
116
- }
117
-
118
- ]
119
- return [[sample["global_caption"], [[caption] for caption in sample["region_caption_list"]], sample["region_bboxes_list"]] for sample in sample_list]
120
-
121
-
122
-
123
- with gr.Blocks() as demo:
124
  gr.Markdown("# CreatiLayout: Layout-to-Image generation")
125
- gr.Markdown("""CreatiLayout is a layout-to-image framework for Diffusion Transformer models, offering high-quality and fine-grained controllable generation based on the global description and entity annotations. Users need to provide a global description and the position and description of each entity, as shown in the examples. Please feel free to modify the position and attributes of the entities in the examples (such as size, color, shape, text, portrait, etc.). Here are some inspirations: Iron Man -> Spider Man/Harry Potter/Buzz Lightyear; CreatiLayout -> Hello Friends/Let's Control; drawing board -> round drawing board; Modify the position of the drawing board to (0.4, 0.15, 0.55, 0.35)""")
 
 
 
 
 
 
 
126
  with gr.Row():
127
-
128
- with gr.Column():
129
- global_caption = gr.Textbox(lines=2, label="Global Caption")
130
- box_detail_phrases_list = gr.Dataframe(headers=["Region Captions"], label="Region Captions")
131
- boxes = gr.Dataframe(headers=["x1", "y1", "x2", "y2"], label="Region Bounding Boxes (x_min,y_min,x_max,y_max)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  with gr.Accordion("Advanced Settings", open=False):
133
  seed = gr.Slider(0, 100, step=1, label="Seed", value=42)
134
  randomize_seed = gr.Checkbox(label="Randomize seed", value=False)
135
  guidance_scale = gr.Slider(1, 30, step=0.5, label="Guidance Scale", value=7.5)
136
  num_inference_steps = gr.Slider(1, 50, step=1, label="Number of inference steps", value=28)
137
- with gr.Column():
138
- bbox_visualization_img = gr.Image(type="pil", label="Bounding Box Visualization")
139
 
140
- with gr.Column():
 
141
  output_image = gr.Image(type="pil", label="Generated Image")
142
 
143
-
144
-
145
- gr.Button("Generate").click(
146
  fn=process_image_and_text,
147
- inputs=[global_caption, box_detail_phrases_list, boxes, seed, randomize_seed, guidance_scale, num_inference_steps],
148
  outputs=[bbox_visualization_img, output_image]
149
  )
150
-
151
-
152
- gr.Examples(
153
- examples=get_samples(),
154
- inputs=[global_caption, box_detail_phrases_list, boxes],
155
- outputs=[bbox_visualization_img, output_image],
156
- fn=process_image_and_text,
157
- cache_examples=True
158
- )
159
-
160
-
161
-
162
-
163
 
164
  if __name__ == "__main__":
165
- demo.launch()
166
-
167
-
 
6
  from utils.bbox_visualization import bbox_visualization,scale_boxes
7
  from PIL import Image
8
  import os
 
9
  from huggingface_hub import login
10
 
11
  hf_token = os.getenv("HF_TOKEN")
 
29
  print("pipeline is loaded.")
30
 
31
  @spaces.GPU
32
+ def process_image_and_text(global_caption, region_captions_text, boxes_text, seed: int=42, randomize_seed: bool=False, guidance_scale: float=7.5, num_inference_steps: int=50):
33
 
34
  if randomize_seed:
35
  seed = torch.randint(0, 100, (1,)).item()
 
37
  height = 1024
38
  width = 1024
39
 
40
+ # 解析区域标题
41
+ region_captions = [line.strip() for line in region_captions_text.split('\n') if line.strip()]
42
+
43
+ # 解析边界框
44
+ boxes = []
45
+ for line in boxes_text.split('\n'):
46
+ if line.strip():
47
+ try:
48
+ coords = line.split(',')
49
+ if len(coords) == 4:
50
+ box = [float(coord.strip()) for coord in coords]
51
+ boxes.append(box)
52
+ except ValueError:
53
+ continue
54
+
55
+ if not region_captions or not boxes:
56
+ raise gr.Error("Please provide both region captions and bounding boxes.")
57
+
58
+ if len(region_captions) != len(boxes):
59
+ raise gr.Error("Number of region captions must match number of bounding boxes.")
60
 
61
+ white_image = Image.new('RGB', (width, height), color='white')
62
+ show_input = {"boxes": scale_boxes(boxes, width, height), "labels": region_captions}
63
+ bbox_visualization_img = bbox_visualization(white_image, show_input)
64
 
65
  result_img = pipe(
66
  prompt=global_caption,
67
  generator=torch.Generator(device="cuda").manual_seed(seed),
68
  guidance_scale=guidance_scale,
69
  num_inference_steps=num_inference_steps,
70
+ bbox_phrases=region_captions,
71
  bbox_raw=boxes,
72
  height=height,
73
  width=width
 
75
 
76
  return bbox_visualization_img, result_img
77
 
78
+ # 示例数据
79
+ example1_caption = "A picturesque scene features Iron Man standing confidently on a rugged rock by the sea, holding a drawing board with his hands. The board displays the words 'Creative Layout' in a playful, hand-drawn font. The serene sea shimmers under the setting sun. The sky is painted with a gradient of warm colors, from deep oranges to soft purples."
80
+ example1_regions = """Iron Man standing confidently on a rugged rock.
81
+ A rugged rock by the sea.
82
+ A drawing board with the words "Creative Layout" in a playful, hand-drawn font.
83
+ The serene sea shimmers under the setting sun.
84
+ The sky is a shade of deep orange to soft purple."""
85
+ example1_boxes = """0.40, 0.35, 0.55, 0.80
86
+ 0.35, 0.75, 0.60, 0.95
87
+ 0.40, 0.45, 0.55, 0.65
88
+ 0.00, 0.30, 1.00, 0.90
89
+ 0.00, 0.00, 1.00, 0.30"""
90
+
91
+ with gr.Blocks(title="CreatiLayout") as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  gr.Markdown("# CreatiLayout: Layout-to-Image generation")
93
+ gr.Markdown("""CreatiLayout is a layout-to-image framework for Diffusion Transformer models, offering high-quality and fine-grained controllable generation based on the global description and entity annotations. Users need to provide a global description and the position and description of each entity, as shown in the examples. Please feel free to modify the position and attributes of the entities in the examples (such as size, color, shape, text, portrait, etc.).
94
+
95
+ **Instructions:**
96
+ - Enter a global caption describing the overall scene
97
+ - List region captions (one per line)
98
+ - Enter bounding boxes as x1,y1,x2,y2 (one per line, values between 0 and 1)
99
+ - Number of region captions must match number of bounding boxes""")
100
+
101
  with gr.Row():
102
+ with gr.Column(scale=1):
103
+ global_caption = gr.Textbox(
104
+ lines=3,
105
+ label="Global Caption",
106
+ placeholder="Describe the overall scene...",
107
+ value=example1_caption
108
+ )
109
+ region_captions_text = gr.Textbox(
110
+ lines=6,
111
+ label="Region Captions (one per line)",
112
+ placeholder="Enter region descriptions, one per line...",
113
+ value=example1_regions
114
+ )
115
+ boxes_text = gr.Textbox(
116
+ lines=6,
117
+ label="Region Bounding Boxes (x1,y1,x2,y2 per line)",
118
+ placeholder="0.1,0.1,0.5,0.5\n0.6,0.2,0.9,0.8",
119
+ value=example1_boxes
120
+ )
121
+
122
  with gr.Accordion("Advanced Settings", open=False):
123
  seed = gr.Slider(0, 100, step=1, label="Seed", value=42)
124
  randomize_seed = gr.Checkbox(label="Randomize seed", value=False)
125
  guidance_scale = gr.Slider(1, 30, step=0.5, label="Guidance Scale", value=7.5)
126
  num_inference_steps = gr.Slider(1, 50, step=1, label="Number of inference steps", value=28)
127
+
128
+ generate_btn = gr.Button("Generate", variant="primary", size="lg")
129
 
130
+ with gr.Column(scale=1):
131
+ bbox_visualization_img = gr.Image(type="pil", label="Bounding Box Visualization")
132
  output_image = gr.Image(type="pil", label="Generated Image")
133
 
134
+ generate_btn.click(
 
 
135
  fn=process_image_and_text,
136
+ inputs=[global_caption, region_captions_text, boxes_text, seed, randomize_seed, guidance_scale, num_inference_steps],
137
  outputs=[bbox_visualization_img, output_image]
138
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
  if __name__ == "__main__":
141
+ demo.launch()