yamildiego commited on
Commit
5255f7f
·
1 Parent(s): 2391a94

added depth

Browse files
Files changed (1) hide show
  1. handler.py +36 -38
handler.py CHANGED
@@ -45,19 +45,19 @@ class EndpointHandler():
45
  face_adapter = f"./checkpoints/ip-adapter.bin"
46
  controlnet_path = f"./checkpoints/ControlNetModel"
47
 
48
- # transform = Compose([
49
- # Resize(
50
- # width=518,
51
- # height=518,
52
- # resize_target=False,
53
- # keep_aspect_ratio=True,
54
- # ensure_multiple_of=14,
55
- # resize_method='lower_bound',
56
- # image_interpolation_method=cv2.INTER_CUBIC,
57
- # ),
58
- # NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
59
- # PrepareForNet(),
60
- # ])
61
 
62
  self.controlnet_identitynet = ControlNetModel.from_pretrained(
63
  controlnet_path, torch_dtype=dtype
@@ -92,7 +92,7 @@ class EndpointHandler():
92
 
93
  controlnet_pose_model = "thibaud/controlnet-openpose-sdxl-1.0"
94
  controlnet_canny_model = "diffusers/controlnet-canny-sdxl-1.0"
95
- # controlnet_depth_model = "diffusers/controlnet-depth-sdxl-1.0-small"
96
 
97
  controlnet_pose = ControlNetModel.from_pretrained(
98
  controlnet_pose_model, torch_dtype=dtype
@@ -100,49 +100,49 @@ class EndpointHandler():
100
  controlnet_canny = ControlNetModel.from_pretrained(
101
  controlnet_canny_model, torch_dtype=dtype
102
  ).to(device)
103
- # controlnet_depth = ControlNetModel.from_pretrained(
104
- # controlnet_depth_model, torch_dtype=dtype
105
- # ).to(device)
106
 
107
  openpose = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
108
- # depth_anything = DepthAnything.from_pretrained('LiheYoung/depth_anything_vitl14').to(device).eval()
109
 
110
  def get_canny_image(image, t1=100, t2=200):
111
  image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
112
  edges = cv2.Canny(image, t1, t2)
113
  return Image.fromarray(edges, "L")
114
 
115
- # def get_depth_map(image):
116
 
117
- # image = np.array(image) / 255.0
118
 
119
- # h, w = image.shape[:2]
120
 
121
- # image = transform({'image': image})['image']
122
- # image = torch.from_numpy(image).unsqueeze(0).to("cuda")
123
 
124
- # with torch.no_grad():
125
- # depth = depth_anything(image)
126
 
127
- # depth = F.interpolate(depth[None], (h, w), mode='bilinear', align_corners=False)[0, 0]
128
- # depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
129
 
130
- # depth = depth.cpu().numpy().astype(np.uint8)
131
 
132
- # depth_image = Image.fromarray(depth)
133
 
134
- # return depth_image
135
 
136
  self.controlnet_map = {
137
  "pose": controlnet_pose,
138
  "canny": controlnet_canny,
139
- # "depth": controlnet_depth,
140
  }
141
 
142
  self.controlnet_map_fn = {
143
  "pose": openpose,
144
  "canny": get_canny_image,
145
- # "depth": get_depth_map,
146
  }
147
 
148
  self.app = FaceAnalysis(name="buffalo_l", root="./", providers=["CPUExecutionProvider"])
@@ -153,14 +153,15 @@ class EndpointHandler():
153
  identitynet_strength_ratio = 0.8
154
  pose_strength = 0.4
155
  canny_strength = 0.3
156
- self.my_controlnet_selection = ["pose", "canny"]
157
-
158
 
159
  controlnet_scales = {
160
  "pose": pose_strength,
161
  "canny": canny_strength,
162
- # "depth": depth_strength,
163
  }
 
164
  self.pipe.controlnet = MultiControlNetModel(
165
  [self.controlnet_identitynet]
166
  + [self.controlnet_map[s] for s in self.my_controlnet_selection]
@@ -171,7 +172,6 @@ class EndpointHandler():
171
 
172
  def __call__(self, data):
173
 
174
-
175
  default_prompt = "watercolor painting, {prompt}. vibrant, beautiful, painterly, detailed, textural, artistic"
176
  default_negative_prompt = "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, anime, photorealistic, 35mm film, deformed, glitch, low contrast, noisy"
177
 
@@ -185,8 +185,6 @@ class EndpointHandler():
185
  pose_image_path = data.pop("pose_image_path", "https://i.ibb.co/9bP9tMb/pose-2-1.jpg")
186
 
187
  adapter_strength_ratio = 0.8
188
- # depth_strength = 0.5
189
- # controlnet_selection = ["pose", "canny", "depth"]
190
 
191
  def convert_from_cv2_to_image(img: np.ndarray) -> Image:
192
  return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
 
45
  face_adapter = f"./checkpoints/ip-adapter.bin"
46
  controlnet_path = f"./checkpoints/ControlNetModel"
47
 
48
+ transform = Compose([
49
+ Resize(
50
+ width=518,
51
+ height=518,
52
+ resize_target=False,
53
+ keep_aspect_ratio=True,
54
+ ensure_multiple_of=14,
55
+ resize_method='lower_bound',
56
+ image_interpolation_method=cv2.INTER_CUBIC,
57
+ ),
58
+ NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
59
+ PrepareForNet(),
60
+ ])
61
 
62
  self.controlnet_identitynet = ControlNetModel.from_pretrained(
63
  controlnet_path, torch_dtype=dtype
 
92
 
93
  controlnet_pose_model = "thibaud/controlnet-openpose-sdxl-1.0"
94
  controlnet_canny_model = "diffusers/controlnet-canny-sdxl-1.0"
95
+ controlnet_depth_model = "diffusers/controlnet-depth-sdxl-1.0-small"
96
 
97
  controlnet_pose = ControlNetModel.from_pretrained(
98
  controlnet_pose_model, torch_dtype=dtype
 
100
  controlnet_canny = ControlNetModel.from_pretrained(
101
  controlnet_canny_model, torch_dtype=dtype
102
  ).to(device)
103
+ controlnet_depth = ControlNetModel.from_pretrained(
104
+ controlnet_depth_model, torch_dtype=dtype
105
+ ).to(device)
106
 
107
  openpose = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
108
+ depth_anything = DepthAnything.from_pretrained('LiheYoung/depth_anything_vitl14').to(device).eval()
109
 
110
  def get_canny_image(image, t1=100, t2=200):
111
  image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
112
  edges = cv2.Canny(image, t1, t2)
113
  return Image.fromarray(edges, "L")
114
 
115
+ def get_depth_map(image):
116
 
117
+ image = np.array(image) / 255.0
118
 
119
+ h, w = image.shape[:2]
120
 
121
+ image = transform({'image': image})['image']
122
+ image = torch.from_numpy(image).unsqueeze(0).to("cuda")
123
 
124
+ with torch.no_grad():
125
+ depth = depth_anything(image)
126
 
127
+ depth = F.interpolate(depth[None], (h, w), mode='bilinear', align_corners=False)[0, 0]
128
+ depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
129
 
130
+ depth = depth.cpu().numpy().astype(np.uint8)
131
 
132
+ depth_image = Image.fromarray(depth)
133
 
134
+ return depth_image
135
 
136
  self.controlnet_map = {
137
  "pose": controlnet_pose,
138
  "canny": controlnet_canny,
139
+ "depth": controlnet_depth,
140
  }
141
 
142
  self.controlnet_map_fn = {
143
  "pose": openpose,
144
  "canny": get_canny_image,
145
+ "depth": get_depth_map,
146
  }
147
 
148
  self.app = FaceAnalysis(name="buffalo_l", root="./", providers=["CPUExecutionProvider"])
 
153
  identitynet_strength_ratio = 0.8
154
  pose_strength = 0.4
155
  canny_strength = 0.3
156
+ depth_strength = 0.5
157
+ self.my_controlnet_selection = ["pose", "canny", "depth"]
158
 
159
  controlnet_scales = {
160
  "pose": pose_strength,
161
  "canny": canny_strength,
162
+ "depth": depth_strength,
163
  }
164
+
165
  self.pipe.controlnet = MultiControlNetModel(
166
  [self.controlnet_identitynet]
167
  + [self.controlnet_map[s] for s in self.my_controlnet_selection]
 
172
 
173
  def __call__(self, data):
174
 
 
175
  default_prompt = "watercolor painting, {prompt}. vibrant, beautiful, painterly, detailed, textural, artistic"
176
  default_negative_prompt = "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, anime, photorealistic, 35mm film, deformed, glitch, low contrast, noisy"
177
 
 
185
  pose_image_path = data.pop("pose_image_path", "https://i.ibb.co/9bP9tMb/pose-2-1.jpg")
186
 
187
  adapter_strength_ratio = 0.8
 
 
188
 
189
  def convert_from_cv2_to_image(img: np.ndarray) -> Image:
190
  return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))