Update pipeline.py
Browse files- pipeline.py +5 -14
pipeline.py
CHANGED
|
@@ -108,8 +108,6 @@ class E2EMarigoldDepthPipeline(DiffusionPipeline):
|
|
| 108 |
Text-encoder, for empty text embedding.
|
| 109 |
tokenizer (`CLIPTokenizer`):
|
| 110 |
CLIP tokenizer.
|
| 111 |
-
prediction_type (`str`, *optional*):
|
| 112 |
-
Type of predictions made by the model.
|
| 113 |
default_processing_resolution (`int`, *optional*):
|
| 114 |
The recommended value of the `processing_resolution` parameter of the pipeline. This value must be set in
|
| 115 |
the model config. When the pipeline is called without explicitly setting `processing_resolution`, the
|
|
@@ -118,7 +116,6 @@ class E2EMarigoldDepthPipeline(DiffusionPipeline):
|
|
| 118 |
"""
|
| 119 |
|
| 120 |
model_cpu_offload_seq = "text_encoder->unet->vae"
|
| 121 |
-
supported_prediction_types = ("depth", "disparity")
|
| 122 |
|
| 123 |
def __init__(
|
| 124 |
self,
|
|
@@ -127,17 +124,10 @@ class E2EMarigoldDepthPipeline(DiffusionPipeline):
|
|
| 127 |
scheduler: Union[DDIMScheduler],
|
| 128 |
text_encoder: CLIPTextModel,
|
| 129 |
tokenizer: CLIPTokenizer,
|
| 130 |
-
|
| 131 |
-
default_processing_resolution: Optional[int] = None,
|
| 132 |
):
|
| 133 |
super().__init__()
|
| 134 |
|
| 135 |
-
if prediction_type not in self.supported_prediction_types:
|
| 136 |
-
logger.warning(
|
| 137 |
-
f"Potentially unsupported `prediction_type='{prediction_type}'`; values supported by the pipeline: "
|
| 138 |
-
f"{self.supported_prediction_types}."
|
| 139 |
-
)
|
| 140 |
-
|
| 141 |
self.register_modules(
|
| 142 |
unet=unet,
|
| 143 |
vae=vae,
|
|
@@ -146,7 +136,6 @@ class E2EMarigoldDepthPipeline(DiffusionPipeline):
|
|
| 146 |
tokenizer=tokenizer,
|
| 147 |
)
|
| 148 |
self.register_to_config(
|
| 149 |
-
prediction_type=prediction_type,
|
| 150 |
default_processing_resolution=default_processing_resolution,
|
| 151 |
)
|
| 152 |
|
|
@@ -473,6 +462,8 @@ class E2EMarigoldDepthPipeline(DiffusionPipeline):
|
|
| 473 |
|
| 474 |
prediction = prediction.mean(dim=1, keepdim=True) # [B,1,H,W]
|
| 475 |
prediction = torch.clip(prediction, -1.0, 1.0) # [B,1,H,W]
|
| 476 |
-
prediction = (prediction + 1.0) / 2.0
|
| 477 |
|
| 478 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
Text-encoder, for empty text embedding.
|
| 109 |
tokenizer (`CLIPTokenizer`):
|
| 110 |
CLIP tokenizer.
|
|
|
|
|
|
|
| 111 |
default_processing_resolution (`int`, *optional*):
|
| 112 |
The recommended value of the `processing_resolution` parameter of the pipeline. This value must be set in
|
| 113 |
the model config. When the pipeline is called without explicitly setting `processing_resolution`, the
|
|
|
|
| 116 |
"""
|
| 117 |
|
| 118 |
model_cpu_offload_seq = "text_encoder->unet->vae"
|
|
|
|
| 119 |
|
| 120 |
def __init__(
|
| 121 |
self,
|
|
|
|
| 124 |
scheduler: Union[DDIMScheduler],
|
| 125 |
text_encoder: CLIPTextModel,
|
| 126 |
tokenizer: CLIPTokenizer,
|
| 127 |
+
default_processing_resolution: Optional[int] = 768,
|
|
|
|
| 128 |
):
|
| 129 |
super().__init__()
|
| 130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
self.register_modules(
|
| 132 |
unet=unet,
|
| 133 |
vae=vae,
|
|
|
|
| 136 |
tokenizer=tokenizer,
|
| 137 |
)
|
| 138 |
self.register_to_config(
|
|
|
|
| 139 |
default_processing_resolution=default_processing_resolution,
|
| 140 |
)
|
| 141 |
|
|
|
|
| 462 |
|
| 463 |
prediction = prediction.mean(dim=1, keepdim=True) # [B,1,H,W]
|
| 464 |
prediction = torch.clip(prediction, -1.0, 1.0) # [B,1,H,W]
|
|
|
|
| 465 |
|
| 466 |
+
# add
|
| 467 |
+
prediction = (prediction - prediction.min()) / (prediction.max() - prediction.min())
|
| 468 |
+
|
| 469 |
+
return prediction # [B,1,H,W]
|