Spaces:

pengHTYX
/

Era3D_MV_demo

Runtime error

App Files Files Community

pengHTYX commited on May 29, 2024

Commit

a72a0f3

1 Parent(s): 2736f7e

'update_layout'

Browse files

Files changed (4) hide show

app.py +19 -16
mvdiffusion/data/single_image_dataset.py +4 -4
mvdiffusion/pipelines/pipeline_mvdiffusion_unclip.py +1 -1
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -141,23 +141,20 @@ def preprocess(predictor, input_image, chk_group=None, segment=True, rescale=Fal
         input_image = Image.fromarray((rgb * 255).astype(np.uint8))
     else:
         input_image = expand2square(input_image, (127, 127, 127, 0))
-    return input_image, input_image.resize((768, 768), Image.Resampling.LANCZOS)
 def load_era3d_pipeline(cfg):
     # Load scheduler, tokenizer and models.
     pipeline = StableUnCLIPImg2ImgPipeline.from_pretrained(
-    cfg.pretrained_model_name_or_path,
-    torch_dtype=weight_dtype
     )
-    # pipeline.to('cuda:0')
-    pipeline.unet.enable_xformers_memory_efficient_attention()
     if torch.cuda.is_available():
         pipeline.to('cuda:0')
     # sys.main_lock = threading.Lock()
     return pipeline
@@ -165,8 +162,9 @@ def load_era3d_pipeline(cfg):
 from mvdiffusion.data.single_image_dataset import SingleImageDataset
-def prepare_data(single_image, crop_size):
-    dataset = SingleImageDataset(root_dir='', num_views=6, img_wh=[512, 512], bg_color='white', crop_size=crop_size, single_image=single_image)
     return dataset[0]
 scene = 'scene'
@@ -179,7 +177,7 @@ def run_pipeline(pipeline, cfg, single_image, guidance_scale, steps, seed, crop_
     if chk_group is not None:
         write_image = "Write Results" in chk_group
-    batch = prepare_data(single_image, crop_size)
     pipeline.set_progress_bar_config(disable=True)
     seed = int(seed)
@@ -203,7 +201,7 @@ def run_pipeline(pipeline, cfg, single_image, guidance_scale, steps, seed, crop_
         guidance_scale=guidance_scale,
         output_type='pt',
         num_images_per_prompt=1,
-        return_elevation_focal=cfg.log_elevation_focal_length,
         **cfg.pipe_validation_kwargs
     ).images
@@ -314,6 +312,7 @@ def run_demo():
     custom_css = '''#disp_image {
         text-align: center; /* Horizontally center the content */
     }'''
     with gr.Blocks(title=_TITLE, theme=custom_theme, css=custom_css) as demo:
         with gr.Row():
@@ -322,14 +321,16 @@ def run_demo():
         gr.Markdown(_DESCRIPTION)
         with gr.Row(variant='panel'):
             with gr.Column(scale=1):
-                input_image = gr.Image(type='pil', image_mode='RGBA', height=768, label='Input image')
             with gr.Column(scale=1):
                 processed_image = gr.Image(
                     type='pil',
                     label="Processed Image",
                     interactive=False,
-                    height=768,
                     image_mode='RGBA',
                     elem_id="disp_image",
                     visible=True,
@@ -341,7 +342,7 @@ def run_demo():
             #                         label="3D Model", height=320,
             #                         # camera_position=[0,0,2.0]
             #                         )
-                processed_image_highres = gr.Image(type='pil', image_mode='RGBA', visible=False)
         with gr.Row(variant='panel'):
             with gr.Column(scale=1):
                 example_folder = os.path.join(os.path.dirname(__file__), "./examples")
@@ -391,6 +392,7 @@ def run_demo():
             view_1 = gr.Image(interactive=False, height=512, show_label=False)
             view_2 = gr.Image(interactive=False, height=512, show_label=False)
             view_3 = gr.Image(interactive=False, height=512, show_label=False)
             view_4 = gr.Image(interactive=False, height=512, show_label=False)
             view_5 = gr.Image(interactive=False, height=512, show_label=False)
             view_6 = gr.Image(interactive=False, height=512, show_label=False)
@@ -398,10 +400,11 @@ def run_demo():
             normal_1 = gr.Image(interactive=False, height=512, show_label=False)
             normal_2 = gr.Image(interactive=False, height=512, show_label=False)
             normal_3 = gr.Image(interactive=False, height=512, show_label=False)
             normal_4 = gr.Image(interactive=False, height=512, show_label=False)
             normal_5 = gr.Image(interactive=False, height=512, show_label=False)
             normal_6 = gr.Image(interactive=False, height=512, show_label=False)
         run_btn.click(
             fn=partial(preprocess, predictor), inputs=[input_image, input_processing], outputs=[processed_image_highres, processed_image], queue=True
         ).success(
@@ -414,7 +417,7 @@ def run_demo():
         # )
         demo.queue().launch(share=True, max_threads=80)
 if __name__ == '__main__':
     fire.Fire(run_demo)

         input_image = Image.fromarray((rgb * 255).astype(np.uint8))
     else:
         input_image = expand2square(input_image, (127, 127, 127, 0))
+    return input_image, input_image.resize((320, 320), Image.Resampling.LANCZOS)
 def load_era3d_pipeline(cfg):
     # Load scheduler, tokenizer and models.
     pipeline = StableUnCLIPImg2ImgPipeline.from_pretrained(
+        cfg.pretrained_model_name_or_path,
+        torch_dtype=weight_dtype
     )
     if torch.cuda.is_available():
         pipeline.to('cuda:0')
+        pipeline.unet.enable_xformers_memory_efficient_attention()
     # sys.main_lock = threading.Lock()
     return pipeline
 from mvdiffusion.data.single_image_dataset import SingleImageDataset
+def prepare_data(single_image, crop_size, cfg):
+    dataset = SingleImageDataset(root_dir='', num_views=6, img_wh=[512, 512], bg_color='white',
+        crop_size=crop_size, single_image=single_image, prompt_embeds_path=cfg.validation_dataset.prompt_embeds_path)
     return dataset[0]
 scene = 'scene'
     if chk_group is not None:
         write_image = "Write Results" in chk_group
+    batch = prepare_data(single_image, crop_size, cfg)
     pipeline.set_progress_bar_config(disable=True)
     seed = int(seed)
         guidance_scale=guidance_scale,
         output_type='pt',
         num_images_per_prompt=1,
+        # return_elevation_focal=cfg.log_elevation_focal_length,
         **cfg.pipe_validation_kwargs
     ).images
     custom_css = '''#disp_image {
         text-align: center; /* Horizontally center the content */
     }'''
     with gr.Blocks(title=_TITLE, theme=custom_theme, css=custom_css) as demo:
         with gr.Row():
         gr.Markdown(_DESCRIPTION)
         with gr.Row(variant='panel'):
             with gr.Column(scale=1):
+                input_image = gr.Image(type='pil', image_mode='RGBA', height=320, label='Input image')
             with gr.Column(scale=1):
+                processed_image_highres = gr.Image(type='pil', image_mode='RGBA', visible=False)
                 processed_image = gr.Image(
                     type='pil',
                     label="Processed Image",
                     interactive=False,
+                    # height=320,
                     image_mode='RGBA',
                     elem_id="disp_image",
                     visible=True,
             #                         label="3D Model", height=320,
             #                         # camera_position=[0,0,2.0]
             #                         )
         with gr.Row(variant='panel'):
             with gr.Column(scale=1):
                 example_folder = os.path.join(os.path.dirname(__file__), "./examples")
             view_1 = gr.Image(interactive=False, height=512, show_label=False)
             view_2 = gr.Image(interactive=False, height=512, show_label=False)
             view_3 = gr.Image(interactive=False, height=512, show_label=False)
+        with gr.Row():
             view_4 = gr.Image(interactive=False, height=512, show_label=False)
             view_5 = gr.Image(interactive=False, height=512, show_label=False)
             view_6 = gr.Image(interactive=False, height=512, show_label=False)
             normal_1 = gr.Image(interactive=False, height=512, show_label=False)
             normal_2 = gr.Image(interactive=False, height=512, show_label=False)
             normal_3 = gr.Image(interactive=False, height=512, show_label=False)
+        with gr.Row():
             normal_4 = gr.Image(interactive=False, height=512, show_label=False)
             normal_5 = gr.Image(interactive=False, height=512, show_label=False)
             normal_6 = gr.Image(interactive=False, height=512, show_label=False)
+        print('Launching...')
         run_btn.click(
             fn=partial(preprocess, predictor), inputs=[input_image, input_processing], outputs=[processed_image_highres, processed_image], queue=True
         ).success(
         # )
         demo.queue().launch(share=True, max_threads=80)
 if __name__ == '__main__':
     fire.Fire(run_demo)

mvdiffusion/data/single_image_dataset.py CHANGED Viewed

@@ -236,10 +236,10 @@ class SingleImageDataset(Dataset):
         color_prompt_embeddings = self.color_text_embeds if hasattr(self, 'color_text_embeds') else None
         out =  {
-            'imgs_in': img_tensors_in,
-            'alphas': alpha_tensors_in,
-            'normal_prompt_embeddings': normal_prompt_embeddings,
-            'color_prompt_embeddings': color_prompt_embeddings,
             'filename': filename,
             }

         color_prompt_embeddings = self.color_text_embeds if hasattr(self, 'color_text_embeds') else None
         out =  {
+            'imgs_in': img_tensors_in.unsqueeze(0),
+            'alphas': alpha_tensors_in.unsqueeze(0),
+            'normal_prompt_embeddings': normal_prompt_embeddings.unsqueeze(0),
+            'color_prompt_embeddings': color_prompt_embeddings.unsqueeze(0),
             'filename': filename,
             }

mvdiffusion/pipelines/pipeline_mvdiffusion_unclip.py CHANGED Viewed

@@ -239,7 +239,7 @@ class StableUnCLIPImg2ImgPipeline(DiffusionPipeline):
             image_embeds = torch.cat([negative_prompt_embeds, normal_image_embeds, negative_prompt_embeds, color_image_embeds], 0)
         # _____________________________vae input latents__________________________________________________
-        image_pt = torch.stack([TF.to_tensor(img) for img in image_pil], dim=0).to(device)
         image_pt = image_pt * 2.0 - 1.0
         image_latents = self.vae.encode(image_pt).latent_dist.mode() * self.vae.config.scaling_factor
         # Note: repeat differently from official pipelines

             image_embeds = torch.cat([negative_prompt_embeds, normal_image_embeds, negative_prompt_embeds, color_image_embeds], 0)
         # _____________________________vae input latents__________________________________________________
+        image_pt = torch.stack([TF.to_tensor(img) for img in image_pil], dim=0).to(dtype=self.vae.dtype, device=device)
         image_pt = image_pt * 2.0 - 1.0
         image_latents = self.vae.encode(image_pt).latent_dist.mode() * self.vae.config.scaling_factor
         # Note: repeat differently from official pipelines

requirements.txt CHANGED Viewed

@@ -30,7 +30,7 @@ torch_efficient_distloss
 tensorboard
 rembg
 segment_anything
-gradio==3.50.2
 moviepy
 kornia
 fire

 tensorboard
 rembg
 segment_anything
+gradio==4.29.0
 moviepy
 kornia
 fire