BAGEL-NHR-Edit

Runtime error

App Files Files Community

iitolstykh commited on Jul 21

Commit

32a5ef3

verified ·

1 Parent(s): 7d37585

Update inferencer.py

Browse files

Files changed (1) hide show

inferencer.py +42 -37

inferencer.py CHANGED Viewed

@@ -51,7 +51,8 @@ class InterleaveInferencer:
             new_token_ids=self.new_token_ids,
         )
-        past_key_values = self.model.forward_cache_update_text(past_key_values, **generation_input)
         gen_context['kv_lens'] = kv_lens
         gen_context['ropes'] = ropes
         gen_context['past_key_values'] = past_key_values
@@ -76,7 +77,8 @@ class InterleaveInferencer:
                 transforms=self.vae_transform,
                 new_token_ids=self.new_token_ids,
             )
-            past_key_values = self.model.forward_cache_update_vae(self.vae_model, past_key_values, **generation_input)
         if vit:
             ## update vit
@@ -87,7 +89,8 @@ class InterleaveInferencer:
                 transforms=self.vit_transform,
                 new_token_ids=self.new_token_ids,
             )
-            past_key_values = self.model.forward_cache_update_vit(past_key_values, **generation_input)
         gen_context['kv_lens'] = kv_lens
         gen_context['ropes'] = ropes
@@ -143,27 +146,28 @@ class InterleaveInferencer:
             image_sizes=[image_shape],
         )
-        unpacked_latent = self.model.generate_image(
-            past_key_values=past_key_values,
-            cfg_text_past_key_values=cfg_text_past_key_values,
-            cfg_img_past_key_values=cfg_img_past_key_values,
-            num_timesteps=num_timesteps,
-            cfg_text_scale=cfg_text_scale,
-            cfg_img_scale=cfg_img_scale,
-            cfg_interval=cfg_interval,
-            cfg_renorm_min=cfg_renorm_min,
-            cfg_renorm_type=cfg_renorm_type,
-            timestep_shift=timestep_shift,
-            **generation_input,
-            cfg_text_packed_position_ids=generation_input_cfg_text['cfg_packed_position_ids'],
-            cfg_text_packed_query_indexes=generation_input_cfg_text['cfg_packed_query_indexes'],
-            cfg_text_key_values_lens=generation_input_cfg_text['cfg_key_values_lens'],
-            cfg_text_packed_key_value_indexes=generation_input_cfg_text['cfg_packed_key_value_indexes'],
-            cfg_img_packed_position_ids=generation_input_cfg_img['cfg_packed_position_ids'],
-            cfg_img_packed_query_indexes=generation_input_cfg_img['cfg_packed_query_indexes'],
-            cfg_img_key_values_lens=generation_input_cfg_img['cfg_key_values_lens'],
-            cfg_img_packed_key_value_indexes=generation_input_cfg_img['cfg_packed_key_value_indexes'],
-        )
         image = self.decode_image(unpacked_latent[0], image_shape)
         return image
@@ -189,19 +193,20 @@ class InterleaveInferencer:
         kv_lens = gen_context['kv_lens']
         ropes = gen_context['ropes']
-        generation_input = self.model.prepare_start_tokens(kv_lens, ropes, self.new_token_ids)
-        for unpacked_latent in self.model.generate_text(
-            past_key_values=past_key_values,
-            max_length=max_length,
-            do_sample=do_sample,
-            temperature=temperature,
-            end_token_id=self.new_token_ids['eos_token_id'],
-            **generation_input,
-        ):
-            output = self.tokenizer.decode(unpacked_latent)
-            if output != "<|im_end|>":
-                yield output
     @torch.no_grad()
     def interleave_inference(
         self,

             new_token_ids=self.new_token_ids,
         )
+        with torch.amp.autocast("cuda", enabled=True, dtype=torch.bfloat16):
+            past_key_values = self.model.forward_cache_update_text(past_key_values, **generation_input)
         gen_context['kv_lens'] = kv_lens
         gen_context['ropes'] = ropes
         gen_context['past_key_values'] = past_key_values
                 transforms=self.vae_transform,
                 new_token_ids=self.new_token_ids,
             )
+            with torch.amp.autocast("cuda", enabled=True, dtype=torch.bfloat16):
+                past_key_values = self.model.forward_cache_update_vae(self.vae_model, past_key_values, **generation_input)
         if vit:
             ## update vit
                 transforms=self.vit_transform,
                 new_token_ids=self.new_token_ids,
             )
+            with torch.amp.autocast("cuda", enabled=True, dtype=torch.bfloat16):
+                past_key_values = self.model.forward_cache_update_vit(past_key_values, **generation_input)
         gen_context['kv_lens'] = kv_lens
         gen_context['ropes'] = ropes
             image_sizes=[image_shape],
         )
+        with torch.amp.autocast("cuda", enabled=True, dtype=torch.bfloat16):
+            unpacked_latent = self.model.generate_image(
+                past_key_values=past_key_values,
+                cfg_text_past_key_values=cfg_text_past_key_values,
+                cfg_img_past_key_values=cfg_img_past_key_values,
+                num_timesteps=num_timesteps,
+                cfg_text_scale=cfg_text_scale,
+                cfg_img_scale=cfg_img_scale,
+                cfg_interval=cfg_interval,
+                cfg_renorm_min=cfg_renorm_min,
+                cfg_renorm_type=cfg_renorm_type,
+                timestep_shift=timestep_shift,
+                **generation_input,
+                cfg_text_packed_position_ids=generation_input_cfg_text['cfg_packed_position_ids'],
+                cfg_text_packed_query_indexes=generation_input_cfg_text['cfg_packed_query_indexes'],
+                cfg_text_key_values_lens=generation_input_cfg_text['cfg_key_values_lens'],
+                cfg_text_packed_key_value_indexes=generation_input_cfg_text['cfg_packed_key_value_indexes'],
+                cfg_img_packed_position_ids=generation_input_cfg_img['cfg_packed_position_ids'],
+                cfg_img_packed_query_indexes=generation_input_cfg_img['cfg_packed_query_indexes'],
+                cfg_img_key_values_lens=generation_input_cfg_img['cfg_key_values_lens'],
+                cfg_img_packed_key_value_indexes=generation_input_cfg_img['cfg_packed_key_value_indexes'],
+            )
         image = self.decode_image(unpacked_latent[0], image_shape)
         return image
         kv_lens = gen_context['kv_lens']
         ropes = gen_context['ropes']
+        with torch.amp.autocast("cuda", enabled=True, dtype=torch.bfloat16):
+            generation_input = self.model.prepare_start_tokens(kv_lens, ropes, self.new_token_ids)
+            for unpacked_latent in self.model.generate_text(
+                past_key_values=past_key_values,
+                max_length=max_length,
+                do_sample=do_sample,
+                temperature=temperature,
+                end_token_id=self.new_token_ids['eos_token_id'],
+                **generation_input,
+            ):
+                output = self.tokenizer.decode(unpacked_latent)
+                if output != "<|im_end|>":
+                    yield output
     @torch.no_grad()
     def interleave_inference(
         self,