Spaces:

JMalott
/

ai_architecture

Runtime error

App Files Files

JMalott commited on Sep 6, 2022

Commit

0f96e76

1 Parent(s): dfa8d4a

Update min_dalle/min_dalle.py

Browse files

Files changed (1) hide show

min_dalle/min_dalle.py +17 -22

min_dalle/min_dalle.py CHANGED Viewed

@@ -10,12 +10,11 @@ from typing import Iterator
 from .text_tokenizer import TextTokenizer
 from .models import DalleBartEncoder, DalleBartDecoder, VQGanDetokenizer
 import streamlit as st
-import gc
 torch.set_grad_enabled(False)
 torch.set_num_threads(os.cpu_count())
-torch.backends.cudnn.enabled = False
-torch.backends.cudnn.allow_tf16 = False
 MIN_DALLE_REPO = 'https://huggingface.co/kuprel/min-dalle/resolve/main/'
 IMAGE_TOKEN_COUNT = 256
@@ -25,7 +24,7 @@ class MinDalle:
     def __init__(
         self,
         models_root: str = 'pretrained',
-        dtype: torch.dtype = torch.float16,
         device: str = None,
         is_mega: bool = True,
         is_reusable: bool = True,
@@ -188,7 +187,7 @@ class MinDalle:
         if len(tokens) > self.text_token_count:
             tokens = tokens[:self.text_token_count]
         if is_verbose: print("{} text tokens".format(len(tokens)), tokens)
-        text_tokens = numpy.ones((2, 64), dtype=numpy.int16)
         text_tokens[0, :2] = [tokens[0], tokens[-1]]
         text_tokens[1, :len(tokens)] = tokens
         text_tokens = torch.tensor(
@@ -232,37 +231,33 @@ class MinDalle:
         token_indices = torch.arange(IMAGE_TOKEN_COUNT, device=self.device)
         settings = torch.tensor(
             [temperature, top_k, supercondition_factor],
-            dtype=torch.float16,
             device=self.device
         )
         for i in range(IMAGE_TOKEN_COUNT):
             if(st.session_state.page != 0):
                 break
             st.session_state.bar.progress(i/IMAGE_TOKEN_COUNT)
-            #torch.cuda.empty_cache()
             #torch.cpu.empty_cache()
-            #gc.collect()
-            image_tokens[i + 1], attention_state = self.decoder.forward(
-                settings=settings,
-                attention_mask=attention_mask,
-                encoder_state=encoder_state,
-                attention_state=attention_state,
-                prev_tokens=image_tokens[i],
-                token_index=token_indices[[i]]
-            )
-            if ((i + 1) % 16 == 0 and progressive_outputs) or i + 1 == 256:
                 yield self.image_grid_from_tokens(
                     image_tokens=image_tokens[1:].T,
                     is_seamless=is_seamless,
                     is_verbose=is_verbose
                 )
     def generate_image_stream(self, *args, **kwargs) -> Iterator[Image.Image]:
         image_stream = self.generate_raw_image_stream(*args, **kwargs)

 from .text_tokenizer import TextTokenizer
 from .models import DalleBartEncoder, DalleBartDecoder, VQGanDetokenizer
 import streamlit as st
 torch.set_grad_enabled(False)
 torch.set_num_threads(os.cpu_count())
+torch.backends.cudnn.enabled = True
+torch.backends.cudnn.allow_tf32 = True
 MIN_DALLE_REPO = 'https://huggingface.co/kuprel/min-dalle/resolve/main/'
 IMAGE_TOKEN_COUNT = 256
     def __init__(
         self,
         models_root: str = 'pretrained',
+        dtype: torch.dtype = torch.float32,
         device: str = None,
         is_mega: bool = True,
         is_reusable: bool = True,
         if len(tokens) > self.text_token_count:
             tokens = tokens[:self.text_token_count]
         if is_verbose: print("{} text tokens".format(len(tokens)), tokens)
+        text_tokens = numpy.ones((2, 64), dtype=numpy.int32)
         text_tokens[0, :2] = [tokens[0], tokens[-1]]
         text_tokens[1, :len(tokens)] = tokens
         text_tokens = torch.tensor(
         token_indices = torch.arange(IMAGE_TOKEN_COUNT, device=self.device)
         settings = torch.tensor(
             [temperature, top_k, supercondition_factor],
+            dtype=torch.float32,
             device=self.device
         )
         for i in range(IMAGE_TOKEN_COUNT):
             if(st.session_state.page != 0):
                 break
             st.session_state.bar.progress(i/IMAGE_TOKEN_COUNT)
+            torch.cuda.empty_cache()
             #torch.cpu.empty_cache()
+            with torch.cuda.amp.autocast(dtype=self.dtype):
+                image_tokens[i + 1], attention_state = self.decoder.forward(
+                    settings=settings,
+                    attention_mask=attention_mask,
+                    encoder_state=encoder_state,
+                    attention_state=attention_state,
+                    prev_tokens=image_tokens[i],
+                    token_index=token_indices[[i]]
+                )
+           # with torch.cuda.amp.autocast(dtype=torch.float32):
+            if ((i + 1) % 32 == 0 and progressive_outputs) or i + 1 == 256:
                 yield self.image_grid_from_tokens(
                     image_tokens=image_tokens[1:].T,
                     is_seamless=is_seamless,
                     is_verbose=is_verbose
                 )
     def generate_image_stream(self, *args, **kwargs) -> Iterator[Image.Image]:
         image_stream = self.generate_raw_image_stream(*args, **kwargs)