Spaces:

MohamedRashad
/

Infinity

Paused

MohamedRashad commited on Jan 6, 2025

Commit

9914b63

1 Parent(s): 54f9225

Optimize Infinity model loading by clearing CUDA cache and adjusting device assignment; remove redundant calls

Files changed (1) hide show

app.py CHANGED Viewed

@@ -197,7 +197,8 @@ def load_infinity(
 ):
     print(f'[Loading Infinity]')
     text_maxlen = 512
-    with torch.cuda.amp.autocast(enabled=True, dtype=torch.bfloat16, cache_enabled=True), torch.no_grad():
         infinity_test: Infinity = Infinity(
             vae_local=vae, text_channels=text_channels, text_maxlen=text_maxlen,
             shared_aln=True, raw_scale_schedule=scale_schedule,
@@ -215,7 +216,7 @@ def load_infinity(
             inference_mode=True,
             train_h_div_w_list=[1.0],
             **model_kwargs,
-        ).to(device=device)
         print(f'[you selected Infinity with {model_kwargs=}] model size: {sum(p.numel() for p in infinity_test.parameters())/1e9:.2f}B, bf16={bf16}')
         if bf16:
@@ -225,9 +226,6 @@ def load_infinity(
         infinity_test.eval()
         infinity_test.requires_grad_(False)
-        infinity_test.cuda()
-        torch.cuda.empty_cache()
         print(f'[Load Infinity weights]')
         state_dict = torch.load(model_path, map_location=device)
         print(infinity_test.load_state_dict(state_dict))
@@ -529,7 +527,6 @@ with gr.Blocks() as demo:
             # Output Section
             gr.Markdown("### Generated Image")
             output_image = gr.Image(label="Generated Image", type="pil")
-            gr.Markdown("**Tip:** Right-click the image to save it.")
     # Error Handling
     error_message = gr.Textbox(label="Error Message", visible=False)

 ):
     print(f'[Loading Infinity]')
     text_maxlen = 512
+    torch.cuda.empty_cache()
+    with torch.amp.autocast(enabled=True, dtype=torch.bfloat16, cache_enabled=True), torch.no_grad():
         infinity_test: Infinity = Infinity(
             vae_local=vae, text_channels=text_channels, text_maxlen=text_maxlen,
             shared_aln=True, raw_scale_schedule=scale_schedule,
             inference_mode=True,
             train_h_div_w_list=[1.0],
             **model_kwargs,
+        ).to(device)
         print(f'[you selected Infinity with {model_kwargs=}] model size: {sum(p.numel() for p in infinity_test.parameters())/1e9:.2f}B, bf16={bf16}')
         if bf16:
         infinity_test.eval()
         infinity_test.requires_grad_(False)
         print(f'[Load Infinity weights]')
         state_dict = torch.load(model_path, map_location=device)
         print(infinity_test.load_state_dict(state_dict))
             # Output Section
             gr.Markdown("### Generated Image")
             output_image = gr.Image(label="Generated Image", type="pil")
     # Error Handling
     error_message = gr.Textbox(label="Error Message", visible=False)