SUPIR

Runtime error

App Files Files Community

Fabrice-TIERCELIN commited on Jun 18, 2025

Commit

4e6603f

verified ·

1 Parent(s): 7154dc2

Optimization

Browse files

Files changed (1) hide show

app.py +58 -48

app.py CHANGED Viewed

@@ -357,31 +357,36 @@ def worker(input_image, prompts, n_prompt, seed, resolution, total_second_length
         H, W, C = input_image.shape
         height, width = find_nearest_bucket(H, W, resolution=resolution)
-        input_image_np = resize_and_center_crop(input_image, target_width=width, target_height=height)
-        Image.fromarray(input_image_np).save(os.path.join(outputs_folder, f'{job_id}.png'))
-        input_image_pt = torch.from_numpy(input_image_np).float() / 127.5 - 1
-        input_image_pt = input_image_pt.permute(2, 0, 1)[None, :, None]
-        # VAE encoding
-        stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'VAE encoding ...'))))
-        if not high_vram:
-            load_model_as_complete(vae, target_device=gpu)
-        start_latent = vae_encode(input_image_pt, vae)
-        # CLIP Vision
-        stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'CLIP Vision encoding ...'))))
-        if not high_vram:
-            load_model_as_complete(image_encoder, target_device=gpu)
-        image_encoder_output = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder)
-        image_encoder_last_hidden_state = image_encoder_output.last_hidden_state
         # Dtype
@@ -573,31 +578,36 @@ def worker_last_frame(input_image, prompts, n_prompt, seed, resolution, total_se
         H, W, C = input_image.shape
         height, width = find_nearest_bucket(H, W, resolution=resolution)
-        input_image_np = resize_and_center_crop(input_image, target_width=width, target_height=height)
-        Image.fromarray(input_image_np).save(os.path.join(outputs_folder, f'{job_id}.png'))
-        input_image_pt = torch.from_numpy(input_image_np).float() / 127.5 - 1
-        input_image_pt = input_image_pt.permute(2, 0, 1)[None, :, None]
-        # VAE encoding
-        stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'VAE encoding ...'))))
-        if not high_vram:
-            load_model_as_complete(vae, target_device=gpu)
-        start_latent = vae_encode(input_image_pt, vae)
-        # CLIP Vision
-        stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'CLIP Vision encoding ...'))))
-        if not high_vram:
-            load_model_as_complete(image_encoder, target_device=gpu)
-        image_encoder_output = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder)
-        image_encoder_last_hidden_state = image_encoder_output.last_hidden_state
         # Dtype

         H, W, C = input_image.shape
         height, width = find_nearest_bucket(H, W, resolution=resolution)
+        def get_start_latent(input_image, height, width, vae, gpu, image_encoder, high_vram):
+            input_image_np = resize_and_center_crop(input_image, target_width=width, target_height=height)
+            #Image.fromarray(input_image_np).save(os.path.join(outputs_folder, f'{job_id}.png'))
+            input_image_pt = torch.from_numpy(input_image_np).float() / 127.5 - 1
+            input_image_pt = input_image_pt.permute(2, 0, 1)[None, :, None]
+            # VAE encoding
+            stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'VAE encoding ...'))))
+            if not high_vram:
+                load_model_as_complete(vae, target_device=gpu)
+            start_latent = vae_encode(input_image_pt, vae)
+            # CLIP Vision
+            stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'CLIP Vision encoding ...'))))
+            if not high_vram:
+                load_model_as_complete(image_encoder, target_device=gpu)
+            return start_latent
+        start_latent = get_start_latent(input_image, height, width, vae, gpu, image_encoder, high_vram)
+        image_encoder_last_hidden_state = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder).last_hidden_state
         # Dtype
         H, W, C = input_image.shape
         height, width = find_nearest_bucket(H, W, resolution=resolution)
+        def get_start_latent(input_image, height, width, vae, gpu, image_encoder, high_vram):
+            input_image_np = resize_and_center_crop(input_image, target_width=width, target_height=height)
+            #Image.fromarray(input_image_np).save(os.path.join(outputs_folder, f'{job_id}.png'))
+            input_image_pt = torch.from_numpy(input_image_np).float() / 127.5 - 1
+            input_image_pt = input_image_pt.permute(2, 0, 1)[None, :, None]
+            # VAE encoding
+            stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'VAE encoding ...'))))
+            if not high_vram:
+                load_model_as_complete(vae, target_device=gpu)
+            start_latent = vae_encode(input_image_pt, vae)
+            # CLIP Vision
+            stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'CLIP Vision encoding ...'))))
+            if not high_vram:
+                load_model_as_complete(image_encoder, target_device=gpu)
+            return start_latent
+        start_latent = get_start_latent(input_image, height, width, vae, gpu, image_encoder, high_vram)
+        image_encoder_last_hidden_state = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder).last_hidden_state
         # Dtype