Spaces:

snap-research
/

KontinuousKontext

Running on Zero

App Files Files Community

RishubhPar commited on Oct 30

Commit

3f41cdf

verified ·

1 Parent(s): 461f73f

added file with changes

Browse files

Files changed (1) hide show

app.py +11 -11

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import os
 import gc
 from typing import List, Tuple, Dict
 import json
 import torch
 import gradio as gr
@@ -26,15 +27,16 @@ if HF_TOKEN:
 # Avoid meta-tensor init from environment leftovers
 os.environ.pop("ACCELERATE_INIT_EMPTY_WEIGHTS", None)
-DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
 print("Using device:", DEVICE)
 torch.backends.cudnn.benchmark = True
 # -----------------------------
 # Model / pipeline loading
 # -----------------------------
-def load_pipeline_single_gpu(device_str: str) -> FluxKontextSliderPipeline:
     pretrained = "black-forest-labs/FLUX.1-Kontext-dev"
     n_slider_layers = 4
@@ -50,7 +52,6 @@ def load_pipeline_single_gpu(device_str: str) -> FluxKontextSliderPipeline:
         low_cpu_mem_usage=False,
         token=HF_TOKEN,
     )
-    transformer.eval()
     weight_dtype = transformer.dtype  # keep checkpoint dtype
     # Slider projector
@@ -69,7 +70,7 @@ def load_pipeline_single_gpu(device_str: str) -> FluxKontextSliderPipeline:
     # Load projector weights on CPU
     slider_projector_path = os.path.join(trained_models_path, "slider_projector.pth")
-    state_dict = torch.load(slider_projector_path)
     print("state_dict keys: {}".format(state_dict.keys()))
     slider_projector.load_state_dict(state_dict)
@@ -90,13 +91,11 @@ def load_pipeline_single_gpu(device_str: str) -> FluxKontextSliderPipeline:
     pipeline.load_lora_weights(trained_models_path)
     print("loaded the pipeline with lora weights from: {}".format(trained_models_path))
-    # Move everything to the single device
-    pipeline.to(device_str)
     return pipeline
-PIPELINE = load_pipeline_single_gpu(DEVICE)
 print(f"[init] Pipeline loaded on {DEVICE}")
@@ -285,7 +284,7 @@ def resize_image(img: Image.Image, target: int = 512) -> Image.Image:
     img = img.resize((new_w, new_h), resample)
     return img
 def _encode_prompt(prompt: str):
     with torch.no_grad():
         pe, ppe, _ = PIPELINE.encode_prompt(prompt, prompt_2=prompt)
@@ -295,6 +294,7 @@ def _encode_prompt(prompt: str):
 # -----------------------------
 # Inference functions
 # -----------------------------
 def generate_image_stack_edits(text_prompt, n_edits, input_image):
     """
     Compute n_edits images on a single GPU for slider values in (0,1],
@@ -346,7 +346,7 @@ def generate_image_stack_edits(text_prompt, n_edits, input_image):
     first = results[0] if results else None
     return results, first
 def generate_single_image(text_prompt, slider_value, input_image):
     if not input_image or not text_prompt or text_prompt.startswith("Please select"):
         return None

 import gc
 from typing import List, Tuple, Dict
 import json
+import spaces
 import torch
 import gradio as gr
 # Avoid meta-tensor init from environment leftovers
 os.environ.pop("ACCELERATE_INIT_EMPTY_WEIGHTS", None)
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 print("Using device:", DEVICE)
 torch.backends.cudnn.benchmark = True
 # -----------------------------
 # Model / pipeline loading
 # -----------------------------
+@torch.nograd()
+@spaces.GPU
+def load_pipeline_single_gpu() -> FluxKontextSliderPipeline:
     pretrained = "black-forest-labs/FLUX.1-Kontext-dev"
     n_slider_layers = 4
         low_cpu_mem_usage=False,
         token=HF_TOKEN,
     )
     weight_dtype = transformer.dtype  # keep checkpoint dtype
     # Slider projector
     # Load projector weights on CPU
     slider_projector_path = os.path.join(trained_models_path, "slider_projector.pth")
+    state_dict = torch.load(slider_projector_path, map_location='cpu')
     print("state_dict keys: {}".format(state_dict.keys()))
     slider_projector.load_state_dict(state_dict)
     pipeline.load_lora_weights(trained_models_path)
     print("loaded the pipeline with lora weights from: {}".format(trained_models_path))
     return pipeline
+PIPELINE = load_pipeline_single_gpu()
+PIPELINE.to(DEVICE)
 print(f"[init] Pipeline loaded on {DEVICE}")
     img = img.resize((new_w, new_h), resample)
     return img
+@spaces.GPU
 def _encode_prompt(prompt: str):
     with torch.no_grad():
         pe, ppe, _ = PIPELINE.encode_prompt(prompt, prompt_2=prompt)
 # -----------------------------
 # Inference functions
 # -----------------------------
+@spaces.GPU
 def generate_image_stack_edits(text_prompt, n_edits, input_image):
     """
     Compute n_edits images on a single GPU for slider values in (0,1],
     first = results[0] if results else None
     return results, first
+@spaces.GPU
 def generate_single_image(text_prompt, slider_value, input_image):
     if not input_image or not text_prompt or text_prompt.startswith("Please select"):
         return None