Spaces:

TIGER-Lab
/

RationalRewards-Demo

Running on Zero

App Files Files Community

JasperHaozhe commited on 21 days ago

Commit

5ffc27e

verified ·

1 Parent(s): 8a89c69

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -12

app.py CHANGED Viewed

@@ -25,7 +25,7 @@ VLM_QUANTIZATION_4BIT = False # Load VLM in 4-bit to save memory
 VLM_QUANTIZATION_8BIT = False # Load VLM in 8-bit to save memory (mutually exclusive with 4-bit)
 MODEL_ID = "JasperHaozhe/RationalRewards-Both-Demo"
-FLUX_MODEL_ID = "AlekseyCalvin/Flux_Kontext_Dev_fp8_scaled_diffusers" # "black-forest-labs/FLUX.1-Kontext-dev"
 processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
@@ -52,10 +52,10 @@ model_kwargs = {
 }
 # If VLM_MAX_MEMORY is set or using quantization, use device_map
-if VLM_MAX_MEMORY or VLM_QUANTIZATION_4BIT or VLM_QUANTIZATION_8BIT:
-    model_kwargs["device_map"] = "auto"
-    if VLM_MAX_MEMORY:
-        model_kwargs["max_memory"] = VLM_MAX_MEMORY
 model = AutoModelForImageTextToText.from_pretrained(
     MODEL_ID,
@@ -63,10 +63,10 @@ model = AutoModelForImageTextToText.from_pretrained(
 )
 # Only manually move to CPU/eval if NOT using device_map/quantization (which handles placement)
-if not (VLM_MAX_MEMORY or VLM_QUANTIZATION_4BIT or VLM_QUANTIZATION_8BIT):
-    model.to("cpu").eval()
-else:
-    model.eval()
 # Load Flux Pipeline
 flux_pipeline = FluxKontextPipeline.from_pretrained(
@@ -74,7 +74,7 @@ flux_pipeline = FluxKontextPipeline.from_pretrained(
     torch_dtype=torch.bfloat16
 )
 # Fix VAE precision for Flux to avoid artifacts
-# flux_pipeline.vae.to(dtype=torch.float32)
 # flux_pipeline.enable_attention_slicing() # Enable attention slicing to save memory during inference
 # Assume we can load both models simultaneously (User request)
 # No CPU offloading logic here.
@@ -474,6 +474,7 @@ def model_inference(task_type, instruction_text, image1, image2, image3, progres
             os.makedirs("generated_images", exist_ok=True)
             generated_image_path = f"generated_images/flux_edit_{timestamp}.png"
             generated_image.save(generated_image_path)
         except Exception as e:
             yield f"Error generating image: {str(e)}", None
@@ -502,8 +503,8 @@ def model_inference(task_type, instruction_text, image1, image2, image3, progres
     messages = [{"role": "user", "content": content}]
     # Ensure model is on CUDA/device for evaluation (VLM handles its own placement via device_map if set)
-    if not (VLM_MAX_MEMORY or VLM_QUANTIZATION_4BIT or VLM_QUANTIZATION_8BIT):
-        model.to(device_vlm)
     # Generate and stream text
     prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

 VLM_QUANTIZATION_8BIT = False # Load VLM in 8-bit to save memory (mutually exclusive with 4-bit)
 MODEL_ID = "JasperHaozhe/RationalRewards-Both-Demo"
+FLUX_MODEL_ID = "yuvraj108c/FLUX.1-Kontext-dev" # "black-forest-labs/FLUX.1-Kontext-dev"
 processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
 }
 # If VLM_MAX_MEMORY is set or using quantization, use device_map
+# if VLM_MAX_MEMORY or VLM_QUANTIZATION_4BIT or VLM_QUANTIZATION_8BIT:
+#     model_kwargs["device_map"] = "auto"
+#     if VLM_MAX_MEMORY:
+#         model_kwargs["max_memory"] = VLM_MAX_MEMORY
 model = AutoModelForImageTextToText.from_pretrained(
     MODEL_ID,
 )
 # Only manually move to CPU/eval if NOT using device_map/quantization (which handles placement)
+# if not (VLM_MAX_MEMORY or VLM_QUANTIZATION_4BIT or VLM_QUANTIZATION_8BIT):
+#     model.to("cpu").eval()
+# else:
+model.eval()
 # Load Flux Pipeline
 flux_pipeline = FluxKontextPipeline.from_pretrained(
     torch_dtype=torch.bfloat16
 )
 # Fix VAE precision for Flux to avoid artifacts
+flux_pipeline.vae.to(dtype=torch.float32)
 # flux_pipeline.enable_attention_slicing() # Enable attention slicing to save memory during inference
 # Assume we can load both models simultaneously (User request)
 # No CPU offloading logic here.
             os.makedirs("generated_images", exist_ok=True)
             generated_image_path = f"generated_images/flux_edit_{timestamp}.png"
             generated_image.save(generated_image_path)
+            print(f">>>> generated: {generated_image_path}")
         except Exception as e:
             yield f"Error generating image: {str(e)}", None
     messages = [{"role": "user", "content": content}]
     # Ensure model is on CUDA/device for evaluation (VLM handles its own placement via device_map if set)
+    # if not (VLM_MAX_MEMORY or VLM_QUANTIZATION_4BIT or VLM_QUANTIZATION_8BIT):
+    model.to(device_vlm)
     # Generate and stream text
     prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)