flux-quant

Running on L40S

App Files Files Community

DerekLiu35 commited on Apr 29, 2025

Commit

575f433

1 Parent(s): e70b261

remove clear gpu memory

Browse files

Files changed (1) hide show

app.py +4 -46

app.py CHANGED Viewed

@@ -22,31 +22,6 @@ DEFAULT_MAX_SEQUENCE_LENGTH = 512
 GENERATION_SEED = 0 # could use a random number generator to set this, for more variety
 HF_TOKEN = os.environ.get("HF_ACCESS_TOKEN")
-def clear_gpu_memory(*args):
-    allocated_before = torch.cuda.memory_allocated(0) / 1024**3 if DEVICE == "cuda" else 0
-    reserved_before = torch.cuda.memory_reserved(0) / 1024**3 if DEVICE == "cuda" else 0
-    print(f"Before clearing: Allocated={allocated_before:.2f} GB, Reserved={reserved_before:.2f} GB")
-    deleted_types = []
-    for arg in args:
-        if arg is not None:
-            deleted_types.append(str(type(arg)))
-            del arg
-    if deleted_types:
-        print(f"Deleted objects of types: {', '.join(deleted_types)}")
-    else:
-        print("No objects passed to clear_gpu_memory.")
-    gc.collect()
-    if DEVICE == "cuda":
-        torch.cuda.empty_cache()
-    allocated_after = torch.cuda.memory_allocated(0) / 1024**3 if DEVICE == "cuda" else 0
-    reserved_after = torch.cuda.memory_reserved(0) / 1024**3 if DEVICE == "cuda" else 0
-    print(f"After clearing:  Allocated={allocated_after:.2f} GB, Reserved={reserved_after:.2f} GB")
-    print("-" * 20)
 CACHED_PIPES = {}
 def load_bf16_pipeline():
     """Loads the original FLUX.1-dev pipeline in BF16 precision."""
@@ -120,7 +95,7 @@ def load_bnb_4bit_pipeline():
 @spaces.GPU(duration=240)
 def generate_images(prompt, quantization_choice, progress=gr.Progress(track_tqdm=True)):
-    """Loads original and selected quantized model, generates one image each, clears memory, shuffles results."""
     if not prompt:
         return None, {}, gr.update(value="Please enter a prompt.", interactive=False), gr.update(choices=[], value=None)
@@ -161,12 +136,6 @@ def generate_images(prompt, quantization_choice, progress=gr.Progress(track_tqdm
         print(f"\n--- Loading {label} Model ---")
         load_start_time = time.time()
         try:
-            # Ensure previous pipe is cleared *before* loading the next
-            # if current_pipe:
-            #     print(f"--- Clearing memory before loading {label} Model ---")
-            #     clear_gpu_memory(current_pipe)
-            #     current_pipe = None
             current_pipe = load_func()
             load_end_time = time.time()
             print(f"{label} model loaded in {load_end_time - load_start_time:.2f} seconds.")
@@ -184,22 +153,11 @@ def generate_images(prompt, quantization_choice, progress=gr.Progress(track_tqdm
         except Exception as e:
             print(f"Error during {label} model processing: {e}")
-            # Attempt cleanup
-            if current_pipe:
-                print(f"--- Clearing memory after error with {label} Model ---")
-                clear_gpu_memory(current_pipe)
-                current_pipe = None
             # Return error state to Gradio - update all outputs
             return None, {}, gr.update(value=f"Error processing {label} model: {e}", interactive=False), gr.update(choices=[], value=None)
         # No finally block needed here, cleanup happens before next load or after loop
-    # Final cleanup after the loop finishes successfully
-    # if current_pipe:
-    #     print(f"--- Clearing memory after last model ({label}) ---")
-    #     clear_gpu_memory(current_pipe)
-    #     current_pipe = None
     if len(results) != len(model_configs):
         print("Generation did not complete for all models.")
         # Update all outputs
@@ -275,7 +233,7 @@ with gr.Blocks(title="FLUX Quantization Challenge", theme=gr.themes.Soft()) as d
         generate_button = gr.Button("Generate & Compare", variant="primary", scale=1)
     output_gallery = gr.Gallery(
-        label="Generated Images (Original vs. Quantized)",
         columns=2,
         height=512,
         object_fit="contain",
@@ -324,5 +282,5 @@ with gr.Blocks(title="FLUX Quantization Challenge", theme=gr.themes.Soft()) as d
 if __name__ == "__main__":
     # queue()
-    # demo.queue().launch() # Set share=True to create public link if needed
-    demo.launch()

 GENERATION_SEED = 0 # could use a random number generator to set this, for more variety
 HF_TOKEN = os.environ.get("HF_ACCESS_TOKEN")
 CACHED_PIPES = {}
 def load_bf16_pipeline():
     """Loads the original FLUX.1-dev pipeline in BF16 precision."""
 @spaces.GPU(duration=240)
 def generate_images(prompt, quantization_choice, progress=gr.Progress(track_tqdm=True)):
+    """Loads original and selected quantized model, generates one image each, shuffles results."""
     if not prompt:
         return None, {}, gr.update(value="Please enter a prompt.", interactive=False), gr.update(choices=[], value=None)
         print(f"\n--- Loading {label} Model ---")
         load_start_time = time.time()
         try:
             current_pipe = load_func()
             load_end_time = time.time()
             print(f"{label} model loaded in {load_end_time - load_start_time:.2f} seconds.")
         except Exception as e:
             print(f"Error during {label} model processing: {e}")
             # Return error state to Gradio - update all outputs
             return None, {}, gr.update(value=f"Error processing {label} model: {e}", interactive=False), gr.update(choices=[], value=None)
         # No finally block needed here, cleanup happens before next load or after loop
     if len(results) != len(model_configs):
         print("Generation did not complete for all models.")
         # Update all outputs
         generate_button = gr.Button("Generate & Compare", variant="primary", scale=1)
     output_gallery = gr.Gallery(
+        label="Generated Images",
         columns=2,
         height=512,
         object_fit="contain",
 if __name__ == "__main__":
     # queue()
+    # demo.queue().launch()
+    demo.launch(share=True)