Spaces:

LiuZichen
/

MagicQuillHelper

Running on Zero

App Files Files Community

[Admin maintenance] Migrate grant to ZeroGPU

by multimodalart HF Staff - opened May 24

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+79

-162

Files changed (2) hide show

app.py +72 -156
requirements.txt +7 -6

app.py CHANGED Viewed

@@ -1,111 +1,105 @@
 import gradio as gr
-import random
 import torch
 import numpy as np
-from PIL import Image, ImageOps
 import os
 import json
-import sys
-import multiprocessing
-from concurrent.futures import ProcessPoolExecutor
-import time
-# Assume MagicQuill and other dependencies are present as per user instruction
 from MagicQuill import folder_paths
 from MagicQuill.llava_new import LLaVAModel
 from huggingface_hub import snapshot_download
-# Imports for SAM (Only needed in worker process, but imported here for checking)
 from segment_anything import sam_model_registry, SamPredictor
-# Download models (Main process does this once)
 hf_token = os.environ.get("HF_TOKEN")
 snapshot_download(repo_id="LiuZichen/MagicQuill-models", repo_type="model", local_dir="models")
 snapshot_download(repo_id="LiuZichen/MagicQuillV2-models", repo_type="model", local_dir="models_v2", token=hf_token)
-# --- Global Models for Main Process ---
-print("Initializing LLaVAModel (Main Process)...")
-# LLaVA is stateless/thread-safe enough or too big to duplicate, so we keep it in main process (or use threads)
 llavaModel = LLaVAModel()
 print("LLaVAModel initialized.")
-# --- Worker Process Logic for SAM ---
-# Global variable for the worker process to hold its own SAM instance
-worker_sam = None
-def init_worker_sam(device='cuda'):
-    """
-    This function is called when a new worker process starts.
-    It initializes a standalone SAM model for that process.
-    """
-    global worker_sam
-    print(f"Process {os.getpid()}: Initializing SAM model...")
-    # Define SAM class locally or import it. Since it was defined in the script,
-    # we can redefine a helper or import the logic.
-    # Ideally, the SAM logic should be in a separate module to be picklable easily.
-    # But for this script, we can define the loading logic here.
-    checkpoint_path = 'models_v2/sam/sam_vit_b_01ec64.pth'
-    # Load Model
-    try:
-        sam = sam_model_registry['vit_b'](checkpoint=checkpoint_path)
-        sam.to(device=device)
-        predictor = SamPredictor(sam)
-        worker_sam = {
-            "predictor": predictor
-        }
-        print(f"Process {os.getpid()}: SAM initialized.")
-    except Exception as e:
-        print(f"Process {os.getpid()}: Failed to init SAM: {e}")
-def run_sam_inference(image_np, coordinates_positive, coordinates_negative, bboxes):
-    """
-    The actual inference function running inside the worker process.
-    """
-    global worker_sam
-    if worker_sam is None:
-        # Fallback if init didn't run or failed (though ProcessPool initializer should handle it)
-        init_worker_sam()
-    predictor = worker_sam["predictor"]
-    # Set Image
-    predictor.set_image(image_np)
     input_point = []
     input_label = []
-    # Process points
     if coordinates_positive:
         coords = json.loads(coordinates_positive) if isinstance(coordinates_positive, str) else coordinates_positive
         for p in coords:
             input_point.append([p['x'], p['y']])
             input_label.append(1)
     if coordinates_negative:
         coords = json.loads(coordinates_negative) if isinstance(coordinates_negative, str) else coordinates_negative
         for p in coords:
             input_point.append([p['x'], p['y']])
             input_label.append(0)
-    # Process bbox
     input_box = None
     if bboxes:
         if isinstance(bboxes, str):
             try:
                 bboxes = json.loads(bboxes)
-            except:
                 pass
         box_list = []
         if isinstance(bboxes, list):
             for box in bboxes:
                 box_list.append(list(box))
         if len(box_list) > 0:
             input_box = np.array(box_list)
@@ -116,104 +110,35 @@ def run_sam_inference(image_np, coordinates_positive, coordinates_negative, bbox
         input_point = None
         input_label = None
-    # Predict
-    masks, scores, logits = predictor.predict(
         point_coords=input_point,
         point_labels=input_label,
         box=input_box,
         multimask_output=False,
     )
     mask_np = masks[0]
-    # Post-processing
-    # Simply convert mask to uint8 [0, 255] for transport
     if mask_np.dtype == bool:
         mask_np = mask_np.astype(np.uint8) * 255
     else:
         mask_np = (mask_np > 0).astype(np.uint8) * 255
-    # Return mask as image for client to use
-    # We return mask_np twice to satisfy the function signature or unpacker in segment()
-    # segment() expects (image_with_alpha_np, mask_np)
-    return mask_np, mask_np
-# --- Main Process Helpers ---
-# We need a pool. Since we are in a script, we initialize it in main block.
-sam_pool = None
-def numpy_to_tensor(numpy_array):
-    tensor = torch.from_numpy(numpy_array).float().unsqueeze(0) / 255.
-    return tensor
-def guess(original_image, add_color_image, add_edge_mask):
-    # LLaVA inference runs in the main process (threaded)
-    original_image_tensor = numpy_to_tensor(original_image)
-    add_color_image_tensor = numpy_to_tensor(add_color_image)
-    add_edge_mask_tensor = numpy_to_tensor(add_edge_mask)
-    description, ans1, ans2 = llavaModel.process(original_image_tensor, add_color_image_tensor, add_edge_mask_tensor)
-    ans_list = []
-    if ans1 and ans1 != "":
-        ans_list.append(ans1)
-    if ans2 and ans2 != "":
-        ans_list.append(ans2)
-    return ", ".join(ans_list)
-def get_mask_bbox(mask_np):
-    # mask_np: [1, H, W] or [H, W]
-    if mask_np.ndim == 3:
-        mask_np = mask_np[0]
-    rows = np.any(mask_np, axis=1)
-    cols = np.any(mask_np, axis=0)
-    if not np.any(rows) or not np.any(cols):
-        return None
-    y_min, y_max = np.where(rows)[0][[0, -1]]
-    x_min, x_max = np.where(cols)[0][[0, -1]]
-    return int(x_min), int(y_min), int(x_max), int(y_max)
-def segment(image, coordinates_positive, coordinates_negative, bboxes):
-    # image: numpy array (uint8)
-    # Submit task to process pool
-    print("image.shape:", image.shape)
-    print("coordinates_positive:", coordinates_positive)
-    print("coordinates_negative:", coordinates_negative)
-    print("bboxes:", bboxes)
-    if sam_pool is None:
-        return None, json.dumps({'error': 'SAM pool not initialized'})
-    # Future result
-    future = sam_pool.submit(run_sam_inference, image, coordinates_positive, coordinates_negative, bboxes)
-    # Wait for result
-    image_with_alpha_np, mask_np = future.result(timeout=60) # 60s timeout
-    # Convert back to PIL for Gradio
-    res_pil = Image.fromarray(image_with_alpha_np)
-    # Calculate bbox
     mask_bbox = get_mask_bbox(mask_np)
     if mask_bbox:
         x_min, y_min, x_max, y_max = mask_bbox
         seg_bbox = {'startX': x_min, 'startY': y_min, 'endX': x_max, 'endY': y_max}
     else:
         seg_bbox = {'startX': 0, 'startY': 0, 'endX': 0, 'endY': 0}
     return res_pil, json.dumps(seg_bbox)
-# --- Gradio UI ---
 with gr.Blocks() as app:
     with gr.Row():
         gr.Markdown("## MagicQuill Worker Server (Draw&Guess + SAM)")
     with gr.Tab("Draw & Guess"):
         with gr.Row():
             dg_input_img = gr.Image(label="Original Image")
@@ -221,7 +146,7 @@ with gr.Blocks() as app:
             dg_edge_img = gr.Image(image_mode="L", label="Edge Mask")
         dg_output = gr.Textbox(label="Prediction Output")
         dg_btn = gr.Button("Guess")
         dg_btn.click(
             fn=guess,
             inputs=[dg_input_img, dg_color_img, dg_edge_img],
@@ -229,20 +154,20 @@ with gr.Blocks() as app:
             api_name="guess_prompt",
             concurrency_limit=1
         )
     with gr.Tab("SAM Segmentation"):
         with gr.Row():
             sam_input_img = gr.Image(label="Input Image", type="numpy")
             sam_pos_coords = gr.Textbox(label="Pos Coords JSON")
             sam_neg_coords = gr.Textbox(label="Neg Coords JSON")
             sam_bboxes = gr.Textbox(label="BBoxes JSON")
         with gr.Row():
             sam_output_img = gr.Image(label="Segmented Image", format="png")
             sam_output_bbox = gr.Textbox(label="Mask BBox JSON")
         sam_btn = gr.Button("Segment")
         sam_btn.click(
             fn=segment,
             inputs=[sam_input_img, sam_pos_coords, sam_neg_coords, sam_bboxes],
@@ -251,15 +176,6 @@ with gr.Blocks() as app:
             concurrency_limit=5
         )
 if __name__ == "__main__":
-    # Set start method to spawn for CUDA compatibility
-    multiprocessing.set_start_method('spawn', force=True)
-    # Initialize SAM Pool
-    # Adjust max_workers based on GPU memory (e.g., 2-4 workers for SAM-B)
-    NUM_SAM_WORKERS = 5
-    print(f"Starting {NUM_SAM_WORKERS} SAM worker processes...")
-    sam_pool = ProcessPoolExecutor(max_workers=NUM_SAM_WORKERS, initializer=init_worker_sam)
-    # Launch Gradio
     app.queue(max_size=40).launch(max_threads=5)

+import spaces
 import gradio as gr
 import torch
 import numpy as np
+from PIL import Image
 import os
 import json
 from MagicQuill import folder_paths
 from MagicQuill.llava_new import LLaVAModel
 from huggingface_hub import snapshot_download
 from segment_anything import sam_model_registry, SamPredictor
 hf_token = os.environ.get("HF_TOKEN")
 snapshot_download(repo_id="LiuZichen/MagicQuill-models", repo_type="model", local_dir="models")
 snapshot_download(repo_id="LiuZichen/MagicQuillV2-models", repo_type="model", local_dir="models_v2", token=hf_token)
+print("Initializing LLaVAModel...")
 llavaModel = LLaVAModel()
 print("LLaVAModel initialized.")
+print("Initializing SAM...")
+sam = sam_model_registry['vit_b'](checkpoint='models_v2/sam/sam_vit_b_01ec64.pth')
+sam.to(device='cuda')
+sam_predictor = SamPredictor(sam)
+print("SAM initialized.")
+def numpy_to_tensor(numpy_array):
+    tensor = torch.from_numpy(numpy_array).float().unsqueeze(0) / 255.
+    return tensor
+@spaces.GPU
+def guess(original_image, add_color_image, add_edge_mask):
+    original_image_tensor = numpy_to_tensor(original_image)
+    add_color_image_tensor = numpy_to_tensor(add_color_image)
+    add_edge_mask_tensor = numpy_to_tensor(add_edge_mask)
+    description, ans1, ans2 = llavaModel.process(original_image_tensor, add_color_image_tensor, add_edge_mask_tensor)
+    ans_list = []
+    if ans1 and ans1 != "":
+        ans_list.append(ans1)
+    if ans2 and ans2 != "":
+        ans_list.append(ans2)
+    return ", ".join(ans_list)
+def get_mask_bbox(mask_np):
+    if mask_np.ndim == 3:
+        mask_np = mask_np[0]
+    rows = np.any(mask_np, axis=1)
+    cols = np.any(mask_np, axis=0)
+    if not np.any(rows) or not np.any(cols):
+        return None
+    y_min, y_max = np.where(rows)[0][[0, -1]]
+    x_min, x_max = np.where(cols)[0][[0, -1]]
+    return int(x_min), int(y_min), int(x_max), int(y_max)
+@spaces.GPU
+def segment(image, coordinates_positive, coordinates_negative, bboxes):
+    print("image.shape:", image.shape)
+    print("coordinates_positive:", coordinates_positive)
+    print("coordinates_negative:", coordinates_negative)
+    print("bboxes:", bboxes)
+    sam_predictor.set_image(image)
     input_point = []
     input_label = []
     if coordinates_positive:
         coords = json.loads(coordinates_positive) if isinstance(coordinates_positive, str) else coordinates_positive
         for p in coords:
             input_point.append([p['x'], p['y']])
             input_label.append(1)
     if coordinates_negative:
         coords = json.loads(coordinates_negative) if isinstance(coordinates_negative, str) else coordinates_negative
         for p in coords:
             input_point.append([p['x'], p['y']])
             input_label.append(0)
     input_box = None
     if bboxes:
         if isinstance(bboxes, str):
             try:
                 bboxes = json.loads(bboxes)
+            except Exception:
                 pass
         box_list = []
         if isinstance(bboxes, list):
             for box in bboxes:
                 box_list.append(list(box))
         if len(box_list) > 0:
             input_box = np.array(box_list)
         input_point = None
         input_label = None
+    masks, scores, logits = sam_predictor.predict(
         point_coords=input_point,
         point_labels=input_label,
         box=input_box,
         multimask_output=False,
     )
     mask_np = masks[0]
     if mask_np.dtype == bool:
         mask_np = mask_np.astype(np.uint8) * 255
     else:
         mask_np = (mask_np > 0).astype(np.uint8) * 255
+    res_pil = Image.fromarray(mask_np)
     mask_bbox = get_mask_bbox(mask_np)
     if mask_bbox:
         x_min, y_min, x_max, y_max = mask_bbox
         seg_bbox = {'startX': x_min, 'startY': y_min, 'endX': x_max, 'endY': y_max}
     else:
         seg_bbox = {'startX': 0, 'startY': 0, 'endX': 0, 'endY': 0}
     return res_pil, json.dumps(seg_bbox)
 with gr.Blocks() as app:
     with gr.Row():
         gr.Markdown("## MagicQuill Worker Server (Draw&Guess + SAM)")
     with gr.Tab("Draw & Guess"):
         with gr.Row():
             dg_input_img = gr.Image(label="Original Image")
             dg_edge_img = gr.Image(image_mode="L", label="Edge Mask")
         dg_output = gr.Textbox(label="Prediction Output")
         dg_btn = gr.Button("Guess")
         dg_btn.click(
             fn=guess,
             inputs=[dg_input_img, dg_color_img, dg_edge_img],
             api_name="guess_prompt",
             concurrency_limit=1
         )
     with gr.Tab("SAM Segmentation"):
         with gr.Row():
             sam_input_img = gr.Image(label="Input Image", type="numpy")
             sam_pos_coords = gr.Textbox(label="Pos Coords JSON")
             sam_neg_coords = gr.Textbox(label="Neg Coords JSON")
             sam_bboxes = gr.Textbox(label="BBoxes JSON")
         with gr.Row():
             sam_output_img = gr.Image(label="Segmented Image", format="png")
             sam_output_bbox = gr.Textbox(label="Mask BBox JSON")
         sam_btn = gr.Button("Segment")
         sam_btn.click(
             fn=segment,
             inputs=[sam_input_img, sam_pos_coords, sam_neg_coords, sam_bboxes],
             concurrency_limit=5
         )
 if __name__ == "__main__":
     app.queue(max_size=40).launch(max_threads=5)

requirements.txt CHANGED Viewed

@@ -14,7 +14,7 @@ anyio==4.4.0
 async-timeout==4.0.3
 attrs==23.2.0
 beautifulsoup4==4.12.3
-bitsandbytes==0.43.3
 certifi==2024.7.4
 cffi==1.16.0
 chardet==5.2.0
@@ -33,7 +33,6 @@ einops-exts==0.0.4
 embreex==2.17.7.post5
 eval-type-backport==0.2.0
 exceptiongroup==1.2.2
-fastapi
 ffmpy==0.4.0
 filelock==3.15.4
 flatbuffers==24.3.25
@@ -132,11 +131,10 @@ sounddevice==0.4.7
 soupsieve==2.5
 spandrel==0.3.4
 stanza==1.1.1
-starlette
 svg-path==6.3
 svglib==1.5.1
 svgwrite==1.4.3
-sympy==1.13.1
 tabulate==0.9.0
 termcolor==2.4.0
 threadpoolctl==3.5.0
@@ -151,7 +149,6 @@ tqdm==4.66.5
 trampoline==0.1.2
 transformers==4.37.2
 trimesh==4.4.3
-triton==2.1.0
 torchsde==0.2.6
 typer==0.12.5
 typing-extensions==4.12.2
@@ -169,4 +166,8 @@ yacs==0.1.8
 yapf==0.40.2
 yarl==1.9.4
 zipp==3.19.2
-git+https://github.com/facebookresearch/segment-anything.git

 async-timeout==4.0.3
 attrs==23.2.0
 beautifulsoup4==4.12.3
+bitsandbytes
 certifi==2024.7.4
 cffi==1.16.0
 chardet==5.2.0
 embreex==2.17.7.post5
 eval-type-backport==0.2.0
 exceptiongroup==1.2.2
 ffmpy==0.4.0
 filelock==3.15.4
 flatbuffers==24.3.25
 soupsieve==2.5
 spandrel==0.3.4
 stanza==1.1.1
 svg-path==6.3
 svglib==1.5.1
 svgwrite==1.4.3
+sympy==1.13.3
 tabulate==0.9.0
 termcolor==2.4.0
 threadpoolctl==3.5.0
 trampoline==0.1.2
 transformers==4.37.2
 trimesh==4.4.3
 torchsde==0.2.6
 typer==0.12.5
 typing-extensions==4.12.2
 yapf==0.40.2
 yarl==1.9.4
 zipp==3.19.2
+git+https://github.com/facebookresearch/segment-anything.git
+starlette<0.38
+fastapi<0.112
+torch==2.8.0
+torchvision==0.23.0