Spaces:

mjohanes
/

ReDesign

Sleeping

App Files Files Community

mjohanes commited on Mar 26, 2025

Commit

b147284

1 Parent(s): d400066

push app

Browse files

Files changed (2) hide show

app.py +91 -94
requirements.txt +7 -21

app.py CHANGED Viewed

@@ -1,110 +1,107 @@
 import streamlit as st
 import numpy as np
-from PIL import Image
-import torch
 from diffusers import StableDiffusionInpaintPipeline
-from fastsam import FastSAM, FastSAMPrompt
-from huggingface_hub import hf_hub_download
-# Initialize session state
-if "points" not in st.session_state:
-    st.session_state.points = []
-# Load models with caching
 @st.cache_resource
 def load_models():
-    fastsam_path = hf_hub_download(
-        repo_id="An-619/FastSAM",
-        filename="FastSAM.pt",
-        repo_type="model"
-    )
-    fastsam_model = FastSAM(fastsam_path)
-    pipe = StableDiffusionInpaintPipeline.from_pretrained(
-        "stabilityai/stable-diffusion-2-inpainting",
-        torch_dtype=torch.float16,
     )
-    if torch.cuda.is_available():
-        pipe = pipe.to("cuda")
-    return fastsam_model, pipe
-fastsam, pipe = load_models()
-st.title("Mobile Inpainting with Point Selection")
-st.write("1. Take photo 2. Select points 3. Enter prompt")
-# Camera input
-img_file = st.camera_input("Take a picture:")
-def add_point(x, y):
-    st.session_state.points.append((x, y))
-def clear_points():
-    st.session_state.points = []
-if img_file:
-    img = Image.open(img_file).convert("RGB")
-    w, h = img.size
-    # Display image with click coordinates
-    st.image(img, caption="Original Image")
-    st.write(f"Image dimensions: {w}x{h} pixels")
-    # Point input controls
-    col1, col2 = st.columns(2)
-    with col1:
-        x = st.number_input("X coordinate", 0, w-1, w//2)
-    with col2:
-        y = st.number_input("Y coordinate", 0, h-1, h//2)
-    st.button("Add Point", on_click=add_point, args=(x, y))
-    st.button("Clear Points", on_click=clear_points)
-    # Show selected points
-    if st.session_state.points:
-        st.write("Selected points (x,y):")
-        st.write(st.session_state.points)
-        # Generate mask
-        if st.button("Generate Mask"):
-            img_np = np.array(img)
-            with st.spinner("Segmenting..."):
-                # Normalize points
-                norm_points = [[x/w, y/h] for x, y in st.session_state.points]
-                # FastSAM processing
-                results = fastsam(
-                    img_np,
-                    device="cuda" if torch.cuda.is_available() else "cpu",
-                    imgsz=1024,
-                    conf=0.4,
-                )
-                prompt_process = FastSAMPrompt(img_np, results, device="cpu")
-                mask = prompt_process.point_prompt(
-                    points=norm_points,
-                    pointlabel=[1]*len(norm_points)
-                )
-                mask = mask[0].astype(np.uint8) * 255
-            st.image(mask, caption="Generated Mask")
-            # Inpainting
-            prompt = st.text_input("What should replace the selected area?")
-            if prompt:
-                with st.spinner("Generating result..."):
-                    img_512 = img.resize((512, 512))
-                    mask_512 = Image.fromarray(mask).resize((512, 512))
-                    result = pipe(
-                        prompt=prompt,
-                        image=img_512,
-                        mask_image=mask_512,
-                        num_inference_steps=30,
-                    ).images[0]
-                    st.image(result, caption="Final Result")
-    else:
-        st.warning("Add points to create a mask")
-else:
-    st.info("Take a photo to begin")

+import os
 import streamlit as st
+from PIL import Image, ImageDraw
 import numpy as np
+# Import the custom component for image coordinates
+from streamlit_image_coordinates import streamlit_image_coordinates
+# Import diffusers pipeline for Stable Diffusion inpainting
 from diffusers import StableDiffusionInpaintPipeline
+# Ultralytics provides the FastSAM model class
+from ultralytics import FastSAM
+# Set page config for a better mobile experience
+st.set_page_config(page_title="Inpainting Demo", layout="wide")
+# Define model paths or IDs for easy switching in the future
+FASTSAM_CHECKPOINT = "FastSAM-x.pt"  # file name of the FastSAM model weights
+SD_MODEL_ID = "runwayml/stable-diffusion-inpainting"  # HF Hub model for SD Inpainting v1.5
+# Ensure FastSAM model weights are available (download if not present)
+if not os.path.exists(FASTSAM_CHECKPOINT):
+    # Download FastSAM weights (if not already in the repo)
+    # Here we use the official Ultralytics release URL for FastSAM-x (68MB).
+    import requests
+    fastsam_url = "https://github.com/ultralytics/assets/releases/download/v8.2.0/FastSAM-x.pt"
+    st.write("Downloading FastSAM model weights...")
+    resp = requests.get(fastsam_url)
+    open(FASTSAM_CHECKPOINT, "wb").write(resp.content)
+# Load models with caching to avoid reloading on each interaction
 @st.cache_resource
 def load_models():
+    # Load FastSAM model
+    fastsam_model = FastSAM(FASTSAM_CHECKPOINT)  # load the checkpoint
+    # Move FastSAM to GPU if available
+    # (Ultralytics will internally handle device assignment when calling the model)
+    # Load Stable Diffusion inpainting pipeline
+    sd_pipe = StableDiffusionInpaintPipeline.from_pretrained(
+        SD_MODEL_ID,
+        torch_dtype=None  # we'll let diffusers choose float16 if GPU is available
     )
+    # Move pipeline to GPU for faster inference, if a GPU is available
+    sd_pipe = sd_pipe.to("cuda" if torch.cuda.is_available() else "cpu")
+    # (Optional) Enable memory optimizations
+    sd_pipe.enable_attention_slicing()  # improve memory usage
+    return fastsam_model, sd_pipe
+# Initialize the models (this will run only once thanks to caching)
+fastsam_model, sd_pipe = load_models()
+# Title and instructions
+st.title("📱 Mobile Inpainting with FastSAM and Stable Diffusion")
+st.markdown(
+    "1. **Capture** an image using the camera.\n"
+    "2. **Tap** on an object in the image to select it.\n"
+    "3. **Describe** what should replace it, and let the app do the rest!"
+)
+# Camera input widget (opens device camera on mobile/desktop)
+picture = st.camera_input("Take a picture")
+if picture is not None:
+    # When an image is captured, display it and allow point selection
+    img = Image.open(picture)  # read image as PIL
+    st.image(img, caption="Captured Image", use_column_width=True)
+    # Let user click a point on the image. This returns a dict with 'x' and 'y'.
+    coords = streamlit_image_coordinates(img, key="click_img")
+    if coords:
+        # If a point was clicked, mark it on the image for user feedback
+        cx, cy = int(coords['x']), int(coords['y'])
+        # Draw a small red circle on the image copy to show selected point
+        img_with_dot = img.copy()
+        draw = ImageDraw.Draw(img_with_dot)
+        draw.ellipse((cx-5, cy-5, cx+5, cy+5), fill='red')
+        st.image(img_with_dot, caption=f"Selected Point: ({cx}, {cy})", use_column_width=True)
+    else:
+        cx = cy = None
+    # Prompt input for inpainting
+    prompt = st.text_input("Prompt for inpainting (describe what should replace the selected area):")
+    # Only proceed when a point is selected and prompt is provided
+    if coords and prompt:
+        cx, cy = int(coords['x']), int(coords['y'])
+        st.write("Generating mask with FastSAM...")
+        # Run FastSAM segmentation with the selected point as prompt
+        # Using the Ultralytics API: points=[[x,y]] and labels=[1] for a positive point prompt
+        results = fastsam_model(img, points=[[cx, cy]], labels=[1])
+        # The results object holds masks; extract the first mask (closest object to the point)
+        mask_data = results[0].masks.data[0]  # mask tensor (H x W)
+        mask_array = mask_data.cpu().numpy()  # convert to numpy array
+        # Create a PIL Image for the mask: convert 1.0 to 255 (white), 0.0 to 0 (black)
+        mask_image = Image.fromarray((mask_array * 255).astype(np.uint8))
+        # For debugging, we can display the mask – uncomment if needed
+        # st.image(mask_image, caption="Segmentation Mask", use_column_width=True)
+        st.write("Running Stable Diffusion Inpainting...")
+        # Run the Stable Diffusion inpainting pipeline
+        result = sd_pipe(prompt=prompt, image=img, mask_image=mask_image).images[0]
+        # Display the final inpainted image
+        st.image(result, caption="Inpainted Image", use_column_width=True)

requirements.txt CHANGED Viewed

@@ -1,21 +1,7 @@
-# Core dependencies
-streamlit>=1.28.0
-torch
-torchvision
-ultralytics==8.0.121  # Critical for FastSAM
-# Hugging Face ecosystem
-diffusers>=0.19.0
-transformers>=4.34.0
-huggingface-hub>=0.17.0
-# Image processing
-opencv-python>=4.7.0.72
-matplotlib>=3.7.2
-pillow>=9.5.0
-# Additional
-accelerate>=0.24.0
-# FastSAM from specific commit
-git+https://github.com/CASIA-IVA-Lab/FastSAM.git

+streamlit==1.x
+streamlit-image-coordinates==0.2.0   # component for getting click coordinates on images
+ultralytics==8.0.134                # includes FastSAM integration
+diffusers==0.17.0                   # for Stable Diffusion pipeline
+transformers==4.30.2                # for Stable Diffusion text encoder
+accelerate==0.20.3                  # helps with model acceleration
+torch                              # PyTorch (will auto-select a CUDA version on GPU)