Spaces:

CS5330
/

3D_Image_Composer

Sleeping

App Files Files Community

gexu13 commited on Mar 27, 2025

Commit

f74ae4b

verified ·

1 Parent(s): 680da51

Upload 16 files

Browse files

Files changed (17) hide show

.gitattributes +7 -0
Side_By_Side_3D_Images/sbs_backyard.png +3 -0
Side_By_Side_3D_Images/sbs_campus.png +3 -0
Side_By_Side_3D_Images/sbs_downtown.png +3 -0
Side_By_Side_3D_Images/sbs_neu.png +3 -0
Side_By_Side_3D_Images/sbs_steam_clock.png +3 -0
Side_By_Side_3D_Images/sbs_trail.png +3 -0
app.py +98 -0
create_anaglyph.py +42 -0
image_segmentation_mask_rcnn.py +58 -0
insert_person_into_stereo.py +136 -0
person/person1.jpg +0 -0
person/person2.png +0 -0
person/person3.png +0 -0
person/person4.png +0 -0
person/person5.png +3 -0
requirements.txt +207 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+person/person5.png filter=lfs diff=lfs merge=lfs -text
+Side_By_Side_3D_Images/sbs_backyard.png filter=lfs diff=lfs merge=lfs -text
+Side_By_Side_3D_Images/sbs_campus.png filter=lfs diff=lfs merge=lfs -text
+Side_By_Side_3D_Images/sbs_downtown.png filter=lfs diff=lfs merge=lfs -text
+Side_By_Side_3D_Images/sbs_neu.png filter=lfs diff=lfs merge=lfs -text
+Side_By_Side_3D_Images/sbs_steam_clock.png filter=lfs diff=lfs merge=lfs -text
+Side_By_Side_3D_Images/sbs_trail.png filter=lfs diff=lfs merge=lfs -text

Side_By_Side_3D_Images/sbs_backyard.png ADDED Viewed

Git LFS Details

SHA256: 8df7e1094cd44df271fd3cad9a362df5f853aa886c7f4bbfc4ef3a88c537dd80
Pointer size: 132 Bytes
Size of remote file: 2.4 MB

Side_By_Side_3D_Images/sbs_campus.png ADDED Viewed

Git LFS Details

SHA256: aaa90b773df0eb80161d0ccc2129a5f8babf93f31c6791b60d9bc3f8bd1c9041
Pointer size: 132 Bytes
Size of remote file: 1.51 MB

Side_By_Side_3D_Images/sbs_downtown.png ADDED Viewed

Git LFS Details

SHA256: d13d947b3b2a462bd79c145958bb5a44b00c3d0463c4c55871041fc25aabc792
Pointer size: 132 Bytes
Size of remote file: 2.15 MB

Side_By_Side_3D_Images/sbs_neu.png ADDED Viewed

Git LFS Details

SHA256: fa090e1b20ce0b683b5d0f9cdc5b5c1c0f93127cec0a06fc408341860d8c57e9
Pointer size: 131 Bytes
Size of remote file: 825 kB

Side_By_Side_3D_Images/sbs_steam_clock.png ADDED Viewed

Git LFS Details

SHA256: 7a5192340b654f5184ba669d65c46a2736e28b2017fcc5b19c10877762c690ec
Pointer size: 132 Bytes
Size of remote file: 1.97 MB

Side_By_Side_3D_Images/sbs_trail.png ADDED Viewed

Git LFS Details

SHA256: 04da0cb1972d716f7f4f90698944b2a8b51ae75775414ec8b01cd8dfab36a6c4
Pointer size: 132 Bytes
Size of remote file: 2.96 MB

app.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import gradio as gr
+from PIL import Image
+import os
+from image_segmentation_mask_rcnn import segment_person
+from insert_person_into_stereo import insert_person_from_combined_stereo
+from create_anaglyph import create_anaglyph
+# Predefined sample files (make sure these exist in your project directory)
+DEFAULT_BACKGROUNDS = {
+    "Backyard": "Side_By_Side_3D_Images/sbs_backyard.png",
+    "Campus": "Side_By_Side_3D_Images/sbs_campus.png",
+    "Downtown": "Side_By_Side_3D_Images/sbs_downtown.png",
+    "NEU": "Side_By_Side_3D_Images/sbs_neu.png",
+    "STEAM_CLOCK": "Side_By_Side_3D_Images/sbs_steam_clock.png",
+    "Trail": "Side_By_Side_3D_Images/sbs_trail.png"
+}
+DEFAULT_PEOPLE = {
+    "PERSON1": "person/person1.jpg",
+    "PERSON2": "person/person2.png",
+    "PERSON3": "person/person3.png",
+    "PERSON4": "person/person4.png",
+    "PERSON5": "person/person5.png",
+}
+def pipeline(person_image, stereo_image, depth, x, y):
+    segmented = segment_person(person_image)
+    left_image, right_image, _ = insert_person_from_combined_stereo(
+        stereo_image=stereo_image,
+        segmented_person=segmented,
+        depth=depth,
+        position=(x, y)
+    )
+    anaglyph = create_anaglyph(left_image, right_image)
+    return anaglyph
+def get_image_dimensions(stereo_image, person_image):
+    if stereo_image is None or person_image is None:
+        return gr.update(), gr.update()
+    w_bg, h_bg = stereo_image.size
+    w_p, h_p = person_image.size
+    max_x = max(10, w_bg // 2 - w_p // 2)  # Ensure > 0
+    max_y = max(10, h_bg)  # Ensure > 0
+    return gr.update(minimum=0, maximum=max_x, value=max_x // 2), gr.update(minimum=0, maximum=max_y, value=int(h_bg * 0.9))
+def main():
+    with gr.Blocks() as demo:
+        gr.Markdown("# 🧍‍➡️ 3D Anaglyph Composer")
+        with gr.Row():
+            person_input = gr.Image(type="pil", label="Person Image")
+            stereo_input = gr.Image(type="pil", label="Stereo Background")
+        # Sample selectors
+        with gr.Row():
+            with gr.Column():
+                gr.Markdown("### Sample People")
+                for label, path in DEFAULT_PEOPLE.items():
+                    with gr.Row():
+                        preview = gr.Image(value=path, label=label, interactive=False, show_label=False, width=128, height=128)
+                        use_btn = gr.Button(f"Use {label}")
+                        use_btn.click(lambda p=path: Image.open(p), outputs=person_input)
+            with gr.Column():
+                gr.Markdown("### Sample Backgrounds")
+                for label, path in DEFAULT_BACKGROUNDS.items():
+                    with gr.Row():
+                        preview = gr.Image(value=path, label=label, interactive=False, show_label=False)
+                        use_btn = gr.Button(f"Use {label}")
+                        use_btn.click(lambda p=path: Image.open(p), outputs=stereo_input)
+        depth_input = gr.Dropdown(["close", "medium", "far"], value="medium", label="Depth")
+        x_slider = gr.Slider(0, 2000, value=1000, label="Person X Position")
+        y_slider = gr.Slider(0, 2000, value=500, label="Person Y Position")
+        generate_btn = gr.Button("Generate 3D Anaglyph")
+        output_img = gr.Image(type="pil", label="Anaglyph 3D Image")
+        # Dynamically update position sliders when images are uploaded
+        person_input.change(get_image_dimensions, inputs=[stereo_input, person_input], outputs=[x_slider, y_slider])
+        stereo_input.change(get_image_dimensions, inputs=[stereo_input, person_input], outputs=[x_slider, y_slider])
+        generate_btn.click(
+            fn=pipeline,
+            inputs=[person_input, stereo_input, depth_input, x_slider, y_slider],
+            outputs=output_img
+        )
+    demo.launch()
+if __name__ == "__main__":
+    main()

create_anaglyph.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from PIL import Image
+import numpy as np
+def create_anaglyph(left_img, right_img, output_path=""):
+    if left_img is None or right_img is None:
+        raise FileNotFoundError("Left or right image not found.")
+    # Ensure both images are the same size
+    left_img = left_img.resize(right_img.size)
+    # Convert images to NumPy arrays in RGB format
+    left_np = np.array(left_img.convert("RGB"))
+    right_np = np.array(right_img.convert("RGB"))
+    # Extract color channels
+    r_left = left_np[:, :, 0]
+    g_right = right_np[:, :, 1]
+    b_right = right_np[:, :, 2]
+    # Create anaglyph image: Red from left image, Green/Blue from right image
+    anaglyph_np = np.stack((r_left, g_right, b_right), axis=2).astype(np.uint8)
+    anaglyph_img = Image.fromarray(anaglyph_np)
+    # Save output (optional)
+    if output_path:
+        anaglyph_img.save(output_path)
+        print(f"Anaglyph image saved to: {output_path}")
+    return anaglyph_img
+if __name__ == "__main__":
+    from PIL import Image
+    left = Image.open("stereo_close_left_with_person.png").convert("RGB")
+    right = Image.open("stereo_close_right_with_person.png").convert("RGB")
+    create_anaglyph(
+        left_img=left,
+        right_img=right,
+        output_path="anaglyph_with_person.png"
+    )

image_segmentation_mask_rcnn.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import torch
+from torchvision.models.detection import maskrcnn_resnet50_fpn
+from torchvision.transforms import functional as F
+import numpy as np
+from PIL import Image
+# Load the pre-trained Mask R-CNN model
+def load_model():
+    model = maskrcnn_resnet50_fpn(pretrained=True)
+    model.eval()
+    return model
+# Get the mask for the person class
+def extract_person_mask(model, image_pil, score_threshold=0.8):
+    image_tensor = F.to_tensor(image_pil)
+    with torch.no_grad():
+        predictions = model([image_tensor])[0]
+    for i, label in enumerate(predictions['labels']):
+        if label.item() == 1 and predictions['scores'][i].item() > score_threshold:
+            mask = predictions['masks'][i, 0].cpu().numpy()
+            mask = (mask > 0.5).astype(np.uint8) * 255
+            return mask
+    return None
+# Apply the mask to the image and convert to transparent PNG
+def apply_mask_to_image(image_pil, mask):
+    image_rgba = image_pil.convert("RGBA")
+    image_np = np.array(image_rgba)
+    image_np[:, :, 3] = mask
+    return Image.fromarray(image_np)
+# Save the image
+def save_segmented_person(output_image, output_path):
+    output_image.save(output_path)
+    print(f"Segmented person saved to: {output_path}")
+# Main function to run everything
+def segment_person(image_pil, output_path=""):
+    model = load_model()
+    mask = extract_person_mask(model, image_pil)
+    if mask is not None:
+        segmented_image = apply_mask_to_image(image_pil, mask)
+        if output_path:
+            save_segmented_person(segmented_image, output_path)
+        return segmented_image
+    else:
+        print("No person detected with high enough confidence.")
+        return None
+# Example usage
+if __name__ == "__main__":
+    input_image_path = "./person/person1.jpg"
+    output_image_path = "segmented_person.png"
+    image = Image.open(input_image_path).convert("RGB")
+    segment_person(image, output_image_path)

insert_person_into_stereo.py ADDED Viewed

	@@ -0,0 +1,136 @@

+from PIL import Image
+import numpy as np
+# Depth → disparity & scaling factor
+disparity_map = {
+    "close": 60,
+    "medium": 30,
+    "far": 5
+}
+scale_map = {
+    "close": 1.2,
+    "medium": 0.8,
+    "far": 0.4
+}
+def clamp_large_person_image(image, max_dim=800):
+    w, h = image.size
+    if max(w, h) > max_dim:
+        scale = max_dim / max(w, h)
+        new_size = (int(w * scale), int(h * scale))
+        resized = image.resize(new_size, Image.Resampling.LANCZOS)
+        print(f"⚠️ Person image auto-resized from ({w}, {h}) to {new_size} before scaling.")
+        return resized
+    return image
+def resize_person(person_img, scale_factor):
+    w, h = person_img.size
+    new_size = (int(w * scale_factor), int(h * scale_factor))
+    return person_img.resize(new_size, Image.Resampling.LANCZOS)
+def overlay_image_auto_scale(background, overlay_rgba, x, y):
+    bg_w, bg_h = background.size
+    ov_w, ov_h = overlay_rgba.size
+    # Clamp overlay position and crop overlay if needed
+    if x < 0:
+        overlay_rgba = overlay_rgba.crop((-x, 0, ov_w, ov_h))
+        ov_w += x
+        x = 0
+    if y < 0:
+        overlay_rgba = overlay_rgba.crop((0, -y, ov_w, ov_h))
+        ov_h += y
+        y = 0
+    if x + ov_w > bg_w:
+        overlay_rgba = overlay_rgba.crop((0, 0, bg_w - x, ov_h))
+        ov_w = bg_w - x
+    if y + ov_h > bg_h:
+        overlay_rgba = overlay_rgba.crop((0, 0, ov_w, bg_h - y))
+        ov_h = bg_h - y
+    if ov_w < 20 or ov_h < 20:
+        print("⚠️ Person fully clipped or too small, skipped.")
+        return background
+    # Paste with transparency
+    background = background.copy()
+    background.paste(overlay_rgba, (x, y), overlay_rgba)
+    return background
+def insert_person_from_combined_stereo(
+    stereo_image,
+    segmented_person,
+    depth="medium",
+    position=(100, 100),
+    scale=None,
+    save_output=False
+):
+    print(f"Stereo image size: {stereo_image.size}")
+    print(f"Segmented person size: {segmented_person.size}")
+    if stereo_image is None:
+        raise FileNotFoundError(f"Stereo image not found.")
+    if segmented_person is None or segmented_person.mode != "RGBA":
+        raise ValueError("Segmented person image must be RGBA with an alpha channel.")
+    # Clamp large image
+    segmented_person = clamp_large_person_image(segmented_person)
+    # Get stereo L/R images
+    w, h = stereo_image.size
+    half_w = w // 2
+    left_image = stereo_image.crop((0, 0, half_w, h))
+    right_image = stereo_image.crop((half_w, 0, w, h))
+    # Use depth to get scale and disparity
+    scale_factor = scale if scale is not None else scale_map.get(depth, 0.8)
+    disparity = disparity_map.get(depth, 10)
+    # Use user-specified position
+    x_base, y_base = position
+    # Resize person
+    person_resized = resize_person(segmented_person, scale_factor)
+    print(f"Resized person size: {person_resized.size}")
+    # Calculate positions for stereo images
+    x_left = x_base - disparity // 2
+    x_right = x_base + disparity // 2
+    ov_w, ov_h = person_resized.size
+    x_left_adj = x_left - ov_w // 2
+    x_right_adj = x_right - ov_w // 2
+    y_adj = y_base - ov_h  # bottom-aligned
+    # Overlay onto L/R views
+    left_with_person = overlay_image_auto_scale(left_image, person_resized, x_left_adj, y_adj)
+    right_with_person = overlay_image_auto_scale(right_image, person_resized, x_right_adj, y_adj)
+    # Merge back into one side-by-side image
+    combined_output = Image.new("RGB", (w, h))
+    combined_output.paste(left_with_person, (0, 0))
+    combined_output.paste(right_with_person, (half_w, 0))
+    # Optionally save
+    if save_output:
+        left_with_person.save(f"stereo_{depth}_left_with_person.png")
+        right_with_person.save(f"stereo_{depth}_right_with_person.png")
+        combined_output.save(f"stereo_{depth}_combined_with_person.png")
+    print(f"✅ Step 2 complete: Person inserted into stereo image (depth: {depth})")
+    return left_with_person, right_with_person, combined_output
+if __name__ == "__main__":
+    stereo_image = Image.open("./Side_By_Side_3D_Images/sbs_downtown.png").convert("RGB")
+    person_image = Image.open("segmented_person.png").convert("RGBA")
+    insert_person_from_combined_stereo(
+        stereo_image=stereo_image,
+        segmented_person=person_image,
+        depth="close",
+        position=(500, 1000),
+        save_output=True
+    )

person/person1.jpg ADDED Viewed

person/person2.png ADDED Viewed

person/person3.png ADDED Viewed

person/person4.png ADDED Viewed

person/person5.png ADDED Viewed

Git LFS Details

SHA256: a244cffcb063488054e8cf282b6d3d6161ca1a92d480ca28438ddc1babb58bac
Pointer size: 131 Bytes
Size of remote file: 114 kB

requirements.txt ADDED Viewed

	@@ -0,0 +1,207 @@

+absl-py==2.1.0
+aiofiles==23.2.1
+aiohappyeyeballs==2.5.0
+aiohttp==3.11.13
+aiosignal==1.3.2
+annotated-types==0.7.0
+anyio==4.8.0
+appnope==0.1.4
+asttokens==3.0.0
+astunparse==1.6.3
+attrs==24.2.0
+Automat==22.10.0
+autopep8==2.3.1
+av==14.1.0
+backcall==0.2.0
+beautifulsoup4==4.12.3
+bleach==6.2.0
+bs4==0.0.2
+certifi==2025.1.31
+cffi==1.17.0
+charset-normalizer==3.3.2
+click==8.1.8
+comm==0.2.2
+constantly==23.10.4
+contourpy==1.3.1
+coverage==7.6.4
+cryptography==43.0.0
+cssselect==1.2.0
+cycler==0.12.1
+datasets==3.3.2
+debugpy==1.8.9
+decorator==5.1.1
+defusedxml==0.7.1
+dill==0.3.8
+distro==1.9.0
+docopt==0.6.2
+dotenv==0.9.9
+executing==2.1.0
+faiss-cpu==1.10.0
+fastapi==0.115.8
+fastjsonschema==2.21.1
+ffmpy==0.5.0
+filelock==3.15.4
+flatbuffers==25.2.10
+fonttools==4.55.2
+frozenlist==1.5.0
+fsspec==2024.12.0
+fuzzywuzzy==0.18.0
+gast==0.6.0
+git-filter-repo==2.47.0
+google-pasta==0.2.0
+gradio==5.15.0
+gradio_client==1.7.0
+grpcio==1.71.0
+h11==0.14.0
+h5py==3.13.0
+httpcore==1.0.7
+httpx==0.28.1
+huggingface-hub==0.28.1
+hyperlink==21.0.0
+idna==3.7
+imageio==2.37.0
+incremental==24.7.2
+iniconfig==2.0.0
+ipykernel==6.29.5
+ipython==8.12.3
+itemadapter==0.9.0
+itemloaders==1.3.1
+jedi==0.19.2
+Jinja2==3.1.5
+jiter==0.8.2
+jmespath==1.0.1
+joblib==1.4.2
+jsonschema==4.23.0
+jsonschema-specifications==2024.10.1
+jupyter_client==8.6.3
+jupyter_core==5.7.2
+jupyterlab_pygments==0.3.0
+keras==3.9.0
+kiwisolver==1.4.7
+lazy_loader==0.4
+libclang==18.1.1
+lxml==5.3.0
+Markdown==3.7
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.9.3
+matplotlib-inline==0.1.7
+mdurl==0.1.2
+mistune==3.1.2
+ml_dtypes==0.5.1
+mpmath==1.3.0
+multidict==6.1.0
+multiprocess==0.70.16
+namex==0.0.8
+nbclient==0.10.2
+nbconvert==7.16.6
+nbformat==5.10.4
+nest-asyncio==1.6.0
+networkx==3.4.2
+numpy==2.1.3
+openai==1.61.1
+opencv-python==4.11.0.86
+opt_einsum==3.4.0
+optree==0.14.1
+orjson==3.10.15
+packaging==24.1
+pandas==2.2.3
+pandocfilters==1.5.1
+parsel==1.9.1
+parso==0.8.4
+pedal==2.6.4
+pexpect==4.9.0
+pickleshare==0.7.5
+pillow==11.0.0
+pipreqs==0.5.0
+platformdirs==4.3.6
+pluggy==1.5.0
+prompt_toolkit==3.0.48
+propcache==0.3.0
+Protego==0.3.1
+protobuf==5.29.3
+psutil==6.1.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pyarrow==19.0.1
+pyasn1==0.6.0
+pyasn1_modules==0.4.0
+pycodestyle==2.12.1
+pycparser==2.22
+pydantic==2.10.6
+pydantic_core==2.27.2
+PyDispatcher==2.0.7
+pydub==0.25.1
+Pygments==2.18.0
+pyOpenSSL==24.2.1
+pyparsing==3.2.0
+pytest==8.3.3
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-multipart==0.0.20
+pytz==2024.2
+PyYAML==6.0.2
+pyzmq==26.2.0
+queuelib==1.7.0
+RapidFuzz==3.10.1
+referencing==0.36.2
+regex==2024.11.6
+requests==2.32.3
+requests-file==2.1.0
+rich==13.9.4
+rpds-py==0.23.0
+ruff==0.9.4
+safehttpx==0.1.6
+safetensors==0.5.2
+scikit-image==0.25.1
+scikit-learn==1.6.1
+scipy==1.15.1
+Scrapy==2.11.2
+semantic-version==2.10.0
+sentence-transformers==3.4.1
+service-identity==24.1.0
+setuptools==72.2.0
+shapely==2.0.6
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.1
+soupsieve==2.6
+stack-data==0.6.3
+starlette==0.45.3
+sympy==1.13.1
+tabulate==0.9.0
+tensorboard==2.19.0
+tensorboard-data-server==0.7.2
+tensorflow==2.19.0
+termcolor==2.4.0
+tf_keras==2.19.0
+threadpoolctl==3.5.0
+tifffile==2025.1.10
+tinycss2==1.4.0
+tldextract==5.1.2
+tokenizers==0.21.0
+tomlkit==0.13.2
+torch==2.6.0
+torchaudio==2.6.0
+torchvision==0.21.0
+tornado==6.4.2
+tqdm==4.67.1
+traitlets==5.14.3
+transformers==4.49.0
+Twisted==24.7.0
+typer==0.15.1
+typing_extensions==4.12.2
+tzdata==2024.2
+urllib3==2.2.2
+uvicorn==0.34.0
+w3lib==2.2.1
+wcwidth==0.2.13
+webencodings==0.5.1
+websockets==14.2
+Werkzeug==3.1.3
+wheel==0.45.1
+wrapt==1.17.2
+xxhash==3.5.0
+yarg==0.1.9
+yarl==1.18.3
+zope.interface==7.0.1