elevow's picture
Update app.py
2746d5c verified
"""SHARP Gradio demo (PLY export + 3D viewer).
This Space:
- Runs Apple's SHARP model to predict a 3D Gaussian scene from a single image.
- Exports a canonical `.ply` file for download.
- Serves unique PLY and settings files per generation via the SuperSplat Viewer.
Uses Gradio 6's static file serving (no FastAPI/uvicorn needed).
"""
from __future__ import annotations
import json
import math
import time
import uuid
from pathlib import Path
from typing import Final
import gradio as gr
from model_utils import predict_to_ply_gpu
# -----------------------------------------------------------------------------
# Paths & constants
# -----------------------------------------------------------------------------
APP_DIR: Final[Path] = Path(__file__).resolve().parent
OUTPUTS_DIR: Final[Path] = APP_DIR / "outputs"
VIEWER_DIR: Final[Path] = APP_DIR / "viewer"
DEFAULT_SETTINGS_JSON: Final[Path] = VIEWER_DIR / "settings.default.json"
DEFAULT_QUEUE_MAX_SIZE: Final[int] = 32
DEFAULT_FOCAL_LENGTH_MM: Final[float] = 35.0
SENSOR_HEIGHT_MM: Final[float] = 24.0 # Full-frame 35mm equivalent
SENSOR_WIDTH_MM: Final[float] = 36.0 # Full-frame 35mm width
# Register static paths for Gradio 6 file serving
gr.set_static_paths(paths=[str(VIEWER_DIR), str(OUTPUTS_DIR)])
THEME: Final = gr.themes.Origin()
CSS: Final[str] = """
/* Keep layout stable when scrollbars appear/disappear */
html { scrollbar-gutter: stable; }
/* Use normal document flow */
html, body { height: auto; }
body { overflow: auto; }
/* Full-width layout */
.gradio-container {
max-width: none;
width: 100%;
margin: 0;
padding: 0.5rem 1rem 1rem;
box-sizing: border-box;
}
/* Header styling */
#app-header {
margin-bottom: 0.5rem;
}
#app-header h2 {
margin: 0 0 0.25rem 0;
font-size: 1.5rem;
}
#app-header p {
margin: 0;
opacity: 0.85;
}
/* Main layout: controls left, viewer right (larger) */
#main-row {
gap: 1rem;
align-items: stretch;
}
/* Left panel: controls */
#controls-panel {
display: flex;
flex-direction: column;
gap: 0.75rem;
}
#controls-panel .input-image-container {
flex: 1;
min-height: 200px;
}
#input-image {
width: 100%;
}
#input-image img {
width: 100%;
height: auto;
max-height: 280px;
object-fit: contain;
}
/* Options row */
#options-row {
gap: 0.5rem;
}
#options-row > div {
flex: 1;
}
/* Action buttons */
#actions-row {
gap: 0.5rem;
}
#actions-row button {
flex: 1;
min-height: 42px;
}
/* Downloads row */
#downloads-row {
gap: 0.5rem;
align-items: center;
}
#downloads-row > div {
flex: 1;
}
/* Right panel: 3D viewer (dominant) */
#viewer-panel {
display: flex;
flex-direction: column;
min-height: 500px;
}
#viewer-container {
flex: 1;
display: flex;
flex-direction: column;
min-height: 0;
}
/* Viewer iframe/placeholder */
#viewer-html {
flex: 1;
min-height: 500px;
}
#viewer-html iframe {
width: 100%;
height: 100%;
min-height: 500px;
border: 0;
border-radius: 12px;
overflow: hidden;
background: #000;
}
/* Placeholder styling */
.viewer-placeholder {
width: 100%;
height: 100%;
min-height: 500px;
display: flex;
align-items: center;
justify-content: center;
border: 2px dashed var(--border-color-primary, rgba(127, 127, 127, 0.35));
border-radius: 12px;
background: var(--block-background-fill, rgba(127, 127, 127, 0.05));
color: var(--body-text-color, rgba(255, 255, 255, 0.92));
transition: all 0.3s ease;
}
.viewer-placeholder-inner {
max-width: 400px;
padding: 32px;
text-align: center;
}
.viewer-placeholder-icon {
font-size: 48px;
margin-bottom: 16px;
opacity: 0.6;
}
.viewer-placeholder-title {
font-size: 18px;
font-weight: 600;
margin-bottom: 8px;
}
.viewer-placeholder-desc {
font-size: 14px;
line-height: 1.5;
opacity: 0.75;
}
/* Loading state */
.viewer-loading {
border-color: var(--primary-500;
background: linear-gradient(
135deg,
rgba(255, 102, 0, 0.05) 0%,
rgba(255, 102, 0, 0.1) 100%
);
}
.viewer-loading .viewer-placeholder-icon {
animation: pulse 1.5s ease-in-out infinite;
}
@keyframes pulse {
0%, 100% { opacity: 0.4; transform: scale(1); }
50% { opacity: 0.8; transform: scale(1.05); }
}
/* Status text */
#status-text {
font-size: 13px;
opacity: 0.85;
margin-top: 0.5rem;
}
/* Responsive: stack on small screens */
@media (max-width: 900px) {
#main-row {
flex-direction: column;
}
#controls-panel, #viewer-panel {
min-width: 100% !important;
}
#viewer-html, #viewer-html iframe, .viewer-placeholder {
min-height: 400px;
}
#input-image img {
max-height: 200px;
}
}
"""
def _ensure_dir(path: Path) -> Path:
path.mkdir(parents=True, exist_ok=True)
return path
_ensure_dir(OUTPUTS_DIR)
_ensure_dir(VIEWER_DIR)
# -----------------------------------------------------------------------------
# FOV / Focal Length utilities
# -----------------------------------------------------------------------------
def focal_length_to_fov(focal_length_mm: float, sensor_height_mm: float = SENSOR_HEIGHT_MM, sensor_width_mm: float = SENSOR_WIDTH_MM) -> float:
"""Convert focal length (mm) to diagonal field of view (degrees).
Uses the formula: FOV = 2 * atan(diagonal / (2 * focal_length))
where diagonal = sqrt(width^2 + height^2) for full-frame 35mm (36x24mm)
"""
if focal_length_mm <= 0:
focal_length_mm = DEFAULT_FOCAL_LENGTH_MM
diagonal_mm = math.sqrt(sensor_width_mm**2 + sensor_height_mm**2)
fov_rad = 2 * math.atan(diagonal_mm / (2 * focal_length_mm))
return math.degrees(fov_rad)
def create_settings_file(focal_length_mm: float, output_stem: str) -> Path:
"""Create a unique settings.json for this generation."""
fov = focal_length_to_fov(focal_length_mm)
# Load default settings as base
settings = {
"camera": {
"fov": fov,
"position": [0, 0, 0],
"target": [0, 0, 0],
"startAnim": "none",
"animTrack": ""
},
"background": {"color": [0, 0, 0, 0]},
"animTracks": []
}
if DEFAULT_SETTINGS_JSON.exists():
try:
existing = json.loads(DEFAULT_SETTINGS_JSON.read_text(encoding="utf-8"))
# Merge, preserving existing values but updating FOV
if "background" in existing:
settings["background"] = existing["background"]
if "camera" in existing:
settings["camera"] = {**settings["camera"], **existing["camera"]}
settings["camera"]["fov"] = fov # Always update FOV
if "animTracks" in existing:
settings["animTracks"] = existing["animTracks"]
except Exception:
pass
settings_path = OUTPUTS_DIR / f"{output_stem}.settings.json"
settings_path.write_text(json.dumps(settings, indent=2), encoding="utf-8")
return settings_path
# -----------------------------------------------------------------------------
# Validation & file operations
# -----------------------------------------------------------------------------
def _validate_image(image_path: str | None) -> None:
if not image_path:
raise gr.Error("Please upload an image first.")
def _generate_output_stem() -> str:
"""Generate unique output file stem."""
ts = int(time.time() * 1000)
uid = uuid.uuid4().hex[:8]
return f"scene_{ts}_{uid}"
# -----------------------------------------------------------------------------
# HTML generators
# -----------------------------------------------------------------------------
def viewer_url_for_output(ply_filename: str, settings_filename: str) -> str:
"""URL for the viewer with specific output files."""
# Use absolute paths with /gradio_api/file= prefix for content and settings
content_path = f"/gradio_api/file=outputs/{ply_filename}"
settings_path = f"/gradio_api/file=outputs/{settings_filename}"
return f"/gradio_api/file=viewer/index.html?content={content_path}&settings={settings_path}&noanim"
def viewer_placeholder_html() -> str:
return """
<div class="viewer-placeholder">
<div class="viewer-placeholder-inner">
<div class="viewer-placeholder-icon">🎨</div>
<div class="viewer-placeholder-title">3D Viewer</div>
<div class="viewer-placeholder-desc">
Upload an image and click <strong>Generate</strong> to create a 3D Gaussian scene.
The interactive viewer will appear here.
</div>
</div>
</div>
"""
def viewer_loading_html() -> str:
"""Loading placeholder with timer element."""
return """
<div class="viewer-placeholder viewer-loading">
<div class="viewer-placeholder-inner">
<div class="viewer-placeholder-icon">⚡</div>
<div class="viewer-placeholder-title">Generating 3D Scene...</div>
<div class="viewer-placeholder-desc">
Running SHARP model inference. This may take a moment.
</div>
<div id="generation-timer" style="font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace; font-size: 28px; font-weight: 600; color: var(--primary-500); margin-top: 16px;">0s</div>
</div>
</div>
"""
def viewer_iframe_html(ply_filename: str, settings_filename: str) -> str:
src = viewer_url_for_output(ply_filename, settings_filename)
return f"""
<iframe
src="{src}"
allow="xr-spatial-tracking; fullscreen"
referrerpolicy="no-referrer"
loading="eager"
></iframe>
"""
# -----------------------------------------------------------------------------
# Main inference function
# -----------------------------------------------------------------------------
def run_sharp(
image_path: str | None,
focal_length_mm: float,
) -> tuple[object, object, str, object, object, object, str]:
"""Run SHARP inference.
Returns: (ply_download, viewer_html, status, generate_btn, clear_btn, open_viewer_btn, current_viewer_url)
"""
_validate_image(image_path)
try:
# Generate unique output stem
output_stem = _generate_output_stem()
# Create settings file with FOV
settings_path = create_settings_file(focal_length_mm, output_stem)
# Run inference
ply_path = predict_to_ply_gpu(image_path)
# Rename PLY to unique name in outputs
unique_ply_path = OUTPUTS_DIR / f"{output_stem}.ply"
ply_path.rename(unique_ply_path)
fov = focal_length_to_fov(focal_length_mm)
status = f"✓ Generated **{unique_ply_path.name}** | FOV: {fov:.1f}°"
viewer_url = viewer_url_for_output(unique_ply_path.name, settings_path.name)
return (
gr.update(value=str(unique_ply_path), visible=True, interactive=True),
viewer_iframe_html(unique_ply_path.name, settings_path.name),
status,
gr.update(interactive=True, value="Generate"),
gr.update(interactive=True),
gr.update(visible=True, interactive=True),
viewer_url,
)
except gr.Error:
raise
except Exception as e:
raise gr.Error(f"Generation failed: {type(e).__name__}: {e}") from e
def start_generation() -> tuple[str, object, object]:
"""Start generation: show loading state.
Returns: (viewer_html, generate_btn, clear_btn)
"""
return (
viewer_loading_html(),
gr.update(interactive=False, value="Generating..."),
gr.update(interactive=False),
)
def clear_all() -> tuple:
"""Clear all inputs and outputs.
Returns: (image, ply_download, viewer_html, status, generate_btn, clear_btn, open_viewer_btn, current_viewer_url)
"""
return (
None,
gr.update(value=None, visible=False),
viewer_placeholder_html(),
"",
gr.update(interactive=True, value="Generate"),
gr.update(interactive=False),
gr.update(visible=False),
"",
)
def on_image_change(image_path: str | None) -> tuple[object, object]:
"""Handle image upload/removal.
Returns: (generate_btn, clear_btn)
"""
has_image = bool(image_path)
return (
gr.update(interactive=has_image, value="Generate"),
gr.update(interactive=has_image),
)
# -----------------------------------------------------------------------------
# UI
# -----------------------------------------------------------------------------
# Global JS for timer control and viewer URL (injected via head parameter)
HEAD_JS: Final[str] = """
<script>
window.sharpTimer = {
interval: null,
start: function() {
this.stop();
var startTime = Date.now();
this.interval = setInterval(function() {
var el = document.getElementById('generation-timer');
if (!el) return;
var secs = Math.floor((Date.now() - startTime) / 1000);
var mins = Math.floor(secs / 60);
secs = secs % 60;
el.textContent = mins > 0 ? mins + ':' + (secs < 10 ? '0' : '') + secs : secs + 's';
}, 500);
},
stop: function() {
if (this.interval) {
clearInterval(this.interval);
this.interval = null;
}
}
};
window.openSharpViewer = function() {
var iframe = document.querySelector('#viewer-html iframe');
if (iframe && iframe.src) {
window.open(iframe.src, '_blank');
}
};
</script>
"""
# def build_demo() -> gr.Blocks:
# with gr.Blocks(
# title="SHARP • Single-Image 3D Gaussian Prediction",
# elem_id="sharp-root",
# ) as demo:
# # Hidden textbox to store viewer URL (State doesn't work well with js param)
# current_viewer_url = gr.Textbox(value="", visible=False, elem_id="viewer-url-store")
# # Header
# with gr.Column(elem_id="app-header"):
# gr.Markdown("## SHARP")
# gr.Markdown("Single-image **3D Gaussian scene** prediction")
# # Main layout: controls (left, narrow) + viewer (right, wide)
# with gr.Row(elem_id="main-row", equal_height=True):
# # Left column: Controls
# with gr.Column(scale=3, min_width=280, elem_id="controls-panel"):
# # Image upload
# image_in = gr.Image(
# label="Input Image",
# type="filepath",
# sources=["upload"],
# elem_id="input-image",
# show_label=True,
# )
# # Options
# with gr.Row(elem_id="options-row"):
# focal_length = gr.Slider(
# label="Focal Length (mm)",
# minimum=12,
# maximum=200,
# step=1,
# value=DEFAULT_FOCAL_LENGTH_MM,
# info="Affects viewer FOV",
# )
# # Action buttons
# with gr.Row(elem_id="actions-row"):
# generate_btn = gr.Button(
# "Generate",
# variant="primary",
# interactive=False,
# elem_id="generate-btn",
# )
# clear_btn = gr.Button(
# "Clear",
# variant="secondary",
# interactive=False,
# elem_id="clear-btn",
# )
# # Downloads
# with gr.Row(elem_id="downloads-row"):
# ply_download = gr.DownloadButton(
# label="Download PLY",
# value=None,
# visible=False,
# elem_id="ply-download",
# )
# open_viewer_btn = gr.Button(
# "Open Viewer in New Tab ↗",
# size="sm",
# visible=False,
# elem_id="open-viewer-btn",
# )
# # Status
# status_md = gr.Markdown("", elem_id="status-text")
# # Right column: 3D Viewer (dominant)
# with gr.Column(scale=7, min_width=400, elem_id="viewer-panel"):
# viewer_html = gr.HTML(
# value=viewer_placeholder_html(),
# elem_id="viewer-html",
# label="3D Viewer",
# )
# # About section (collapsible)
# with gr.Accordion("About", open=False):
# gr.Markdown("""
# ### SHARP Model
# **Sharp Monocular View Synthesis in Less Than a Second** (Apple, 2025)
# SHARP predicts a 3D Gaussian splatting scene from a single image, enabling novel view synthesis.
# ```bibtex
# @inproceedings{Sharp2025:arxiv,
# title = {Sharp Monocular View Synthesis in Less Than a Second},
# author = {Lars Mescheder and Wei Dong and Shiwei Li and Xuyang Bai and Marcel Santos and Peiyun Hu and Bruno Lecouat and Mingmin Zhen and Amaël Delaunoy and Tian Fang and Yanghai Tsin and Stephan R. Richter and Vladlen Koltun},
# journal = {arXiv preprint arXiv:2512.10685},
# year = {2025},
# }
# ```
# ### 3D Viewer
# Powered by [SuperSplat Viewer](https://github.com/playcanvas/supersplat-viewer) by PlayCanvas.
# """.strip())
# # --- Event handlers ---
# # Image change: enable/disable buttons
# image_in.change(
# fn=on_image_change,
# inputs=[image_in],
# outputs=[generate_btn, clear_btn],
# queue=False,
# show_progress="hidden",
# )
# # Generate: start loading, run inference
# generate_btn.click(
# fn=start_generation,
# outputs=[viewer_html, generate_btn, clear_btn],
# queue=False,
# show_progress="hidden",
# js="() => { window.sharpTimer && window.sharpTimer.start(); }",
# ).then(
# fn=run_sharp,
# inputs=[image_in, focal_length],
# outputs=[ply_download, viewer_html, status_md, generate_btn, clear_btn, open_viewer_btn, current_viewer_url],
# show_progress="hidden",
# ).then(
# fn=lambda: None,
# js="() => { window.sharpTimer && window.sharpTimer.stop(); }",
# )
# # Clear
# clear_btn.click(
# fn=clear_all,
# outputs=[image_in, ply_download, viewer_html, status_md, generate_btn, clear_btn, open_viewer_btn, current_viewer_url],
# queue=False,
# show_progress="hidden",
# )
# # Open viewer in new tab using global URL
# open_viewer_btn.click(
# fn=None,
# js="() => { window.openSharpViewer(); }",
# )
# demo.queue(max_size=DEFAULT_QUEUE_MAX_SIZE, default_concurrency_limit=1)
# return demo
# ... (Keep your existing imports, constants, and utility functions up to line 499)
def build_demo() -> gr.Blocks:
with gr.Blocks(
title="SHARP • Single-Image 3D Gaussian Prediction",
elem_id="sharp-root",
) as demo:
# --- State Tracking & Hidden Stores ---
# Change "your_secure_password" to whatever password you want to use
CORRECT_PASSWORD = "yes"
is_authenticated = gr.State(value=False)
current_viewer_url = gr.Textbox(value="", visible=False, elem_id="viewer-url-store")
# Header
with gr.Column(elem_id="app-header"):
gr.Markdown("##3D Guassian Prediction")
gr.Markdown("Single-image **3D Gaussian scene** prediction using Apple's 2025 Sharp Model.")
# --- Password Protection Layer ---
with gr.Row(elem_id="password-row") as password_row:
with gr.Column(scale=1):
password_in = gr.Textbox(
label="Enter Password to Access",
type="password",
placeholder="Password..."
)
auth_btn = gr.Button("Verify Password", variant="primary")
auth_status = gr.Markdown("")
# --- Main App Layout (Initially Hidden) ---
with gr.Row(elem_id="main-row", equal_height=True, visible=False) as main_content_row:
# Left column: Controls (Image upload & Settings)
with gr.Column(scale=3, min_width=280, elem_id="controls-panel"):
# Image upload (Supports drag-and-drop file upload or webcam capture)
image_in = gr.Image(
label="Input Image",
type="filepath",
sources=["upload", "webcam"], # Added webcam support
elem_id="input-image",
show_label=True,
)
# Options
with gr.Row(elem_id="options-row"):
focal_length = gr.Slider(
label="Focal Length (mm)",
minimum=12,
maximum=200,
step=1,
value=DEFAULT_FOCAL_LENGTH_MM,
info="Affects viewer FOV",
)
# Action buttons
with gr.Row(elem_id="actions-row"):
generate_btn = gr.Button(
"Generate",
variant="primary",
interactive=False,
elem_id="generate-btn",
)
clear_btn = gr.Button(
"Clear",
variant="secondary",
interactive=False,
elem_id="clear-btn",
)
# Downloads
with gr.Row(elem_id="downloads-row"):
ply_download = gr.DownloadButton(
label="Download PLY",
value=None,
visible=False,
elem_id="ply-download",
)
open_viewer_btn = gr.Button(
"Open Viewer in New Tab ↗",
size="sm",
visible=False,
elem_id="open-viewer-btn",
)
# Status
status_md = gr.Markdown("", elem_id="status-text")
# Right column: 3D Viewer (dominant)
with gr.Column(scale=7, min_width=400, elem_id="viewer-panel"):
viewer_html = gr.HTML(
value=viewer_placeholder_html(),
elem_id="viewer-html",
label="3D Viewer",
)
# About section (collapsible)
with gr.Accordion("About", open=False):
gr.Markdown(
"""
### SHARP Model
**Sharp Monocular View Synthesis in Less Than a Second** (Apple, 2025)
"""
)
# --- Event Handlers ---
# Password Verification Logic
def check_password(password):
if password == CORRECT_PASSWORD:
return (
gr.update(visible=False), # Hide password row
gr.update(visible=True), # Show main content row
"", # Clear error messages
True # Set authenticated state to True
)
else:
return (
gr.update(visible=True),
gr.update(visible=False),
"❌ Incorrect password. Please try again.",
False
)
auth_btn.click(
fn=check_password,
inputs=[password_in],
outputs=[password_row, main_content_row, auth_status, is_authenticated]
)
# Image change: enable/disable buttons
image_in.change(
fn=on_image_change,
inputs=[image_in],
outputs=[generate_btn, clear_btn],
queue=False,
show_progress="hidden",
)
# Generate: start loading, run inference
generate_btn.click(
fn=start_generation,
outputs=[viewer_html, generate_btn, clear_btn],
queue=False,
show_progress="hidden",
js="() => { window.sharpTimer && window.sharpTimer.start(); }",
).then(
fn=run_sharp,
inputs=[image_in, focal_length],
outputs=[ply_download, viewer_html, status_md, generate_btn, clear_btn, open_viewer_btn, current_viewer_url],
show_progress="hidden",
).then(
fn=lambda: None,
js="() => { window.sharpTimer && window.sharpTimer.stop(); }",
)
# Clear
clear_btn.click(
fn=clear_all,
outputs=[image_in, ply_download, viewer_html, status_md, generate_btn, clear_btn, open_viewer_btn, current_viewer_url],
queue=False,
show_progress="hidden",
)
# Open viewer in new tab using global URL
open_viewer_btn.click(
fn=None,
js="() => { window.openSharpViewer(); }",
)
demo.queue(max_size=DEFAULT_QUEUE_MAX_SIZE, default_concurrency_limit=1)
return demo
demo = build_demo()
if __name__ == "__main__":
demo.launch(theme=THEME, css=CSS, head=HEAD_JS)
# demo = build_demo()
# if __name__ == "__main__":
# demo.launch(theme=THEME, css=CSS, head=HEAD_JS)