import os import sys import spaces import base64 import tempfile from omegaconf import OmegaConf from typing import Optional, Union, Tuple import gradio as gr GUIDEFLOW_YELLOW = "#ccad57" GUIDEFLOW_BLUE = "#2459c2" GUIDEFLOW_GREEN = "#8edf9f" os.environ["CUMM_DISABLE_JIT"] = "1" os.environ["SPCONV_DISABLE_JIT"] = "1" os.environ["TOKENIZERS_PARALLELISM"] = "false" # Add project root to Python path project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) if project_root not in sys.path: sys.path.insert(0, project_root) # --- START XVFB GLOBALLY --- # Check if we are in a headless environment and DISPLAY is not set if os.environ.get("DISPLAY") is None: print("[INFO] Starting Xvfb for headless rendering...") from pyvirtualdisplay import Display # Start Xvfb. visible=0 means headless. # size=(1920, 1080) matches your previous xvfb-run settings. display = Display(visible=0, size=(1920, 1080)) display.start() # Ensure DISPLAY env var is set for subprocesses if os.environ.get("DISPLAY") is None: # PyVirtualDisplay usually sets this, but fallback if needed os.environ["DISPLAY"] = f":{display.display}" print(f"[INFO] Xvfb started on {os.environ['DISPLAY']}") # --- LOGO SETUP (BASE64) --- def image_to_base64(image_path): """Encodes an image to a base64 string for direct HTML embedding.""" if not os.path.exists(image_path): return "" with open(image_path, "rb") as img_file: encoded_string = base64.b64encode(img_file.read()).decode('utf-8') return f"data:image/png;base64,{encoded_string}" logo_rel_path = os.path.join("demos", "assets", "logo.png") logo_abs_path = os.path.join(project_root, logo_rel_path) logo_src = image_to_base64(logo_abs_path) BLENDER_LINK = 'https://download.blender.org/release/Blender3.0/blender-3.0.1-linux-x64.tar.xz' BLENDER_INSTALLATION_PATH = '/tmp' BLENDER_PATH = f'{BLENDER_INSTALLATION_PATH}/blender-3.0.1-linux-x64/blender' def _install_blender(): if not os.path.exists(BLENDER_PATH): os.system('sudo apt-get update') os.system('sudo apt-get install -y libxrender1 libxi6 libxkbcommon-x11-0 libsm6') os.system(f'wget {BLENDER_LINK} -P {BLENDER_INSTALLATION_PATH}') os.system(f'tar -xvf {BLENDER_INSTALLATION_PATH}/blender-3.0.1-linux-x64.tar.xz -C {BLENDER_INSTALLATION_PATH}') _install_blender() # Attempt import, handle failure gracefully for the demo shell try: from demos.pipeline_fn import GuideFlow3dPipeline except ImportError: GuideFlow3dPipeline = None pipe = None cfg = None # Initialize Pipeline try: cfg_path = os.path.join(project_root, 'config', 'default.yaml') if os.path.exists(cfg_path): cfg = OmegaConf.load(cfg_path) if GuideFlow3dPipeline: pipe = GuideFlow3dPipeline().from_pretrained(cfg) except Exception as e: print(f"Error initializing pipeline: {e}") pass output_dir = os.path.join(os.getcwd(), "all_outputs") os.makedirs(output_dir, exist_ok=True) # --- MAPPING HELPERS --- # Dictionary mapping static thumbnail images to actual GLB files THUMB_TO_GLB = { # Structure Mesh Examples "example_data/thumbs/example1_thumb.png": "example_data/example1.glb", # Reference Appearance Mesh Examples "example_data/thumbs/B07QC84LP1_thumb.png": "example_data/B07QC84LP1.glb" } # Create a lookup based on basename to be robust against Gradio temp paths THUMB_BASENAME_TO_GLB = {os.path.basename(k): v for k, v in THUMB_TO_GLB.items()} def load_mesh_from_thumb(thumb_path: str) -> Optional[str]: """Callback to return the GLB path associated with a thumbnail.""" if not thumb_path: return None basename = os.path.basename(thumb_path) return THUMB_BASENAME_TO_GLB.get(basename, None) def _ensure_glb_path(result: Union[str, bytes, os.PathLike]) -> str: """Normalize various return types from fn() to a .glb file path.""" if isinstance(result, (str, os.PathLike)): path = os.fspath(result) if not os.path.exists(path): raise gr.Error("Returned mesh path does not exist.") return path if isinstance(result, (bytes, bytearray)): tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".glb") tmp.write(result) tmp.flush() tmp.close() return tmp.name # @spaces.GPU(duration=360) def on_run( guidance_mode_state: str, app_struct_mesh: Optional[str], app_ref_mesh: Optional[str], app_ref_image: Optional[str], sim_struct_mesh: Optional[str], sim_ref_text: Optional[str], sim_ref_image: Optional[str], target_up_label: str, reference_up_label: str, cfg_strength: float, num_steps: int, learning_rate: float, ) -> Tuple[str, Optional[str]]: current_mode = guidance_mode_state.lower() if current_mode == "appearance": target_mesh_path = app_struct_mesh reference_mesh_path = app_ref_mesh reference_image_path = app_ref_image reference_text = None else: target_mesh_path = sim_struct_mesh reference_text = sim_ref_text reference_image_path = sim_ref_image reference_mesh_path = None if not target_mesh_path: raise gr.Error(f"Target Structure mesh is required for {current_mode} mode.") if pipe is None: raise gr.Error("Pipeline not initialized. Check logs.") args = { "structure_mesh": target_mesh_path, "output_dir": output_dir, "convert_target_yup_to_zup": target_up_label == "Z-up", "convert_appearance_yup_to_zup": reference_up_label == "Z-up", "appearance_mesh": reference_mesh_path, "appearance_image": reference_image_path, "appearance_text": (reference_text or "").strip(), } fn = None if current_mode == "appearance": if not reference_mesh_path: raise gr.Error("Appearance mode requires a reference mesh.") fn = pipe.run_appearance args.pop("appearance_text", None) else: # similarity if not reference_text: raise gr.Error("Similarity mode requires a text prompt.") fn = pipe.run_self_similarity args.pop("appearance_mesh", None) args.pop("appearance_image", None) args.pop("convert_appearance_yup_to_zup", None) if cfg: updated_cfg = cfg # OmegaConf.load(cfg) updated_cfg.cfg_strength = cfg_strength updated_cfg.steps = num_steps updated_cfg.learning_rate = learning_rate pipe.cfg = updated_cfg try: result_mesh, result_video = fn(**args) mesh_path = _ensure_glb_path(result_mesh) video_path = _ensure_glb_path(result_video) return mesh_path, video_path except Exception as e: raise gr.Error(f"Generation failed: {str(e)}") # --- UI Styling & Header --- css = f""" body, .gradio-container {{ background-color: #ffffff !important; color: #1f2937 !important; }} .dark body, .dark .gradio-container {{ background-color: #ffffff !important; color: #1f2937 !important; }} h1, h2, h3, span, p {{ font-family: 'Inter', 'Roboto', sans-serif; }} .guideflow-header {{ display: flex; flex-direction: column; align-items: center; margin-bottom: 1rem; }} .logo-row {{ display: flex; align-items: baseline; gap: 0.2rem; }} .logo-img {{ height: 4rem; width: auto; transform: translateY(0.5rem); }} .gradient-title {{ font-size: 3.5rem; font-weight: 800; background: linear-gradient(90deg, {GUIDEFLOW_GREEN}, {GUIDEFLOW_BLUE}, {GUIDEFLOW_YELLOW}); -webkit-background-clip: text; background-clip: text; color: transparent; line-height: 1.2; }} .subtitle {{ font-size: 1.5rem; font-weight: 600; color: {GUIDEFLOW_YELLOW}; margin-top: 0.5rem; text-align: center; }} .authors {{ font-size: 1rem; color: #334155; margin-top: 0.5rem; }} .affiliations {{ font-size: 0.9rem; color: #6b7280; margin-top: 0.2rem; }} .venue {{ font-size: 1.1rem; font-weight: 700; color: #111827; margin-top: 0.5rem; }} .links a {{ color: {GUIDEFLOW_BLUE}; text-decoration: none; margin: 0 0.5rem; font-weight: 500; }} .links a:hover {{ text-decoration: underline; }} .demo-credit {{ font-size: 0.9rem; color: #64748b; margin-top: 0.5rem; }} .instructions-container {{ max-width: 800px; margin: 0 auto 2rem auto; text-align: left; padding: 0 1rem; }} .input-row {{ align-items: flex-start; margin-bottom: 1rem; }} """ HEADER_HTML = f"""
GuideFlow3D Logo uideFlow3D
Optimization-Guided Rectified Flow For Appearance Transfer
Sayan Deb Sarkar1    Sinisa Stekovic2    Vincent Lepetit2    Iro Armeni1
1Stanford University    2ENPC, IP Paris
NeurIPS 2025
Demo made by Suvaditya Mukherjee
""" INSTRUCTIONS_MD = """

Instructions

  1. Upload a Structure Mesh (.glb): This defines the shape of your 3D object.
  2. Choose Guidance Mode: Select "Self-Similarity" (Text) or "Appearance" (Mesh/Image) using the tabs.
  3. Provide Reference: Enter a text prompt or upload a reference image/mesh.
  4. Run: Click "Generate 3D Asset" to create the result.
""" # Example Data EX_STRUCT_THUMBS = [["example_data/thumbs/example1_thumb.png"]] EX_MESH_THUMBS = [["example_data/thumbs/B07QC84LP1_thumb.png"]] EX_IMG = ["example_data/B07QC84LP1_orig.png"] EX_TEXT = ["a wooden chair", "a marble statue", "a golden trophy"] with gr.Blocks( title="GuideFlow3D", ) as demo: gr.HTML(HEADER_HTML) gr.HTML(INSTRUCTIONS_MD) guidance_mode_state = gr.State(value="Similarity") with gr.Tabs() as guidance_tabs: # --- TAB 1: SELF-SIMILARITY (LEFT) --- with gr.TabItem("Self-Similarity", id="tab_similarity") as tab_sim: gr.Markdown("### Similarity Editing Inputs") with gr.Row(elem_classes="input-row"): with gr.Column(scale=3): sim_struct_mesh = gr.Model3D(label="Structure Mesh (.glb)", interactive=True, height=300) with gr.Column(scale=2): sim_struct_hidden = gr.Image(type="filepath", visible=False) # sim_struct_mesh_examples = gr.Examples(examples=EX_STRUCT_THUMBS, inputs=sim_struct_hidden, label="Structure Examples") sim_struct_mesh_examples = gr.Examples( examples=EX_STRUCT_THUMBS, inputs=sim_struct_hidden, outputs=sim_struct_mesh, # Target the 3D viewer directly fn=load_mesh_from_thumb, # Run the conversion function run_on_click=True, # Force execution on click label="Structure Examples" ) with gr.Row(elem_classes="input-row"): with gr.Column(scale=3): sim_ref_text = gr.Textbox(label="Reference Text Prompt", placeholder="Describe the appearance...", lines=2) with gr.Column(scale=2): gr.Examples(examples=EX_TEXT, inputs=sim_ref_text, label="Prompt Examples") with gr.Row(elem_classes="input-row"): with gr.Column(scale=3): sim_ref_image = gr.Image(label="Reference Appearance Image (Optional)", type="filepath", height=250) with gr.Column(scale=2): gr.Examples(examples=EX_IMG, inputs=sim_ref_image, label="Image Examples") # --- TAB 2: APPEARANCE (RIGHT) --- with gr.TabItem("Appearance", id="tab_appearance") as tab_app: gr.Markdown("### Appearance Transfer Inputs") with gr.Row(elem_classes="input-row"): with gr.Column(scale=3): app_struct_mesh = gr.Model3D(label="Structure Mesh (.glb)", interactive=True, height=300) with gr.Column(scale=2): app_struct_hidden = gr.Image(type="filepath", visible=False) # app_struct_mesh_examples = gr.Examples(examples=EX_STRUCT_THUMBS, inputs=app_struct_hidden, label="Structure Examples") app_struct_mesh_examples = gr.Examples( examples=EX_STRUCT_THUMBS, inputs=app_struct_hidden, outputs=app_struct_mesh, # Target the 3D viewer directly fn=load_mesh_from_thumb, # Run the conversion function run_on_click=True, # Force execution on click label="Structure Examples" ) with gr.Row(elem_classes="input-row"): with gr.Column(scale=3): app_ref_image = gr.Image(label="Reference Appearance Image", type="filepath", height=250) with gr.Column(scale=2): gr.Examples(examples=EX_IMG, inputs=app_ref_image, label="Image Examples") with gr.Row(elem_classes="input-row"): with gr.Column(scale=3): app_ref_mesh = gr.Model3D(label="Reference Appearance Mesh (.glb)", interactive=True, height=300) with gr.Column(scale=2): app_ref_mesh_hidden = gr.Image(type="filepath", visible=False) # app_ref_mesh_examples = gr.Examples(examples=EX_MESH_THUMBS, inputs=app_ref_mesh_hidden, label="Mesh Examples") app_ref_mesh_examples = gr.Examples( examples=EX_MESH_THUMBS, inputs=app_ref_mesh_hidden, outputs=app_ref_mesh, # Target the 3D viewer directly fn=load_mesh_from_thumb, # Run the conversion function run_on_click=True, # Force execution on click label="Mesh Examples" ) # --- ADVANCED SETTINGS --- with gr.Accordion("Advanced Settings", open=False): with gr.Row(): target_up = gr.Radio(["Y-up", "Z-up"], value="Y-up", label="Target Mesh Up-Axis") reference_up = gr.Radio(["Y-up", "Z-up"], value="Y-up", label="Ref Mesh Up-Axis") with gr.Row(): cfg_strength = gr.Slider(0.1, 10.0, value=5.0, step=0.1, label="CFG Strength") num_steps = gr.Slider(50, 1000, value=300, step=50, label="Diffusion Steps") learning_rate = gr.Number(value=5e-4, label="Learning Rate") # --- RUN BUTTON --- with gr.Row(): run_btn = gr.Button("Generate 3D Asset", variant="primary", size="lg") # --- OUTPUTS --- gr.Markdown("### Results") with gr.Row(): with gr.Column(): output_model = gr.Model3D(label="Output Mesh", interactive=False, clear_color=[1.0, 1.0, 1.0, 0.0]) with gr.Column(): output_video = gr.Video(label="Output Video", autoplay=True, loop=True, interactive=False) # --- EVENT BINDING --- # sim_struct_hidden.change(fn=load_mesh_from_thumb, inputs=sim_struct_hidden, outputs=sim_struct_mesh) # app_struct_hidden.change(fn=load_mesh_from_thumb, inputs=app_struct_hidden, outputs=app_struct_mesh) # app_ref_mesh_hidden.change(fn=load_mesh_from_thumb, inputs=app_ref_mesh_hidden, outputs=app_ref_mesh) tab_sim.select(lambda: "Similarity", outputs=guidance_mode_state) tab_app.select(lambda: "Appearance", outputs=guidance_mode_state) run_btn.click( fn=on_run, inputs=[ guidance_mode_state, app_struct_mesh, app_ref_mesh, app_ref_image, sim_struct_mesh, sim_ref_text, sim_ref_image, target_up, reference_up, cfg_strength, num_steps, learning_rate ], outputs=[output_model, output_video] ) demo.load(None, None, None, js="() => { document.body.classList.remove('dark'); }") if __name__ == "__main__": # demo.queue().launch(share=True, allowed_paths=[project_root], mcp_server=True) # Useful for Colab runs demo.queue().launch( allowed_paths=[project_root], mcp_server=True, css=css, theme=gr.themes.Default( primary_hue="sky", secondary_hue="lime" ).set( body_background_fill="white", background_fill_primary="white", block_background_fill="white", input_background_fill="#f9fafb" ) )