""" Architexture 3D FULL - Complete AI Architectural Design Platform Stage 1: Philosophy + 2D Design (Pollinations.ai) Stage 2: Multi-View Generation (MV-Adapter SDXL) Stage 3: 3D Gaussian Splatting (VGGT) Updated: 2025-10-28 """ import os import sys import gc import random import shutil import time from datetime import datetime import glob import gradio as gr import numpy as np import torch import cv2 import requests import urllib.parse from io import BytesIO from PIL import Image, ImageDraw from torchvision import transforms from transformers import AutoModelForImageSegmentation import spaces # Import MV-Adapter modules sys.path.append(".") try: from inference_i2mv_sdxl import prepare_pipeline, remove_bg, run_pipeline except ImportError: print("⚠️ MV-Adapter modules not found - Stage 2 will be disabled") prepare_pipeline = None # Import VGGT modules sys.path.append("vggt/") try: from visual_util import predictions_to_glb from vggt.models.vggt import VGGT from vggt.utils.load_fn import load_and_preprocess_images from vggt.utils.pose_enc import pose_encoding_to_extri_intri from vggt.utils.geometry import unproject_depth_map_to_point_map except ImportError: print("⚠️ VGGT modules not found - Stage 3 will be disabled") VGGT = None print("="*80) print(" ARCHITEXTURE 3D FULL - Complete Pipeline") print("="*80) print("🏛️ Stage 1: Architectural Design (Pollinations.ai)") print("🔄 Stage 2: Multi-View Generation (MV-Adapter SDXL)") print("🎭 Stage 3: 3D Gaussian Splatting (VGGT)") print("="*80) # ============================================================================ # GLOBAL SETUP # ============================================================================ device = "cuda" if torch.cuda.is_available() else "cpu" dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32 print(f"🖥️ Device: {device}") print(f"💾 Dtype: {dtype}") # ============================================================================ # STAGE 1: ARCHITEXTURE - Design Philosophy & 2D Generation # ============================================================================ STYLES = { "Brutalist": { "prompt_suffix": "brutalist architecture with raw concrete, bold geometric forms, monumental scale, professional photography" }, "Art Deco": { "prompt_suffix": "Art Deco architecture with geometric patterns, luxurious materials, golden accents, elegant design, professional photography" }, "Modern": { "prompt_suffix": "modern architecture with clean lines, glass facades, minimalist design, professional photography" }, "Gothic": { "prompt_suffix": "Gothic architecture with pointed arches, ribbed vaults, flying buttresses, ornate details, professional photography" } } def generate_design_philosophy(style, building_type): """Generate architectural design philosophy""" philosophies = { "Brutalist": f"A {style} {building_type} emphasizing bold geometries, raw materials, and human-centered design principles that celebrate structural honesty and functional beauty through exposed concrete and monumental forms.", "Art Deco": f"A {style} {building_type} featuring geometric patterns, luxurious materials, and decorative elements that embody elegance and modernity through symmetrical designs and rich ornamentation.", "Modern": f"A {style} {building_type} showcasing clean lines, open spaces, and functional minimalism that prioritizes simplicity and efficiency through innovative materials and sustainable design.", "Gothic": f"A {style} {building_type} displaying vertical emphasis, pointed arches, and intricate details that inspire awe and spirituality through dramatic height and ornate craftsmanship." } return philosophies.get(style, f"A {style} {building_type} design.") def validate_design(style, philosophy): """Text-based validation""" style_keywords = { "Brutalist": ["concrete", "geometric", "bold", "raw", "monumental", "structural"], "Art Deco": ["geometric", "luxurious", "golden", "elegant", "pattern", "decorative"], "Modern": ["clean", "minimal", "glass", "simple", "functional", "sustainable"], "Gothic": ["arch", "vault", "ornate", "vertical", "dramatic", "spiritual"] } text_lower = philosophy.lower() keywords = style_keywords.get(style, []) matches = sum(1 for keyword in keywords if keyword in text_lower) return f"✅ Validation: {matches}/{len(keywords)} style keywords matched" def generate_2d_image(philosophy, style): """Generate 2D architectural image using Pollinations.ai""" try: print(f"🎨 Generating image for style: {style}") style_suffix = STYLES[style]["prompt_suffix"] full_prompt = f"{philosophy}, {style_suffix}" encoded_prompt = urllib.parse.quote(full_prompt) image_url = f"https://image.pollinations.ai/prompt/{encoded_prompt}?width=1024&height=768&model=flux&nologo=true" print(f"📡 Requesting from Pollinations.ai...") response = requests.get(image_url, timeout=90) print(f"📥 Response status: {response.status_code}") if response.status_code == 200: print(f"✅ Image generated successfully!") return Image.open(BytesIO(response.content)) else: return create_placeholder_image(f"API Error: Status {response.status_code}") except Exception as e: print(f"❌ Error: {str(e)}") return create_placeholder_image(f"Error: {str(e)}") def create_placeholder_image(text): """Create placeholder image with error message""" img = Image.new('RGB', (1024, 768), color=(240, 240, 245)) draw = ImageDraw.Draw(img) draw.rectangle([(10, 10), (1014, 758)], outline=(200, 200, 210), width=3) draw.text((50, 350), text, fill=(60, 60, 80)) return img def architexture_generate(style, building_type): """Main function for Architexture tab""" print(f"\n{'='*60}") print(f"🚀 Stage 1: Generating {style} {building_type}") print(f"{'='*60}") if not building_type or building_type.strip() == "": return "❌ Please enter a building type", "❌ Validation skipped", create_placeholder_image("No building type provided") philosophy = generate_design_philosophy(style, building_type) validation = validate_design(style, philosophy) image_2d = generate_2d_image(philosophy, style) print(f"✅ Stage 1 Complete!\n") return philosophy, validation, image_2d # ============================================================================ # STAGE 2: MV-ADAPTER - Multi-View Generation # ============================================================================ # Lazy loading for MV-Adapter mv_pipe = None birefnet = None transform_image = None NUM_VIEWS = 6 HEIGHT = 768 WIDTH = 768 MAX_SEED = np.iinfo(np.int32).max @spaces.GPU def load_and_run_mvadapter(input_image_np, prompt, do_rembg, seed, randomize_seed, guidance_scale, num_inference_steps, reference_conditioning_scale): """Load MV-Adapter and generate multi-view (uses ZeroGPU)""" global mv_pipe, birefnet, transform_image print(f"🔄 Starting MV-Adapter generation...") print(f" Input image type: {type(input_image_np)}") print(f" Prompt: {prompt}") print(f" Do rembg: {do_rembg}") device = "cuda" dtype = torch.bfloat16 # Load pipeline if needed if mv_pipe is None: print("🔄 Loading MV-Adapter SDXL pipeline...") try: mv_pipe = prepare_pipeline( base_model="stabilityai/stable-diffusion-xl-base-1.0", vae_model="madebyollin/sdxl-vae-fp16-fix", unet_model=None, lora_model=None, adapter_path="huanngzh/mv-adapter", scheduler=None, num_views=NUM_VIEWS, device=device, dtype=dtype, ) print("✅ MV-Adapter loaded!") except Exception as e: print(f"❌ Failed to load MV-Adapter: {e}") raise # Load BiRefNet if needed if birefnet is None and do_rembg: print("🔄 Loading BiRefNet for background removal...") birefnet = AutoModelForImageSegmentation.from_pretrained( "ZhengPeng7/BiRefNet", trust_remote_code=True ) birefnet.to(device) transform_image = transforms.Compose([ transforms.Resize((1024, 1024)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) print("✅ BiRefNet loaded!") # Handle input image - could be PIL Image or numpy array if isinstance(input_image_np, Image.Image): input_image = input_image_np else: input_image = Image.fromarray(input_image_np) # Setup background removal if do_rembg and birefnet is not None: remove_bg_fn = lambda x: remove_bg(x, birefnet, transform_image, device) else: remove_bg_fn = None # Handle seed if randomize_seed: seed = random.randint(0, MAX_SEED) print(f" Using seed: {seed}") print(f" Guidance scale: {guidance_scale}, Steps: {num_inference_steps}") negative_prompt = "watermark, ugly, deformed, noisy, blurry, low contrast" # Run pipeline print("🔄 Running MV-Adapter pipeline...") try: images, preprocessed_image = run_pipeline( mv_pipe, num_views=NUM_VIEWS, text=prompt, image=input_image, height=HEIGHT, width=WIDTH, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, seed=seed, remove_bg_fn=remove_bg_fn, reference_conditioning_scale=reference_conditioning_scale, negative_prompt=negative_prompt, device=device, ) print(f"✅ Generated {len(images)} views!") except Exception as e: print(f"❌ Pipeline execution failed: {e}") raise return images, preprocessed_image, seed def generate_multiview(input_image, prompt, do_rembg=True, seed=42, randomize_seed=False, guidance_scale=3.0, num_inference_steps=30, reference_conditioning_scale=1.0): """Generate multiple views from single image""" if input_image is None: return [create_placeholder_image("Please upload an image first")], None, 42, "❌ No input image" # Check if MV-Adapter is available if prepare_pipeline is None: error_msg = "❌ MV-Adapter not available - inference_i2mv_sdxl module failed to import" return [create_placeholder_image(error_msg)], input_image, seed, error_msg try: print("=" * 60) print("🚀 Starting Multi-View Generation") print(f" GPU Available: {torch.cuda.is_available()}") print(f" Input type: {type(input_image)}") print(f" Prompt: {prompt}") print("=" * 60) images, preprocessed, seed = load_and_run_mvadapter( input_image, prompt, do_rembg, seed, randomize_seed, guidance_scale, num_inference_steps, reference_conditioning_scale ) print(f"✅ Success! Generated {len(images)} images") print(f" Images type: {type(images)}") print(f" First image type: {type(images[0]) if images else 'None'}") # Ensure images is a list of PIL Images if not isinstance(images, list): images = [images] return images, preprocessed, seed, f"✅ Generated {len(images)} multi-view images" except Exception as e: error_msg = f"❌ Error: {str(e)}" print(f"❌ MV-Adapter Error: {e}") import traceback traceback.print_exc() return [create_placeholder_image(error_msg)], input_image, seed, error_msg # ============================================================================ # STAGE 3: VGGT - 3D Gaussian Splatting (ZeroGPU) # ============================================================================ vggt_model = None @spaces.GPU(duration=120) def run_vggt_reconstruction(target_dir, conf_thres, show_cam): """Run VGGT 3D reconstruction (uses ZeroGPU for 120s)""" global vggt_model device = "cuda" dtype = torch.bfloat16 # Load model if needed if vggt_model is None: print("🎭 Loading VGGT-1B model...") vggt_model = VGGT() _URL = "https://huggingface.co/facebook/VGGT-1B/resolve/main/model.pt" vggt_model.load_state_dict(torch.hub.load_state_dict_from_url(_URL)) vggt_model.eval() print("✅ VGGT loaded!") vggt_model.to(device) # Load images image_names = glob.glob(os.path.join(target_dir, "images", "*")) image_names = sorted(image_names) if len(image_names) == 0: raise ValueError("No images found") images = load_and_preprocess_images(image_names).to(device) # Run inference with torch.no_grad(): with torch.cuda.amp.autocast(dtype=dtype): predictions = vggt_model(images) # Process predictions extrinsic, intrinsic = pose_encoding_to_extri_intri(predictions["pose_enc"], images.shape[-2:]) predictions["extrinsic"] = extrinsic predictions["intrinsic"] = intrinsic for key in predictions.keys(): if isinstance(predictions[key], torch.Tensor): predictions[key] = predictions[key].cpu().numpy().squeeze(0) depth_map = predictions["depth"] world_points = unproject_depth_map_to_point_map(depth_map, predictions["extrinsic"], predictions["intrinsic"]) predictions["world_points_from_depth"] = world_points # Save predictions prediction_save_path = os.path.join(target_dir, "predictions.npz") np.savez(prediction_save_path, **predictions) # Generate GLB glbfile = os.path.join(target_dir, f"scene_{conf_thres}_cam{show_cam}.glb") glbscene = predictions_to_glb( predictions, conf_thres=conf_thres, filter_by_frames="All", mask_black_bg=False, mask_white_bg=False, show_cam=show_cam, mask_sky=False, target_dir=target_dir, prediction_mode="Depthmap and Camera Branch", ) glbscene.export(file_obj=glbfile) del predictions torch.cuda.empty_cache() return glbfile def handle_3d_uploads(input_images): """Handle uploaded images for 3D reconstruction""" if input_images is None or len(input_images) == 0: return None, [] timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") target_dir = f"input_images_{timestamp}" target_dir_images = os.path.join(target_dir, "images") if os.path.exists(target_dir): shutil.rmtree(target_dir) os.makedirs(target_dir) os.makedirs(target_dir_images) image_paths = [] for i, file_data in enumerate(input_images): if hasattr(file_data, 'name'): file_path = file_data.name elif isinstance(file_data, dict) and "name" in file_data: file_path = file_data["name"] else: file_path = str(file_data) dst_path = os.path.join(target_dir_images, f"{i:06d}.png") shutil.copy(file_path, dst_path) image_paths.append(dst_path) return target_dir, sorted(image_paths) def generate_3d_gaussian(input_images, conf_thres=50.0, show_cam=True): """Generate 3D Gaussian representation""" if input_images is None or len(input_images) == 0: return None, "❌ Please provide images for 3D reconstruction" try: gc.collect() torch.cuda.empty_cache() target_dir, image_paths = handle_3d_uploads(input_images) glbfile = run_vggt_reconstruction(target_dir, conf_thres, show_cam) return glbfile, f"✅ 3D reconstruction complete! {len(image_paths)} images processed" except Exception as e: return None, f"❌ Error: {str(e)}" return glbfile, f"✅ 3D reconstruction complete! {len(image_paths)} images processed" except Exception as e: print(f"❌ Error in 3D generation: {str(e)}") return None, f"❌ Error: {str(e)}" # ============================================================================ # GRADIO INTERFACE # ============================================================================ with gr.Blocks(title="Architexture 3D FULL", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # 🏛️ Architexture 3D - Complete AI Architectural Design Platform ### Full 3-Stage Pipeline: Philosophy → 2D Design → Multi-View → 3D Model **Three integrated AI systems:** 1. **Architexture**: Generate architectural philosophy and 2D designs (CPU-friendly) 2. **MV-Adapter**: Create 6 multi-view images from single image (GPU required) 3. **VGGT**: Build 3D Gaussian splatting models (GPU required) """) with gr.Tabs(): # ===== TAB 1: ARCHITEXTURE ===== with gr.Tab("🎨 Stage 1: Design Generation"): gr.Markdown("### Generate Architectural Design Philosophy and 2D Image") with gr.Row(): with gr.Column(): style_input = gr.Dropdown( choices=list(STYLES.keys()), value="Brutalist", label="Architectural Style" ) building_input = gr.Textbox( label="Building Type", placeholder="e.g., university library, concert hall, museum", value="university library" ) generate_btn = gr.Button("🚀 Generate Design", variant="primary", size="lg") with gr.Column(): philosophy_output = gr.Textbox( label="Design Philosophy", lines=6 ) validation_output = gr.Textbox( label="Validation Result" ) image_2d_output = gr.Image(label="Generated 2D Design", type="pil", height=512) generate_btn.click( fn=architexture_generate, inputs=[style_input, building_input], outputs=[philosophy_output, validation_output, image_2d_output] ) gr.Markdown(f""" **Usage:** 1. Select architectural style 2. Enter building type 3. Click Generate → Get philosophy + 2D image 4. Use image in Stage 2 for multi-view generation **✅ Status**: Fully functional (CPU-only, <200MB memory) """) # ===== TAB 2: MULTI-VIEW ===== with gr.Tab("🔄 Stage 2: Multi-View Generation"): gr.Markdown("### Generate 6 Multi-View Images from Single Image (GPU Required)") with gr.Row(): with gr.Column(): mv_input_image = gr.Image( label="Input Image (from Stage 1 or upload)", type="pil" ) mv_prompt = gr.Textbox( label="Prompt", placeholder="high quality, detailed", value="high quality" ) mv_do_rembg = gr.Checkbox(label="Remove Background", value=False) mv_generate_btn = gr.Button("🔄 Generate Multi-View", variant="primary", size="lg") with gr.Accordion("⚙️ Advanced Settings (Reduce for lower GPU quota usage)", open=False): mv_seed = gr.Slider(0, MAX_SEED, value=42, step=1, label="Seed") mv_randomize = gr.Checkbox(label="Randomize Seed", value=True) mv_guidance = gr.Slider(0.0, 10.0, value=3.0, step=0.1, label="CFG Scale") mv_steps = gr.Slider(1, 50, value=20, step=1, label="Inference Steps (⬇️ Lower = Less GPU)") mv_img_scale = gr.Slider(0.0, 2.0, value=1.0, step=0.1, label="Image Conditioning Scale") with gr.Column(): mv_preprocessed = gr.Image(label="Preprocessed Image", type="pil") mv_output_gallery = gr.Gallery( label="Generated Multi-View Images", columns=3, rows=2, height=600 ) mv_status = gr.Textbox(label="Status") mv_generate_btn.click( fn=generate_multiview, inputs=[mv_input_image, mv_prompt, mv_do_rembg, mv_seed, mv_randomize, mv_guidance, mv_steps, mv_img_scale], outputs=[mv_output_gallery, mv_preprocessed, mv_seed, mv_status] ) gr.Markdown(f""" **💡 Running on ZeroGPU (Serverless):** - ✅ **No local GPU required** - Uses Hugging Face's free GPU - ✅ **Login required** - Sign in to get your GPU quota - ✅ Models: SDXL, BiRefNet, MV-Adapter (loaded on first use) - ⏱️ Processing time: ~20-60 seconds per generation **⚠️ GPU Quota Tips:** - Lower **Inference Steps** (20 instead of 30) to save quota - Disable **Remove Background** if not needed - Daily quota resets every 24 hours **🔧 Troubleshooting:** - If quota error persists, try refreshing the page - Check the Space logs (Settings → Logs) for detailed errors - Verify you're logged in to Hugging Face """) # ===== TAB 3: 3D GENERATION ===== with gr.Tab("🎭 Stage 3: 3D Gaussian Splatting"): gr.Markdown("### Generate 3D Model from Multiple Views (GPU Required)") with gr.Row(): with gr.Column(): gs_input_images = gr.File( file_count="multiple", label="Upload Images (from Stage 2 or multiple views)", file_types=["image"] ) gs_conf_thres = gr.Slider(0, 100, value=50, step=0.1, label="Confidence Threshold (%)") gs_show_cam = gr.Checkbox(label="Show Camera Poses", value=True) gs_generate_btn = gr.Button("🎭 Generate 3D Model", variant="primary", size="lg") with gr.Column(): gs_output_3d = gr.Model3D(label="3D Gaussian Splatting Model", height=600) gs_status = gr.Textbox(label="Status") gs_generate_btn.click( fn=generate_3d_gaussian, inputs=[gs_input_images, gs_conf_thres, gs_show_cam], outputs=[gs_output_3d, gs_status] ) gr.Markdown(f""" **💡 Running on ZeroGPU (Serverless):** - ✅ **No local GPU required** - Uses Hugging Face's free GPU - ✅ **Login required** - Sign in to get your GPU quota - ✅ Model: VGGT-1B (~1GB, loaded on first use) - ⏱️ Processing time: ~60-120 seconds (higher timeout) - 📦 **Output**: GLB 3D model file (viewable in Blender, Three.js, etc) """) gr.Markdown(f""" --- ### 🚀 System Status: - **Platform**: Hugging Face Spaces with **ZeroGPU** (Serverless) - **Stage 1**: ✅ Always Available (CPU-only, no GPU needed) - **Stage 2**: ✅ Available (ZeroGPU - requires login) - **Stage 3**: ✅ Available (ZeroGPU - requires login) ### 💡 Tips: - **Login required** for Stage 2 & 3 to access GPU quota - Run Stage 1 first to generate architectural designs - Use Stage 1 output as input for Stage 2 - Use Stage 2 multi-view outputs for Stage 3 - GPU features use lazy loading (models load on first use) """) if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860, share=False )