architexture-3d / app.py
Britto-j2004's picture
Add detailed logging and troubleshooting tips for GPU quota issues
2a9f8de
"""
Architexture 3D FULL - Complete AI Architectural Design Platform
Stage 1: Philosophy + 2D Design (Pollinations.ai)
Stage 2: Multi-View Generation (MV-Adapter SDXL)
Stage 3: 3D Gaussian Splatting (VGGT)
Updated: 2025-10-28
"""
import os
import sys
import gc
import random
import shutil
import time
from datetime import datetime
import glob
import gradio as gr
import numpy as np
import torch
import cv2
import requests
import urllib.parse
from io import BytesIO
from PIL import Image, ImageDraw
from torchvision import transforms
from transformers import AutoModelForImageSegmentation
import spaces
# Import MV-Adapter modules
sys.path.append(".")
try:
from inference_i2mv_sdxl import prepare_pipeline, remove_bg, run_pipeline
except ImportError:
print("⚠️ MV-Adapter modules not found - Stage 2 will be disabled")
prepare_pipeline = None
# Import VGGT modules
sys.path.append("vggt/")
try:
from visual_util import predictions_to_glb
from vggt.models.vggt import VGGT
from vggt.utils.load_fn import load_and_preprocess_images
from vggt.utils.pose_enc import pose_encoding_to_extri_intri
from vggt.utils.geometry import unproject_depth_map_to_point_map
except ImportError:
print("⚠️ VGGT modules not found - Stage 3 will be disabled")
VGGT = None
print("="*80)
print(" ARCHITEXTURE 3D FULL - Complete Pipeline")
print("="*80)
print("πŸ›οΈ Stage 1: Architectural Design (Pollinations.ai)")
print("πŸ”„ Stage 2: Multi-View Generation (MV-Adapter SDXL)")
print("🎭 Stage 3: 3D Gaussian Splatting (VGGT)")
print("="*80)
# ============================================================================
# GLOBAL SETUP
# ============================================================================
device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
print(f"πŸ–₯️ Device: {device}")
print(f"πŸ’Ύ Dtype: {dtype}")
# ============================================================================
# STAGE 1: ARCHITEXTURE - Design Philosophy & 2D Generation
# ============================================================================
STYLES = {
"Brutalist": {
"prompt_suffix": "brutalist architecture with raw concrete, bold geometric forms, monumental scale, professional photography"
},
"Art Deco": {
"prompt_suffix": "Art Deco architecture with geometric patterns, luxurious materials, golden accents, elegant design, professional photography"
},
"Modern": {
"prompt_suffix": "modern architecture with clean lines, glass facades, minimalist design, professional photography"
},
"Gothic": {
"prompt_suffix": "Gothic architecture with pointed arches, ribbed vaults, flying buttresses, ornate details, professional photography"
}
}
def generate_design_philosophy(style, building_type):
"""Generate architectural design philosophy"""
philosophies = {
"Brutalist": f"A {style} {building_type} emphasizing bold geometries, raw materials, and human-centered design principles that celebrate structural honesty and functional beauty through exposed concrete and monumental forms.",
"Art Deco": f"A {style} {building_type} featuring geometric patterns, luxurious materials, and decorative elements that embody elegance and modernity through symmetrical designs and rich ornamentation.",
"Modern": f"A {style} {building_type} showcasing clean lines, open spaces, and functional minimalism that prioritizes simplicity and efficiency through innovative materials and sustainable design.",
"Gothic": f"A {style} {building_type} displaying vertical emphasis, pointed arches, and intricate details that inspire awe and spirituality through dramatic height and ornate craftsmanship."
}
return philosophies.get(style, f"A {style} {building_type} design.")
def validate_design(style, philosophy):
"""Text-based validation"""
style_keywords = {
"Brutalist": ["concrete", "geometric", "bold", "raw", "monumental", "structural"],
"Art Deco": ["geometric", "luxurious", "golden", "elegant", "pattern", "decorative"],
"Modern": ["clean", "minimal", "glass", "simple", "functional", "sustainable"],
"Gothic": ["arch", "vault", "ornate", "vertical", "dramatic", "spiritual"]
}
text_lower = philosophy.lower()
keywords = style_keywords.get(style, [])
matches = sum(1 for keyword in keywords if keyword in text_lower)
return f"βœ… Validation: {matches}/{len(keywords)} style keywords matched"
def generate_2d_image(philosophy, style):
"""Generate 2D architectural image using Pollinations.ai"""
try:
print(f"🎨 Generating image for style: {style}")
style_suffix = STYLES[style]["prompt_suffix"]
full_prompt = f"{philosophy}, {style_suffix}"
encoded_prompt = urllib.parse.quote(full_prompt)
image_url = f"https://image.pollinations.ai/prompt/{encoded_prompt}?width=1024&height=768&model=flux&nologo=true"
print(f"πŸ“‘ Requesting from Pollinations.ai...")
response = requests.get(image_url, timeout=90)
print(f"πŸ“₯ Response status: {response.status_code}")
if response.status_code == 200:
print(f"βœ… Image generated successfully!")
return Image.open(BytesIO(response.content))
else:
return create_placeholder_image(f"API Error: Status {response.status_code}")
except Exception as e:
print(f"❌ Error: {str(e)}")
return create_placeholder_image(f"Error: {str(e)}")
def create_placeholder_image(text):
"""Create placeholder image with error message"""
img = Image.new('RGB', (1024, 768), color=(240, 240, 245))
draw = ImageDraw.Draw(img)
draw.rectangle([(10, 10), (1014, 758)], outline=(200, 200, 210), width=3)
draw.text((50, 350), text, fill=(60, 60, 80))
return img
def architexture_generate(style, building_type):
"""Main function for Architexture tab"""
print(f"\n{'='*60}")
print(f"πŸš€ Stage 1: Generating {style} {building_type}")
print(f"{'='*60}")
if not building_type or building_type.strip() == "":
return "❌ Please enter a building type", "❌ Validation skipped", create_placeholder_image("No building type provided")
philosophy = generate_design_philosophy(style, building_type)
validation = validate_design(style, philosophy)
image_2d = generate_2d_image(philosophy, style)
print(f"βœ… Stage 1 Complete!\n")
return philosophy, validation, image_2d
# ============================================================================
# STAGE 2: MV-ADAPTER - Multi-View Generation
# ============================================================================
# Lazy loading for MV-Adapter
mv_pipe = None
birefnet = None
transform_image = None
NUM_VIEWS = 6
HEIGHT = 768
WIDTH = 768
MAX_SEED = np.iinfo(np.int32).max
@spaces.GPU
def load_and_run_mvadapter(input_image_np, prompt, do_rembg, seed, randomize_seed,
guidance_scale, num_inference_steps, reference_conditioning_scale):
"""Load MV-Adapter and generate multi-view (uses ZeroGPU)"""
global mv_pipe, birefnet, transform_image
print(f"πŸ”„ Starting MV-Adapter generation...")
print(f" Input image type: {type(input_image_np)}")
print(f" Prompt: {prompt}")
print(f" Do rembg: {do_rembg}")
device = "cuda"
dtype = torch.bfloat16
# Load pipeline if needed
if mv_pipe is None:
print("πŸ”„ Loading MV-Adapter SDXL pipeline...")
try:
mv_pipe = prepare_pipeline(
base_model="stabilityai/stable-diffusion-xl-base-1.0",
vae_model="madebyollin/sdxl-vae-fp16-fix",
unet_model=None,
lora_model=None,
adapter_path="huanngzh/mv-adapter",
scheduler=None,
num_views=NUM_VIEWS,
device=device,
dtype=dtype,
)
print("βœ… MV-Adapter loaded!")
except Exception as e:
print(f"❌ Failed to load MV-Adapter: {e}")
raise
# Load BiRefNet if needed
if birefnet is None and do_rembg:
print("πŸ”„ Loading BiRefNet for background removal...")
birefnet = AutoModelForImageSegmentation.from_pretrained(
"ZhengPeng7/BiRefNet", trust_remote_code=True
)
birefnet.to(device)
transform_image = transforms.Compose([
transforms.Resize((1024, 1024)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])
print("βœ… BiRefNet loaded!")
# Handle input image - could be PIL Image or numpy array
if isinstance(input_image_np, Image.Image):
input_image = input_image_np
else:
input_image = Image.fromarray(input_image_np)
# Setup background removal
if do_rembg and birefnet is not None:
remove_bg_fn = lambda x: remove_bg(x, birefnet, transform_image, device)
else:
remove_bg_fn = None
# Handle seed
if randomize_seed:
seed = random.randint(0, MAX_SEED)
print(f" Using seed: {seed}")
print(f" Guidance scale: {guidance_scale}, Steps: {num_inference_steps}")
negative_prompt = "watermark, ugly, deformed, noisy, blurry, low contrast"
# Run pipeline
print("πŸ”„ Running MV-Adapter pipeline...")
try:
images, preprocessed_image = run_pipeline(
mv_pipe,
num_views=NUM_VIEWS,
text=prompt,
image=input_image,
height=HEIGHT,
width=WIDTH,
num_inference_steps=num_inference_steps,
guidance_scale=guidance_scale,
seed=seed,
remove_bg_fn=remove_bg_fn,
reference_conditioning_scale=reference_conditioning_scale,
negative_prompt=negative_prompt,
device=device,
)
print(f"βœ… Generated {len(images)} views!")
except Exception as e:
print(f"❌ Pipeline execution failed: {e}")
raise
return images, preprocessed_image, seed
def generate_multiview(input_image, prompt, do_rembg=True, seed=42, randomize_seed=False,
guidance_scale=3.0, num_inference_steps=30, reference_conditioning_scale=1.0):
"""Generate multiple views from single image"""
if input_image is None:
return [create_placeholder_image("Please upload an image first")], None, 42, "❌ No input image"
# Check if MV-Adapter is available
if prepare_pipeline is None:
error_msg = "❌ MV-Adapter not available - inference_i2mv_sdxl module failed to import"
return [create_placeholder_image(error_msg)], input_image, seed, error_msg
try:
print("=" * 60)
print("πŸš€ Starting Multi-View Generation")
print(f" GPU Available: {torch.cuda.is_available()}")
print(f" Input type: {type(input_image)}")
print(f" Prompt: {prompt}")
print("=" * 60)
images, preprocessed, seed = load_and_run_mvadapter(
input_image, prompt, do_rembg, seed, randomize_seed,
guidance_scale, num_inference_steps, reference_conditioning_scale
)
print(f"βœ… Success! Generated {len(images)} images")
print(f" Images type: {type(images)}")
print(f" First image type: {type(images[0]) if images else 'None'}")
# Ensure images is a list of PIL Images
if not isinstance(images, list):
images = [images]
return images, preprocessed, seed, f"βœ… Generated {len(images)} multi-view images"
except Exception as e:
error_msg = f"❌ Error: {str(e)}"
print(f"❌ MV-Adapter Error: {e}")
import traceback
traceback.print_exc()
return [create_placeholder_image(error_msg)], input_image, seed, error_msg
# ============================================================================
# STAGE 3: VGGT - 3D Gaussian Splatting (ZeroGPU)
# ============================================================================
vggt_model = None
@spaces.GPU(duration=120)
def run_vggt_reconstruction(target_dir, conf_thres, show_cam):
"""Run VGGT 3D reconstruction (uses ZeroGPU for 120s)"""
global vggt_model
device = "cuda"
dtype = torch.bfloat16
# Load model if needed
if vggt_model is None:
print("🎭 Loading VGGT-1B model...")
vggt_model = VGGT()
_URL = "https://huggingface.co/facebook/VGGT-1B/resolve/main/model.pt"
vggt_model.load_state_dict(torch.hub.load_state_dict_from_url(_URL))
vggt_model.eval()
print("βœ… VGGT loaded!")
vggt_model.to(device)
# Load images
image_names = glob.glob(os.path.join(target_dir, "images", "*"))
image_names = sorted(image_names)
if len(image_names) == 0:
raise ValueError("No images found")
images = load_and_preprocess_images(image_names).to(device)
# Run inference
with torch.no_grad():
with torch.cuda.amp.autocast(dtype=dtype):
predictions = vggt_model(images)
# Process predictions
extrinsic, intrinsic = pose_encoding_to_extri_intri(predictions["pose_enc"], images.shape[-2:])
predictions["extrinsic"] = extrinsic
predictions["intrinsic"] = intrinsic
for key in predictions.keys():
if isinstance(predictions[key], torch.Tensor):
predictions[key] = predictions[key].cpu().numpy().squeeze(0)
depth_map = predictions["depth"]
world_points = unproject_depth_map_to_point_map(depth_map, predictions["extrinsic"], predictions["intrinsic"])
predictions["world_points_from_depth"] = world_points
# Save predictions
prediction_save_path = os.path.join(target_dir, "predictions.npz")
np.savez(prediction_save_path, **predictions)
# Generate GLB
glbfile = os.path.join(target_dir, f"scene_{conf_thres}_cam{show_cam}.glb")
glbscene = predictions_to_glb(
predictions,
conf_thres=conf_thres,
filter_by_frames="All",
mask_black_bg=False,
mask_white_bg=False,
show_cam=show_cam,
mask_sky=False,
target_dir=target_dir,
prediction_mode="Depthmap and Camera Branch",
)
glbscene.export(file_obj=glbfile)
del predictions
torch.cuda.empty_cache()
return glbfile
def handle_3d_uploads(input_images):
"""Handle uploaded images for 3D reconstruction"""
if input_images is None or len(input_images) == 0:
return None, []
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
target_dir = f"input_images_{timestamp}"
target_dir_images = os.path.join(target_dir, "images")
if os.path.exists(target_dir):
shutil.rmtree(target_dir)
os.makedirs(target_dir)
os.makedirs(target_dir_images)
image_paths = []
for i, file_data in enumerate(input_images):
if hasattr(file_data, 'name'):
file_path = file_data.name
elif isinstance(file_data, dict) and "name" in file_data:
file_path = file_data["name"]
else:
file_path = str(file_data)
dst_path = os.path.join(target_dir_images, f"{i:06d}.png")
shutil.copy(file_path, dst_path)
image_paths.append(dst_path)
return target_dir, sorted(image_paths)
def generate_3d_gaussian(input_images, conf_thres=50.0, show_cam=True):
"""Generate 3D Gaussian representation"""
if input_images is None or len(input_images) == 0:
return None, "❌ Please provide images for 3D reconstruction"
try:
gc.collect()
torch.cuda.empty_cache()
target_dir, image_paths = handle_3d_uploads(input_images)
glbfile = run_vggt_reconstruction(target_dir, conf_thres, show_cam)
return glbfile, f"βœ… 3D reconstruction complete! {len(image_paths)} images processed"
except Exception as e:
return None, f"❌ Error: {str(e)}"
return glbfile, f"βœ… 3D reconstruction complete! {len(image_paths)} images processed"
except Exception as e:
print(f"❌ Error in 3D generation: {str(e)}")
return None, f"❌ Error: {str(e)}"
# ============================================================================
# GRADIO INTERFACE
# ============================================================================
with gr.Blocks(title="Architexture 3D FULL", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# πŸ›οΈ Architexture 3D - Complete AI Architectural Design Platform
### Full 3-Stage Pipeline: Philosophy β†’ 2D Design β†’ Multi-View β†’ 3D Model
**Three integrated AI systems:**
1. **Architexture**: Generate architectural philosophy and 2D designs (CPU-friendly)
2. **MV-Adapter**: Create 6 multi-view images from single image (GPU required)
3. **VGGT**: Build 3D Gaussian splatting models (GPU required)
""")
with gr.Tabs():
# ===== TAB 1: ARCHITEXTURE =====
with gr.Tab("🎨 Stage 1: Design Generation"):
gr.Markdown("### Generate Architectural Design Philosophy and 2D Image")
with gr.Row():
with gr.Column():
style_input = gr.Dropdown(
choices=list(STYLES.keys()),
value="Brutalist",
label="Architectural Style"
)
building_input = gr.Textbox(
label="Building Type",
placeholder="e.g., university library, concert hall, museum",
value="university library"
)
generate_btn = gr.Button("πŸš€ Generate Design", variant="primary", size="lg")
with gr.Column():
philosophy_output = gr.Textbox(
label="Design Philosophy",
lines=6
)
validation_output = gr.Textbox(
label="Validation Result"
)
image_2d_output = gr.Image(label="Generated 2D Design", type="pil", height=512)
generate_btn.click(
fn=architexture_generate,
inputs=[style_input, building_input],
outputs=[philosophy_output, validation_output, image_2d_output]
)
gr.Markdown(f"""
**Usage:**
1. Select architectural style
2. Enter building type
3. Click Generate β†’ Get philosophy + 2D image
4. Use image in Stage 2 for multi-view generation
**βœ… Status**: Fully functional (CPU-only, <200MB memory)
""")
# ===== TAB 2: MULTI-VIEW =====
with gr.Tab("πŸ”„ Stage 2: Multi-View Generation"):
gr.Markdown("### Generate 6 Multi-View Images from Single Image (GPU Required)")
with gr.Row():
with gr.Column():
mv_input_image = gr.Image(
label="Input Image (from Stage 1 or upload)",
type="pil"
)
mv_prompt = gr.Textbox(
label="Prompt",
placeholder="high quality, detailed",
value="high quality"
)
mv_do_rembg = gr.Checkbox(label="Remove Background", value=False)
mv_generate_btn = gr.Button("πŸ”„ Generate Multi-View", variant="primary", size="lg")
with gr.Accordion("βš™οΈ Advanced Settings (Reduce for lower GPU quota usage)", open=False):
mv_seed = gr.Slider(0, MAX_SEED, value=42, step=1, label="Seed")
mv_randomize = gr.Checkbox(label="Randomize Seed", value=True)
mv_guidance = gr.Slider(0.0, 10.0, value=3.0, step=0.1, label="CFG Scale")
mv_steps = gr.Slider(1, 50, value=20, step=1, label="Inference Steps (⬇️ Lower = Less GPU)")
mv_img_scale = gr.Slider(0.0, 2.0, value=1.0, step=0.1, label="Image Conditioning Scale")
with gr.Column():
mv_preprocessed = gr.Image(label="Preprocessed Image", type="pil")
mv_output_gallery = gr.Gallery(
label="Generated Multi-View Images",
columns=3,
rows=2,
height=600
)
mv_status = gr.Textbox(label="Status")
mv_generate_btn.click(
fn=generate_multiview,
inputs=[mv_input_image, mv_prompt, mv_do_rembg, mv_seed, mv_randomize,
mv_guidance, mv_steps, mv_img_scale],
outputs=[mv_output_gallery, mv_preprocessed, mv_seed, mv_status]
)
gr.Markdown(f"""
**πŸ’‘ Running on ZeroGPU (Serverless):**
- βœ… **No local GPU required** - Uses Hugging Face's free GPU
- βœ… **Login required** - Sign in to get your GPU quota
- βœ… Models: SDXL, BiRefNet, MV-Adapter (loaded on first use)
- ⏱️ Processing time: ~20-60 seconds per generation
**⚠️ GPU Quota Tips:**
- Lower **Inference Steps** (20 instead of 30) to save quota
- Disable **Remove Background** if not needed
- Daily quota resets every 24 hours
**πŸ”§ Troubleshooting:**
- If quota error persists, try refreshing the page
- Check the Space logs (Settings β†’ Logs) for detailed errors
- Verify you're logged in to Hugging Face
""")
# ===== TAB 3: 3D GENERATION =====
with gr.Tab("🎭 Stage 3: 3D Gaussian Splatting"):
gr.Markdown("### Generate 3D Model from Multiple Views (GPU Required)")
with gr.Row():
with gr.Column():
gs_input_images = gr.File(
file_count="multiple",
label="Upload Images (from Stage 2 or multiple views)",
file_types=["image"]
)
gs_conf_thres = gr.Slider(0, 100, value=50, step=0.1, label="Confidence Threshold (%)")
gs_show_cam = gr.Checkbox(label="Show Camera Poses", value=True)
gs_generate_btn = gr.Button("🎭 Generate 3D Model", variant="primary", size="lg")
with gr.Column():
gs_output_3d = gr.Model3D(label="3D Gaussian Splatting Model", height=600)
gs_status = gr.Textbox(label="Status")
gs_generate_btn.click(
fn=generate_3d_gaussian,
inputs=[gs_input_images, gs_conf_thres, gs_show_cam],
outputs=[gs_output_3d, gs_status]
)
gr.Markdown(f"""
**πŸ’‘ Running on ZeroGPU (Serverless):**
- βœ… **No local GPU required** - Uses Hugging Face's free GPU
- βœ… **Login required** - Sign in to get your GPU quota
- βœ… Model: VGGT-1B (~1GB, loaded on first use)
- ⏱️ Processing time: ~60-120 seconds (higher timeout)
- πŸ“¦ **Output**: GLB 3D model file (viewable in Blender, Three.js, etc)
""")
gr.Markdown(f"""
---
### πŸš€ System Status:
- **Platform**: Hugging Face Spaces with **ZeroGPU** (Serverless)
- **Stage 1**: βœ… Always Available (CPU-only, no GPU needed)
- **Stage 2**: βœ… Available (ZeroGPU - requires login)
- **Stage 3**: βœ… Available (ZeroGPU - requires login)
### πŸ’‘ Tips:
- **Login required** for Stage 2 & 3 to access GPU quota
- Run Stage 1 first to generate architectural designs
- Use Stage 1 output as input for Stage 2
- Use Stage 2 multi-view outputs for Stage 3
- GPU features use lazy loading (models load on first use)
""")
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False
)