Spaces:

BSJ2004
/

architexture-3d

Runtime error

App Files Files Community

architexture-3d / app.py

Britto-j2004

Add detailed logging and troubleshooting tips for GPU quota issues

2a9f8de 3 months ago

raw

history blame contribute delete

24.7 kB

	"""
	Architexture 3D FULL - Complete AI Architectural Design Platform
	Stage 1: Philosophy + 2D Design (Pollinations.ai)
	Stage 2: Multi-View Generation (MV-Adapter SDXL)
	Stage 3: 3D Gaussian Splatting (VGGT)
	Updated: 2025-10-28
	"""
	import os
	import sys
	import gc
	import random
	import shutil
	import time
	from datetime import datetime
	import glob

	import gradio as gr
	import numpy as np
	import torch
	import cv2
	import requests
	import urllib.parse
	from io import BytesIO
	from PIL import Image, ImageDraw
	from torchvision import transforms
	from transformers import AutoModelForImageSegmentation
	import spaces

	# Import MV-Adapter modules
	sys.path.append(".")
	try:
	from inference_i2mv_sdxl import prepare_pipeline, remove_bg, run_pipeline
	except ImportError:
	print("⚠️ MV-Adapter modules not found - Stage 2 will be disabled")
	prepare_pipeline = None

	# Import VGGT modules
	sys.path.append("vggt/")
	try:
	from visual_util import predictions_to_glb
	from vggt.models.vggt import VGGT
	from vggt.utils.load_fn import load_and_preprocess_images
	from vggt.utils.pose_enc import pose_encoding_to_extri_intri
	from vggt.utils.geometry import unproject_depth_map_to_point_map
	except ImportError:
	print("⚠️ VGGT modules not found - Stage 3 will be disabled")
	VGGT = None

	print("="*80)
	print(" ARCHITEXTURE 3D FULL - Complete Pipeline")
	print("="*80)
	print("🏛️ Stage 1: Architectural Design (Pollinations.ai)")
	print("🔄 Stage 2: Multi-View Generation (MV-Adapter SDXL)")
	print("🎭 Stage 3: 3D Gaussian Splatting (VGGT)")
	print("="*80)

	# ============================================================================
	# GLOBAL SETUP
	# ============================================================================

	device = "cuda" if torch.cuda.is_available() else "cpu"
	dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32

	print(f"🖥️ Device: {device}")
	print(f"💾 Dtype: {dtype}")

	# ============================================================================
	# STAGE 1: ARCHITEXTURE - Design Philosophy & 2D Generation
	# ============================================================================

	STYLES = {
	"Brutalist": {
	"prompt_suffix": "brutalist architecture with raw concrete, bold geometric forms, monumental scale, professional photography"
	},
	"Art Deco": {
	"prompt_suffix": "Art Deco architecture with geometric patterns, luxurious materials, golden accents, elegant design, professional photography"
	},
	"Modern": {
	"prompt_suffix": "modern architecture with clean lines, glass facades, minimalist design, professional photography"
	},
	"Gothic": {
	"prompt_suffix": "Gothic architecture with pointed arches, ribbed vaults, flying buttresses, ornate details, professional photography"
	}
	}

	def generate_design_philosophy(style, building_type):
	"""Generate architectural design philosophy"""
	philosophies = {
	"Brutalist": f"A {style} {building_type} emphasizing bold geometries, raw materials, and human-centered design principles that celebrate structural honesty and functional beauty through exposed concrete and monumental forms.",
	"Art Deco": f"A {style} {building_type} featuring geometric patterns, luxurious materials, and decorative elements that embody elegance and modernity through symmetrical designs and rich ornamentation.",
	"Modern": f"A {style} {building_type} showcasing clean lines, open spaces, and functional minimalism that prioritizes simplicity and efficiency through innovative materials and sustainable design.",
	"Gothic": f"A {style} {building_type} displaying vertical emphasis, pointed arches, and intricate details that inspire awe and spirituality through dramatic height and ornate craftsmanship."
	}
	return philosophies.get(style, f"A {style} {building_type} design.")

	def validate_design(style, philosophy):
	"""Text-based validation"""
	style_keywords = {
	"Brutalist": ["concrete", "geometric", "bold", "raw", "monumental", "structural"],
	"Art Deco": ["geometric", "luxurious", "golden", "elegant", "pattern", "decorative"],
	"Modern": ["clean", "minimal", "glass", "simple", "functional", "sustainable"],
	"Gothic": ["arch", "vault", "ornate", "vertical", "dramatic", "spiritual"]
	}

	text_lower = philosophy.lower()
	keywords = style_keywords.get(style, [])
	matches = sum(1 for keyword in keywords if keyword in text_lower)

	return f"✅ Validation: {matches}/{len(keywords)} style keywords matched"

	def generate_2d_image(philosophy, style):
	"""Generate 2D architectural image using Pollinations.ai"""
	try:
	print(f"🎨 Generating image for style: {style}")
	style_suffix = STYLES[style]["prompt_suffix"]
	full_prompt = f"{philosophy}, {style_suffix}"

	encoded_prompt = urllib.parse.quote(full_prompt)
	image_url = f"https://image.pollinations.ai/prompt/{encoded_prompt}?width=1024&height=768&model=flux&nologo=true"

	print(f"📡 Requesting from Pollinations.ai...")
	response = requests.get(image_url, timeout=90)
	print(f"📥 Response status: {response.status_code}")

	if response.status_code == 200:
	print(f"✅ Image generated successfully!")
	return Image.open(BytesIO(response.content))
	else:
	return create_placeholder_image(f"API Error: Status {response.status_code}")
	except Exception as e:
	print(f"❌ Error: {str(e)}")
	return create_placeholder_image(f"Error: {str(e)}")

	def create_placeholder_image(text):
	"""Create placeholder image with error message"""
	img = Image.new('RGB', (1024, 768), color=(240, 240, 245))
	draw = ImageDraw.Draw(img)
	draw.rectangle([(10, 10), (1014, 758)], outline=(200, 200, 210), width=3)
	draw.text((50, 350), text, fill=(60, 60, 80))
	return img

	def architexture_generate(style, building_type):
	"""Main function for Architexture tab"""
	print(f"\n{'='*60}")
	print(f"🚀 Stage 1: Generating {style} {building_type}")
	print(f"{'='*60}")

	if not building_type or building_type.strip() == "":
	return "❌ Please enter a building type", "❌ Validation skipped", create_placeholder_image("No building type provided")

	philosophy = generate_design_philosophy(style, building_type)
	validation = validate_design(style, philosophy)
	image_2d = generate_2d_image(philosophy, style)

	print(f"✅ Stage 1 Complete!\n")
	return philosophy, validation, image_2d

	# ============================================================================
	# STAGE 2: MV-ADAPTER - Multi-View Generation
	# ============================================================================

	# Lazy loading for MV-Adapter
	mv_pipe = None
	birefnet = None
	transform_image = None

	NUM_VIEWS = 6
	HEIGHT = 768
	WIDTH = 768
	MAX_SEED = np.iinfo(np.int32).max

	@spaces.GPU
	def load_and_run_mvadapter(input_image_np, prompt, do_rembg, seed, randomize_seed,
	guidance_scale, num_inference_steps, reference_conditioning_scale):
	"""Load MV-Adapter and generate multi-view (uses ZeroGPU)"""
	global mv_pipe, birefnet, transform_image

	print(f"🔄 Starting MV-Adapter generation...")
	print(f" Input image type: {type(input_image_np)}")
	print(f" Prompt: {prompt}")
	print(f" Do rembg: {do_rembg}")

	device = "cuda"
	dtype = torch.bfloat16

	# Load pipeline if needed
	if mv_pipe is None:
	print("🔄 Loading MV-Adapter SDXL pipeline...")
	try:
	mv_pipe = prepare_pipeline(
	base_model="stabilityai/stable-diffusion-xl-base-1.0",
	vae_model="madebyollin/sdxl-vae-fp16-fix",
	unet_model=None,
	lora_model=None,
	adapter_path="huanngzh/mv-adapter",
	scheduler=None,
	num_views=NUM_VIEWS,
	device=device,
	dtype=dtype,
	)
	print("✅ MV-Adapter loaded!")
	except Exception as e:
	print(f"❌ Failed to load MV-Adapter: {e}")
	raise

	# Load BiRefNet if needed
	if birefnet is None and do_rembg:
	print("🔄 Loading BiRefNet for background removal...")
	birefnet = AutoModelForImageSegmentation.from_pretrained(
	"ZhengPeng7/BiRefNet", trust_remote_code=True
	)
	birefnet.to(device)
	transform_image = transforms.Compose([
	transforms.Resize((1024, 1024)),
	transforms.ToTensor(),
	transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
	])
	print("✅ BiRefNet loaded!")

	# Handle input image - could be PIL Image or numpy array
	if isinstance(input_image_np, Image.Image):
	input_image = input_image_np
	else:
	input_image = Image.fromarray(input_image_np)

	# Setup background removal
	if do_rembg and birefnet is not None:
	remove_bg_fn = lambda x: remove_bg(x, birefnet, transform_image, device)
	else:
	remove_bg_fn = None

	# Handle seed
	if randomize_seed:
	seed = random.randint(0, MAX_SEED)

	print(f" Using seed: {seed}")
	print(f" Guidance scale: {guidance_scale}, Steps: {num_inference_steps}")

	negative_prompt = "watermark, ugly, deformed, noisy, blurry, low contrast"

	# Run pipeline
	print("🔄 Running MV-Adapter pipeline...")
	try:
	images, preprocessed_image = run_pipeline(
	mv_pipe,
	num_views=NUM_VIEWS,
	text=prompt,
	image=input_image,
	height=HEIGHT,
	width=WIDTH,
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale,
	seed=seed,
	remove_bg_fn=remove_bg_fn,
	reference_conditioning_scale=reference_conditioning_scale,
	negative_prompt=negative_prompt,
	device=device,
	)
	print(f"✅ Generated {len(images)} views!")
	except Exception as e:
	print(f"❌ Pipeline execution failed: {e}")
	raise

	return images, preprocessed_image, seed

	def generate_multiview(input_image, prompt, do_rembg=True, seed=42, randomize_seed=False,
	guidance_scale=3.0, num_inference_steps=30, reference_conditioning_scale=1.0):
	"""Generate multiple views from single image"""
	if input_image is None:
	return [create_placeholder_image("Please upload an image first")], None, 42, "❌ No input image"

	# Check if MV-Adapter is available
	if prepare_pipeline is None:
	error_msg = "❌ MV-Adapter not available - inference_i2mv_sdxl module failed to import"
	return [create_placeholder_image(error_msg)], input_image, seed, error_msg

	try:
	print("=" * 60)
	print("🚀 Starting Multi-View Generation")
	print(f" GPU Available: {torch.cuda.is_available()}")
	print(f" Input type: {type(input_image)}")
	print(f" Prompt: {prompt}")
	print("=" * 60)

	images, preprocessed, seed = load_and_run_mvadapter(
	input_image, prompt, do_rembg, seed, randomize_seed,
	guidance_scale, num_inference_steps, reference_conditioning_scale
	)

	print(f"✅ Success! Generated {len(images)} images")
	print(f" Images type: {type(images)}")
	print(f" First image type: {type(images[0]) if images else 'None'}")

	# Ensure images is a list of PIL Images
	if not isinstance(images, list):
	images = [images]

	return images, preprocessed, seed, f"✅ Generated {len(images)} multi-view images"

	except Exception as e:
	error_msg = f"❌ Error: {str(e)}"
	print(f"❌ MV-Adapter Error: {e}")
	import traceback
	traceback.print_exc()
	return [create_placeholder_image(error_msg)], input_image, seed, error_msg

	# ============================================================================
	# STAGE 3: VGGT - 3D Gaussian Splatting (ZeroGPU)
	# ============================================================================

	vggt_model = None

	@spaces.GPU(duration=120)
	def run_vggt_reconstruction(target_dir, conf_thres, show_cam):
	"""Run VGGT 3D reconstruction (uses ZeroGPU for 120s)"""
	global vggt_model

	device = "cuda"
	dtype = torch.bfloat16

	# Load model if needed
	if vggt_model is None:
	print("🎭 Loading VGGT-1B model...")
	vggt_model = VGGT()
	_URL = "https://huggingface.co/facebook/VGGT-1B/resolve/main/model.pt"
	vggt_model.load_state_dict(torch.hub.load_state_dict_from_url(_URL))
	vggt_model.eval()
	print("✅ VGGT loaded!")

	vggt_model.to(device)

	# Load images
	image_names = glob.glob(os.path.join(target_dir, "images", "*"))
	image_names = sorted(image_names)

	if len(image_names) == 0:
	raise ValueError("No images found")

	images = load_and_preprocess_images(image_names).to(device)

	# Run inference
	with torch.no_grad():
	with torch.cuda.amp.autocast(dtype=dtype):
	predictions = vggt_model(images)

	# Process predictions
	extrinsic, intrinsic = pose_encoding_to_extri_intri(predictions["pose_enc"], images.shape[-2:])
	predictions["extrinsic"] = extrinsic
	predictions["intrinsic"] = intrinsic

	for key in predictions.keys():
	if isinstance(predictions[key], torch.Tensor):
	predictions[key] = predictions[key].cpu().numpy().squeeze(0)

	depth_map = predictions["depth"]
	world_points = unproject_depth_map_to_point_map(depth_map, predictions["extrinsic"], predictions["intrinsic"])
	predictions["world_points_from_depth"] = world_points

	# Save predictions
	prediction_save_path = os.path.join(target_dir, "predictions.npz")
	np.savez(prediction_save_path, **predictions)

	# Generate GLB
	glbfile = os.path.join(target_dir, f"scene_{conf_thres}_cam{show_cam}.glb")
	glbscene = predictions_to_glb(
	predictions,
	conf_thres=conf_thres,
	filter_by_frames="All",
	mask_black_bg=False,
	mask_white_bg=False,
	show_cam=show_cam,
	mask_sky=False,
	target_dir=target_dir,
	prediction_mode="Depthmap and Camera Branch",
	)
	glbscene.export(file_obj=glbfile)

	del predictions
	torch.cuda.empty_cache()

	return glbfile

	def handle_3d_uploads(input_images):
	"""Handle uploaded images for 3D reconstruction"""
	if input_images is None or len(input_images) == 0:
	return None, []

	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	target_dir = f"input_images_{timestamp}"
	target_dir_images = os.path.join(target_dir, "images")

	if os.path.exists(target_dir):
	shutil.rmtree(target_dir)
	os.makedirs(target_dir)
	os.makedirs(target_dir_images)

	image_paths = []
	for i, file_data in enumerate(input_images):
	if hasattr(file_data, 'name'):
	file_path = file_data.name
	elif isinstance(file_data, dict) and "name" in file_data:
	file_path = file_data["name"]
	else:
	file_path = str(file_data)
	dst_path = os.path.join(target_dir_images, f"{i:06d}.png")
	shutil.copy(file_path, dst_path)
	image_paths.append(dst_path)

	return target_dir, sorted(image_paths)

	def generate_3d_gaussian(input_images, conf_thres=50.0, show_cam=True):
	"""Generate 3D Gaussian representation"""
	if input_images is None or len(input_images) == 0:
	return None, "❌ Please provide images for 3D reconstruction"

	try:
	gc.collect()
	torch.cuda.empty_cache()

	target_dir, image_paths = handle_3d_uploads(input_images)
	glbfile = run_vggt_reconstruction(target_dir, conf_thres, show_cam)

	return glbfile, f"✅ 3D reconstruction complete! {len(image_paths)} images processed"
	except Exception as e:
	return None, f"❌ Error: {str(e)}"
	return glbfile, f"✅ 3D reconstruction complete! {len(image_paths)} images processed"

	except Exception as e:
	print(f"❌ Error in 3D generation: {str(e)}")
	return None, f"❌ Error: {str(e)}"

	# ============================================================================
	# GRADIO INTERFACE
	# ============================================================================

	with gr.Blocks(title="Architexture 3D FULL", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 🏛️ Architexture 3D - Complete AI Architectural Design Platform

	### Full 3-Stage Pipeline: Philosophy → 2D Design → Multi-View → 3D Model

	Three integrated AI systems:
	1. Architexture: Generate architectural philosophy and 2D designs (CPU-friendly)
	2. MV-Adapter: Create 6 multi-view images from single image (GPU required)
	3. VGGT: Build 3D Gaussian splatting models (GPU required)
	""")

	with gr.Tabs():
	# ===== TAB 1: ARCHITEXTURE =====
	with gr.Tab("🎨 Stage 1: Design Generation"):
	gr.Markdown("### Generate Architectural Design Philosophy and 2D Image")

	with gr.Row():
	with gr.Column():
	style_input = gr.Dropdown(
	choices=list(STYLES.keys()),
	value="Brutalist",
	label="Architectural Style"
	)
	building_input = gr.Textbox(
	label="Building Type",
	placeholder="e.g., university library, concert hall, museum",
	value="university library"
	)
	generate_btn = gr.Button("🚀 Generate Design", variant="primary", size="lg")

	with gr.Column():
	philosophy_output = gr.Textbox(
	label="Design Philosophy",
	lines=6
	)
	validation_output = gr.Textbox(
	label="Validation Result"
	)

	image_2d_output = gr.Image(label="Generated 2D Design", type="pil", height=512)

	generate_btn.click(
	fn=architexture_generate,
	inputs=[style_input, building_input],
	outputs=[philosophy_output, validation_output, image_2d_output]
	)

	gr.Markdown(f"""
	Usage:
	1. Select architectural style
	2. Enter building type
	3. Click Generate → Get philosophy + 2D image
	4. Use image in Stage 2 for multi-view generation

	✅ Status: Fully functional (CPU-only, <200MB memory)
	""")

	# ===== TAB 2: MULTI-VIEW =====
	with gr.Tab("🔄 Stage 2: Multi-View Generation"):
	gr.Markdown("### Generate 6 Multi-View Images from Single Image (GPU Required)")

	with gr.Row():
	with gr.Column():
	mv_input_image = gr.Image(
	label="Input Image (from Stage 1 or upload)",
	type="pil"
	)
	mv_prompt = gr.Textbox(
	label="Prompt",
	placeholder="high quality, detailed",
	value="high quality"
	)
	mv_do_rembg = gr.Checkbox(label="Remove Background", value=False)
	mv_generate_btn = gr.Button("🔄 Generate Multi-View", variant="primary", size="lg")

	with gr.Accordion("⚙️ Advanced Settings (Reduce for lower GPU quota usage)", open=False):
	mv_seed = gr.Slider(0, MAX_SEED, value=42, step=1, label="Seed")
	mv_randomize = gr.Checkbox(label="Randomize Seed", value=True)
	mv_guidance = gr.Slider(0.0, 10.0, value=3.0, step=0.1, label="CFG Scale")
	mv_steps = gr.Slider(1, 50, value=20, step=1, label="Inference Steps (⬇️ Lower = Less GPU)")
	mv_img_scale = gr.Slider(0.0, 2.0, value=1.0, step=0.1, label="Image Conditioning Scale")

	with gr.Column():
	mv_preprocessed = gr.Image(label="Preprocessed Image", type="pil")
	mv_output_gallery = gr.Gallery(
	label="Generated Multi-View Images",
	columns=3,
	rows=2,
	height=600
	)
	mv_status = gr.Textbox(label="Status")

	mv_generate_btn.click(
	fn=generate_multiview,
	inputs=[mv_input_image, mv_prompt, mv_do_rembg, mv_seed, mv_randomize,
	mv_guidance, mv_steps, mv_img_scale],
	outputs=[mv_output_gallery, mv_preprocessed, mv_seed, mv_status]
	)

	gr.Markdown(f"""
	💡 Running on ZeroGPU (Serverless):
	- ✅ No local GPU required - Uses Hugging Face's free GPU
	- ✅ Login required - Sign in to get your GPU quota
	- ✅ Models: SDXL, BiRefNet, MV-Adapter (loaded on first use)
	- ⏱️ Processing time: ~20-60 seconds per generation

	⚠️ GPU Quota Tips:
	- Lower Inference Steps (20 instead of 30) to save quota
	- Disable Remove Background if not needed
	- Daily quota resets every 24 hours

	🔧 Troubleshooting:
	- If quota error persists, try refreshing the page
	- Check the Space logs (Settings → Logs) for detailed errors
	- Verify you're logged in to Hugging Face
	""")

	# ===== TAB 3: 3D GENERATION =====
	with gr.Tab("🎭 Stage 3: 3D Gaussian Splatting"):
	gr.Markdown("### Generate 3D Model from Multiple Views (GPU Required)")

	with gr.Row():
	with gr.Column():
	gs_input_images = gr.File(
	file_count="multiple",
	label="Upload Images (from Stage 2 or multiple views)",
	file_types=["image"]
	)
	gs_conf_thres = gr.Slider(0, 100, value=50, step=0.1, label="Confidence Threshold (%)")
	gs_show_cam = gr.Checkbox(label="Show Camera Poses", value=True)
	gs_generate_btn = gr.Button("🎭 Generate 3D Model", variant="primary", size="lg")

	with gr.Column():
	gs_output_3d = gr.Model3D(label="3D Gaussian Splatting Model", height=600)
	gs_status = gr.Textbox(label="Status")

	gs_generate_btn.click(
	fn=generate_3d_gaussian,
	inputs=[gs_input_images, gs_conf_thres, gs_show_cam],
	outputs=[gs_output_3d, gs_status]
	)

	gr.Markdown(f"""
	💡 Running on ZeroGPU (Serverless):
	- ✅ No local GPU required - Uses Hugging Face's free GPU
	- ✅ Login required - Sign in to get your GPU quota
	- ✅ Model: VGGT-1B (~1GB, loaded on first use)
	- ⏱️ Processing time: ~60-120 seconds (higher timeout)
	- 📦 Output: GLB 3D model file (viewable in Blender, Three.js, etc)
	""")

	gr.Markdown(f"""
	---
	### 🚀 System Status:
	- Platform: Hugging Face Spaces with ZeroGPU (Serverless)
	- Stage 1: ✅ Always Available (CPU-only, no GPU needed)
	- Stage 2: ✅ Available (ZeroGPU - requires login)
	- Stage 3: ✅ Available (ZeroGPU - requires login)

	### 💡 Tips:
	- Login required for Stage 2 & 3 to access GPU quota
	- Run Stage 1 first to generate architectural designs
	- Use Stage 1 output as input for Stage 2
	- Use Stage 2 multi-view outputs for Stage 3
	- GPU features use lazy loading (models load on first use)
	""")

	if __name__ == "__main__":
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False
	)