matfuse-pbr / app.py
j-bels's picture
Upload 3 files
ad92435 verified
"""
MatFuse PBR Material Generator - HuggingFace Space (CPU Version)
"""
import os
import sys
import torch
import numpy as np
from PIL import Image
import gradio as gr
from huggingface_hub import hf_hub_download
from omegaconf import OmegaConf
device = 'cpu'
model = None
def load_model():
global model
if model is not None:
return model
print("Downloading MatFuse checkpoint...")
ckpt_path = hf_hub_download(
repo_id="gvecchio/MatFuse",
filename="matfuse-pruned.ckpt",
local_dir="checkpoints",
local_dir_use_symlinks=False
)
print("Cloning MatFuse repository...")
if not os.path.exists('matfuse-sd'):
os.system('git clone https://github.com/giuvecchio/matfuse-sd.git')
sys.path.insert(0, 'matfuse-sd/src')
from ldm.util import instantiate_from_config
config_path = 'matfuse-sd/src/configs/diffusion/matfuse-ldm-vq-4ch.yaml'
config = OmegaConf.load(config_path)
print("Loading model...")
model = instantiate_from_config(config.model)
ckpt = torch.load(ckpt_path, map_location='cpu')
model.load_state_dict(ckpt['state_dict'], strict=False)
model = model.to(device)
model.eval()
print("Model loaded successfully!")
return model
@torch.no_grad()
def generate_material(input_image):
global model
try:
if model is None:
model = load_model()
if input_image is None:
blank = Image.new('RGB', (512, 512), (128, 128, 128))
return blank, blank, blank, blank
input_image = input_image.convert('RGB').resize((512, 512))
img_np = np.array(input_image).astype(np.float32) / 127.5 - 1.0
img_tensor = torch.from_numpy(img_np).permute(2, 0, 1).unsqueeze(0).to(device)
if hasattr(model, 'cond_stage_model') and model.cond_stage_model is not None:
cond = model.cond_stage_model.encode(img_tensor)
else:
cond = model.encode_first_stage(img_tensor)
cond = model.get_first_stage_encoding(cond)
samples, _ = model.sample(
cond=cond,
batch_size=1,
return_intermediates=False,
ddim_steps=25,
eta=0.0,
unconditional_guidance_scale=7.5
)
outputs = model.decode_first_stage(samples)
outputs = outputs.cpu().numpy()
outputs = ((outputs + 1) * 127.5).clip(0, 255).astype(np.uint8)
if outputs.shape[1] >= 12:
diffuse = Image.fromarray(outputs[0, 0:3].transpose(1, 2, 0))
normal = Image.fromarray(outputs[0, 3:6].transpose(1, 2, 0))
roughness = Image.fromarray(outputs[0, 6:9].transpose(1, 2, 0))
specular = Image.fromarray(outputs[0, 9:12].transpose(1, 2, 0))
elif outputs.shape[1] == 3:
normal = Image.fromarray(outputs[0].transpose(1, 2, 0))
diffuse = input_image
roughness = Image.new('RGB', (512, 512), (128, 128, 128))
specular = Image.new('RGB', (512, 512), (64, 64, 64))
else:
diffuse = input_image
normal = Image.new('RGB', (512, 512), (128, 128, 255))
roughness = Image.new('RGB', (512, 512), (128, 128, 128))
specular = Image.new('RGB', (512, 512), (64, 64, 64))
return diffuse, normal, roughness, specular
except Exception as e:
import traceback
traceback.print_exc()
blank = Image.new('RGB', (512, 512), (0, 0, 0))
return blank, blank, blank, blank
demo = gr.Interface(
fn=generate_material,
inputs=gr.Image(type="pil", label="Input Image"),
outputs=[
gr.Image(type="pil", label="Diffuse/Albedo"),
gr.Image(type="pil", label="Normal"),
gr.Image(type="pil", label="Roughness"),
gr.Image(type="pil", label="Specular")
],
title="MatFuse PBR Material Generator",
description="Upload an image to generate PBR texture maps. Running on CPU - first request takes 5-10 min (model loading), then 2-5 min per image.",
allow_flagging="never"
)
demo.launch()