|
|
import gradio as gr |
|
|
import numpy as np |
|
|
import cv2 |
|
|
from PIL import Image |
|
|
from PIFuHD.data import EvalWMetaDataset |
|
|
from PIFuHD.data.ImageBundle import ImageBundle |
|
|
from PIFuHD.options import BaseOptions |
|
|
from PIFuHD.recontructor import Reconstructor |
|
|
from huggingface_hub import hf_hub_download |
|
|
from human_pose_estimator import PoseEstimator |
|
|
from estimator import rect |
|
|
|
|
|
REPO_ID = "cxeep/PIFuHD" |
|
|
pose_estimator = PoseEstimator("cpu") |
|
|
checkpoint_path = hf_hub_download(repo_id=REPO_ID, filename="pifuhd.pt") |
|
|
|
|
|
cmd = [ |
|
|
'--dataroot', './data', |
|
|
'--results_path', './results', |
|
|
'--loadSize', '1024', |
|
|
'--resolution', '256', |
|
|
'--load_netMR_checkpoint_path', checkpoint_path, |
|
|
'--start_id', '-1', |
|
|
'--end_id', '-1' |
|
|
] |
|
|
parser = BaseOptions() |
|
|
opts = parser.parse(cmd) |
|
|
reconstructor = Reconstructor(opts) |
|
|
|
|
|
def make_bundle(image, name): |
|
|
image, rects = rect(pose_estimator, image) |
|
|
return ImageBundle(img=image, name=name, meta=rects) |
|
|
|
|
|
def process_video(video_path): |
|
|
frames = [] |
|
|
cap = cv2.VideoCapture(video_path) |
|
|
while cap.isOpened(): |
|
|
ret, frame = cap.read() |
|
|
if not ret: |
|
|
break |
|
|
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) |
|
|
frames.append(Image.fromarray(frame)) |
|
|
cap.release() |
|
|
|
|
|
models = [] |
|
|
for frame in frames: |
|
|
bundle = make_bundle(np.array(frame), "Model3D") |
|
|
dataset = EvalWMetaDataset(opts, [bundle]) |
|
|
model = reconstructor.evaluate(dataset) |
|
|
models.append(model) |
|
|
|
|
|
|
|
|
output_animation = models[0] |
|
|
|
|
|
return output_animation |
|
|
|
|
|
def predict(input_img, input_video): |
|
|
if input_video: |
|
|
return process_video(input_video) |
|
|
elif input_img is not None: |
|
|
bundle = make_bundle(input_img, "Model3D") |
|
|
dataset = EvalWMetaDataset(opts, [bundle]) |
|
|
return reconstructor.evaluate(dataset) |
|
|
else: |
|
|
return None, None |
|
|
|
|
|
footer = r""" |
|
|
|
|
|
""" |
|
|
|
|
|
with gr.Blocks(title="PIFuHD") as app: |
|
|
gr.HTML("<center><h1>3D Human Digitization</h1></center>") |
|
|
gr.HTML("<center><h3>Multi-Level Pixel-Aligned Implicit Function for High-Resolution 3D Models</h3></center>") |
|
|
|
|
|
with gr.Row(equal_height=False): |
|
|
with gr.Column(): |
|
|
input_img = gr.Image(type="numpy", label="Input image") |
|
|
input_video = gr.Video(label="Input Video") |
|
|
run_btn = gr.Button(variant="primary") |
|
|
with gr.Column(): |
|
|
output_obj = gr.Model3D(label="Output model") |
|
|
output_img = gr.Image(type="filepath", label="Output image") |
|
|
gr.ClearButton(components=[input_img, input_video, output_img, output_obj], variant="stop") |
|
|
|
|
|
run_btn.click(predict, [input_img, input_video], [output_img, output_obj]) |
|
|
|
|
|
with gr.Row(): |
|
|
blobs = [[f"examples/{x:02d}.png"] for x in range(1, 4)] |
|
|
examples = gr.Dataset(components=[input_img], samples=blobs) |
|
|
examples.click(lambda x: x[0], [examples], [input_img]) |
|
|
|
|
|
with gr.Row(): |
|
|
gr.HTML(footer) |
|
|
|
|
|
app.launch(share=False, debug=True, show_error=True) |
|
|
app.queue() |