Spaces:
Build error
Build error
| import sys | |
| import torch | |
| import gradio as gr | |
| import pickle | |
| from easydict import EasyDict as edict | |
| from huggingface_hub import hf_hub_download | |
| sys.path.append("./rome/") | |
| sys.path.append('./DECA') | |
| from rome.infer import Infer | |
| from rome.src.utils.processing import process_black_shape, tensor2image | |
| # loading models ---- create model repo | |
| default_modnet_path = hf_hub_download('Pie31415/rome', 'modnet_photographic_portrait_matting.ckpt') | |
| default_model_path = hf_hub_download('Pie31415/rome', 'rome.pth') | |
| # parser configurations | |
| args = edict({ | |
| "save_dir": ".", | |
| "save_render": True, | |
| "model_checkpoint": default_model_path, | |
| "modnet_path": default_modnet_path, | |
| "random_seed": 0, | |
| "debug": False, | |
| "verbose": False, | |
| "model_image_size": 256, | |
| "align_source": True, | |
| "align_target": False, | |
| "align_scale": 1.25, | |
| "use_mesh_deformations": False, | |
| "subdivide_mesh": False, | |
| "renderer_sigma": 1e-08, | |
| "renderer_zfar": 100.0, | |
| "renderer_type": "soft_mesh", | |
| "renderer_texture_type": "texture_uv", | |
| "renderer_normalized_alphas": False, | |
| "deca_path": "DECA", | |
| "rome_data_dir": "rome/data", | |
| "autoenc_cat_alphas": False, | |
| "autoenc_align_inputs": False, | |
| "autoenc_use_warp": False, | |
| "autoenc_num_channels": 64, | |
| "autoenc_max_channels": 512, | |
| "autoenc_num_groups": 4, | |
| "autoenc_num_bottleneck_groups": 0, | |
| "autoenc_num_blocks": 2, | |
| "autoenc_num_layers": 4, | |
| "autoenc_block_type": "bottleneck", | |
| "neural_texture_channels": 8, | |
| "num_harmonic_encoding_funcs": 6, | |
| "unet_num_channels": 64, | |
| "unet_max_channels": 512, | |
| "unet_num_groups": 4, | |
| "unet_num_blocks": 1, | |
| "unet_num_layers": 2, | |
| "unet_block_type": "conv", | |
| "unet_skip_connection_type": "cat", | |
| "unet_use_normals_cond": True, | |
| "unet_use_vertex_cond": False, | |
| "unet_use_uvs_cond": False, | |
| "unet_pred_mask": False, | |
| "use_separate_seg_unet": True, | |
| "norm_layer_type": "gn", | |
| "activation_type": "relu", | |
| "conv_layer_type": "ws_conv", | |
| "deform_norm_layer_type": "gn", | |
| "deform_activation_type": "relu", | |
| "deform_conv_layer_type": "ws_conv", | |
| "unet_seg_weight": 0.0, | |
| "unet_seg_type": "bce_with_logits", | |
| "deform_face_tightness": 0.0001, | |
| "use_whole_segmentation": False, | |
| "mask_hair_for_neck": False, | |
| "use_hair_from_avatar": False, | |
| "use_scalp_deforms": True, | |
| "use_neck_deforms": True, | |
| "use_basis_deformer": False, | |
| "use_unet_deformer": True, | |
| "pretrained_encoder_basis_path": "", | |
| "pretrained_vertex_basis_path": "", | |
| "num_basis": 50, | |
| "basis_init": "pca", | |
| "num_vertex": 5023, | |
| "train_basis": True, | |
| "path_to_deca": "DECA", | |
| "path_to_linear_hair_model": "data/linear_hair.pth", # N/A | |
| "path_to_mobile_model": "data/disp_model.pth", # N/A | |
| "n_scalp": 60, | |
| "use_distill": False, | |
| "use_mobile_version": False, | |
| "deformer_path": "data/rome.pth", | |
| "output_unet_deformer_feats": 32, | |
| "use_deca_details": False, | |
| "use_flametex": False, | |
| "upsample_type": "nearest", | |
| "num_frequencies": 6, | |
| "deform_face_scale_coef": 0.0, | |
| "device": "cpu" | |
| }) | |
| # download FLAME and DECA pretrained | |
| generic_model_path = hf_hub_download('Pie31415/rome', 'generic_model.pkl') | |
| deca_model_path = hf_hub_download('Pie31415/rome', 'deca_model.tar') | |
| with open(generic_model_path, 'rb') as f: | |
| ss = pickle.load(f, encoding='latin1') | |
| with open('./DECA/data/generic_model.pkl', 'wb') as out: | |
| pickle.dump(ss, out) | |
| with open(deca_model_path, "rb") as input: | |
| with open('./DECA/data/deca_model.tar', "wb") as out: | |
| for line in input: | |
| out.write(line) | |
| # load ROME inference model | |
| infer = Infer(args) | |
| def image_inference( | |
| source_img: gr.inputs.Image = None, | |
| driver_img: gr.inputs.Image = None | |
| ): | |
| out = infer.evaluate(source_img, driver_img, crop_center=False) | |
| res = tensor2image(torch.cat([out['source_information']['data_dict']['source_img'][0].cpu(), | |
| out['source_information']['data_dict']['target_img'][0].cpu(), | |
| out['render_masked'].cpu(), out['pred_target_shape_img'][0].cpu()], dim=2)) | |
| return res[..., ::-1] | |
| def video_inference(): | |
| pass | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# **<p align='center'>ROME: Realistic one-shot mesh-based head avatars</p>**") | |
| with gr.Tab("Image Inference"): | |
| with gr.Row(): | |
| source_img = gr.Image(type="pil", label="source image", show_label=True) | |
| driver_img = gr.Image(type="pil", label="driver image", show_label=True) | |
| image_output = gr.Image() | |
| image_button = gr.Button("Predict") | |
| with gr.Tab("Video Inference"): | |
| with gr.Row(): | |
| source_video = gr.Video(label="source video", ) | |
| driver_image_for_vid = gr.Image(type="pil", label="driver image", show_label=True) | |
| video_output = gr.Image() | |
| video_button = gr.Button("Predict") | |
| gr.Examples( | |
| examples=[ | |
| ["./examples/lincoln.jpg", "./examples/taras2.jpg"], | |
| ["./examples/lincoln.jpg", "./examples/taras1.jpg"] | |
| ], | |
| inputs=[source_img, driver_img], | |
| outputs=[image_output], | |
| fn=image_inference, | |
| cache_examples=True | |
| ) | |
| image_button.click(image_inference, inputs=[source_img, driver_img], outputs=image_output) | |
| video_button.click(None, inputs=[source_video, driver_image_for_vid], outputs=video_output) | |
| demo.launch() |