Spaces:
Runtime error
Runtime error
kxhit
commited on
Commit
·
00c4703
1
Parent(s):
869a648
rm main
Browse files
app.py
CHANGED
|
@@ -522,263 +522,263 @@ def preview_input(inputfiles):
|
|
| 522 |
imgs.append(img)
|
| 523 |
return imgs
|
| 524 |
|
| 525 |
-
def main():
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
|
| 532 |
-
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
|
| 573 |
-
|
| 574 |
-
|
| 575 |
-
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
|
| 608 |
-
|
| 609 |
|
| 610 |
|
| 611 |
|
| 612 |
|
| 613 |
|
| 614 |
-
|
| 615 |
-
with gr.Column():
|
| 616 |
-
|
| 617 |
-
with gr.Row():
|
| 618 |
-
outmodel = gr.Model3D()
|
| 619 |
-
|
| 620 |
-
with gr.Row():
|
| 621 |
-
gr.Markdown('''
|
| 622 |
-
<h4><b>Check if the pose and segmentation looks correct. If not, remove the incorrect images and try again.</b></h4>
|
| 623 |
-
''')
|
| 624 |
-
|
| 625 |
-
with gr.Row():
|
| 626 |
-
with gr.Group():
|
| 627 |
-
do_remove_background = gr.Checkbox(
|
| 628 |
-
label="Remove Background", value=True
|
| 629 |
-
)
|
| 630 |
-
sample_seed = gr.Number(value=42, label="Seed Value", precision=0)
|
| 631 |
-
|
| 632 |
-
sample_steps = gr.Slider(
|
| 633 |
-
label="Sample Steps",
|
| 634 |
-
minimum=30,
|
| 635 |
-
maximum=75,
|
| 636 |
-
value=50,
|
| 637 |
-
step=5,
|
| 638 |
-
visible=False
|
| 639 |
-
)
|
| 640 |
-
|
| 641 |
-
nvs_num = gr.Slider(
|
| 642 |
-
label="Number of Novel Views",
|
| 643 |
-
minimum=5,
|
| 644 |
-
maximum=100,
|
| 645 |
-
value=30,
|
| 646 |
-
step=1
|
| 647 |
-
)
|
| 648 |
-
|
| 649 |
-
nvs_mode = gr.Dropdown(["archimedes circle"], # "fixed 4 views", "fixed 8 views"
|
| 650 |
-
value="archimedes circle", label="Novel Views Pose Chosen", visible=True)
|
| 651 |
-
|
| 652 |
-
with gr.Row():
|
| 653 |
-
gr.Markdown('''
|
| 654 |
-
<h4><b>Choose your desired novel view poses number and generate! The more output images the longer it takes.</b></h4>
|
| 655 |
-
''')
|
| 656 |
-
|
| 657 |
-
with gr.Row():
|
| 658 |
-
submit = gr.Button("Submit", elem_id="eschernet", variant="primary")
|
| 659 |
-
|
| 660 |
-
with gr.Row():
|
| 661 |
-
# mv_show_images = gr.Image(
|
| 662 |
-
# label="Generated Multi-views",
|
| 663 |
-
# type="pil",
|
| 664 |
-
# width=379,
|
| 665 |
-
# interactive=False
|
| 666 |
-
# )
|
| 667 |
-
with gr.Column():
|
| 668 |
-
output_video = gr.Video(
|
| 669 |
-
label="video", format="mp4",
|
| 670 |
-
width=379,
|
| 671 |
-
autoplay=True,
|
| 672 |
-
interactive=False
|
| 673 |
-
)
|
| 674 |
-
|
| 675 |
-
# with gr.Row():
|
| 676 |
-
# with gr.Tab("OBJ"):
|
| 677 |
-
# output_model_obj = gr.Model3D(
|
| 678 |
-
# label="Output Model (OBJ Format)",
|
| 679 |
-
# #width=768,
|
| 680 |
-
# interactive=False,
|
| 681 |
-
# )
|
| 682 |
-
# gr.Markdown("Note: Downloaded .obj model will be flipped. Export .glb instead or manually flip it before usage.")
|
| 683 |
-
# with gr.Tab("GLB"):
|
| 684 |
-
# output_model_glb = gr.Model3D(
|
| 685 |
-
# label="Output Model (GLB Format)",
|
| 686 |
-
# #width=768,
|
| 687 |
-
# interactive=False,
|
| 688 |
-
# )
|
| 689 |
-
# gr.Markdown("Note: The model shown here has a darker appearance. Download to get correct results.")
|
| 690 |
-
|
| 691 |
-
with gr.Row():
|
| 692 |
-
gr.Markdown('''The novel views are generated on an archimedean spiral. You can download the video''')
|
| 693 |
-
|
| 694 |
-
gr.Markdown(_CITE_)
|
| 695 |
-
|
| 696 |
-
# set dust3r parameter invisible to be clean
|
| 697 |
with gr.Column():
|
|
|
|
| 698 |
with gr.Row():
|
| 699 |
-
|
| 700 |
-
value='linear', label="schedule", info="For global alignment!", visible=False)
|
| 701 |
-
niter = gr.Number(value=300, precision=0, minimum=0, maximum=5000,
|
| 702 |
-
label="num_iterations", info="For global alignment!", visible=False)
|
| 703 |
-
scenegraph_type = gr.Dropdown(["complete", "swin", "oneref"],
|
| 704 |
-
value='complete', label="Scenegraph",
|
| 705 |
-
info="Define how to make pairs",
|
| 706 |
-
interactive=True, visible=False)
|
| 707 |
-
same_focals = gr.Checkbox(value=True, label="Focal", info="Use the same focal for all cameras", visible=False)
|
| 708 |
-
winsize = gr.Slider(label="Scene Graph: Window Size", value=1,
|
| 709 |
-
minimum=1, maximum=1, step=1, visible=False)
|
| 710 |
-
refid = gr.Slider(label="Scene Graph: Id", value=0, minimum=0, maximum=0, step=1, visible=False)
|
| 711 |
|
| 712 |
with gr.Row():
|
| 713 |
-
|
| 714 |
-
|
| 715 |
-
|
| 716 |
-
|
| 717 |
with gr.Row():
|
| 718 |
-
|
| 719 |
-
|
| 720 |
-
|
| 721 |
-
|
| 722 |
-
|
| 723 |
-
|
| 724 |
-
|
| 725 |
-
|
| 726 |
-
|
| 727 |
-
|
| 728 |
-
|
| 729 |
-
|
| 730 |
-
|
| 731 |
-
|
| 732 |
-
# inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 733 |
-
# clean_depth, transparent_cams, cam_size, same_focals],
|
| 734 |
-
# outputs=outmodel)
|
| 735 |
-
# cam_size.change(fn=model_from_scene_fun,
|
| 736 |
-
# inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 737 |
-
# clean_depth, transparent_cams, cam_size, same_focals],
|
| 738 |
-
# outputs=outmodel)
|
| 739 |
-
# as_pointcloud.change(fn=model_from_scene_fun,
|
| 740 |
-
# inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 741 |
-
# clean_depth, transparent_cams, cam_size, same_focals],
|
| 742 |
-
# outputs=outmodel)
|
| 743 |
-
# mask_sky.change(fn=model_from_scene_fun,
|
| 744 |
-
# inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 745 |
-
# clean_depth, transparent_cams, cam_size, same_focals],
|
| 746 |
-
# outputs=outmodel)
|
| 747 |
-
# clean_depth.change(fn=model_from_scene_fun,
|
| 748 |
-
# inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 749 |
-
# clean_depth, transparent_cams, cam_size, same_focals],
|
| 750 |
-
# outputs=outmodel)
|
| 751 |
-
# transparent_cams.change(model_from_scene_fun,
|
| 752 |
-
# inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 753 |
-
# clean_depth, transparent_cams, cam_size, same_focals],
|
| 754 |
-
# outputs=outmodel)
|
| 755 |
-
run_dust3r.click(fn=recon_fun,
|
| 756 |
-
inputs=[input_image, schedule, niter, min_conf_thr, as_pointcloud,
|
| 757 |
-
mask_sky, clean_depth, transparent_cams, cam_size,
|
| 758 |
-
scenegraph_type, winsize, refid, same_focals],
|
| 759 |
-
outputs=[scene, outmodel, processed_image, eschernet_input])
|
| 760 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 761 |
|
| 762 |
-
|
| 763 |
-
|
| 764 |
-
|
| 765 |
-
|
| 766 |
-
|
|
|
|
|
|
|
| 767 |
|
| 768 |
-
|
| 769 |
-
|
| 770 |
-
nvs_num, nvs_mode],
|
| 771 |
-
outputs=[mv_images, output_video],
|
| 772 |
-
)#.success(
|
| 773 |
-
# # fn=make3d,
|
| 774 |
-
# # inputs=[mv_images],
|
| 775 |
-
# # outputs=[output_video, output_model_obj, output_model_glb]
|
| 776 |
-
# # )
|
| 777 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 778 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 779 |
|
| 780 |
-
|
| 781 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 782 |
|
| 783 |
-
|
| 784 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 522 |
imgs.append(img)
|
| 523 |
return imgs
|
| 524 |
|
| 525 |
+
# def main():
|
| 526 |
+
# dustr init
|
| 527 |
+
silent = False
|
| 528 |
+
image_size = 224
|
| 529 |
+
weights_path = 'checkpoints/DUSt3R_ViTLarge_BaseDecoder_224_linear.pth'
|
| 530 |
+
model = AsymmetricCroCo3DStereo.from_pretrained(weights_path).to(device)
|
| 531 |
+
# dust3r will write the 3D model inside tmpdirname
|
| 532 |
+
# with tempfile.TemporaryDirectory(suffix='dust3r_gradio_demo') as tmpdirname:
|
| 533 |
+
tmpdirname = os.path.join('logs/user_object')
|
| 534 |
+
# remove the directory if it already exists
|
| 535 |
+
if os.path.exists(tmpdirname):
|
| 536 |
+
shutil.rmtree(tmpdirname)
|
| 537 |
+
os.makedirs(tmpdirname, exist_ok=True)
|
| 538 |
+
if not silent:
|
| 539 |
+
print('Outputing stuff in', tmpdirname)
|
| 540 |
+
|
| 541 |
+
recon_fun = functools.partial(get_reconstructed_scene, tmpdirname, model, device, silent, image_size)
|
| 542 |
+
model_from_scene_fun = functools.partial(get_3D_model_from_scene, tmpdirname, silent)
|
| 543 |
+
|
| 544 |
+
generate_mvs = functools.partial(run_eschernet, tmpdirname)
|
| 545 |
+
|
| 546 |
+
_HEADER_ = '''
|
| 547 |
+
<h2><b>[CVPR'24 Oral] EscherNet: A Generative Model for Scalable View Synthesis</b></h2>
|
| 548 |
+
<b>EscherNet</b> is a multiview diffusion model for scalable generative any-to-any number/pose novel view synthesis.
|
| 549 |
+
|
| 550 |
+
Image views are treated as tokens and the camera pose is encoded by <b>CaPE (Camera Positional Encoding)</b>.
|
| 551 |
+
|
| 552 |
+
<a href='https://kxhit.github.io/EscherNet' target='_blank'>Project</a> <b>|</b>
|
| 553 |
+
<a href='https://github.com/kxhit/EscherNet' target='_blank'>GitHub</a> <b>|</b>
|
| 554 |
+
<a href='https://arxiv.org/abs/2402.03908' target='_blank'>ArXiv</a>
|
| 555 |
+
|
| 556 |
+
<h4><b>Tips:</b></h4>
|
| 557 |
+
|
| 558 |
+
- Our model can take <b>any number input images</b>. The more images you provide, the better the results.
|
| 559 |
+
|
| 560 |
+
- Our model can generate <b>any number and any pose</b> novel views. You can specify the number of views you want to generate. In this demo, we set novel views on an <b>archemedian spiral</b> for simplicity.
|
| 561 |
+
|
| 562 |
+
- The pose estimation is done using <a href='https://github.com/naver/dust3r' target='_blank'>DUSt3R</a>. You can also provide your own poses or get pose via any SLAM system.
|
| 563 |
+
|
| 564 |
+
- The current checkpoint supports 6DoF camera pose and is trained on 30k 3D <a href='https://objaverse.allenai.org/' target='_blank'>Objaverse</a> objects for demo. Scaling is on the roadmap!
|
| 565 |
+
|
| 566 |
+
'''
|
| 567 |
+
|
| 568 |
+
_CITE_ = r"""
|
| 569 |
+
📝 <b>Citation</b>:
|
| 570 |
+
```bibtex
|
| 571 |
+
@article{kong2024eschernet,
|
| 572 |
+
title={EscherNet: A Generative Model for Scalable View Synthesis},
|
| 573 |
+
author={Kong, Xin and Liu, Shikun and Lyu, Xiaoyang and Taher, Marwan and Qi, Xiaojuan and Davison, Andrew J},
|
| 574 |
+
journal={arXiv preprint arXiv:2402.03908},
|
| 575 |
+
year={2024}
|
| 576 |
+
}
|
| 577 |
+
```
|
| 578 |
+
"""
|
| 579 |
+
|
| 580 |
+
with gr.Blocks() as demo:
|
| 581 |
+
gr.Markdown(_HEADER_)
|
| 582 |
+
mv_images = gr.State()
|
| 583 |
+
scene = gr.State(None)
|
| 584 |
+
eschernet_input = gr.State(None)
|
| 585 |
+
with gr.Row(variant="panel"):
|
| 586 |
+
# left column
|
| 587 |
+
with gr.Column():
|
| 588 |
+
with gr.Row():
|
| 589 |
+
input_image = gr.File(file_count="multiple")
|
| 590 |
+
# with gr.Row():
|
| 591 |
+
# # set the size of the window
|
| 592 |
+
# preview_image = gr.Gallery(label='Input Views', rows=1,
|
| 593 |
+
with gr.Row():
|
| 594 |
+
run_dust3r = gr.Button("Get Pose!", elem_id="dust3r")
|
| 595 |
+
with gr.Row():
|
| 596 |
+
processed_image = gr.Gallery(label='Input Views', columns=2, height="100%")
|
| 597 |
+
with gr.Row(variant="panel"):
|
| 598 |
+
# input examples under "examples" folder
|
| 599 |
+
gr.Examples(
|
| 600 |
+
examples=get_examples('examples'),
|
| 601 |
+
# examples=[
|
| 602 |
+
# [['examples/controller/frame000077.jpg', 'examples/controller/frame000032.jpg', 'examples/controller/frame000172.jpg']],
|
| 603 |
+
# [['examples/hairdryer/frame000081.jpg', 'examples/hairdryer/frame000162.jpg', 'examples/hairdryer/frame000003.jpg']],
|
| 604 |
+
# ],
|
| 605 |
+
inputs=[input_image],
|
| 606 |
+
label="Examples (click one set of images to start!)",
|
| 607 |
+
examples_per_page=20
|
| 608 |
+
)
|
| 609 |
|
| 610 |
|
| 611 |
|
| 612 |
|
| 613 |
|
| 614 |
+
# right column
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 615 |
with gr.Column():
|
| 616 |
+
|
| 617 |
with gr.Row():
|
| 618 |
+
outmodel = gr.Model3D()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 619 |
|
| 620 |
with gr.Row():
|
| 621 |
+
gr.Markdown('''
|
| 622 |
+
<h4><b>Check if the pose and segmentation looks correct. If not, remove the incorrect images and try again.</b></h4>
|
| 623 |
+
''')
|
| 624 |
+
|
| 625 |
with gr.Row():
|
| 626 |
+
with gr.Group():
|
| 627 |
+
do_remove_background = gr.Checkbox(
|
| 628 |
+
label="Remove Background", value=True
|
| 629 |
+
)
|
| 630 |
+
sample_seed = gr.Number(value=42, label="Seed Value", precision=0)
|
| 631 |
+
|
| 632 |
+
sample_steps = gr.Slider(
|
| 633 |
+
label="Sample Steps",
|
| 634 |
+
minimum=30,
|
| 635 |
+
maximum=75,
|
| 636 |
+
value=50,
|
| 637 |
+
step=5,
|
| 638 |
+
visible=False
|
| 639 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 640 |
|
| 641 |
+
nvs_num = gr.Slider(
|
| 642 |
+
label="Number of Novel Views",
|
| 643 |
+
minimum=5,
|
| 644 |
+
maximum=100,
|
| 645 |
+
value=30,
|
| 646 |
+
step=1
|
| 647 |
+
)
|
| 648 |
|
| 649 |
+
nvs_mode = gr.Dropdown(["archimedes circle"], # "fixed 4 views", "fixed 8 views"
|
| 650 |
+
value="archimedes circle", label="Novel Views Pose Chosen", visible=True)
|
| 651 |
+
|
| 652 |
+
with gr.Row():
|
| 653 |
+
gr.Markdown('''
|
| 654 |
+
<h4><b>Choose your desired novel view poses number and generate! The more output images the longer it takes.</b></h4>
|
| 655 |
+
''')
|
| 656 |
|
| 657 |
+
with gr.Row():
|
| 658 |
+
submit = gr.Button("Submit", elem_id="eschernet", variant="primary")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 659 |
|
| 660 |
+
with gr.Row():
|
| 661 |
+
# mv_show_images = gr.Image(
|
| 662 |
+
# label="Generated Multi-views",
|
| 663 |
+
# type="pil",
|
| 664 |
+
# width=379,
|
| 665 |
+
# interactive=False
|
| 666 |
+
# )
|
| 667 |
+
with gr.Column():
|
| 668 |
+
output_video = gr.Video(
|
| 669 |
+
label="video", format="mp4",
|
| 670 |
+
width=379,
|
| 671 |
+
autoplay=True,
|
| 672 |
+
interactive=False
|
| 673 |
+
)
|
| 674 |
|
| 675 |
+
# with gr.Row():
|
| 676 |
+
# with gr.Tab("OBJ"):
|
| 677 |
+
# output_model_obj = gr.Model3D(
|
| 678 |
+
# label="Output Model (OBJ Format)",
|
| 679 |
+
# #width=768,
|
| 680 |
+
# interactive=False,
|
| 681 |
+
# )
|
| 682 |
+
# gr.Markdown("Note: Downloaded .obj model will be flipped. Export .glb instead or manually flip it before usage.")
|
| 683 |
+
# with gr.Tab("GLB"):
|
| 684 |
+
# output_model_glb = gr.Model3D(
|
| 685 |
+
# label="Output Model (GLB Format)",
|
| 686 |
+
# #width=768,
|
| 687 |
+
# interactive=False,
|
| 688 |
+
# )
|
| 689 |
+
# gr.Markdown("Note: The model shown here has a darker appearance. Download to get correct results.")
|
| 690 |
|
| 691 |
+
with gr.Row():
|
| 692 |
+
gr.Markdown('''The novel views are generated on an archimedean spiral. You can download the video''')
|
| 693 |
+
|
| 694 |
+
gr.Markdown(_CITE_)
|
| 695 |
+
|
| 696 |
+
# set dust3r parameter invisible to be clean
|
| 697 |
+
with gr.Column():
|
| 698 |
+
with gr.Row():
|
| 699 |
+
schedule = gr.Dropdown(["linear", "cosine"],
|
| 700 |
+
value='linear', label="schedule", info="For global alignment!", visible=False)
|
| 701 |
+
niter = gr.Number(value=300, precision=0, minimum=0, maximum=5000,
|
| 702 |
+
label="num_iterations", info="For global alignment!", visible=False)
|
| 703 |
+
scenegraph_type = gr.Dropdown(["complete", "swin", "oneref"],
|
| 704 |
+
value='complete', label="Scenegraph",
|
| 705 |
+
info="Define how to make pairs",
|
| 706 |
+
interactive=True, visible=False)
|
| 707 |
+
same_focals = gr.Checkbox(value=True, label="Focal", info="Use the same focal for all cameras", visible=False)
|
| 708 |
+
winsize = gr.Slider(label="Scene Graph: Window Size", value=1,
|
| 709 |
+
minimum=1, maximum=1, step=1, visible=False)
|
| 710 |
+
refid = gr.Slider(label="Scene Graph: Id", value=0, minimum=0, maximum=0, step=1, visible=False)
|
| 711 |
+
|
| 712 |
+
with gr.Row():
|
| 713 |
+
# adjust the confidence threshold
|
| 714 |
+
min_conf_thr = gr.Slider(label="min_conf_thr", value=3.0, minimum=1.0, maximum=20, step=0.1, visible=False)
|
| 715 |
+
# adjust the camera size in the output pointcloud
|
| 716 |
+
cam_size = gr.Slider(label="cam_size", value=0.05, minimum=0.01, maximum=0.5, step=0.001, visible=False)
|
| 717 |
+
with gr.Row():
|
| 718 |
+
as_pointcloud = gr.Checkbox(value=False, label="As pointcloud", visible=False)
|
| 719 |
+
# two post process implemented
|
| 720 |
+
mask_sky = gr.Checkbox(value=False, label="Mask sky", visible=False)
|
| 721 |
+
clean_depth = gr.Checkbox(value=True, label="Clean-up depthmaps", visible=False)
|
| 722 |
+
transparent_cams = gr.Checkbox(value=False, label="Transparent cameras", visible=False)
|
| 723 |
|
| 724 |
+
# events
|
| 725 |
+
# scenegraph_type.change(set_scenegraph_options,
|
| 726 |
+
# inputs=[input_image, winsize, refid, scenegraph_type],
|
| 727 |
+
# outputs=[winsize, refid])
|
| 728 |
+
input_image.change(set_scenegraph_options,
|
| 729 |
+
inputs=[input_image, winsize, refid, scenegraph_type],
|
| 730 |
+
outputs=[winsize, refid])
|
| 731 |
+
# min_conf_thr.release(fn=model_from_scene_fun,
|
| 732 |
+
# inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 733 |
+
# clean_depth, transparent_cams, cam_size, same_focals],
|
| 734 |
+
# outputs=outmodel)
|
| 735 |
+
# cam_size.change(fn=model_from_scene_fun,
|
| 736 |
+
# inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 737 |
+
# clean_depth, transparent_cams, cam_size, same_focals],
|
| 738 |
+
# outputs=outmodel)
|
| 739 |
+
# as_pointcloud.change(fn=model_from_scene_fun,
|
| 740 |
+
# inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 741 |
+
# clean_depth, transparent_cams, cam_size, same_focals],
|
| 742 |
+
# outputs=outmodel)
|
| 743 |
+
# mask_sky.change(fn=model_from_scene_fun,
|
| 744 |
+
# inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 745 |
+
# clean_depth, transparent_cams, cam_size, same_focals],
|
| 746 |
+
# outputs=outmodel)
|
| 747 |
+
# clean_depth.change(fn=model_from_scene_fun,
|
| 748 |
+
# inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 749 |
+
# clean_depth, transparent_cams, cam_size, same_focals],
|
| 750 |
+
# outputs=outmodel)
|
| 751 |
+
# transparent_cams.change(model_from_scene_fun,
|
| 752 |
+
# inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 753 |
+
# clean_depth, transparent_cams, cam_size, same_focals],
|
| 754 |
+
# outputs=outmodel)
|
| 755 |
+
run_dust3r.click(fn=recon_fun,
|
| 756 |
+
inputs=[input_image, schedule, niter, min_conf_thr, as_pointcloud,
|
| 757 |
+
mask_sky, clean_depth, transparent_cams, cam_size,
|
| 758 |
+
scenegraph_type, winsize, refid, same_focals],
|
| 759 |
+
outputs=[scene, outmodel, processed_image, eschernet_input])
|
| 760 |
+
|
| 761 |
+
|
| 762 |
+
# events
|
| 763 |
+
# preview images on input change
|
| 764 |
+
input_image.change(fn=preview_input,
|
| 765 |
+
inputs=[input_image],
|
| 766 |
+
outputs=[processed_image])
|
| 767 |
+
|
| 768 |
+
submit.click(fn=generate_mvs,
|
| 769 |
+
inputs=[eschernet_input, sample_steps, sample_seed,
|
| 770 |
+
nvs_num, nvs_mode],
|
| 771 |
+
outputs=[mv_images, output_video],
|
| 772 |
+
)#.success(
|
| 773 |
+
# # fn=make3d,
|
| 774 |
+
# # inputs=[mv_images],
|
| 775 |
+
# # outputs=[output_video, output_model_obj, output_model_glb]
|
| 776 |
+
# # )
|
| 777 |
+
|
| 778 |
+
|
| 779 |
+
|
| 780 |
+
demo.queue(max_size=10)
|
| 781 |
+
demo.launch(share=True, server_name="0.0.0.0", server_port=None)
|
| 782 |
+
|
| 783 |
+
# if __name__ == '__main__':
|
| 784 |
+
# main()
|