Spaces:
Running
on
Zero
Running
on
Zero
| import gradio as gr | |
| import torch | |
| import numpy as np | |
| from transformers import AutoModel | |
| from theia.decoding import load_feature_stats, prepare_depth_decoder, prepare_mask_generator, decode_everything | |
| device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
| def run_theia(image): | |
| theia_model = AutoModel.from_pretrained("theaiinstitute/theia-base-patch16-224-cdiv", trust_remote_code=True) | |
| theia_model = theia_model.to(device) | |
| target_model_names = [ | |
| "google/vit-huge-patch14-224-in21k", | |
| "facebook/dinov2-large", | |
| "openai/clip-vit-large-patch14", | |
| "facebook/sam-vit-huge", | |
| "LiheYoung/depth-anything-large-hf", | |
| ] | |
| feature_means, feature_vars = load_feature_stats(target_model_names, stat_file_root="feature_stats") | |
| mask_generator, sam_model = prepare_mask_generator(device) | |
| depth_anything_model_name = "LiheYoung/depth-anything-large-hf" | |
| depth_anything_decoder, _ = prepare_depth_decoder(depth_anything_model_name, device) | |
| images = [image] | |
| theia_decode_results, gt_decode_results = decode_everything( | |
| theia_model=theia_model, | |
| feature_means=feature_means, | |
| feature_vars=feature_vars, | |
| images=images, | |
| mask_generator=mask_generator, | |
| sam_model=sam_model, | |
| depth_anything_decoder=depth_anything_decoder, | |
| pred_iou_thresh=0.5, | |
| stability_score_thresh=0.7, | |
| gt=True, | |
| device=device, | |
| ) | |
| vis_video = np.stack( | |
| [np.vstack([tr, gtr]) for tr, gtr in zip(theia_decode_results, gt_decode_results, strict=False)] | |
| ) | |
| return vis_video | |
| demo = gr.Interface(fn=run_theia, inputs="image", outputs="image") | |
| demo.launch() |