Spaces:

fffiloni
/

x-decoder-video

Paused

App Files Files Community

fffiloni commited on Feb 16, 2023

Commit

0beb2f8

1 Parent(s): c2bb385

Update app_d.py

Browse files

Files changed (1) hide show

app_d.py +61 -2

app_d.py CHANGED Viewed

@@ -5,7 +5,65 @@ import numpy as np
 from moviepy.editor import *
 #from share_btn import community_icon_html, loading_icon_html, share_js
-xdecoder = gr.Interface.load(name="spaces/xdecoder/Instruct-X-Decoder")
 def get_frames(video_in):
     frames = []
@@ -69,7 +127,8 @@ def infer(prompt,video_in, trim_value):
     print("set stop frames to: " + str(n_frame))
     for i in frames_list[0:int(n_frame)]:
-        xdecoder_img = xdecoder(i, prompt, fn_index=0)
         #res_image = xdecoder_img[0]
         #rgb_im = images[0].convert("RGB")

 from moviepy.editor import *
 #from share_btn import community_icon_html, loading_icon_html, share_js
+os.system("python -m pip install git+https://github.com/MaureenZOU/detectron2-xyz.git")
+import torch
+import argparse
+from xdecoder.BaseModel import BaseModel
+from xdecoder import build_model
+from utils.distributed import init_distributed
+from utils.arguments import load_opt_from_config_files
+from tasks import *
+def parse_option():
+    parser = argparse.ArgumentParser('X-Decoder All-in-One Demo', add_help=False)
+    parser.add_argument('--conf_files', default="configs/xdecoder/svlp_focalt_lang.yaml", metavar="FILE", help='path to config file', )
+    args = parser.parse_args()
+    return args
+'''
+build args
+'''
+args = parse_option()
+opt = load_opt_from_config_files(args.conf_files)
+opt = init_distributed(opt)
+# META DATA
+pretrained_pth_last = os.path.join("xdecoder_focalt_last.pt")
+pretrained_pth_novg = os.path.join("xdecoder_focalt_last_novg.pt")
+if not os.path.exists(pretrained_pth_last):
+    os.system("wget {}".format("https://projects4jw.blob.core.windows.net/x-decoder/release/xdecoder_focalt_last.pt"))
+if not os.path.exists(pretrained_pth_novg):
+    os.system("wget {}".format("https://projects4jw.blob.core.windows.net/x-decoder/release/xdecoder_focalt_last_novg.pt"))
+'''
+build model
+'''
+model_last = BaseModel(opt, build_model(opt)).from_pretrained(pretrained_pth_last).eval().cuda()
+with torch.no_grad():
+    model_last.model.sem_seg_head.predictor.lang_encoder.get_text_embeddings(["background", "background"], is_eval=True)
+'''
+inference model
+'''
+@torch.no_grad()
+def xdecoder(image, instruction, *args, **kwargs):
+    image = image.convert("RGB")
+    with torch.autocast(device_type='cuda', dtype=torch.float16):
+        return referring_inpainting_gpt3(model_last, image, instruction, *args, **kwargs)
+#xdecoder = gr.Interface.load(name="spaces/xdecoder/Instruct-X-Decoder")
 def get_frames(video_in):
     frames = []
     print("set stop frames to: " + str(n_frame))
     for i in frames_list[0:int(n_frame)]:
+        #xdecoder_img = xdecoder(i, prompt, fn_index=0)
+        xdecoder_img = xdecoder(i, prompt)
         #res_image = xdecoder_img[0]
         #rgb_im = images[0].convert("RGB")