Spaces:
Paused
Paused
Update app_d.py
Browse files
app_d.py
CHANGED
|
@@ -5,7 +5,65 @@ import numpy as np
|
|
| 5 |
from moviepy.editor import *
|
| 6 |
#from share_btn import community_icon_html, loading_icon_html, share_js
|
| 7 |
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
def get_frames(video_in):
|
| 11 |
frames = []
|
|
@@ -69,7 +127,8 @@ def infer(prompt,video_in, trim_value):
|
|
| 69 |
print("set stop frames to: " + str(n_frame))
|
| 70 |
|
| 71 |
for i in frames_list[0:int(n_frame)]:
|
| 72 |
-
xdecoder_img = xdecoder(i, prompt, fn_index=0)
|
|
|
|
| 73 |
#res_image = xdecoder_img[0]
|
| 74 |
#rgb_im = images[0].convert("RGB")
|
| 75 |
|
|
|
|
| 5 |
from moviepy.editor import *
|
| 6 |
#from share_btn import community_icon_html, loading_icon_html, share_js
|
| 7 |
|
| 8 |
+
|
| 9 |
+
os.system("python -m pip install git+https://github.com/MaureenZOU/detectron2-xyz.git")
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
import torch
|
| 13 |
+
import argparse
|
| 14 |
+
|
| 15 |
+
from xdecoder.BaseModel import BaseModel
|
| 16 |
+
from xdecoder import build_model
|
| 17 |
+
from utils.distributed import init_distributed
|
| 18 |
+
from utils.arguments import load_opt_from_config_files
|
| 19 |
+
|
| 20 |
+
from tasks import *
|
| 21 |
+
|
| 22 |
+
def parse_option():
|
| 23 |
+
parser = argparse.ArgumentParser('X-Decoder All-in-One Demo', add_help=False)
|
| 24 |
+
parser.add_argument('--conf_files', default="configs/xdecoder/svlp_focalt_lang.yaml", metavar="FILE", help='path to config file', )
|
| 25 |
+
args = parser.parse_args()
|
| 26 |
+
|
| 27 |
+
return args
|
| 28 |
+
|
| 29 |
+
'''
|
| 30 |
+
build args
|
| 31 |
+
'''
|
| 32 |
+
args = parse_option()
|
| 33 |
+
opt = load_opt_from_config_files(args.conf_files)
|
| 34 |
+
opt = init_distributed(opt)
|
| 35 |
+
|
| 36 |
+
# META DATA
|
| 37 |
+
pretrained_pth_last = os.path.join("xdecoder_focalt_last.pt")
|
| 38 |
+
pretrained_pth_novg = os.path.join("xdecoder_focalt_last_novg.pt")
|
| 39 |
+
|
| 40 |
+
if not os.path.exists(pretrained_pth_last):
|
| 41 |
+
os.system("wget {}".format("https://projects4jw.blob.core.windows.net/x-decoder/release/xdecoder_focalt_last.pt"))
|
| 42 |
+
|
| 43 |
+
if not os.path.exists(pretrained_pth_novg):
|
| 44 |
+
os.system("wget {}".format("https://projects4jw.blob.core.windows.net/x-decoder/release/xdecoder_focalt_last_novg.pt"))
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
'''
|
| 48 |
+
build model
|
| 49 |
+
'''
|
| 50 |
+
model_last = BaseModel(opt, build_model(opt)).from_pretrained(pretrained_pth_last).eval().cuda()
|
| 51 |
+
|
| 52 |
+
with torch.no_grad():
|
| 53 |
+
model_last.model.sem_seg_head.predictor.lang_encoder.get_text_embeddings(["background", "background"], is_eval=True)
|
| 54 |
+
|
| 55 |
+
'''
|
| 56 |
+
inference model
|
| 57 |
+
'''
|
| 58 |
+
|
| 59 |
+
@torch.no_grad()
|
| 60 |
+
def xdecoder(image, instruction, *args, **kwargs):
|
| 61 |
+
image = image.convert("RGB")
|
| 62 |
+
with torch.autocast(device_type='cuda', dtype=torch.float16):
|
| 63 |
+
return referring_inpainting_gpt3(model_last, image, instruction, *args, **kwargs)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
#xdecoder = gr.Interface.load(name="spaces/xdecoder/Instruct-X-Decoder")
|
| 67 |
|
| 68 |
def get_frames(video_in):
|
| 69 |
frames = []
|
|
|
|
| 127 |
print("set stop frames to: " + str(n_frame))
|
| 128 |
|
| 129 |
for i in frames_list[0:int(n_frame)]:
|
| 130 |
+
#xdecoder_img = xdecoder(i, prompt, fn_index=0)
|
| 131 |
+
xdecoder_img = xdecoder(i, prompt)
|
| 132 |
#res_image = xdecoder_img[0]
|
| 133 |
#rgb_im = images[0].convert("RGB")
|
| 134 |
|