Spaces:
Sleeping
Sleeping
Added @spaces.gpu decorator and switched to gpu officially
Browse files
app.py
CHANGED
|
@@ -28,34 +28,18 @@ cwd = os.getcwd()
|
|
| 28 |
print("Current working directory:", cwd)
|
| 29 |
|
| 30 |
# Installing dependencies not in requirements.txt
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
script = file.read()
|
| 37 |
-
return call(script, shell=True)
|
| 38 |
-
|
| 39 |
-
def build_custom_prompter():
|
| 40 |
-
with open('./build_custom_prompter.sh', 'rb') as file:
|
| 41 |
-
script = file.read()
|
| 42 |
-
return call(script, shell=True)
|
| 43 |
-
|
| 44 |
-
def build_multiscale_deform():
|
| 45 |
-
with open('./build_multiscale_deform.sh', 'rb') as file:
|
| 46 |
-
script = file.read()
|
| 47 |
-
return call(script, shell=True)
|
| 48 |
-
|
| 49 |
-
build_custom_prompter()
|
| 50 |
from gradio_image_prompter import ImagePrompter
|
|
|
|
| 51 |
subprocess.run(
|
| 52 |
shlex.split(
|
| 53 |
"pip install MultiScaleDeformableAttention-1.0-cp310-cp310-linux_x86_64.whl"
|
| 54 |
)
|
| 55 |
)
|
| 56 |
-
#print("torch version")
|
| 57 |
-
#print(torch.version.cuda)
|
| 58 |
-
#install_add_dependencies()
|
| 59 |
|
| 60 |
class AppSteps(Enum):
|
| 61 |
JUST_TEXT = 1
|
|
@@ -124,6 +108,12 @@ def get_args_parser():
|
|
| 124 |
parser.add_argument("--amp", action="store_true", help="Train with mixed precision")
|
| 125 |
return parser
|
| 126 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
|
| 128 |
# Get counting model.
|
| 129 |
@spaces.GPU
|
|
@@ -162,8 +152,6 @@ def build_model_and_transforms(args):
|
|
| 162 |
build_func = MODULE_BUILD_FUNCS.get(args.modelname)
|
| 163 |
model, _, _ = build_func(args)
|
| 164 |
|
| 165 |
-
#model.to(device)
|
| 166 |
-
|
| 167 |
checkpoint = torch.load(args.pretrain_model_path, map_location="cpu")["model"]
|
| 168 |
model.load_state_dict(checkpoint, strict=False)
|
| 169 |
|
|
@@ -174,11 +162,8 @@ def build_model_and_transforms(args):
|
|
| 174 |
|
| 175 |
parser = argparse.ArgumentParser("Counting Application", parents=[get_args_parser()])
|
| 176 |
args = parser.parse_args()
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
#else:
|
| 180 |
-
# args.device = torch.device('cpu')
|
| 181 |
-
args.device = torch.device('cpu')
|
| 182 |
model, transform = build_model_and_transforms(args)
|
| 183 |
|
| 184 |
examples = [
|
|
@@ -233,11 +218,12 @@ def get_ind_to_filter(text, word_ids, keywords):
|
|
| 233 |
|
| 234 |
return inds_to_filter
|
| 235 |
|
| 236 |
-
|
| 237 |
def count(image, text, prompts, state, device):
|
| 238 |
model.to(device)
|
| 239 |
-
|
| 240 |
keywords = "" # do not handle this for now
|
|
|
|
| 241 |
# Handle no prompt case.
|
| 242 |
if prompts is None:
|
| 243 |
prompts = {"image": image, "points": []}
|
|
@@ -259,11 +245,7 @@ def count(image, text, prompts, state, device):
|
|
| 259 |
)
|
| 260 |
|
| 261 |
ind_to_filter = get_ind_to_filter(text, model_output["token"][0].word_ids, keywords)
|
| 262 |
-
print(model_output["token"][0].tokens)
|
| 263 |
-
print(ind_to_filter)
|
| 264 |
-
print(model_output["pred_logits"].sigmoid()[0].shape)
|
| 265 |
logits = model_output["pred_logits"].sigmoid()[0][:, ind_to_filter]
|
| 266 |
-
print(logits.shape)
|
| 267 |
boxes = model_output["pred_boxes"][0]
|
| 268 |
if len(keywords.strip()) > 0:
|
| 269 |
box_mask = (logits > CONF_THRESH).sum(dim=-1) == len(ind_to_filter)
|
|
@@ -339,7 +321,6 @@ def count_main(image, text, prompts, device):
|
|
| 339 |
input_image_exemplars, exemplars = transform(prompts["image"], {"exemplars": torch.tensor(exemplars)})
|
| 340 |
input_image_exemplars = input_image_exemplars.unsqueeze(0).to(device)
|
| 341 |
exemplars = [exemplars["exemplars"].to(device)]
|
| 342 |
-
print("image device: " + str(input_image.device))
|
| 343 |
|
| 344 |
with torch.no_grad():
|
| 345 |
model_output = model(
|
|
@@ -351,11 +332,7 @@ def count_main(image, text, prompts, device):
|
|
| 351 |
)
|
| 352 |
|
| 353 |
ind_to_filter = get_ind_to_filter(text, model_output["token"][0].word_ids, keywords)
|
| 354 |
-
print(model_output["token"][0].tokens)
|
| 355 |
-
print(ind_to_filter)
|
| 356 |
-
print(model_output["pred_logits"].sigmoid()[0].shape)
|
| 357 |
logits = model_output["pred_logits"].sigmoid()[0][:, ind_to_filter]
|
| 358 |
-
print(logits.shape)
|
| 359 |
boxes = model_output["pred_boxes"][0]
|
| 360 |
if len(keywords.strip()) > 0:
|
| 361 |
box_mask = (logits > CONF_THRESH).sum(dim=-1) == len(ind_to_filter)
|
|
|
|
| 28 |
print("Current working directory:", cwd)
|
| 29 |
|
| 30 |
# Installing dependencies not in requirements.txt
|
| 31 |
+
subprocess.run(
|
| 32 |
+
shlex.split(
|
| 33 |
+
"pip install gradio_image_prompter-0.1.0-py3-none-any.whl"
|
| 34 |
+
)
|
| 35 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
from gradio_image_prompter import ImagePrompter
|
| 37 |
+
|
| 38 |
subprocess.run(
|
| 39 |
shlex.split(
|
| 40 |
"pip install MultiScaleDeformableAttention-1.0-cp310-cp310-linux_x86_64.whl"
|
| 41 |
)
|
| 42 |
)
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
class AppSteps(Enum):
|
| 45 |
JUST_TEXT = 1
|
|
|
|
| 108 |
parser.add_argument("--amp", action="store_true", help="Train with mixed precision")
|
| 109 |
return parser
|
| 110 |
|
| 111 |
+
@spaces.GPU
|
| 112 |
+
def get_device():
|
| 113 |
+
if torch.cuda.is_available():
|
| 114 |
+
return torch.device('cuda')
|
| 115 |
+
else:
|
| 116 |
+
return torch.device('cpu')
|
| 117 |
|
| 118 |
# Get counting model.
|
| 119 |
@spaces.GPU
|
|
|
|
| 152 |
build_func = MODULE_BUILD_FUNCS.get(args.modelname)
|
| 153 |
model, _, _ = build_func(args)
|
| 154 |
|
|
|
|
|
|
|
| 155 |
checkpoint = torch.load(args.pretrain_model_path, map_location="cpu")["model"]
|
| 156 |
model.load_state_dict(checkpoint, strict=False)
|
| 157 |
|
|
|
|
| 162 |
|
| 163 |
parser = argparse.ArgumentParser("Counting Application", parents=[get_args_parser()])
|
| 164 |
args = parser.parse_args()
|
| 165 |
+
|
| 166 |
+
args.device = get_device()
|
|
|
|
|
|
|
|
|
|
| 167 |
model, transform = build_model_and_transforms(args)
|
| 168 |
|
| 169 |
examples = [
|
|
|
|
| 218 |
|
| 219 |
return inds_to_filter
|
| 220 |
|
| 221 |
+
@spaces.GPU
|
| 222 |
def count(image, text, prompts, state, device):
|
| 223 |
model.to(device)
|
| 224 |
+
|
| 225 |
keywords = "" # do not handle this for now
|
| 226 |
+
|
| 227 |
# Handle no prompt case.
|
| 228 |
if prompts is None:
|
| 229 |
prompts = {"image": image, "points": []}
|
|
|
|
| 245 |
)
|
| 246 |
|
| 247 |
ind_to_filter = get_ind_to_filter(text, model_output["token"][0].word_ids, keywords)
|
|
|
|
|
|
|
|
|
|
| 248 |
logits = model_output["pred_logits"].sigmoid()[0][:, ind_to_filter]
|
|
|
|
| 249 |
boxes = model_output["pred_boxes"][0]
|
| 250 |
if len(keywords.strip()) > 0:
|
| 251 |
box_mask = (logits > CONF_THRESH).sum(dim=-1) == len(ind_to_filter)
|
|
|
|
| 321 |
input_image_exemplars, exemplars = transform(prompts["image"], {"exemplars": torch.tensor(exemplars)})
|
| 322 |
input_image_exemplars = input_image_exemplars.unsqueeze(0).to(device)
|
| 323 |
exemplars = [exemplars["exemplars"].to(device)]
|
|
|
|
| 324 |
|
| 325 |
with torch.no_grad():
|
| 326 |
model_output = model(
|
|
|
|
| 332 |
)
|
| 333 |
|
| 334 |
ind_to_filter = get_ind_to_filter(text, model_output["token"][0].word_ids, keywords)
|
|
|
|
|
|
|
|
|
|
| 335 |
logits = model_output["pred_logits"].sigmoid()[0][:, ind_to_filter]
|
|
|
|
| 336 |
boxes = model_output["pred_boxes"][0]
|
| 337 |
if len(keywords.strip()) > 0:
|
| 338 |
box_mask = (logits > CONF_THRESH).sum(dim=-1) == len(ind_to_filter)
|