Upload app.py
Browse files
app.py
CHANGED
|
@@ -38,7 +38,7 @@ from transformers import BlipProcessor, BlipForConditionalGeneration
|
|
| 38 |
def generate_caption(processor, blip_model, raw_image):
|
| 39 |
# unconditional image captioning
|
| 40 |
inputs = processor(raw_image, return_tensors="pt").to(
|
| 41 |
-
|
| 42 |
out = blip_model.generate(**inputs)
|
| 43 |
caption = processor.decode(out[0], skip_special_tokens=True)
|
| 44 |
return caption
|
|
@@ -153,8 +153,6 @@ sam_checkpoint = 'sam_vit_b_01ec64.pth'
|
|
| 153 |
asam_checkpoint = 'asam_vit_b.pth'
|
| 154 |
output_dir = "outputs"
|
| 155 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 156 |
-
|
| 157 |
-
|
| 158 |
blip_processor = None
|
| 159 |
blip_model = None
|
| 160 |
groundingdino_model = None
|
|
@@ -185,7 +183,7 @@ def run_grounded_sam(input_image, text_prompt, task_type, box_threshold, text_th
|
|
| 185 |
blip_processor = blip_processor or BlipProcessor.from_pretrained(
|
| 186 |
"Salesforce/blip-image-captioning-large")
|
| 187 |
blip_model = blip_model or BlipForConditionalGeneration.from_pretrained(
|
| 188 |
-
"Salesforce/blip-image-captioning-large"
|
| 189 |
text_prompt = generate_caption(blip_processor, blip_model, image_pil)
|
| 190 |
print(f"Caption: {text_prompt}")
|
| 191 |
|
|
@@ -281,7 +279,7 @@ def run_grounded_sam(input_image, text_prompt, task_type, box_threshold, text_th
|
|
| 281 |
boxes=transformed_boxes,
|
| 282 |
multimask_output=False,
|
| 283 |
)
|
| 284 |
-
print(torch.sum(masks))
|
| 285 |
# masks: [1, 1, 512, 512]
|
| 286 |
mask_image = Image.new('RGBA', size, color=(0, 0, 0, 0))
|
| 287 |
mask_draw = ImageDraw.Draw(mask_image)
|
|
@@ -449,7 +447,7 @@ if __name__ == "__main__":
|
|
| 449 |
input_image = gr.Image(
|
| 450 |
source='upload', type="pil", value="example9.jpg", tool="sketch",brush_radius=20)
|
| 451 |
task_type = gr.Dropdown(
|
| 452 |
-
["default_box","automatic", "scribble_point", "scribble_box"
|
| 453 |
text_prompt = gr.Textbox(label="Text Prompt", placeholder="bench .", visible=False)
|
| 454 |
run_button = gr.Button(label="Run")
|
| 455 |
with gr.Accordion("Advanced options", open=False):
|
|
|
|
| 38 |
def generate_caption(processor, blip_model, raw_image):
|
| 39 |
# unconditional image captioning
|
| 40 |
inputs = processor(raw_image, return_tensors="pt").to(
|
| 41 |
+
device) #fp 16
|
| 42 |
out = blip_model.generate(**inputs)
|
| 43 |
caption = processor.decode(out[0], skip_special_tokens=True)
|
| 44 |
return caption
|
|
|
|
| 153 |
asam_checkpoint = 'asam_vit_b.pth'
|
| 154 |
output_dir = "outputs"
|
| 155 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
|
|
|
|
|
|
| 156 |
blip_processor = None
|
| 157 |
blip_model = None
|
| 158 |
groundingdino_model = None
|
|
|
|
| 183 |
blip_processor = blip_processor or BlipProcessor.from_pretrained(
|
| 184 |
"Salesforce/blip-image-captioning-large")
|
| 185 |
blip_model = blip_model or BlipForConditionalGeneration.from_pretrained(
|
| 186 |
+
"Salesforce/blip-image-captioning-large").to(device) #torch_dtype=torch.float16
|
| 187 |
text_prompt = generate_caption(blip_processor, blip_model, image_pil)
|
| 188 |
print(f"Caption: {text_prompt}")
|
| 189 |
|
|
|
|
| 279 |
boxes=transformed_boxes,
|
| 280 |
multimask_output=False,
|
| 281 |
)
|
| 282 |
+
print(torch.sum(masks), masks.device)
|
| 283 |
# masks: [1, 1, 512, 512]
|
| 284 |
mask_image = Image.new('RGBA', size, color=(0, 0, 0, 0))
|
| 285 |
mask_draw = ImageDraw.Draw(mask_image)
|
|
|
|
| 447 |
input_image = gr.Image(
|
| 448 |
source='upload', type="pil", value="example9.jpg", tool="sketch",brush_radius=20)
|
| 449 |
task_type = gr.Dropdown(
|
| 450 |
+
["default_box","automatic", "scribble_point", "scribble_box"], value="default_box", label="task_type")
|
| 451 |
text_prompt = gr.Textbox(label="Text Prompt", placeholder="bench .", visible=False)
|
| 452 |
run_button = gr.Button(label="Run")
|
| 453 |
with gr.Accordion("Advanced options", open=False):
|