ASAM-Team commited on
Commit
5d47a5f
·
1 Parent(s): 4fd08a4

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -6
app.py CHANGED
@@ -38,7 +38,7 @@ from transformers import BlipProcessor, BlipForConditionalGeneration
38
  def generate_caption(processor, blip_model, raw_image):
39
  # unconditional image captioning
40
  inputs = processor(raw_image, return_tensors="pt").to(
41
- "cuda", torch.float16)
42
  out = blip_model.generate(**inputs)
43
  caption = processor.decode(out[0], skip_special_tokens=True)
44
  return caption
@@ -153,8 +153,6 @@ sam_checkpoint = 'sam_vit_b_01ec64.pth'
153
  asam_checkpoint = 'asam_vit_b.pth'
154
  output_dir = "outputs"
155
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
156
-
157
-
158
  blip_processor = None
159
  blip_model = None
160
  groundingdino_model = None
@@ -185,7 +183,7 @@ def run_grounded_sam(input_image, text_prompt, task_type, box_threshold, text_th
185
  blip_processor = blip_processor or BlipProcessor.from_pretrained(
186
  "Salesforce/blip-image-captioning-large")
187
  blip_model = blip_model or BlipForConditionalGeneration.from_pretrained(
188
- "Salesforce/blip-image-captioning-large", torch_dtype=torch.float16).to("cuda")
189
  text_prompt = generate_caption(blip_processor, blip_model, image_pil)
190
  print(f"Caption: {text_prompt}")
191
 
@@ -281,7 +279,7 @@ def run_grounded_sam(input_image, text_prompt, task_type, box_threshold, text_th
281
  boxes=transformed_boxes,
282
  multimask_output=False,
283
  )
284
- print(torch.sum(masks))
285
  # masks: [1, 1, 512, 512]
286
  mask_image = Image.new('RGBA', size, color=(0, 0, 0, 0))
287
  mask_draw = ImageDraw.Draw(mask_image)
@@ -449,7 +447,7 @@ if __name__ == "__main__":
449
  input_image = gr.Image(
450
  source='upload', type="pil", value="example9.jpg", tool="sketch",brush_radius=20)
451
  task_type = gr.Dropdown(
452
- ["default_box","automatic", "scribble_point", "scribble_box", "text"], value="default_box", label="task_type")
453
  text_prompt = gr.Textbox(label="Text Prompt", placeholder="bench .", visible=False)
454
  run_button = gr.Button(label="Run")
455
  with gr.Accordion("Advanced options", open=False):
 
38
  def generate_caption(processor, blip_model, raw_image):
39
  # unconditional image captioning
40
  inputs = processor(raw_image, return_tensors="pt").to(
41
+ device) #fp 16
42
  out = blip_model.generate(**inputs)
43
  caption = processor.decode(out[0], skip_special_tokens=True)
44
  return caption
 
153
  asam_checkpoint = 'asam_vit_b.pth'
154
  output_dir = "outputs"
155
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
 
 
156
  blip_processor = None
157
  blip_model = None
158
  groundingdino_model = None
 
183
  blip_processor = blip_processor or BlipProcessor.from_pretrained(
184
  "Salesforce/blip-image-captioning-large")
185
  blip_model = blip_model or BlipForConditionalGeneration.from_pretrained(
186
+ "Salesforce/blip-image-captioning-large").to(device) #torch_dtype=torch.float16
187
  text_prompt = generate_caption(blip_processor, blip_model, image_pil)
188
  print(f"Caption: {text_prompt}")
189
 
 
279
  boxes=transformed_boxes,
280
  multimask_output=False,
281
  )
282
+ print(torch.sum(masks), masks.device)
283
  # masks: [1, 1, 512, 512]
284
  mask_image = Image.new('RGBA', size, color=(0, 0, 0, 0))
285
  mask_draw = ImageDraw.Draw(mask_image)
 
447
  input_image = gr.Image(
448
  source='upload', type="pil", value="example9.jpg", tool="sketch",brush_radius=20)
449
  task_type = gr.Dropdown(
450
+ ["default_box","automatic", "scribble_point", "scribble_box"], value="default_box", label="task_type")
451
  text_prompt = gr.Textbox(label="Text Prompt", placeholder="bench .", visible=False)
452
  run_button = gr.Button(label="Run")
453
  with gr.Accordion("Advanced options", open=False):