Spaces:

P3ngLiu
/

SAM3_VLM-FO1

Running on Zero

P3ngLiu commited on Nov 21, 2025

Commit

4c7ae41

1 Parent(s): 5d9f81c

update

Files changed (2) hide show

README.md CHANGED Viewed

@@ -5,7 +5,7 @@ colorFrom: pink
 colorTo: green
 sdk: gradio
 sdk_version: 5.49.1
-app_file: app.py
 pinned: false
 license: apache-2.0
 short_description: Complex text label dection using SAM3 with VLM-FO1

 colorTo: green
 sdk: gradio
 sdk_version: 5.49.1
+app_file: demo/gradio_demo_with_sam3.py
 pinned: false
 license: apache-2.0
 short_description: Complex text label dection using SAM3 with VLM-FO1

demo/gradio_demo_with_sam3.py CHANGED Viewed

@@ -310,15 +310,24 @@ def launch_demo():
 if __name__ == "__main__":
     # model_path = './resources/VLM-FO1_Qwen2.5-VL-3B-v01'
     # sam3_model_path = './resources/sam3/sam3.pt'
-    from modelscope import snapshot_download
-    model_dir = snapshot_download('facebook/sam3', allow_patterns='sam3.pt')
     model_path = 'omlab/VLM-FO1_Qwen2.5-VL-3B-v01'
     tokenizer, model, image_processors = load_pretrained_model(
         model_path=model_path,
         device="cuda:0",
     )
-    sam3_model = build_sam3_image_model(checkpoint_path=model_dir+'/sam3.pt', device="cuda",bpe_path='/home/user/app/detect_tools/sam3/assets/bpe_simple_vocab_16e6.txt.gz')
     sam3_processor = Sam3Processor(sam3_model, confidence_threshold=0.0, device="cuda")
     demo = launch_demo()

 if __name__ == "__main__":
     # model_path = './resources/VLM-FO1_Qwen2.5-VL-3B-v01'
     # sam3_model_path = './resources/sam3/sam3.pt'
+    # from modelscope import snapshot_download
+    # model_dir = snapshot_download('facebook/sam3', allow_patterns='sam3.pt')
+    # from huggingface_hub import hf_hub_download
+    # model_dir = hf_hub_download(
+    # repo_id='facebook/sam3',
+    # filename='sam3.pt',
+    # local_dir="./sam3_model"
+    import os
+    exit_code = os.system(f"wget -c https://airesources.oss-cn-hangzhou.aliyuncs.com/lp/wheel/sam3.pt")
     model_path = 'omlab/VLM-FO1_Qwen2.5-VL-3B-v01'
     tokenizer, model, image_processors = load_pretrained_model(
         model_path=model_path,
         device="cuda:0",
     )
+    sam3_model = build_sam3_image_model(checkpoint_path='./sam3.pt', device="cuda",bpe_path='/home/user/app/detect_tools/sam3/assets/bpe_simple_vocab_16e6.txt.gz')
     sam3_processor = Sam3Processor(sam3_model, confidence_threshold=0.0, device="cuda")
     demo = launch_demo()