Spaces:

zhouyik
/

SAMTok

Runtime error

App Files Files Community

zhouyik commited on Jan 20

Commit

5814bce

verified ·

1 Parent(s): 7d2576b

Upload ./app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +28 -13

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 # Modified from https://huggingface.co/spaces/PolyU-ChenLab/UniPixel/blob/main/app.py
 import random
 import re
 import colorsys
@@ -11,13 +12,22 @@ import imageio.v3 as iio
 import torch
 from torchvision.transforms.functional import to_pil_image
 import spaces
 import gradio as gr
 from transformers import Qwen3VLForConditionalGeneration, AutoProcessor
-from .sam2 import VQ_SAM2, VQ_SAM2Config, SAM2Config
-from .visualizer import sample_color, draw_mask
 class DirectResize:
     def __init__(self, target_length: int) -> None:
@@ -85,10 +95,8 @@ function init() {
         document.querySelector('main').style.maxWidth = '1536px'
     }
     document.getElementById('query_1').addEventListener('keydown', function f1(e) { if (e.key === 'Enter') { document.getElementById('submit_1').click() } })
-    document.getElementById('query_2').addEventListener('keydown', function f2(e) { if (e.key === 'Enter') { document.getElementById('submit_2').click() } })
-    document.getElementById('query_3').addEventListener('keydown', function f3(e) { if (e.key === 'Enter') { document.getElementById('submit_3').click() } })
-    document.getElementById('query_4').addEventListener('keydown', function f4(e) { if (e.key === 'Enter') { document.getElementById('submit_4').click() } })
 }
 """
 device = torch.device('cuda')
@@ -100,10 +108,12 @@ model = Qwen3VLForConditionalGeneration.from_pretrained(
 processor = AutoProcessor.from_pretrained(MODEL)
 # build vq-sam2 model
 CODEBOOK_SIZE = 256
 CODEBOOK_DEPTH = 2
 sam2_config = SAM2Config(
-    ckpt_path=MODEL+"/sam2.1_hiera_large.pt",
 )
 vq_sam2_config = VQ_SAM2Config(
     sam2_config=sam2_config,
@@ -113,7 +123,7 @@ vq_sam2_config = VQ_SAM2Config(
     latent_dim=256,
 )
 vq_sam2 = VQ_SAM2(vq_sam2_config).cuda().eval()
-state = torch.load(MODEL+"/mask_tokenizer_256x2.pth", map_location="cpu")
 vq_sam2.load_state_dict(state)
 sam2_image_processor = DirectResize(1024)
@@ -126,22 +136,23 @@ color_map_light = {
 }
 def enable_btns():
-    return (gr.Button(interactive=True), ) * 4
 def disable_btns():
-    return (gr.Button(interactive=False), ) * 4
 def reset_seg():
-    return 16, gr.Button(interactive=False)
 def reset_reg():
-    return 1, gr.Button(interactive=False)
 @spaces.GPU
 def infer_seg(media, query):
     global model
     if not media:
@@ -152,7 +163,7 @@ def infer_seg(media, query):
         gr.Warning('Please provide a text prompt.')
         return None, None, None
-    image = Image.open(path).convert('RGB')
     ori_width, ori_height = image.size
     messages = [
         {
@@ -190,6 +201,9 @@ def infer_seg(media, query):
     output_text = processor.batch_decode(
         generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
     )[0]
     quant_ids = extract_mt_token_ids_v1(output_text)
     if len(quant_ids) % CODEBOOK_DEPTH != 0:
@@ -284,6 +298,7 @@ def build_demo():
         # with gr.Tab('Mask Understanding'):
         #     pass
 if __name__ == '__main__':
     demo = build_demo()

 # Modified from https://huggingface.co/spaces/PolyU-ChenLab/UniPixel/blob/main/app.py
+import os
+from pathlib import Path
 import random
 import re
 import colorsys
 import torch
 from torchvision.transforms.functional import to_pil_image
+from huggingface_hub import hf_hub_download
 import spaces
 import gradio as gr
+GRADIO_TMP = os.path.join(os.path.dirname(__file__), ".gradio_tmp")
+Path(GRADIO_TMP).mkdir(parents=True, exist_ok=True)
+os.environ["GRADIO_TEMP_DIR"] = GRADIO_TMP
+os.environ["TMPDIR"] = GRADIO_TMP
+os.environ["TEMP"] = GRADIO_TMP
+os.environ["TMP"] = GRADIO_TMP
 from transformers import Qwen3VLForConditionalGeneration, AutoProcessor
+from sam2 import VQ_SAM2, VQ_SAM2Config, SAM2Config
+from visualizer import sample_color, draw_mask
 class DirectResize:
     def __init__(self, target_length: int) -> None:
         document.querySelector('main').style.maxWidth = '1536px'
     }
     document.getElementById('query_1').addEventListener('keydown', function f1(e) { if (e.key === 'Enter') { document.getElementById('submit_1').click() } })
 }
+window.addEventListener('load', init);
 """
 device = torch.device('cuda')
 processor = AutoProcessor.from_pretrained(MODEL)
 # build vq-sam2 model
+sam2_ckpt_local = hf_hub_download(repo_id=MODEL, filename="sam2.1_hiera_large.pt")
+mask_tokenizer_local = hf_hub_download(repo_id=MODEL, filename="mask_tokenizer_256x2.pth")
 CODEBOOK_SIZE = 256
 CODEBOOK_DEPTH = 2
 sam2_config = SAM2Config(
+    ckpt_path=sam2_ckpt_local,
 )
 vq_sam2_config = VQ_SAM2Config(
     sam2_config=sam2_config,
     latent_dim=256,
 )
 vq_sam2 = VQ_SAM2(vq_sam2_config).cuda().eval()
+state = torch.load(mask_tokenizer_local, map_location="cpu")
 vq_sam2.load_state_dict(state)
 sam2_image_processor = DirectResize(1024)
 }
 def enable_btns():
+    return (gr.update(interactive=True), ) * 4
 def disable_btns():
+    return (gr.update(interactive=False), ) * 4
 def reset_seg():
+    return 16, gr.update(interactive=False)
 def reset_reg():
+    return 1, gr.update(interactive=False)
 @spaces.GPU
 def infer_seg(media, query):
+    print("=======>>>enter infer seg")
     global model
     if not media:
         gr.Warning('Please provide a text prompt.')
         return None, None, None
+    image = Image.open(media).convert('RGB')
     ori_width, ori_height = image.size
     messages = [
         {
     output_text = processor.batch_decode(
         generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
     )[0]
+    print("========>>>>output_text", output_text)
+    exit(0)
     quant_ids = extract_mt_token_ids_v1(output_text)
     if len(quant_ids) % CODEBOOK_DEPTH != 0:
         # with gr.Tab('Mask Understanding'):
         #     pass
+    return demo
 if __name__ == '__main__':
     demo = build_demo()