Files changed (1) hide show
  1. app.py +28 -28
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import spaces
2
  import gradio as gr
3
  import torch
@@ -6,12 +7,20 @@ from PIL import Image
6
  from transformers import Sam3Processor, Sam3Model
7
  import requests
8
  import warnings
 
 
9
  warnings.filterwarnings("ignore")
10
 
 
 
 
 
11
  # Global model and processor
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
13
- model = Sam3Model.from_pretrained("facebook/sam3", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32).to(device)
14
- processor = Sam3Processor.from_pretrained("facebook/sam3")
 
 
15
 
16
  @spaces.GPU()
17
  def segment(image: Image.Image, text: str, threshold: float, mask_threshold: float):
@@ -56,7 +65,8 @@ def segment(image: Image.Image, text: str, threshold: float, mask_threshold: flo
56
  annotations.append((mask_np, label))
57
 
58
  scores_text = ", ".join([f"{s:.2f}" for s in results['scores'].cpu().numpy()[:5]])
59
- info = f"βœ… Found **{n_masks}** objects matching **'{text}'**\nConfidence scores: {scores_text}{'...' if n_masks > 5 else ''}"
 
60
 
61
  # Return tuple: (base_image, list_of_annotations)
62
  return (image, annotations), info
@@ -78,20 +88,10 @@ def segment_example(image_path: str, prompt: str):
78
 
79
  # Gradio Interface
80
  with gr.Blocks(
81
- theme=gr.themes.Soft(),
82
- title="SAM3 - Promptable Concept Segmentation",
83
- css=".gradio-container {max-width: 1400px !important;}"
84
  ) as demo:
85
- gr.Markdown(
86
- """
87
- # SAM3 - Promptable Concept Segmentation (PCS)
88
-
89
- **SAM3** performs zero-shot instance segmentation using natural language prompts.
90
- Upload an image, enter a text prompt (e.g., "person", "car", "dog"), and get segmentation masks.
91
-
92
- Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)
93
- """
94
- )
95
 
96
  gr.Markdown("### Inputs")
97
  with gr.Row(variant="panel"):
@@ -104,13 +104,13 @@ with gr.Blocks(
104
  image_output = gr.AnnotatedImage(
105
  label="Output (Segmented Image)",
106
  height=400,
107
- show_legend=True,
108
  )
109
 
110
  with gr.Row():
111
  text_input = gr.Textbox(
112
  label="Text Prompt",
113
- placeholder="e.g., person, ear, cat, bicycle...",
114
  scale=3
115
  )
116
  clear_btn = gr.Button("πŸ” Clear", size="sm", variant="secondary")
@@ -140,15 +140,15 @@ with gr.Blocks(
140
 
141
  segment_btn = gr.Button("🎯 Segment", variant="primary", size="lg")
142
 
143
- gr.Examples(
144
- examples=[
145
- ["http://images.cocodataset.org/val2017/000000077595.jpg", "cat"],
146
- ],
147
- inputs=[image_input, text_input],
148
- outputs=[image_output, info_output],
149
- fn=segment_example,
150
- cache_examples=False,
151
- )
152
 
153
  clear_btn.click(
154
  fn=clear_all,
@@ -164,7 +164,7 @@ with gr.Blocks(
164
  gr.Markdown(
165
  """
166
  ### Notes
167
- - **Model**: [facebook/sam3](https://huggingface.co/facebook/sam3)
168
  - Click on segments in the output to see labels
169
  - GPU recommended for faster inference
170
  """
 
1
+ import os
2
  import spaces
3
  import gradio as gr
4
  import torch
 
7
  from transformers import Sam3Processor, Sam3Model
8
  import requests
9
  import warnings
10
+ from huggingface_hub import login, list_repo_files
11
+
12
  warnings.filterwarnings("ignore")
13
 
14
+ HF_TOKEN = os.environ.get("HF_TOKEN")
15
+ login(token=HF_TOKEN)
16
+
17
+
18
  # Global model and processor
19
  device = "cuda" if torch.cuda.is_available() else "cpu"
20
+ model = Sam3Model.from_pretrained("giginho83/sa3-base", token=HF_TOKEN, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32).to(device)
21
+ processor = Sam3Processor.from_pretrained("giginho83/sa3-base", token=HF_TOKEN)
22
+
23
+ print("Model loaded successfully!")
24
 
25
  @spaces.GPU()
26
  def segment(image: Image.Image, text: str, threshold: float, mask_threshold: float):
 
65
  annotations.append((mask_np, label))
66
 
67
  scores_text = ", ".join([f"{s:.2f}" for s in results['scores'].cpu().numpy()[:5]])
68
+ #info = f"βœ… Found **{n_masks}** objects matching **'{text}'**\nConfidence scores: {scores_text}{'...' if n_masks > 5 else ''}"
69
+ info = f"βœ… Detected objects: **{n_masks}"
70
 
71
  # Return tuple: (base_image, list_of_annotations)
72
  return (image, annotations), info
 
88
 
89
  # Gradio Interface
90
  with gr.Blocks(
91
+ #theme=gr.themes.Soft(),
92
+ title="Count items in supermarket",
93
+ #css=".gradio-container {max-width: 1400px !important;}"
94
  ) as demo:
 
 
 
 
 
 
 
 
 
 
95
 
96
  gr.Markdown("### Inputs")
97
  with gr.Row(variant="panel"):
 
104
  image_output = gr.AnnotatedImage(
105
  label="Output (Segmented Image)",
106
  height=400,
107
+ show_legend=False,
108
  )
109
 
110
  with gr.Row():
111
  text_input = gr.Textbox(
112
  label="Text Prompt",
113
+ placeholder="e.g., bottle box, jar, tube...",
114
  scale=3
115
  )
116
  clear_btn = gr.Button("πŸ” Clear", size="sm", variant="secondary")
 
140
 
141
  segment_btn = gr.Button("🎯 Segment", variant="primary", size="lg")
142
 
143
+ # gr.Examples(
144
+ # examples=[
145
+ # ["http://images.cocodataset.org/val2017/000000077595.jpg", "cat"],
146
+ # ],
147
+ # inputs=[image_input, text_input],
148
+ # outputs=[image_output, info_output],
149
+ # fn=segment_example,
150
+ # cache_examples=False,
151
+ # )
152
 
153
  clear_btn.click(
154
  fn=clear_all,
 
164
  gr.Markdown(
165
  """
166
  ### Notes
167
+ - **Model**: [giginho83/sa3_base]
168
  - Click on segments in the output to see labels
169
  - GPU recommended for faster inference
170
  """