🎨 Redesign from AnyCoder

#4
Files changed (1) hide show
  1. app.py +198 -96
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import spaces
2
  import gradio as gr
3
  import torch
@@ -20,10 +21,10 @@ def segment(image: Image.Image, text: str, threshold: float, mask_threshold: flo
20
  Returns format compatible with gr.AnnotatedImage: (image, [(mask, label), ...])
21
  """
22
  if image is None:
23
- return None, "❌ Please upload an image."
24
 
25
  if not text.strip():
26
- return (image, []), "❌ Please enter a text prompt."
27
 
28
  try:
29
  inputs = processor(images=image, text=text.strip(), return_tensors="pt").to(device)
@@ -44,29 +45,25 @@ def segment(image: Image.Image, text: str, threshold: float, mask_threshold: flo
44
 
45
  n_masks = len(results['masks'])
46
  if n_masks == 0:
47
- return (image, []), f"❌ No objects found matching '{text}' (try adjusting thresholds)."
48
 
49
- # Format for AnnotatedImage: list of (mask, label) tuples
50
- # mask should be numpy array with values 0-1 (float) matching image dimensions
51
  annotations = []
52
  for i, (mask, score) in enumerate(zip(results['masks'], results['scores'])):
53
- # Convert binary mask to float numpy array (0-1 range)
54
  mask_np = mask.cpu().numpy().astype(np.float32)
55
- label = f"{text} #{i+1} ({score:.2f})"
56
  annotations.append((mask_np, label))
57
 
58
  scores_text = ", ".join([f"{s:.2f}" for s in results['scores'].cpu().numpy()[:5]])
59
- info = f"βœ… Found **{n_masks}** objects matching **'{text}'**\nConfidence scores: {scores_text}{'...' if n_masks > 5 else ''}"
60
 
61
- # Return tuple: (base_image, list_of_annotations)
62
  return (image, annotations), info
63
 
64
  except Exception as e:
65
- return (image, []), f"❌ Error during segmentation: {str(e)}"
66
 
67
  def clear_all():
68
  """Clear all inputs and outputs"""
69
- return None, "", None, 0.5, 0.5, "πŸ“ Enter a prompt and click **Segment** to start."
70
 
71
  def segment_example(image_path: str, prompt: str):
72
  """Handle example clicks"""
@@ -76,99 +73,204 @@ def segment_example(image_path: str, prompt: str):
76
  image = Image.open(image_path).convert("RGB")
77
  return segment(image, prompt, 0.5, 0.5)
78
 
79
- # Gradio Interface
80
- with gr.Blocks(
81
- theme=gr.themes.Soft(),
82
- title="SAM3 - Promptable Concept Segmentation",
83
- css=".gradio-container {max-width: 1400px !important;}"
84
- ) as demo:
85
- gr.Markdown(
86
- """
87
- # SAM3 - Promptable Concept Segmentation (PCS)
88
-
89
- **SAM3** performs zero-shot instance segmentation using natural language prompts.
90
- Upload an image, enter a text prompt (e.g., "person", "car", "dog"), and get segmentation masks.
91
-
92
- Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)
93
- """
94
- )
95
-
96
- gr.Markdown("### Inputs")
97
- with gr.Row(variant="panel"):
98
- image_input = gr.Image(
99
- label="Input Image",
100
- type="pil",
101
- height=400,
102
- )
103
- # AnnotatedImage expects: (base_image, [(mask, label), ...])
104
- image_output = gr.AnnotatedImage(
105
- label="Output (Segmented Image)",
106
- height=400,
107
- show_legend=True,
108
- )
109
-
110
- with gr.Row():
111
- text_input = gr.Textbox(
112
- label="Text Prompt",
113
- placeholder="e.g., person, ear, cat, bicycle...",
114
- scale=3
115
- )
116
- clear_btn = gr.Button("πŸ” Clear", size="sm", variant="secondary")
117
 
118
- with gr.Row():
119
- thresh_slider = gr.Slider(
120
- minimum=0.0,
121
- maximum=1.0,
122
- value=0.5,
123
- step=0.01,
124
- label="Detection Threshold",
125
- info="Higher = fewer detections"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  )
127
- mask_thresh_slider = gr.Slider(
128
- minimum=0.0,
129
- maximum=1.0,
130
- value=0.5,
131
- step=0.01,
132
- label="Mask Threshold",
133
- info="Higher = sharper masks"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  )
135
-
136
- info_output = gr.Markdown(
137
- value="πŸ“ Enter a prompt and click **Segment** to start.",
138
- label="Info / Results"
139
- )
140
-
141
- segment_btn = gr.Button("🎯 Segment", variant="primary", size="lg")
142
-
143
- gr.Examples(
144
- examples=[
145
- ["http://images.cocodataset.org/val2017/000000077595.jpg", "cat"],
146
- ],
147
- inputs=[image_input, text_input],
148
- outputs=[image_output, info_output],
149
- fn=segment_example,
150
- cache_examples=False,
151
- )
152
-
153
- clear_btn.click(
154
- fn=clear_all,
155
- outputs=[image_input, text_input, image_output, thresh_slider, mask_thresh_slider, info_output]
156
- )
157
-
158
  segment_btn.click(
159
  fn=segment,
160
  inputs=[image_input, text_input, thresh_slider, mask_thresh_slider],
161
- outputs=[image_output, info_output]
 
162
  )
163
 
164
- gr.Markdown(
165
- """
166
- ### Notes
167
- - **Model**: [facebook/sam3](https://huggingface.co/facebook/sam3)
168
- - Click on segments in the output to see labels
169
- - GPU recommended for faster inference
170
- """
171
  )
172
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  if __name__ == "__main__":
174
- demo.launch(server_name="0.0.0.0", server_port=7860, share=False, debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
  import spaces
3
  import gradio as gr
4
  import torch
 
21
  Returns format compatible with gr.AnnotatedImage: (image, [(mask, label), ...])
22
  """
23
  if image is None:
24
+ return None, "πŸ“· Please upload an image to begin."
25
 
26
  if not text.strip():
27
+ return (image, []), "✏️ Enter a text prompt (e.g., 'person', 'cat', 'car')."
28
 
29
  try:
30
  inputs = processor(images=image, text=text.strip(), return_tensors="pt").to(device)
 
45
 
46
  n_masks = len(results['masks'])
47
  if n_masks == 0:
48
+ return (image, []), f"πŸ” No objects found for **'{text}'** β€” try adjusting thresholds."
49
 
 
 
50
  annotations = []
51
  for i, (mask, score) in enumerate(zip(results['masks'], results['scores'])):
 
52
  mask_np = mask.cpu().numpy().astype(np.float32)
53
+ label = f"#{i+1} ({score:.2f})"
54
  annotations.append((mask_np, label))
55
 
56
  scores_text = ", ".join([f"{s:.2f}" for s in results['scores'].cpu().numpy()[:5]])
57
+ info = f"✨ **{n_masks}** objects found for **'{text}'**\n\nConfidence: {scores_text}{'...' if n_masks > 5 else ''}"
58
 
 
59
  return (image, annotations), info
60
 
61
  except Exception as e:
62
+ return (image, []), f"❌ Error: {str(e)}"
63
 
64
  def clear_all():
65
  """Clear all inputs and outputs"""
66
+ return None, "", None, 0.5, 0.5, "πŸ’‘ Enter a prompt and click **Segment** to find objects."
67
 
68
  def segment_example(image_path: str, prompt: str):
69
  """Handle example clicks"""
 
73
  image = Image.open(image_path).convert("RGB")
74
  return segment(image, prompt, 0.5, 0.5)
75
 
76
+ # Custom modern theme
77
+ custom_theme = gr.themes.Glass(
78
+ primary_hue="slate",
79
+ secondary_hue="zinc",
80
+ neutral_hue="slate",
81
+ font=gr.themes.GoogleFont("Inter"),
82
+ text_size="md",
83
+ spacing_size="lg",
84
+ radius_size="md"
85
+ ).set(
86
+ button_primary_background_fill="*neutral_800",
87
+ button_primary_background_fill_hover="*neutral_700",
88
+ button_secondary_background_fill="*neutral_100",
89
+ button_secondary_background_fill_hover="*neutral_200",
90
+ block_background_fill="white",
91
+ block_secondary_background_fill="*neutral_50",
92
+ block_title_text_weight="600",
93
+ )
94
+
95
+ # Main application
96
+ with gr.Blocks() as demo:
97
+ # Header
98
+ gr.HTML("""
99
+ <div style="text-align: center; padding: 0.5rem 0; margin-bottom: 0.5rem;">
100
+ <h1 style="font-size: 1.75rem; font-weight: 700; margin: 0; color: var(--neutral-800);">
101
+ SAM3 <span style="font-weight: 400; color: var(--neutral-500);">Promptable Segmentation</span>
102
+ </h1>
103
+ <p style="margin: 0.25rem 0 0 0; color: var(--neutral-600); font-size: 0.875rem;">
104
+ Zero-shot instance segmentation with natural language
105
+ </p>
106
+ <a href="https://huggingface.co/spaces/akhaliq/anycoder"
107
+ style="color: var(--primary-600); text-decoration: none; font-size: 0.8rem;"
108
+ target="_blank">Built with anycoder</a>
109
+ </div>
110
+ """)
 
 
 
111
 
112
+ # Main content
113
+ with gr.Column(elem_classes=["main-content"]):
114
+ # Image section
115
+ with gr.Row(equal_height=True):
116
+ with gr.Column(scale=1, min_width=280):
117
+ image_input = gr.Image(
118
+ label="πŸ“· Upload Image",
119
+ type="pil",
120
+ height=320,
121
+ sources=["upload", "clipboard"],
122
+ )
123
+
124
+ with gr.Column(scale=1, min_width=280):
125
+ image_output = gr.AnnotatedImage(
126
+ label="🎯 Segmentation Result",
127
+ height=320,
128
+ show_legend=True,
129
+ )
130
+
131
+ # Info output
132
+ info_output = gr.Markdown(
133
+ value="πŸ’‘ **Upload an image** and enter a prompt like 'person', 'cat', or 'car'",
134
+ elem_classes=["info-box"],
135
  )
136
+
137
+ # Controls section
138
+ with gr.Group(elem_classes=["controls"]):
139
+ with gr.Row():
140
+ text_input = gr.Textbox(
141
+ label="What to find",
142
+ placeholder="e.g., person, cat, bicycle...",
143
+ scale=4,
144
+ )
145
+ segment_btn = gr.Button(
146
+ "πŸ” Segment",
147
+ variant="primary",
148
+ size="lg",
149
+ scale=1,
150
+ min_width=120,
151
+ )
152
+
153
+ with gr.Row():
154
+ thresh_slider = gr.Slider(
155
+ minimum=0.0,
156
+ maximum=1.0,
157
+ value=0.5,
158
+ step=0.01,
159
+ label="Detection",
160
+ info="Confidence threshold",
161
+ scale=1,
162
+ )
163
+ mask_thresh_slider = gr.Slider(
164
+ minimum=0.0,
165
+ maximum=1.0,
166
+ value=0.5,
167
+ step=0.01,
168
+ label="Mask",
169
+ info="Edge sharpness",
170
+ scale=1,
171
+ )
172
+ clear_btn = gr.Button(
173
+ "β†Ί Clear",
174
+ variant="secondary",
175
+ size="lg",
176
+ scale=0,
177
+ min_width=80,
178
+ )
179
+
180
+ # Examples
181
+ gr.Markdown("### Quick Examples")
182
+ gr.Examples(
183
+ examples=[
184
+ ["http://images.cocodataset.org/val2017/000000077595.jpg", "cat"],
185
+ ["https://images.unsplash.com/photo-1535930483905-2c6d14342d7a", "dog"],
186
+ ["https://images.unsplash.com/photo-1558618666-fcd25c85cd64", "car"],
187
+ ],
188
+ inputs=[image_input, text_input],
189
+ outputs=[image_output, info_output],
190
+ fn=segment_example,
191
+ cache_examples=False,
192
+ examples_per_page=3,
193
  )
194
+
195
+ # Footer info
196
+ gr.Accordion("ℹ️ About", open=False):
197
+ gr.Markdown("""
198
+ **SAM3** uses natural language prompts for zero-shot instance segmentation.
199
+
200
+ - **Model**: [facebook/sam3](https://huggingface.co/facebook/sam3)
201
+ - GPU recommended for faster processing
202
+ - Works best with specific, clear object names
203
+ """)
204
+
205
+ # Event handlers
 
 
 
 
 
 
 
 
 
 
 
206
  segment_btn.click(
207
  fn=segment,
208
  inputs=[image_input, text_input, thresh_slider, mask_thresh_slider],
209
+ outputs=[image_output, info_output],
210
+ api_visibility="public",
211
  )
212
 
213
+ clear_btn.click(
214
+ fn=clear_all,
215
+ outputs=[image_input, text_input, image_output, thresh_slider, mask_thresh_slider, info_output],
216
+ api_visibility="private",
 
 
 
217
  )
218
 
219
+ # Custom CSS for responsive design
220
+ custom_css = """
221
+ @media (max-width: 768px) {
222
+ .main-content {
223
+ gap: 0.75rem !important;
224
+ }
225
+ .controls {
226
+ gap: 0.75rem !important;
227
+ }
228
+ .info-box {
229
+ font-size: 0.875rem !important;
230
+ padding: 0.75rem !important;
231
+ }
232
+ }
233
+
234
+ @media (max-width: 480px) {
235
+ .gradio-group {
236
+ gap: 0.5rem !important;
237
+ }
238
+ }
239
+
240
+ .info-box {
241
+ background: var(--neutral-50);
242
+ border-radius: var(--radius-lg);
243
+ padding: 1rem;
244
+ border: 1px solid var(--neutral-200);
245
+ }
246
+
247
+ .controls {
248
+ background: var(--neutral-50);
249
+ border-radius: var(--radius-lg);
250
+ padding: 1.25rem;
251
+ border: 1px solid var(--neutral-200);
252
+ }
253
+
254
+ .gradio-annotatedimage {
255
+ border: 2px dashed var(--neutral-300);
256
+ border-radius: var(--radius-lg);
257
+ }
258
+
259
+ .gradio-group {
260
+ gap: 1rem !important;
261
+ }
262
+ """
263
+
264
  if __name__ == "__main__":
265
+ demo.launch(
266
+ server_name="0.0.0.0",
267
+ server_port=7860,
268
+ share=False,
269
+ debug=True,
270
+ theme=custom_theme,
271
+ css=custom_css,
272
+ footer_links=[
273
+ {"label": "anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
274
+ {"label": "Model", "url": "https://huggingface.co/facebook/sam3"},
275
+ ],
276
+ )