🎨 Redesign from AnyCoder

#3
Files changed (1) hide show
  1. app.py +213 -91
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import spaces
2
  import gradio as gr
3
  import torch
@@ -20,10 +21,10 @@ def segment(image: Image.Image, text: str, threshold: float, mask_threshold: flo
20
  Returns format compatible with gr.AnnotatedImage: (image, [(mask, label), ...])
21
  """
22
  if image is None:
23
- return None, "❌ Please upload an image."
24
 
25
  if not text.strip():
26
- return (image, []), "❌ Please enter a text prompt."
27
 
28
  try:
29
  inputs = processor(images=image, text=text.strip(), return_tensors="pt").to(device)
@@ -44,29 +45,26 @@ def segment(image: Image.Image, text: str, threshold: float, mask_threshold: flo
44
 
45
  n_masks = len(results['masks'])
46
  if n_masks == 0:
47
- return (image, []), f"❌ No objects found matching '{text}' (try adjusting thresholds)."
48
 
49
  # Format for AnnotatedImage: list of (mask, label) tuples
50
- # mask should be numpy array with values 0-1 (float) matching image dimensions
51
  annotations = []
52
  for i, (mask, score) in enumerate(zip(results['masks'], results['scores'])):
53
- # Convert binary mask to float numpy array (0-1 range)
54
  mask_np = mask.cpu().numpy().astype(np.float32)
55
- label = f"{text} #{i+1} ({score:.2f})"
56
  annotations.append((mask_np, label))
57
 
58
- scores_text = ", ".join([f"{s:.2f}" for s in results['scores'].cpu().numpy()[:5]])
59
- info = f"βœ… Found **{n_masks}** objects matching **'{text}'**\nConfidence scores: {scores_text}{'...' if n_masks > 5 else ''}"
60
 
61
- # Return tuple: (base_image, list_of_annotations)
62
  return (image, annotations), info
63
 
64
  except Exception as e:
65
- return (image, []), f"❌ Error during segmentation: {str(e)}"
66
 
67
  def clear_all():
68
  """Clear all inputs and outputs"""
69
- return None, "", None, 0.5, 0.5, "πŸ“ Enter a prompt and click **Segment** to start."
70
 
71
  def segment_example(image_path: str, prompt: str):
72
  """Handle example clicks"""
@@ -76,80 +74,176 @@ def segment_example(image_path: str, prompt: str):
76
  image = Image.open(image_path).convert("RGB")
77
  return segment(image, prompt, 0.5, 0.5)
78
 
79
- # Gradio Interface
80
- with gr.Blocks(
81
- theme=gr.themes.Soft(),
82
- title="SAM3 - Promptable Concept Segmentation",
83
- css=".gradio-container {max-width: 1400px !important;}"
84
- ) as demo:
85
- gr.Markdown(
86
- """
87
- # SAM3 - Promptable Concept Segmentation (PCS)
88
-
89
- **SAM3** performs zero-shot instance segmentation using natural language prompts.
90
- Upload an image, enter a text prompt (e.g., "person", "car", "dog"), and get segmentation masks.
91
-
92
- Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)
93
- """
94
- )
95
-
96
- gr.Markdown("### Inputs")
97
- with gr.Row(variant="panel"):
98
- image_input = gr.Image(
99
- label="Input Image",
100
- type="pil",
101
- height=400,
102
- )
103
- # AnnotatedImage expects: (base_image, [(mask, label), ...])
104
- image_output = gr.AnnotatedImage(
105
- label="Output (Segmented Image)",
106
- height=400,
107
- show_legend=True,
108
- )
109
-
110
- with gr.Row():
111
- text_input = gr.Textbox(
112
- label="Text Prompt",
113
- placeholder="e.g., person, ear, cat, bicycle...",
114
- scale=3
115
- )
116
- clear_btn = gr.Button("πŸ” Clear", size="sm", variant="secondary")
117
-
118
- with gr.Row():
119
- thresh_slider = gr.Slider(
120
- minimum=0.0,
121
- maximum=1.0,
122
- value=0.5,
123
- step=0.01,
124
- label="Detection Threshold",
125
- info="Higher = fewer detections"
126
- )
127
- mask_thresh_slider = gr.Slider(
128
- minimum=0.0,
129
- maximum=1.0,
130
- value=0.5,
131
- step=0.01,
132
- label="Mask Threshold",
133
- info="Higher = sharper masks"
134
- )
135
-
136
- info_output = gr.Markdown(
137
- value="πŸ“ Enter a prompt and click **Segment** to start.",
138
- label="Info / Results"
139
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
 
141
- segment_btn = gr.Button("🎯 Segment", variant="primary", size="lg")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
- gr.Examples(
144
- examples=[
145
- ["http://images.cocodataset.org/val2017/000000077595.jpg", "cat"],
146
- ],
147
- inputs=[image_input, text_input],
148
- outputs=[image_output, info_output],
149
- fn=segment_example,
150
- cache_examples=False,
151
- )
152
 
 
153
  clear_btn.click(
154
  fn=clear_all,
155
  outputs=[image_input, text_input, image_output, thresh_slider, mask_thresh_slider, info_output]
@@ -161,14 +255,42 @@ with gr.Blocks(
161
  outputs=[image_output, info_output]
162
  )
163
 
164
- gr.Markdown(
165
- """
166
- ### Notes
167
- - **Model**: [facebook/sam3](https://huggingface.co/facebook/sam3)
168
- - Click on segments in the output to see labels
169
- - GPU recommended for faster inference
170
- """
171
  )
172
 
173
- if __name__ == "__main__":
174
- demo.launch(server_name="0.0.0.0", server_port=7860, share=False, debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
  import spaces
3
  import gradio as gr
4
  import torch
 
21
  Returns format compatible with gr.AnnotatedImage: (image, [(mask, label), ...])
22
  """
23
  if image is None:
24
+ return None, "πŸ“· Please upload an image to begin."
25
 
26
  if not text.strip():
27
+ return (image, []), "✏️ Enter a text prompt to find objects."
28
 
29
  try:
30
  inputs = processor(images=image, text=text.strip(), return_tensors="pt").to(device)
 
45
 
46
  n_masks = len(results['masks'])
47
  if n_masks == 0:
48
+ return (image, []), f"πŸ” No objects found for \"{text}\". Try adjusting the thresholds."
49
 
50
  # Format for AnnotatedImage: list of (mask, label) tuples
 
51
  annotations = []
52
  for i, (mask, score) in enumerate(zip(results['masks'], results['scores'])):
 
53
  mask_np = mask.cpu().numpy().astype(np.float32)
54
+ label = f"{text} #{i+1}"
55
  annotations.append((mask_np, label))
56
 
57
+ scores_text = ", ".join([f"{s:.2f}" for s in results['scores'].cpu().numpy()[:3]])
58
+ info = f"**{n_masks}** object(s) found for \"{text}\" | Scores: {scores_text}"
59
 
 
60
  return (image, annotations), info
61
 
62
  except Exception as e:
63
+ return (image, []), f"⚠️ Segmentation error: {str(e)}"
64
 
65
  def clear_all():
66
  """Clear all inputs and outputs"""
67
+ return None, "", None, 0.5, 0.5, "✏️ Enter a prompt and click **Segment** to start."
68
 
69
  def segment_example(image_path: str, prompt: str):
70
  """Handle example clicks"""
 
74
  image = Image.open(image_path).convert("RGB")
75
  return segment(image, prompt, 0.5, 0.5)
76
 
77
+ # Custom CSS for mobile-first responsive design
78
+ custom_css = """
79
+ /* Mobile-first responsive styles */
80
+ @media (max-width: 768px) {
81
+ .main-header { text-align: center; padding: 1rem !important; }
82
+ .control-panel { padding: 0.75rem !important; }
83
+ .slider-group { flex-direction: column; gap: 0.5rem !important; }
84
+ .example-grid { grid-template-columns: repeat(2, 1fr) !important; }
85
+ }
86
+
87
+ @media (min-width: 769px) {
88
+ .app-layout {
89
+ display: grid !important;
90
+ grid-template-columns: 1fr 380px !important;
91
+ gap: 1.5rem !important;
92
+ }
93
+ .control-panel {
94
+ position: sticky !important;
95
+ top: 1rem !important;
96
+ height: fit-content !important;
97
+ }
98
+ }
99
+
100
+ /* Smooth transitions */
101
+ .gradio-container { transition: all 0.3s ease !important; }
102
+
103
+ /* Modern slider styling */
104
+ .slider-label { font-weight: 500 !important; color: var(--body-text-color) !important; }
105
+
106
+ /* Card-like panels */
107
+ .control-panel {
108
+ background: var(--background-fill-secondary);
109
+ border-radius: var(--radius-lg);
110
+ padding: 1.25rem;
111
+ border: 1px solid var(--border-color-primary);
112
+ }
113
+
114
+ /* Button improvements */
115
+ .primary-btn {
116
+ font-weight: 600 !important;
117
+ letter-spacing: 0.02em !important;
118
+ }
119
+
120
+ /* Image container */
121
+ .image-container {
122
+ border-radius: var(--radius-lg);
123
+ overflow: hidden;
124
+ border: 1px solid var(--border-color-primary);
125
+ }
126
+
127
+ /* Info panel */
128
+ .info-panel {
129
+ background: var(--background-fill-primary);
130
+ border-radius: var(--radius-md);
131
+ padding: 1rem;
132
+ border-left: 3px solid var(--color-accent);
133
+ }
134
+
135
+ /* Example items */
136
+ .example-item {
137
+ cursor: pointer !important;
138
+ transition: transform 0.2s ease, box-shadow 0.2s ease !important;
139
+ }
140
+ .example-item:hover {
141
+ transform: translateY(-2px);
142
+ box-shadow: var(--shadow-drop-lg);
143
+ }
144
+ """
145
+
146
+ # Gradio 6 - NO parameters in Blocks constructor!
147
+ with gr.Blocks() as demo:
148
+ # Header
149
+ gr.HTML("""
150
+ <div class="main-header" style="text-align: center; padding: 1.5rem; background: var(--background-fill-primary); border-bottom: 1px solid var(--border-color-primary); margin-bottom: 1rem;">
151
+ <h1 style="margin: 0; font-size: 1.75rem; font-weight: 700;">SAM3</h1>
152
+ <p style="margin: 0.5rem 0 0; opacity: 0.8; font-size: 0.95rem;">Promptable Concept Segmentation</p>
153
+ <div style="margin-top: 0.75rem;">
154
+ <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: var(--color-accent); text-decoration: none; font-size: 0.85rem;">Built with anycoder β†—</a>
155
+ </div>
156
+ </div>
157
+ """)
158
 
159
+ with gr.Row(elem_classes=["app-layout"]):
160
+ # Left: Image section
161
+ with gr.Column(scale=2):
162
+ with gr.Group(elem_classes=["image-container"]):
163
+ gr.Markdown("**πŸ“· Image**", elem_classes="slider-label")
164
+ image_input = gr.Image(
165
+ type="pil",
166
+ sources=["upload", "clipboard"],
167
+ height=400,
168
+ elem_id="input-image"
169
+ )
170
+
171
+ with gr.Group(elem_classes=["image-container", "mt-4"]):
172
+ gr.Markdown("**🎯 Segmentation Result**", elem_classes="slider-label")
173
+ image_output = gr.AnnotatedImage(
174
+ height=400,
175
+ show_legend=True,
176
+ elem_id="output-image"
177
+ )
178
+
179
+ # Right: Control panel
180
+ with gr.Column(scale=1, elem_classes=["control-panel"]):
181
+ gr.Markdown("### βš™οΈ Settings", elem_classes="slider-label")
182
+
183
+ # Text prompt
184
+ text_input = gr.Textbox(
185
+ label="Text Prompt",
186
+ placeholder="e.g., person, cat, car, cup...",
187
+ lines=2,
188
+ autoscroll=False
189
+ )
190
+
191
+ # Sliders in a row
192
+ with gr.Row(elem_classes=["slider-group"]):
193
+ thresh_slider = gr.Slider(
194
+ minimum=0.0,
195
+ maximum=1.0,
196
+ value=0.5,
197
+ step=0.01,
198
+ label="Detection",
199
+ info="Higher = fewer"
200
+ )
201
+ mask_thresh_slider = gr.Slider(
202
+ minimum=0.0,
203
+ maximum=1.0,
204
+ value=0.5,
205
+ step=0.01,
206
+ label="Mask",
207
+ info="Higher = sharper"
208
+ )
209
+
210
+ # Buttons
211
+ with gr.Row():
212
+ segment_btn = gr.Button("🎯 Segment", variant="primary", size="lg", scale=2, elem_classes="primary-btn")
213
+ clear_btn = gr.Button("Clear", variant="secondary", size="lg", scale=1)
214
+
215
+ # Info output
216
+ info_output = gr.Markdown(
217
+ value="✏️ Enter a prompt and click **Segment** to start.",
218
+ elem_classes="info-panel"
219
+ )
220
+
221
+ # Examples
222
+ gr.Markdown("### πŸ’‘ Examples", elem_classes="slider-label")
223
+ gr.Examples(
224
+ examples=[
225
+ ["http://images.cocodataset.org/val2017/000000077595.jpg", "cat"],
226
+ ["http://images.cocodataset.org/val2017/000000039769.jpg", "remote"],
227
+ ["http://images.cocodataset.org/val2017/000000000285.jpg", "person"],
228
+ ["http://images.cocodataset.org/val2017/000000003899.jpg", "dog"],
229
+ ],
230
+ inputs=[image_input, text_input],
231
+ outputs=[image_output, info_output],
232
+ fn=segment_example,
233
+ cache_examples=False,
234
+ examples_per_page=4,
235
+ elem_classes="example-grid"
236
+ )
237
 
238
+ # Model info at bottom
239
+ gr.HTML("""
240
+ <div style="text-align: center; padding: 1rem; opacity: 0.7; font-size: 0.85rem; border-top: 1px solid var(--border-color-primary); margin-top: 1rem;">
241
+ Model: <a href="https://huggingface.co/facebook/sam3" target="_blank" style="color: var(--color-accent);">facebook/sam3</a>
242
+ β€’ Zero-shot segmentation with natural language prompts
243
+ </div>
244
+ """)
 
 
245
 
246
+ # Event handlers
247
  clear_btn.click(
248
  fn=clear_all,
249
  outputs=[image_input, text_input, image_output, thresh_slider, mask_thresh_slider, info_output]
 
255
  outputs=[image_output, info_output]
256
  )
257
 
258
+ # Also trigger on Enter key
259
+ text_input.submit(
260
+ fn=segment,
261
+ inputs=[image_input, text_input, thresh_slider, mask_thresh_slider],
262
+ outputs=[image_output, info_output]
 
 
263
  )
264
 
265
+ # Gradio 6 - ALL app parameters go in launch()!
266
+ demo.launch(
267
+ theme=gr.themes.Soft(
268
+ primary_hue="indigo",
269
+ secondary_hue="slate",
270
+ neutral_hue="gray",
271
+ font=gr.themes.GoogleFont("Inter"),
272
+ text_size="md",
273
+ spacing_size="md",
274
+ radius_size="lg"
275
+ ).set(
276
+ button_primary_background_fill="*primary_600",
277
+ button_primary_background_fill_hover="*primary_700",
278
+ button_secondary_background_fill="*neutral_100",
279
+ button_secondary_background_fill_hover="*neutral_200",
280
+ block_background_fill="*neutral_50",
281
+ block_label_background_fill="*neutral_100",
282
+ ),
283
+ css=custom_css,
284
+ css_paths=None,
285
+ js=None,
286
+ head=None,
287
+ title="SAM3 - Promptable Concept Segmentation",
288
+ server_name="0.0.0.0",
289
+ server_port=7860,
290
+ share=False,
291
+ debug=True,
292
+ footer_links=[
293
+ {"label": "anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
294
+ {"label": "Model", "url": "https://huggingface.co/facebook/sam3"}
295
+ ]
296
+ )