Boobs00 commited on
Commit
35c189d
·
verified ·
1 Parent(s): da0fbc3

Add 3 files

Browse files
Files changed (3) hide show
  1. README.md +7 -5
  2. index.html +580 -19
  3. prompts.txt +1 -0
README.md CHANGED
@@ -1,10 +1,12 @@
1
  ---
2
- title: Ai Powered Image Understanding
3
- emoji: 👁
4
- colorFrom: blue
5
- colorTo: green
6
  sdk: static
7
  pinned: false
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: ai-powered-image-understanding
3
+ emoji: 🐳
4
+ colorFrom: purple
5
+ colorTo: pink
6
  sdk: static
7
  pinned: false
8
+ tags:
9
+ - deepsite
10
  ---
11
 
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
index.html CHANGED
@@ -1,19 +1,580 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
19
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Describe Anything Model</title>
7
+ <script src="https://cdn.tailwindcss.com"></script>
8
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
9
+ <style>
10
+ .gradient-bg {
11
+ background: linear-gradient(135deg, #6b73ff 0%, #000dff 100%);
12
+ }
13
+ .card-hover:hover {
14
+ transform: translateY(-5px);
15
+ box-shadow: 0 20px 25px -5px rgba(0, 0, 0, 0.1), 0 10px 10px -5px rgba(0, 0, 0, 0.04);
16
+ }
17
+ .fade-in {
18
+ animation: fadeIn 0.5s ease-in-out;
19
+ }
20
+ @keyframes fadeIn {
21
+ from { opacity: 0; transform: translateY(10px); }
22
+ to { opacity: 1; transform: translateY(0); }
23
+ }
24
+ .textarea-auto {
25
+ min-height: 120px;
26
+ resize: none;
27
+ }
28
+ .loading-dots:after {
29
+ content: '.';
30
+ animation: dots 1.5s steps(5, end) infinite;
31
+ }
32
+ @keyframes dots {
33
+ 0%, 20% { content: '.'; }
34
+ 40% { content: '..'; }
35
+ 60% { content: '...'; }
36
+ 80%, 100% { content: ''; }
37
+ }
38
+ </style>
39
+ </head>
40
+ <body class="bg-gray-50 min-h-screen">
41
+ <!-- Header -->
42
+ <header class="gradient-bg text-white shadow-lg">
43
+ <div class="container mx-auto px-4 py-6">
44
+ <div class="flex justify-between items-center">
45
+ <div class="flex items-center space-x-3">
46
+ <i class="fas fa-robot text-3xl"></i>
47
+ <h1 class="text-2xl font-bold">Describe Anything Model</h1>
48
+ </div>
49
+ <div class="hidden md:flex space-x-4">
50
+ <a href="#embedding" class="hover:text-blue-200 transition">Embedding</a>
51
+ <a href="#description" class="hover:text-blue-200 transition">Description</a>
52
+ <a href="#about" class="hover:text-blue-200 transition">About</a>
53
+ </div>
54
+ <button class="md:hidden text-xl" id="menu-toggle">
55
+ <i class="fas fa-bars"></i>
56
+ </button>
57
+ </div>
58
+ </div>
59
+ </header>
60
+
61
+ <!-- Mobile Menu -->
62
+ <div class="hidden bg-white shadow-md" id="mobile-menu">
63
+ <div class="container mx-auto px-4 py-2 flex flex-col space-y-2">
64
+ <a href="#embedding" class="py-2 hover:text-blue-600 transition">Embedding</a>
65
+ <a href="#description" class="py-2 hover:text-blue-600 transition">Description</a>
66
+ <a href="#about" class="py-2 hover:text-blue-600 transition">About</a>
67
+ </div>
68
+ </div>
69
+
70
+ <!-- Hero Section -->
71
+ <section class="gradient-bg text-white py-12">
72
+ <div class="container mx-auto px-4 text-center">
73
+ <h2 class="text-4xl font-bold mb-4">AI-Powered Image Understanding</h2>
74
+ <p class="text-xl mb-8 max-w-2xl mx-auto">Generate detailed descriptions of any object in your images with our advanced computer vision model.</p>
75
+ <div class="flex justify-center space-x-4">
76
+ <a href="#embedding" class="bg-white text-blue-600 px-6 py-3 rounded-lg font-medium hover:bg-blue-50 transition">Try Embedding</a>
77
+ <a href="#description" class="border-2 border-white px-6 py-3 rounded-lg font-medium hover:bg-white hover:bg-opacity-10 transition">Try Description</a>
78
+ </div>
79
+ </div>
80
+ </section>
81
+
82
+ <!-- Main Content -->
83
+ <main class="container mx-auto px-4 py-12">
84
+ <!-- Embedding Section -->
85
+ <section id="embedding" class="mb-16 fade-in">
86
+ <div class="flex items-center mb-8">
87
+ <div class="h-1 bg-blue-500 w-12 mr-4"></div>
88
+ <h2 class="text-3xl font-bold">Image Embedding Generator</h2>
89
+ </div>
90
+
91
+ <div class="grid grid-cols-1 lg:grid-cols-2 gap-8">
92
+ <div class="bg-white rounded-xl shadow-md p-6 card-hover transition">
93
+ <h3 class="text-xl font-semibold mb-4">Upload Your Image</h3>
94
+ <div class="border-2 border-dashed border-gray-300 rounded-lg p-8 text-center mb-4" id="drop-area">
95
+ <i class="fas fa-cloud-upload-alt text-4xl text-blue-500 mb-3"></i>
96
+ <p class="mb-2">Drag & drop your image here</p>
97
+ <p class="text-sm text-gray-500 mb-4">or</p>
98
+ <input type="file" id="embedding-file" accept="image/*" class="hidden">
99
+ <label for="embedding-file" class="bg-blue-500 text-white px-4 py-2 rounded-lg cursor-pointer hover:bg-blue-600 transition">Select Image</label>
100
+ </div>
101
+ <div class="flex justify-between items-center">
102
+ <span class="text-sm text-gray-500" id="embedding-file-name">No file selected</span>
103
+ <button id="clear-embedding" class="text-red-500 text-sm hover:text-red-700 hidden">Clear</button>
104
+ </div>
105
+ </div>
106
+
107
+ <div class="bg-white rounded-xl shadow-md p-6 card-hover transition">
108
+ <h3 class="text-xl font-semibold mb-4">Generated Embedding</h3>
109
+ <div class="relative">
110
+ <textarea id="embedding-result" class="w-full bg-gray-100 border border-gray-300 rounded-lg p-4 textarea-auto" placeholder="Your image embedding will appear here..." readonly></textarea>
111
+ <button id="copy-embedding" class="absolute top-2 right-2 bg-gray-200 hover:bg-gray-300 p-2 rounded-lg transition" title="Copy to clipboard">
112
+ <i class="fas fa-copy"></i>
113
+ </button>
114
+ </div>
115
+ <button id="generate-embedding" class="mt-4 w-full bg-blue-500 text-white py-3 rounded-lg font-medium hover:bg-blue-600 transition flex items-center justify-center">
116
+ <span id="embedding-button-text">Generate Embedding</span>
117
+ <span id="embedding-loading" class="hidden ml-2 loading-dots"></span>
118
+ </button>
119
+ </div>
120
+ </div>
121
+ </section>
122
+
123
+ <!-- Description Section -->
124
+ <section id="description" class="mb-16 fade-in">
125
+ <div class="flex items-center mb-8">
126
+ <div class="h-1 bg-blue-500 w-12 mr-4"></div>
127
+ <h2 class="text-3xl font-bold">Mask Description Generator</h2>
128
+ </div>
129
+
130
+ <div class="grid grid-cols-1 lg:grid-cols-3 gap-8">
131
+ <div class="bg-white rounded-xl shadow-md p-6 card-hover transition">
132
+ <h3 class="text-xl font-semibold mb-4">Upload Your Image</h3>
133
+ <div class="border-2 border-dashed border-gray-300 rounded-lg p-8 text-center mb-4" id="image-drop-area">
134
+ <i class="fas fa-image text-4xl text-blue-500 mb-3"></i>
135
+ <p class="mb-2">Drag & drop your image here</p>
136
+ <p class="text-sm text-gray-500 mb-4">or</p>
137
+ <input type="file" id="image-file" accept="image/*" class="hidden">
138
+ <label for="image-file" class="bg-blue-500 text-white px-4 py-2 rounded-lg cursor-pointer hover:bg-blue-600 transition">Select Image</label>
139
+ </div>
140
+ <div class="flex justify-between items-center">
141
+ <span class="text-sm text-gray-500" id="image-file-name">No file selected</span>
142
+ <button id="clear-image" class="text-red-500 text-sm hover:text-red-700 hidden">Clear</button>
143
+ </div>
144
+ </div>
145
+
146
+ <div class="bg-white rounded-xl shadow-md p-6 card-hover transition">
147
+ <h3 class="text-xl font-semibold mb-4">Upload Your Mask</h3>
148
+ <div class="border-2 border-dashed border-gray-300 rounded-lg p-8 text-center mb-4" id="mask-drop-area">
149
+ <i class="fas fa-mask text-4xl text-blue-500 mb-3"></i>
150
+ <p class="mb-2">Drag & drop your mask here</p>
151
+ <p class="text-sm text-gray-500 mb-4">or</p>
152
+ <input type="file" id="mask-file" accept="image/*" class="hidden">
153
+ <label for="mask-file" class="bg-blue-500 text-white px-4 py-2 rounded-lg cursor-pointer hover:bg-blue-600 transition">Select Mask</label>
154
+ </div>
155
+ <div class="flex justify-between items-center">
156
+ <span class="text-sm text-gray-500" id="mask-file-name">No file selected</span>
157
+ <button id="clear-mask" class="text-red-500 text-sm hover:text-red-700 hidden">Clear</button>
158
+ </div>
159
+ </div>
160
+
161
+ <div class="bg-white rounded-xl shadow-md p-6 card-hover transition">
162
+ <h3 class="text-xl font-semibold mb-4">Generate Description</h3>
163
+ <div class="mb-4">
164
+ <label for="description-prompt" class="block text-sm font-medium text-gray-700 mb-1">Prompt (Optional)</label>
165
+ <input type="text" id="description-prompt" class="w-full border border-gray-300 rounded-lg p-3" placeholder="What would you like to know about this object?">
166
+ </div>
167
+ <div class="relative mb-4">
168
+ <label for="description-result" class="block text-sm font-medium text-gray-700 mb-1">Description</label>
169
+ <textarea id="description-result" class="w-full bg-gray-100 border border-gray-300 rounded-lg p-4 textarea-auto" placeholder="Your description will appear here..." readonly></textarea>
170
+ <button id="copy-description" class="absolute top-7 right-2 bg-gray-200 hover:bg-gray-300 p-2 rounded-lg transition" title="Copy to clipboard">
171
+ <i class="fas fa-copy"></i>
172
+ </button>
173
+ </div>
174
+ <div class="flex space-x-3">
175
+ <button id="generate-description" class="flex-1 bg-blue-500 text-white py-3 rounded-lg font-medium hover:bg-blue-600 transition flex items-center justify-center">
176
+ <span id="description-button-text">Generate</span>
177
+ <span id="description-loading" class="hidden ml-2 loading-dots"></span>
178
+ </button>
179
+ <button id="generate-streaming" class="flex-1 bg-purple-500 text-white py-3 rounded-lg font-medium hover:bg-purple-600 transition flex items-center justify-center">
180
+ <span id="streaming-button-text">Stream</span>
181
+ <span id="streaming-loading" class="hidden ml-2 loading-dots"></span>
182
+ </button>
183
+ </div>
184
+ </div>
185
+ </div>
186
+ </section>
187
+
188
+ <!-- Preview Section -->
189
+ <section id="preview" class="mb-16 fade-in">
190
+ <div class="flex items-center mb-8">
191
+ <div class="h-1 bg-blue-500 w-12 mr-4"></div>
192
+ <h2 class="text-3xl font-bold">Image & Mask Preview</h2>
193
+ </div>
194
+
195
+ <div class="grid grid-cols-1 md:grid-cols-2 gap-8">
196
+ <div class="bg-white rounded-xl shadow-md p-6">
197
+ <h3 class="text-xl font-semibold mb-4">Image Preview</h3>
198
+ <div class="border border-gray-200 rounded-lg overflow-hidden">
199
+ <img id="image-preview" src="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='400' height='300' viewBox='0 0 400 300' fill='%23f3f4f6'%3E%3Crect width='400' height='300'/%3E%3Ctext x='50%' y='50%' dominant-baseline='middle' text-anchor='middle' font-family='Arial' font-size='16' fill='%239ca3af'%3ENo image selected%3C/text%3E%3C/svg%3E" alt="Image preview" class="w-full h-auto">
200
+ </div>
201
+ </div>
202
+
203
+ <div class="bg-white rounded-xl shadow-md p-6">
204
+ <h3 class="text-xl font-semibold mb-4">Mask Preview</h3>
205
+ <div class="border border-gray-200 rounded-lg overflow-hidden">
206
+ <img id="mask-preview" src="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='400' height='300' viewBox='0 0 400 300' fill='%23f3f4f6'%3E%3Crect width='400' height='300'/%3E%3Ctext x='50%' y='50%' dominant-baseline='middle' text-anchor='middle' font-family='Arial' font-size='16' fill='%239ca3af'%3ENo mask selected%3C/text%3E%3C/svg%3E" alt="Mask preview" class="w-full h-auto">
207
+ </div>
208
+ </div>
209
+ </div>
210
+ </section>
211
+
212
+ <!-- About Section -->
213
+ <section id="about" class="fade-in">
214
+ <div class="flex items-center mb-8">
215
+ <div class="h-1 bg-blue-500 w-12 mr-4"></div>
216
+ <h2 class="text-3xl font-bold">About Describe Anything Model</h2>
217
+ </div>
218
+
219
+ <div class="bg-white rounded-xl shadow-md p-8">
220
+ <div class="prose max-w-none">
221
+ <h3 class="text-2xl font-semibold mb-4">Advanced Image Understanding</h3>
222
+ <p class="mb-4">The Describe Anything Model (DAM) is a cutting-edge AI system that combines computer vision with natural language processing to provide detailed descriptions of objects in images.</p>
223
+
224
+ <h4 class="text-xl font-semibold mt-6 mb-3">Key Features</h4>
225
+ <ul class="list-disc pl-6 mb-6 space-y-2">
226
+ <li>Generate rich semantic embeddings for any image</li>
227
+ <li>Describe specific objects using segmentation masks</li>
228
+ <li>Answer questions about objects in images</li>
229
+ <li>Streaming output for real-time results</li>
230
+ </ul>
231
+
232
+ <h4 class="text-xl font-semibold mt-6 mb-3">How It Works</h4>
233
+ <ol class="list-decimal pl-6 mb-6 space-y-2">
234
+ <li>Upload an image to generate its embedding</li>
235
+ <li>Optionally upload a mask to focus on specific objects</li>
236
+ <li>Provide a prompt to guide the description</li>
237
+ <li>Get detailed, AI-generated descriptions</li>
238
+ </ol>
239
+
240
+ <div class="bg-blue-50 border-l-4 border-blue-500 p-4 mt-6">
241
+ <p class="text-blue-700"><strong>Note:</strong> This is a demonstration interface. For production use, consider implementing proper API endpoints and error handling.</p>
242
+ </div>
243
+ </div>
244
+ </div>
245
+ </section>
246
+ </main>
247
+
248
+ <!-- Footer -->
249
+ <footer class="bg-gray-800 text-white py-8">
250
+ <div class="container mx-auto px-4">
251
+ <div class="flex flex-col md:flex-row justify-between items-center">
252
+ <div class="mb-4 md:mb-0">
253
+ <h3 class="text-xl font-bold mb-2">Describe Anything Model</h3>
254
+ <p class="text-gray-400">Advanced AI for image understanding</p>
255
+ </div>
256
+ <div class="flex space-x-6">
257
+ <a href="#" class="text-gray-400 hover:text-white transition"><i class="fab fa-github text-xl"></i></a>
258
+ <a href="#" class="text-gray-400 hover:text-white transition"><i class="fab fa-twitter text-xl"></i></a>
259
+ <a href="#" class="text-gray-400 hover:text-white transition"><i class="fab fa-linkedin text-xl"></i></a>
260
+ </div>
261
+ </div>
262
+ <div class="border-t border-gray-700 mt-8 pt-8 text-center text-gray-400">
263
+ <p>&copy; 2023 Describe Anything Model. All rights reserved.</p>
264
+ </div>
265
+ </div>
266
+ </footer>
267
+
268
+ <script>
269
+ // Mobile menu toggle
270
+ document.getElementById('menu-toggle').addEventListener('click', function() {
271
+ const menu = document.getElementById('mobile-menu');
272
+ menu.classList.toggle('hidden');
273
+ });
274
+
275
+ // File handling for embedding
276
+ const embeddingFileInput = document.getElementById('embedding-file');
277
+ const embeddingDropArea = document.getElementById('drop-area');
278
+ const embeddingFileName = document.getElementById('embedding-file-name');
279
+ const clearEmbeddingBtn = document.getElementById('clear-embedding');
280
+ const generateEmbeddingBtn = document.getElementById('generate-embedding');
281
+ const embeddingResult = document.getElementById('embedding-result');
282
+ const copyEmbeddingBtn = document.getElementById('copy-embedding');
283
+ const embeddingButtonText = document.getElementById('embedding-button-text');
284
+ const embeddingLoading = document.getElementById('embedding-loading');
285
+
286
+ // File handling for description
287
+ const imageFileInput = document.getElementById('image-file');
288
+ const imageDropArea = document.getElementById('image-drop-area');
289
+ const imageFileName = document.getElementById('image-file-name');
290
+ const clearImageBtn = document.getElementById('clear-image');
291
+ const maskFileInput = document.getElementById('mask-file');
292
+ const maskDropArea = document.getElementById('mask-drop-area');
293
+ const maskFileName = document.getElementById('mask-file-name');
294
+ const clearMaskBtn = document.getElementById('clear-mask');
295
+ const generateDescriptionBtn = document.getElementById('generate-description');
296
+ const generateStreamingBtn = document.getElementById('generate-streaming');
297
+ const descriptionResult = document.getElementById('description-result');
298
+ const copyDescriptionBtn = document.getElementById('copy-description');
299
+ const descriptionPrompt = document.getElementById('description-prompt');
300
+ const descriptionButtonText = document.getElementById('description-button-text');
301
+ const descriptionLoading = document.getElementById('description-loading');
302
+ const streamingButtonText = document.getElementById('streaming-button-text');
303
+ const streamingLoading = document.getElementById('streaming-loading');
304
+ const imagePreview = document.getElementById('image-preview');
305
+ const maskPreview = document.getElementById('mask-preview');
306
+
307
+ // Prevent default drag behaviors
308
+ [embeddingDropArea, imageDropArea, maskDropArea].forEach(area => {
309
+ ['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
310
+ area.addEventListener(eventName, preventDefaults, false);
311
+ });
312
+ });
313
+
314
+ function preventDefaults(e) {
315
+ e.preventDefault();
316
+ e.stopPropagation();
317
+ }
318
+
319
+ // Highlight drop area when item is dragged over it
320
+ [embeddingDropArea, imageDropArea, maskDropArea].forEach(area => {
321
+ ['dragenter', 'dragover'].forEach(eventName => {
322
+ area.addEventListener(eventName, highlight, false);
323
+ });
324
+
325
+ ['dragleave', 'drop'].forEach(eventName => {
326
+ area.addEventListener(eventName, unhighlight, false);
327
+ });
328
+ });
329
+
330
+ function highlight() {
331
+ this.classList.add('border-blue-500', 'bg-blue-50');
332
+ }
333
+
334
+ function unhighlight() {
335
+ this.classList.remove('border-blue-500', 'bg-blue-50');
336
+ }
337
+
338
+ // Handle dropped files
339
+ embeddingDropArea.addEventListener('drop', handleEmbeddingDrop, false);
340
+ imageDropArea.addEventListener('drop', handleImageDrop, false);
341
+ maskDropArea.addEventListener('drop', handleMaskDrop, false);
342
+
343
+ function handleEmbeddingDrop(e) {
344
+ const dt = e.dataTransfer;
345
+ const files = dt.files;
346
+ if (files.length) {
347
+ handleEmbeddingFile(files[0]);
348
+ }
349
+ }
350
+
351
+ function handleImageDrop(e) {
352
+ const dt = e.dataTransfer;
353
+ const files = dt.files;
354
+ if (files.length) {
355
+ handleImageFile(files[0]);
356
+ }
357
+ }
358
+
359
+ function handleMaskDrop(e) {
360
+ const dt = e.dataTransfer;
361
+ const files = dt.files;
362
+ if (files.length) {
363
+ handleMaskFile(files[0]);
364
+ }
365
+ }
366
+
367
+ // Handle selected files
368
+ embeddingFileInput.addEventListener('change', function() {
369
+ if (this.files.length) {
370
+ handleEmbeddingFile(this.files[0]);
371
+ }
372
+ });
373
+
374
+ imageFileInput.addEventListener('change', function() {
375
+ if (this.files.length) {
376
+ handleImageFile(this.files[0]);
377
+ }
378
+ });
379
+
380
+ maskFileInput.addEventListener('change', function() {
381
+ if (this.files.length) {
382
+ handleMaskFile(this.files[0]);
383
+ }
384
+ });
385
+
386
+ function handleEmbeddingFile(file) {
387
+ if (!file.type.match('image.*')) {
388
+ alert('Please select an image file.');
389
+ return;
390
+ }
391
+
392
+ embeddingFileName.textContent = file.name;
393
+ clearEmbeddingBtn.classList.remove('hidden');
394
+
395
+ // Preview would be similar to image preview, but we're not showing it here
396
+ }
397
+
398
+ function handleImageFile(file) {
399
+ if (!file.type.match('image.*')) {
400
+ alert('Please select an image file.');
401
+ return;
402
+ }
403
+
404
+ imageFileName.textContent = file.name;
405
+ clearImageBtn.classList.remove('hidden');
406
+
407
+ // Preview the image
408
+ const reader = new FileReader();
409
+ reader.onload = function(e) {
410
+ imagePreview.src = e.target.result;
411
+ };
412
+ reader.readAsDataURL(file);
413
+ }
414
+
415
+ function handleMaskFile(file) {
416
+ if (!file.type.match('image.*')) {
417
+ alert('Please select an image file.');
418
+ return;
419
+ }
420
+
421
+ maskFileName.textContent = file.name;
422
+ clearMaskBtn.classList.remove('hidden');
423
+
424
+ // Preview the mask
425
+ const reader = new FileReader();
426
+ reader.onload = function(e) {
427
+ maskPreview.src = e.target.result;
428
+ };
429
+ reader.readAsDataURL(file);
430
+ }
431
+
432
+ // Clear buttons
433
+ clearEmbeddingBtn.addEventListener('click', function() {
434
+ embeddingFileInput.value = '';
435
+ embeddingFileName.textContent = 'No file selected';
436
+ this.classList.add('hidden');
437
+ embeddingResult.value = '';
438
+ });
439
+
440
+ clearImageBtn.addEventListener('click', function() {
441
+ imageFileInput.value = '';
442
+ imageFileName.textContent = 'No file selected';
443
+ this.classList.add('hidden');
444
+ imagePreview.src = "data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='400' height='300' viewBox='0 0 400 300' fill='%23f3f4f6'%3E%3Crect width='400' height='300'/%3E%3Ctext x='50%' y='50%' dominant-baseline='middle' text-anchor='middle' font-family='Arial' font-size='16' fill='%239ca3af'%3ENo image selected%3C/text%3E%3C/svg%3E";
445
+ });
446
+
447
+ clearMaskBtn.addEventListener('click', function() {
448
+ maskFileInput.value = '';
449
+ maskFileName.textContent = 'No file selected';
450
+ this.classList.add('hidden');
451
+ maskPreview.src = "data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='400' height='300' viewBox='0 0 400 300' fill='%23f3f4f6'%3E%3Crect width='400' height='300'/%3E%3Ctext x='50%' y='50%' dominant-baseline='middle' text-anchor='middle' font-family='Arial' font-size='16' fill='%239ca3af'%3ENo mask selected%3C/text%3E%3C/svg%3E";
452
+ });
453
+
454
+ // Copy buttons
455
+ copyEmbeddingBtn.addEventListener('click', function() {
456
+ embeddingResult.select();
457
+ document.execCommand('copy');
458
+
459
+ // Show copied feedback
460
+ const originalText = this.innerHTML;
461
+ this.innerHTML = '<i class="fas fa-check"></i>';
462
+ setTimeout(() => {
463
+ this.innerHTML = originalText;
464
+ }, 2000);
465
+ });
466
+
467
+ copyDescriptionBtn.addEventListener('click', function() {
468
+ descriptionResult.select();
469
+ document.execCommand('copy');
470
+
471
+ // Show copied feedback
472
+ const originalText = this.innerHTML;
473
+ this.innerHTML = '<i class="fas fa-check"></i>';
474
+ setTimeout(() => {
475
+ this.innerHTML = originalText;
476
+ }, 2000);
477
+ });
478
+
479
+ // Generate buttons - these would call your actual API endpoints
480
+ generateEmbeddingBtn.addEventListener('click', function() {
481
+ if (!embeddingFileInput.files.length) {
482
+ alert('Please select an image file first.');
483
+ return;
484
+ }
485
+
486
+ // Simulate API call
487
+ embeddingButtonText.textContent = 'Processing';
488
+ embeddingLoading.classList.remove('hidden');
489
+
490
+ setTimeout(() => {
491
+ // This is where you would call your image_to_sam_embedding API
492
+ // For demo purposes, we're just showing a placeholder
493
+ embeddingResult.value = "This would be the base64 encoded embedding from the API response.";
494
+
495
+ embeddingButtonText.textContent = 'Generate Embedding';
496
+ embeddingLoading.classList.add('hidden');
497
+ }, 2000);
498
+ });
499
+
500
+ generateDescriptionBtn.addEventListener('click', function() {
501
+ if (!imageFileInput.files.length || !maskFileInput.files.length) {
502
+ alert('Please select both an image and a mask file first.');
503
+ return;
504
+ }
505
+
506
+ // Simulate API call
507
+ descriptionButtonText.textContent = 'Processing';
508
+ descriptionLoading.classList.remove('hidden');
509
+ descriptionResult.value = '';
510
+
511
+ setTimeout(() => {
512
+ // This is where you would call your describe_without_streaming API
513
+ // For demo purposes, we're just showing a placeholder
514
+ const prompt = descriptionPrompt.value || "Describe the object in the mask";
515
+ descriptionResult.value = `This is a detailed description of the object in the mask based on the prompt: "${prompt}".\n\nThe description would include specific details about the object's appearance, context, and any other relevant information the model can provide.`;
516
+
517
+ descriptionButtonText.textContent = 'Generate';
518
+ descriptionLoading.classList.add('hidden');
519
+ }, 3000);
520
+ });
521
+
522
+ generateStreamingBtn.addEventListener('click', function() {
523
+ if (!imageFileInput.files.length || !maskFileInput.files.length) {
524
+ alert('Please select both an image and a mask file first.');
525
+ return;
526
+ }
527
+
528
+ // Simulate streaming API call
529
+ streamingButtonText.textContent = 'Streaming';
530
+ streamingLoading.classList.remove('hidden');
531
+ descriptionResult.value = '';
532
+
533
+ const prompt = descriptionPrompt.value || "Describe the object in the mask";
534
+ const demoTexts = [
535
+ "This is the first part of the description... ",
536
+ "The object appears to be a person standing in a park. ",
537
+ "They are wearing a blue jacket and holding a bag. ",
538
+ "The background shows trees and a clear sky. ",
539
+ "The lighting suggests it's late afternoon. ",
540
+ "This completes the description of the object."
541
+ ];
542
+
543
+ let i = 0;
544
+ const interval = setInterval(() => {
545
+ if (i < demoTexts.length) {
546
+ descriptionResult.value += demoTexts[i];
547
+ i++;
548
+ } else {
549
+ clearInterval(interval);
550
+ streamingButtonText.textContent = 'Stream';
551
+ streamingLoading.classList.add('hidden');
552
+ }
553
+ }, 500);
554
+ });
555
+
556
+ // Smooth scrolling for anchor links
557
+ document.querySelectorAll('a[href^="#"]').forEach(anchor => {
558
+ anchor.addEventListener('click', function(e) {
559
+ e.preventDefault();
560
+
561
+ const targetId = this.getAttribute('href');
562
+ if (targetId === '#') return;
563
+
564
+ const targetElement = document.querySelector(targetId);
565
+ if (targetElement) {
566
+ targetElement.scrollIntoView({
567
+ behavior: 'smooth'
568
+ });
569
+
570
+ // Close mobile menu if open
571
+ const mobileMenu = document.getElementById('mobile-menu');
572
+ if (!mobileMenu.classList.contains('hidden')) {
573
+ mobileMenu.classList.add('hidden');
574
+ }
575
+ }
576
+ });
577
+ });
578
+ </script>
579
+ <p style="border-radius: 8px; text-align: center; font-size: 12px; color: #fff; margin-top: 16px;position: fixed; left: 8px; bottom: 8px; z-index: 10; background: rgba(0, 0, 0, 0.8); padding: 4px 8px;">Made with <img src="https://enzostvs-deepsite.hf.space/logo.svg" alt="DeepSite Logo" style="width: 16px; height: 16px; vertical-align: middle;display:inline-block;margin-right:3px;filter:brightness(0) invert(1);"><a href="https://enzostvs-deepsite.hf.space" style="color: #fff;text-decoration: underline;" target="_blank" >DeepSite</a> - 🧬 <a href="https://enzostvs-deepsite.hf.space?remix=Boobs00/ai-powered-image-understanding" style="color: #fff;text-decoration: underline;" target="_blank" >Remix</a></p></body>
580
+ </html>
prompts.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ import os os.environ["GRADIO_SSR_MODE"] = "false" if not os.path.exists("checkpoints"): os.makedirs("checkpoints") os.system("pip install gdown") os.system("gdown https://drive.google.com/uc?id=1eQe6blJcyI7oy78C8ozwj1IUkbkFEItf; unzip -o dam_3b_v1.zip -d checkpoints") from segment_anything import sam_model_registry, SamPredictor import gradio as gr import numpy as np import cv2 import base64 import torch from PIL import Image import io import argparse from fastapi import FastAPI from fastapi.staticfiles import StaticFiles from transformers import SamModel, SamProcessor from dam import DescribeAnythingModel, disable_torch_init try: from spaces import GPU except ImportError: print("Spaces not installed, using dummy GPU decorator") GPU = lambda fn: fn # Load SAM model device = torch.device("cuda" if torch.cuda.is_available() else "cpu") sam_model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device) sam_processor = SamProcessor.from_pretrained("facebook/sam-vit-huge") @GPU(duration=75) def image_to_sam_embedding(base64_image): try: # Decode base64 string to bytes image_bytes = base64.b64decode(base64_image) # Convert bytes to PIL Image image = Image.open(io.BytesIO(image_bytes)) # Process image with SAM processor inputs = sam_processor(image, return_tensors="pt").to(device) # Get image embedding with torch.no_grad(): image_embedding = sam_model.get_image_embeddings(inputs["pixel_values"]) # Convert to CPU and numpy image_embedding = image_embedding.cpu().numpy() # Encode the embedding as base64 embedding_bytes = image_embedding.tobytes() embedding_base64 = base64.b64encode(embedding_bytes).decode('utf-8') return embedding_base64 except Exception as e: print(f"Error processing image: {str(e)}") raise gr.Error(f"Failed to process image: {str(e)}") @GPU(duration=75) def describe(image_base64: str, mask_base64: str, query: str): # Convert base64 to PIL Image image_bytes = base64.b64decode(image_base64.split(',')[1] if ',' in image_base64 else image_base64) img = Image.open(io.BytesIO(image_bytes)) mask_bytes = base64.b64decode(mask_base64.split(',')[1] if ',' in mask_base64 else mask_base64) mask = Image.open(io.BytesIO(mask_bytes)) # Process the mask mask = Image.fromarray((np.array(mask.convert('L')) > 0).astype(np.uint8) * 255) # Get description using DAM with streaming description_generator = dam.get_description(img, mask, query, streaming=True) # Stream the tokens text = "" for token in description_generator: text += token yield text @GPU(duration=75) def describe_without_streaming(image_base64: str, mask_base64: str, query: str): # Convert base64 to PIL Image image_bytes = base64.b64decode(image_base64.split(',')[1] if ',' in image_base64 else image_base64) img = Image.open(io.BytesIO(image_bytes)) mask_bytes = base64.b64decode(mask_base64.split(',')[1] if ',' in mask_base64 else mask_base64) mask = Image.open(io.BytesIO(mask_bytes)) # Process the mask mask = Image.fromarray((np.array(mask.convert('L')) > 0).astype(np.uint8) * 255) # Get description using DAM description = dam.get_description(img, mask, query) return description if __name__ == "__main__": parser = argparse.ArgumentParser(description="Describe Anything gradio demo") parser.add_argument("--model-path", type=str, default="checkpoints/dam_3b_v1", help="Path to the model checkpoint") parser.add_argument("--prompt-mode", type=str, default="full+focal_crop", help="Prompt mode") parser.add_argument("--conv-mode", type=str, default="v1", help="Conversation mode") parser.add_argument("--temperature", type=float, default=0.2, help="Sampling temperature") parser.add_argument("--top_p", type=float, default=0.5, help="Top-p for sampling") args = parser.parse_args() # Initialize DAM model disable_torch_init() dam = DescribeAnythingModel( model_path=args.model_path, conv_mode=args.conv_mode, prompt_mode=args.prompt_mode, temperature=args.temperature, top_p=args.top_p, num_beams=1, max_new_tokens=512, ).to(device) # Create Gradio interface with gr.Blocks() as demo: gr.Interface( fn=image_to_sam_embedding, inputs=gr.Textbox(label="Image Base64"), outputs=gr.Textbox(label="Embedding Base64"), title="Image Embedding Generator", api_name="image_to_sam_embedding" ) gr.Interface( fn=describe, inputs=[ gr.Textbox(label="Image Base64"), gr.Text(label="Mask Base64"), gr.Text(label="Prompt") ], outputs=[ gr.Text(label="Description") ], title="Mask Description Generator", api_name="describe" ) gr.Interface( fn=describe_without_streaming, inputs=[ gr.Textbox(label="Image Base64"), gr.Text(label="Mask Base64"), gr.Text(label="Prompt") ], outputs=[ gr.Text(label="Description") ], title="Mask Description Generator (Non-Streaming)", api_name="describe_without_streaming" ) demo._block_thread = demo.block_thread demo.block_thread = lambda: None demo.launch() for route in demo.app.routes: if route.path == "/": demo.app.routes.remove(route) demo.app.mount("/", StaticFiles(directory="dist", html=True), name="demo") demo._block_thread()