akhaliq HF Staff commited on
Commit
7da38e2
·
verified ·
1 Parent(s): ec424ff

Upload index.js with huggingface_hub

Browse files
Files changed (1) hide show
  1. index.js +324 -60
index.js CHANGED
@@ -1,76 +1,340 @@
1
- import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.4.1';
 
 
 
 
 
2
 
3
- // Reference the elements that we will need
4
- const status = document.getElementById('status');
5
- const fileUpload = document.getElementById('upload');
6
- const imageContainer = document.getElementById('container');
7
- const example = document.getElementById('example');
8
 
9
- const EXAMPLE_URL = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/city-streets.jpg';
10
-
11
- // Create a new object detection pipeline
12
- status.textContent = 'Loading model...';
13
- const detector = await pipeline('object-detection', 'Xenova/detr-resnet-50');
14
- status.textContent = 'Ready';
15
-
16
- example.addEventListener('click', (e) => {
17
- e.preventDefault();
18
- detect(EXAMPLE_URL);
19
- });
 
 
 
 
20
 
21
- fileUpload.addEventListener('change', function (e) {
22
- const file = e.target.files[0];
23
- if (!file) {
24
- return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  }
 
26
 
27
- const reader = new FileReader();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- // Set up a callback when the file is loaded
30
- reader.onload = e2 => detect(e2.target.result);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
- reader.readAsDataURL(file);
33
- });
 
 
 
 
34
 
 
 
 
 
 
35
 
36
- // Detect objects in the image
37
- async function detect(img) {
38
- imageContainer.innerHTML = '';
39
- imageContainer.style.backgroundImage = `url(${img})`;
40
 
41
- status.textContent = 'Analysing...';
42
- const output = await detector(img, {
43
- threshold: 0.5,
44
- percentage: true,
45
- });
46
- status.textContent = '';
47
- output.forEach(renderBox);
48
  }
49
 
50
- // Render a bounding box and label on the image
51
- function renderBox({ box, label }) {
52
- const { xmax, xmin, ymax, ymin } = box;
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
- // Generate a random color for the box
55
- const color = '#' + Math.floor(Math.random() * 0xFFFFFF).toString(16).padStart(6, 0);
 
 
 
 
 
 
 
 
 
56
 
57
- // Draw the box
58
- const boxElement = document.createElement('div');
59
- boxElement.className = 'bounding-box';
60
- Object.assign(boxElement.style, {
61
- borderColor: color,
62
- left: 100 * xmin + '%',
63
- top: 100 * ymin + '%',
64
- width: 100 * (xmax - xmin) + '%',
65
- height: 100 * (ymax - ymin) + '%',
66
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
- // Draw label
69
- const labelElement = document.createElement('span');
70
- labelElement.textContent = label;
71
- labelElement.className = 'bounding-box-label';
72
- labelElement.style.backgroundColor = color;
73
 
74
- boxElement.appendChild(labelElement);
75
- imageContainer.appendChild(boxElement);
76
- }
 
1
+ import {
2
+ AutoProcessor,
3
+ AutoModelForImageTextToText,
4
+ RawImage,
5
+ TextStreamer,
6
+ } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.2';
7
 
8
+ let processor = null;
9
+ let model = null;
10
+ let currentVideo = null;
11
+ let frameDescriptions = [];
 
12
 
13
+ // Check WebGPU support
14
+ async function checkWebGPUSupport() {
15
+ const statusEl = document.getElementById('webgpuStatus');
16
+ if ('gpu' in navigator) {
17
+ statusEl.textContent = ' Available';
18
+ statusEl.style.color = '#10b981';
19
+ return true;
20
+ } else {
21
+ statusEl.textContent = '❌ Not Available';
22
+ statusEl.style.color = '#ef4444';
23
+ document.getElementById('deviceSelect').value = 'wasm';
24
+ document.getElementById('deviceSelect').disabled = true;
25
+ return false;
26
+ }
27
+ }
28
 
29
+ // Initialize the model
30
+ async function initializeModel() {
31
+ const device = document.getElementById('deviceSelect').value;
32
+ updateStatus('Loading AI model...');
33
+
34
+ try {
35
+ const model_id = "onnx-community/FastVLM-0.5B-ONNX";
36
+
37
+ processor = await AutoProcessor.from_pretrained(model_id);
38
+
39
+ const modelOptions = {
40
+ dtype: {
41
+ embed_tokens: "fp16",
42
+ vision_encoder: "q4",
43
+ decoder_model_merged: "q4",
44
+ }
45
+ };
46
+
47
+ if (device === 'webgpu') {
48
+ modelOptions.device = 'webgpu';
49
+ }
50
+
51
+ model = await AutoModelForImageTextToText.from_pretrained(model_id, modelOptions);
52
+
53
+ updateStatus('Model loaded successfully!');
54
+ return true;
55
+ } catch (error) {
56
+ console.error('Model initialization error:', error);
57
+ showError('Failed to load AI model. Please try again.');
58
+ return false;
59
  }
60
+ }
61
 
62
+ // Extract frames from video
63
+ async function extractFramesFromVideo(videoFile, numFrames = 4) {
64
+ return new Promise((resolve, reject) => {
65
+ const video = document.createElement('video');
66
+ const canvas = document.createElement('canvas');
67
+ const ctx = canvas.getContext('2d');
68
+
69
+ video.src = URL.createObjectURL(videoFile);
70
+
71
+ video.addEventListener('loadedmetadata', async () => {
72
+ const duration = video.duration;
73
+ const frameInterval = duration / numFrames;
74
+ const frames = [];
75
+ const frameTimes = [];
76
+
77
+ canvas.width = Math.min(video.videoWidth, 1024);
78
+ canvas.height = Math.min(video.videoHeight, 1024);
79
+
80
+ for (let i = 0; i < numFrames; i++) {
81
+ const currentTime = i * frameInterval;
82
+ video.currentTime = currentTime;
83
+ frameTimes.push(currentTime);
84
+
85
+ await new Promise(r => {
86
+ video.addEventListener('seeked', () => r(), { once: true });
87
+ });
88
+
89
+ ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
90
+
91
+ const blob = await new Promise(r => canvas.toBlob(r, 'image/png'));
92
+ const image = await RawImage.fromBlob(blob);
93
+
94
+ // Store frame preview
95
+ const previewUrl = canvas.toDataURL('image/jpeg', 0.8);
96
+ frames.push({ image, preview: previewUrl, time: currentTime });
97
+
98
+ updateProgress((i + 1) / numFrames * 30, `Extracting frame ${i + 1}/${numFrames}`);
99
+ }
100
+
101
+ URL.revokeObjectURL(video.src);
102
+ resolve(frames);
103
+ });
104
+
105
+ video.addEventListener('error', () => {
106
+ URL.revokeObjectURL(video.src);
107
+ reject(new Error('Failed to load video'));
108
+ });
109
+ });
110
+ }
111
 
112
+ // Process video
113
+ async function processVideo() {
114
+ const analyzeBtn = document.getElementById('analyzeBtn');
115
+ const progressSection = document.getElementById('progressSection');
116
+ const resultsSection = document.getElementById('resultsSection');
117
+
118
+ analyzeBtn.disabled = true;
119
+ analyzeBtn.querySelector('.spinner').classList.remove('hidden');
120
+ analyzeBtn.querySelector('.btn-text').textContent = 'Processing...';
121
+
122
+ progressSection.classList.remove('hidden');
123
+ resultsSection.classList.add('hidden');
124
+ frameDescriptions = [];
125
+
126
+ try {
127
+ // Initialize model if not already loaded
128
+ if (!model || !processor) {
129
+ if (!await initializeModel()) {
130
+ throw new Error('Model initialization failed');
131
+ }
132
+ }
133
+
134
+ // Extract frames
135
+ const numFrames = parseInt(document.getElementById('frameCount').value);
136
+ updateProgress(0, 'Extracting frames from video...');
137
+ const frames = await extractFramesFromVideo(currentVideo, numFrames);
138
+
139
+ // Display frame previews
140
+ const framesGrid = document.getElementById('framesGrid');
141
+ framesGrid.innerHTML = '';
142
+
143
+ // Process each frame
144
+ for (let i = 0; i < frames.length; i++) {
145
+ updateProgress(30 + (i / frames.length * 50), `Analyzing frame ${i + 1}/${frames.length}`);
146
+
147
+ // Create frame card
148
+ const frameCard = document.createElement('div');
149
+ frameCard.className = 'frame-card';
150
+ frameCard.innerHTML = `
151
+ <img src="${frames[i].preview}" alt="Frame ${i + 1}">
152
+ <div class="frame-info">
153
+ <h4>Frame ${i + 1}</h4>
154
+ <span class="frame-time">${formatTime(frames[i].time)}</span>
155
+ </div>
156
+ <div class="frame-description">
157
+ <div class="loading-dots">Analyzing...</div>
158
+ </div>
159
+ `;
160
+ framesGrid.appendChild(frameCard);
161
+
162
+ // Prepare prompt
163
+ const messages = [
164
+ {
165
+ role: "user",
166
+ content: `<image>Describe what's happening in this frame of the video in detail.`,
167
+ },
168
+ ];
169
+ const prompt = processor.apply_chat_template(messages, {
170
+ add_generation_prompt: true,
171
+ });
172
+
173
+ // Prepare inputs
174
+ const inputs = await processor(frames[i].image, prompt, {
175
+ add_special_tokens: false,
176
+ });
177
+
178
+ // Generate output
179
+ let generatedText = '';
180
+ const outputs = await model.generate({
181
+ ...inputs,
182
+ max_new_tokens: 256,
183
+ do_sample: false,
184
+ streamer: new TextStreamer(processor.tokenizer, {
185
+ skip_prompt: true,
186
+ skip_special_tokens: false,
187
+ callback_function: (text) => {
188
+ generatedText += text;
189
+ frameCard.querySelector('.frame-description').innerHTML = `<p>${generatedText}</p>`;
190
+ },
191
+ }),
192
+ });
193
+
194
+ // Decode output
195
+ const decoded = processor.batch_decode(
196
+ outputs.slice(null, [inputs.input_ids.dims.at(-1), null]),
197
+ { skip_special_tokens: true },
198
+ );
199
+
200
+ frameDescriptions.push({
201
+ frame: i + 1,
202
+ time: frames[i].time,
203
+ description: decoded[0]
204
+ });
205
+ }
206
+
207
+ // Generate overall summary
208
+ updateProgress(80, 'Generating video summary...');
209
+ const summaryCard = document.getElementById('summaryCard');
210
+ const summaryContent = document.getElementById('summaryContent');
211
+
212
+ const summaryMessages = [
213
+ {
214
+ role: "user",
215
+ content: `Based on these video frame descriptions: ${frameDescriptions.map(f => f.description).join('. ')}. Provide a comprehensive summary of the entire video content.`,
216
+ },
217
+ ];
218
+ const summaryPrompt = processor.apply_chat_template(summaryMessages, {
219
+ add_generation_prompt: true,
220
+ });
221
+
222
+ const summaryInputs = await processor(frames[0].image, summaryPrompt, {
223
+ add_special_tokens: false,
224
+ });
225
+
226
+ let summaryText = '';
227
+ const summaryOutputs = await model.generate({
228
+ ...summaryInputs,
229
+ max_new_tokens: 512,
230
+ do_sample: false,
231
+ streamer: new TextStreamer(processor.tokenizer, {
232
+ skip_prompt: true,
233
+ skip_special_tokens: false,
234
+ callback_function: (text) => {
235
+ summaryText += text;
236
+ summaryContent.innerHTML = `<p>${summaryText}</p>`;
237
+ summaryCard.classList.remove('hidden');
238
+ },
239
+ }),
240
+ });
241
+
242
+ updateProgress(100, 'Analysis complete!');
243
+
244
+ // Show results
245
+ resultsSection.classList.remove('hidden');
246
+ progressSection.classList.add('hidden');
247
+
248
+ } catch (error) {
249
+ console.error('Processing error:', error);
250
+ showError(`Failed to process video: ${error.message}`);
251
+ } finally {
252
+ analyzeBtn.disabled = false;
253
+ analyzeBtn.querySelector('.spinner').classList.add('hidden');
254
+ analyzeBtn.querySelector('.btn-text').textContent = 'Analyze Video';
255
+ }
256
+ }
257
 
258
+ // Utility functions
259
+ function formatTime(seconds) {
260
+ const mins = Math.floor(seconds / 60);
261
+ const secs = Math.floor(seconds % 60);
262
+ return `${mins}:${secs.toString().padStart(2, '0')}`;
263
+ }
264
 
265
+ function updateProgress(percent, status) {
266
+ document.getElementById('progressFill').style.width = `${percent}%`;
267
+ document.getElementById('progressText').textContent = `${Math.round(percent)}%`;
268
+ document.getElementById('currentStatus').textContent = status;
269
+ }
270
 
271
+ function updateStatus(message) {
272
+ document.getElementById('currentStatus').textContent = message;
273
+ }
 
274
 
275
+ function showError(message) {
276
+ document.getElementById('errorMessage').textContent = message;
277
+ document.getElementById('errorSection').classList.remove('hidden');
278
+ document.getElementById('progressSection').classList.add('hidden');
 
 
 
279
  }
280
 
281
+ function downloadResults() {
282
+ const results = {
283
+ timestamp: new Date().toISOString(),
284
+ video: currentVideo.name,
285
+ frames: frameDescriptions,
286
+ summary: document.getElementById('summaryContent').textContent
287
+ };
288
+
289
+ const blob = new Blob([JSON.stringify(results, null, 2)], { type: 'application/json' });
290
+ const url = URL.createObjectURL(blob);
291
+ const a = document.createElement('a');
292
+ a.href = url;
293
+ a.download = `video-analysis-${Date.now()}.json`;
294
+ a.click();
295
+ URL.revokeObjectURL(url);
296
+ }
297
 
298
+ function resetApp() {
299
+ document.getElementById('videoInput').value = '';
300
+ document.getElementById('videoInfo').innerHTML = '';
301
+ document.getElementById('videoPreview').classList.add('hidden');
302
+ document.getElementById('analyzeBtn').classList.add('hidden');
303
+ document.getElementById('progressSection').classList.add('hidden');
304
+ document.getElementById('resultsSection').classList.add('hidden');
305
+ document.getElementById('errorSection').classList.add('hidden');
306
+ currentVideo = null;
307
+ frameDescriptions = [];
308
+ }
309
 
310
+ // Event listeners
311
+ document.getElementById('videoInput').addEventListener('change', (e) => {
312
+ const file = e.target.files[0];
313
+ if (file && file.type.startsWith('video/')) {
314
+ currentVideo = file;
315
+
316
+ // Display video info
317
+ const videoInfo = document.getElementById('videoInfo');
318
+ videoInfo.innerHTML = `
319
+ <div class="file-info">
320
+ <span class="file-name">${file.name}</span>
321
+ <span class="file-size">${(file.size / 1024 / 1024).toFixed(2)} MB</span>
322
+ </div>
323
+ `;
324
+
325
+ // Show video preview
326
+ const videoPreview = document.getElementById('videoPreview');
327
+ const videoElement = document.getElementById('videoElement');
328
+ videoElement.src = URL.createObjectURL(file);
329
+ videoPreview.classList.remove('hidden');
330
+
331
+ // Show analyze button
332
+ document.getElementById('analyzeBtn').classList.remove('hidden');
333
+ document.getElementById('analyzeBtn').disabled = false;
334
+ }
335
+ });
336
 
337
+ document.getElementById('analyzeBtn').addEventListener('click', processVideo);
 
 
 
 
338
 
339
+ // Initialize
340
+ checkWebGPUSupport();