BK-V commited on
Commit
f9d0191
·
1 Parent(s): 51bb590

initial commit

Browse files
Files changed (2) hide show
  1. index.html +354 -18
  2. style.css +0 -28
index.html CHANGED
@@ -1,19 +1,355 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  </html>
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>Camera Interaction App</title>
7
+ <style>
8
+ body {
9
+ font-family: sans-serif;
10
+ display: flex;
11
+ flex-direction: column;
12
+ align-items: center;
13
+ gap: 20px;
14
+ padding: 20px;
15
+ background-color: #f0f0f0;
16
+ }
17
+ .controls,
18
+ .io-areas {
19
+ display: flex;
20
+ gap: 10px;
21
+ align-items: center;
22
+ background-color: #fff;
23
+ padding: 15px;
24
+ border-radius: 8px;
25
+ box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);
26
+ }
27
+ .io-areas {
28
+ flex-direction: column;
29
+ align-items: stretch;
30
+ }
31
+ textarea {
32
+ width: 300px;
33
+ height: 80px;
34
+ padding: 8px;
35
+ border: 1px solid #ccc;
36
+ border-radius: 4px;
37
+ font-size: 14px;
38
+ }
39
+ #videoFeed {
40
+ display: block;
41
+ width: 100%;
42
+ height: 100%;
43
+ border-radius: 6px;
44
+ object-fit: cover;
45
+ }
46
+ #videoContainer {
47
+ position: relative;
48
+ width: 480px;
49
+ height: 360px;
50
+ border: 2px solid #333;
51
+ background-color: #000;
52
+ border-radius: 8px;
53
+ margin: 0 auto;
54
+ }
55
+ #loadingOverlay {
56
+ position: absolute;
57
+ top: 0;
58
+ left: 0;
59
+ width: 100%;
60
+ height: 100%;
61
+ display: none;
62
+ justify-content: center;
63
+ align-items: center;
64
+ background-color: rgba(0, 0, 0, 0.7);
65
+ z-index: 10;
66
+ border-radius: 6px;
67
+ color: #ffffff;
68
+ font-size: 1.5em;
69
+ font-weight: bold;
70
+ }
71
+ #startButton {
72
+ padding: 10px 20px;
73
+ font-size: 16px;
74
+ cursor: pointer;
75
+ border: none;
76
+ border-radius: 4px;
77
+ color: white;
78
+ }
79
+ #startButton.start {
80
+ background-color: #28a745; /* Green */
81
+ }
82
+ #startButton.stop {
83
+ background-color: #dc3545; /* Red */
84
+ }
85
+ label {
86
+ font-weight: bold;
87
+ }
88
+ select {
89
+ padding: 8px;
90
+ border-radius: 4px;
91
+ border: 1px solid #ccc;
92
+ }
93
+ .hidden {
94
+ display: none;
95
+ }
96
+ </style>
97
+ </head>
98
+ <body>
99
+ <h1>Camera Interaction App</h1>
100
+
101
+ <div id="videoContainer">
102
+ <video id="videoFeed" autoplay playsinline></video>
103
+ <div id="loadingOverlay">Loading...</div>
104
+ </div>
105
+ <canvas id="canvas" class="hidden"></canvas>
106
+ <!-- For capturing frames -->
107
+
108
+ <div class="io-areas">
109
+ <div>
110
+ <label for="instructionText">Instruction:</label><br />
111
+ <textarea
112
+ id="instructionText"
113
+ style="height: 2em; width: 40em"
114
+ name="Instruction"
115
+ ></textarea>
116
+ </div>
117
+ <div>
118
+ <label for="responseText">Response:</label><br />
119
+ <textarea
120
+ id="responseText"
121
+ style="height: 2em; width: 40em"
122
+ name="Response"
123
+ readonly
124
+ placeholder="Server response will appear here..."
125
+ ></textarea>
126
+ </div>
127
+ </div>
128
+
129
+ <div class="controls">
130
+ <label for="intervalSelect">Interval between 2 requests:</label>
131
+ <select id="intervalSelect" name="Interval between 2 requests">
132
+ <option value="0" selected>0ms</option>
133
+ <option value="100">100ms</option>
134
+ <option value="250">250ms</option>
135
+ <option value="500">500ms</option>
136
+ <option value="1000">1s</option>
137
+ <option value="2000">2s</option>
138
+ </select>
139
+ <button id="startButton" class="start">Start</button>
140
+ </div>
141
+
142
+ <script type="module">
143
+ import {
144
+ AutoProcessor,
145
+ AutoModelForVision2Seq,
146
+ RawImage,
147
+ } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers/dist/transformers.min.js";
148
+
149
+ const video = document.getElementById("videoFeed");
150
+ const canvas = document.getElementById("canvas");
151
+ const instructionText = document.getElementById("instructionText");
152
+ const responseText = document.getElementById("responseText");
153
+ const intervalSelect = document.getElementById("intervalSelect");
154
+ const startButton = document.getElementById("startButton");
155
+ const loadingOverlay = document.getElementById("loadingOverlay");
156
+
157
+ instructionText.value = "What do you see?"; // default instruction
158
+ const CONTEXT = `
159
+ Translate the text into persian and only return the translated text without any other text.
160
+ `
161
+
162
+ let stream;
163
+ let isProcessing = false;
164
+ let processor, model;
165
+ async function initModel() {
166
+ const modelId = "HuggingFaceTB/SmolVLM-500M-Instruct"; // or "HuggingFaceTB/SmolVLM-Instruct";
167
+ loadingOverlay.style.display = "flex";
168
+ responseText.value = "Loading processor...";
169
+ processor = await AutoProcessor.from_pretrained(modelId);
170
+ responseText.value = "Processor loaded. Loading model...";
171
+ model = await AutoModelForVision2Seq.from_pretrained(modelId, {
172
+ dtype: {
173
+ embed_tokens: "fp16",
174
+ vision_encoder: "q4",
175
+ decoder_model_merged: "q4",
176
+ },
177
+ device: "webgpu",
178
+ });
179
+ responseText.value = "Model loaded. Initializing camera...";
180
+ loadingOverlay.style.display = "none";
181
+ }
182
+ async function initCamera() {
183
+ try {
184
+ stream = await navigator.mediaDevices.getUserMedia({
185
+ video: true,
186
+ audio: false,
187
+ });
188
+ video.srcObject = stream;
189
+ responseText.value = "Camera access granted. Ready to start.";
190
+ } catch (err) {
191
+ console.error("Error accessing camera:", err);
192
+ responseText.value = `Error accessing camera: ${err.name} - ${err.message}. Please ensure permissions are granted and you are on HTTPS or localhost.`;
193
+ alert(
194
+ `Error accessing camera: ${err.name}. Make sure you've granted permission and are on HTTPS or localhost.`
195
+ );
196
+ }
197
+ }
198
+ function captureImage() {
199
+ if (!stream || !video.videoWidth) {
200
+ console.warn("Video stream not ready for capture.");
201
+ return null;
202
+ }
203
+ canvas.width = video.videoWidth;
204
+ canvas.height = video.videoHeight;
205
+ const context = canvas.getContext("2d", { willReadFrequently: true });
206
+ context.drawImage(video, 0, 0, canvas.width, canvas.height);
207
+ const frame = context.getImageData(0, 0, canvas.width, canvas.height);
208
+ return new RawImage(frame.data, frame.width, frame.height, 4);
209
+ }
210
+ async function runLocalVisionInference(imgElement, instruction) {
211
+ const messages = [
212
+ {
213
+ role: "user",
214
+ content: [{ type: "image" }, { type: "text", text: instruction }],
215
+ },
216
+ ];
217
+
218
+ const text = processor.apply_chat_template(messages, {
219
+ add_generation_prompt: true,
220
+ });
221
+
222
+ const inputs = await processor(text, [imgElement], {
223
+ do_image_splitting: false,
224
+ });
225
+
226
+ const generatedIds = await model.generate({
227
+ ...inputs,
228
+ max_new_tokens: 100,
229
+ });
230
+
231
+ const output = processor.batch_decode(
232
+ generatedIds.slice(null, [inputs.input_ids.dims.at(-1), null]),
233
+ { skip_special_tokens: true }
234
+ );
235
+ return output[0].trim();
236
+ }
237
+
238
+ async function callExternalLLmAPI(text) {
239
+ let response = await fetch("https://openrouter.ai/api/v1/chat/completions", {
240
+ method: "POST",
241
+ headers: {
242
+ "Authorization": "Bearer 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",
243
+ "HTTP-Referer": "<YOUR_SITE_URL>", // Optional. Site URL for rankings on openrouter.ai.
244
+ "X-Title": "<YOUR_SITE_NAME>", // Optional. Site title for rankings on openrouter.ai.
245
+ "Content-Type": "application/json"
246
+ },
247
+ body: JSON.stringify({
248
+ "model": "qwen/qwen-2.5-72b-instruct:free",
249
+ "messages": [
250
+ {
251
+ "role": "system",
252
+ "content": CONTEXT
253
+ },
254
+ {
255
+ "role": "user",
256
+ "content": text
257
+ }
258
+ ]
259
+ })
260
+ });
261
+
262
+ if (!response.ok) {
263
+ throw new Error(`HTTP error! Status: ${response.status}`);
264
+ }
265
+
266
+ const data = await response.json();
267
+ const generatedText = data.choices[0].message.content;
268
+ return generatedText;
269
+ }
270
+
271
+ async function sendData() {
272
+ if (!isProcessing) return;
273
+ const instruction = instructionText.value;
274
+ const rawImg = captureImage();
275
+ if (!rawImg) {
276
+ responseText.value = "Capture failed";
277
+ return;
278
+ }
279
+ try {
280
+ const reply = await runLocalVisionInference(rawImg, instruction);
281
+ const translatedReply = await callExternalLLmAPI(reply);
282
+ responseText.value = translatedReply;
283
+ } catch (e) {
284
+ console.error(e);
285
+ responseText.value = `Error: ${e.message}`;
286
+ }
287
+ }
288
+ function sleep(ms) {
289
+ return new Promise((resolve) => setTimeout(resolve, ms));
290
+ }
291
+ async function processingLoop() {
292
+ const intervalMs = parseInt(intervalSelect.value, 10);
293
+ while (isProcessing) {
294
+ await sendData();
295
+ if (!isProcessing) break;
296
+ await sleep(intervalMs);
297
+ }
298
+ }
299
+ function handleStart() {
300
+ if (!stream) {
301
+ responseText.value = "Camera not available. Cannot start.";
302
+ alert("Camera not available. Please grant permission first.");
303
+ return;
304
+ }
305
+ isProcessing = true;
306
+ startButton.textContent = "Stop";
307
+ startButton.classList.replace("start", "stop");
308
+ instructionText.disabled = true;
309
+ intervalSelect.disabled = true;
310
+ responseText.value = "Processing started...";
311
+ processingLoop();
312
+ }
313
+ function handleStop() {
314
+ isProcessing = false;
315
+ startButton.textContent = "Start";
316
+ startButton.classList.replace("stop", "start");
317
+ instructionText.disabled = false;
318
+ intervalSelect.disabled = false;
319
+ if (responseText.value.startsWith("Processing started...")) {
320
+ responseText.value = "Processing stopped.";
321
+ }
322
+ }
323
+ startButton.addEventListener("click", () => {
324
+ if (isProcessing) {
325
+ handleStop();
326
+ } else {
327
+ handleStart();
328
+ }
329
+ });
330
+ window.addEventListener("DOMContentLoaded", async () => {
331
+ // Check for WebGPU support
332
+ if (!navigator.gpu) {
333
+ const videoElement = document.getElementById("videoFeed");
334
+ const warningElement = document.createElement("p");
335
+ warningElement.textContent =
336
+ "WebGPU is not available in this browser.";
337
+ warningElement.style.color = "red";
338
+ warningElement.style.textAlign = "center";
339
+ videoElement.parentNode.insertBefore(
340
+ warningElement,
341
+ videoElement.nextSibling
342
+ );
343
+ }
344
+ await initModel();
345
+ await initCamera();
346
+ });
347
+ window.addEventListener("beforeunload", () => {
348
+ if (stream) {
349
+ stream.getTracks().forEach((track) => track.stop());
350
+ }
351
+ });
352
+ </script>
353
+ </body>
354
  </html>
355
+
style.css DELETED
@@ -1,28 +0,0 @@
1
- body {
2
- padding: 2rem;
3
- font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
4
- }
5
-
6
- h1 {
7
- font-size: 16px;
8
- margin-top: 0;
9
- }
10
-
11
- p {
12
- color: rgb(107, 114, 128);
13
- font-size: 15px;
14
- margin-bottom: 10px;
15
- margin-top: 5px;
16
- }
17
-
18
- .card {
19
- max-width: 620px;
20
- margin: 0 auto;
21
- padding: 16px;
22
- border: 1px solid lightgray;
23
- border-radius: 16px;
24
- }
25
-
26
- .card p:last-child {
27
- margin-bottom: 0;
28
- }