Spaces:
Paused
Paused
Update script1.js
Browse files- script1.js +25 -68
script1.js
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
|
|
|
|
|
| 1 |
// Constants and Configuration
|
| 2 |
const USER_SPEECH_INTERRUPT_DELAY = 500;
|
| 3 |
-
const TEXT_TO_SPEECH_API_ENDPOINT = "https://api.streamelements.com/kappa/v2/speech";
|
| 4 |
const CHUNK_SIZE = 300;
|
| 5 |
const MAX_PREFETCH_REQUESTS = 10;
|
| 6 |
const PREFETCH_CACHE_EXPIRATION = 60000; // 1 minute
|
|
@@ -10,7 +12,7 @@ const AUDIO_CACHE_EXPIRATION = 3600000; // 1 hour
|
|
| 10 |
const startStopButton = document.getElementById('startStopButton');
|
| 11 |
const voiceSelectionDropdown = document.getElementById('voiceSelect');
|
| 12 |
const modelSelectionDropdown = document.getElementById('modelSelect');
|
| 13 |
-
const noiseSuppressionCheckbox = document.getElementById('noiseSuppression');
|
| 14 |
const responseTimeDisplay = document.getElementById('responseTime');
|
| 15 |
const userActivityIndicator = document.getElementById('userIndicator');
|
| 16 |
const aiActivityIndicator = document.getElementById('aiIndicator');
|
|
@@ -44,19 +46,6 @@ let conversationHistory = [];
|
|
| 44 |
// Audio Caching
|
| 45 |
const audioCache = new Map();
|
| 46 |
|
| 47 |
-
// Webcam and Gradio Integration
|
| 48 |
-
import { client, handle_file } from 'https://cdn.jsdelivr.net/npm/@gradio/client/+esm';
|
| 49 |
-
const video = document.getElementById('webcam');
|
| 50 |
-
const clients = [
|
| 51 |
-
"multimodalart/Florence-2-l4",
|
| 52 |
-
"gokaygokay/Florence-2",
|
| 53 |
-
"multimodalart/Florence-2-l4-2",
|
| 54 |
-
"gokaygokay/Florence-2",
|
| 55 |
-
]; // Or your preferred Gradio models
|
| 56 |
-
let app;
|
| 57 |
-
let lastCaption = "";
|
| 58 |
-
|
| 59 |
-
|
| 60 |
// Utility Functions
|
| 61 |
|
| 62 |
// Normalize query text
|
|
@@ -209,7 +198,7 @@ const cancelPrefetchRequests = (query) => {
|
|
| 209 |
// AI Interaction Functions
|
| 210 |
|
| 211 |
// Send a query to the AI
|
| 212 |
-
|
| 213 |
console.log("Sending query to AI:", query);
|
| 214 |
isRequestInProgress = true;
|
| 215 |
updateActivityIndicators();
|
|
@@ -234,7 +223,8 @@ const sendQueryToAI = async (query) => {
|
|
| 234 |
requestAbortController = new AbortController();
|
| 235 |
|
| 236 |
try {
|
| 237 |
-
|
|
|
|
| 238 |
} catch (error) {
|
| 239 |
if (error.name !== 'AbortError') {
|
| 240 |
console.error("Error sending query to AI:", error);
|
|
@@ -543,20 +533,32 @@ if ('webkitSpeechRecognition' in window) {
|
|
| 543 |
|
| 544 |
setInterval(updateLatency, 100);
|
| 545 |
|
| 546 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 547 |
|
| 548 |
async function startWebcam() {
|
| 549 |
try {
|
| 550 |
const stream = await navigator.mediaDevices.getUserMedia({ video: true });
|
| 551 |
video.srcObject = stream;
|
| 552 |
-
setInterval(captureAndProcessImage, 5000);
|
| 553 |
} catch (error) {
|
| 554 |
console.error("Error accessing webcam: ", error);
|
| 555 |
-
// Consider adding user feedback here, e.g., alert or display a message.
|
| 556 |
}
|
| 557 |
}
|
| 558 |
|
| 559 |
-
|
| 560 |
async function captureAndProcessImage() {
|
| 561 |
const canvas = document.createElement('canvas');
|
| 562 |
canvas.width = video.videoWidth;
|
|
@@ -568,7 +570,6 @@ async function captureAndProcessImage() {
|
|
| 568 |
await processWithGradio(blob);
|
| 569 |
}
|
| 570 |
|
| 571 |
-
|
| 572 |
async function processWithGradio(imageBlob) {
|
| 573 |
try {
|
| 574 |
const randomClient = clients[Math.floor(Math.random() * clients.length)];
|
|
@@ -577,57 +578,13 @@ async function processWithGradio(imageBlob) {
|
|
| 577 |
|
| 578 |
const result = await app.predict("/process_image", [handledFile, "Detailed Caption"]);
|
| 579 |
|
| 580 |
-
const dataString = result.data[0];
|
| 581 |
-
lastCaption = dataString ||
|
| 582 |
} catch (error) {
|
| 583 |
console.error("Error processing with Gradio:", error);
|
| 584 |
-
// Add error handling here (e.g., display a message to the user).
|
| 585 |
-
lastCaption = ""; // Reset caption if there's an error.
|
| 586 |
}
|
| 587 |
}
|
| 588 |
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
// Modify sendQueryToAI to include the caption
|
| 592 |
-
async function sendQueryToAI(query) {
|
| 593 |
-
console.log("Sending query to AI:", query);
|
| 594 |
-
isRequestInProgress = true;
|
| 595 |
-
updateActivityIndicators();
|
| 596 |
-
firstResponseTextTimestamp = null;
|
| 597 |
-
|
| 598 |
-
const normalizedQuery = normalizeQueryText(query);
|
| 599 |
-
const cacheKey = generateCacheKey(normalizedQuery, modelSelectionDropdown.value, conversationHistory, modelSelectionDropdown.value);
|
| 600 |
-
|
| 601 |
-
queryStartTime = Date.now();
|
| 602 |
-
|
| 603 |
-
// Check prefetch cache
|
| 604 |
-
if (prefetchCache.has(cacheKey)) {
|
| 605 |
-
const cachedData = prefetchCache.get(cacheKey);
|
| 606 |
-
if (Date.now() - cachedData.timestamp < PREFETCH_CACHE_EXPIRATION) {
|
| 607 |
-
audioPlaybackQueue.push({ url: cachedData.url, isPrefetched: true });
|
| 608 |
-
playNextAudio();
|
| 609 |
-
} else {
|
| 610 |
-
prefetchCache.delete(cacheKey);
|
| 611 |
-
}
|
| 612 |
-
}
|
| 613 |
-
|
| 614 |
-
requestAbortController = new AbortController();
|
| 615 |
-
|
| 616 |
-
try {
|
| 617 |
-
const combinedQuery = `{USER: "${query}"}, ${lastCaption}, {USER: "${query}"}`;
|
| 618 |
-
await streamAndHandleAudioResponse(combinedQuery, voiceSelectionDropdown.value, requestAbortController.signal);
|
| 619 |
-
} catch (error) {
|
| 620 |
-
if (error.name !== 'AbortError') {
|
| 621 |
-
console.error("Error sending query to AI:", error);
|
| 622 |
-
}
|
| 623 |
-
} finally {
|
| 624 |
-
isRequestInProgress = false;
|
| 625 |
-
updateActivityIndicators();
|
| 626 |
-
}
|
| 627 |
-
};
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
// Initialize Webcam and Speech Recognition on Load
|
| 631 |
window.onload = () => {
|
| 632 |
startWebcam();
|
| 633 |
};
|
|
|
|
| 1 |
+
// script1.js
|
| 2 |
+
|
| 3 |
// Constants and Configuration
|
| 4 |
const USER_SPEECH_INTERRUPT_DELAY = 500;
|
| 5 |
+
const TEXT_TO_SPEECH_API_ENDPOINT = "https://api.streamelements.com/kappa/v2/speech";
|
| 6 |
const CHUNK_SIZE = 300;
|
| 7 |
const MAX_PREFETCH_REQUESTS = 10;
|
| 8 |
const PREFETCH_CACHE_EXPIRATION = 60000; // 1 minute
|
|
|
|
| 12 |
const startStopButton = document.getElementById('startStopButton');
|
| 13 |
const voiceSelectionDropdown = document.getElementById('voiceSelect');
|
| 14 |
const modelSelectionDropdown = document.getElementById('modelSelect');
|
| 15 |
+
const noiseSuppressionCheckbox = document.getElementById('noiseSuppression');
|
| 16 |
const responseTimeDisplay = document.getElementById('responseTime');
|
| 17 |
const userActivityIndicator = document.getElementById('userIndicator');
|
| 18 |
const aiActivityIndicator = document.getElementById('aiIndicator');
|
|
|
|
| 46 |
// Audio Caching
|
| 47 |
const audioCache = new Map();
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
// Utility Functions
|
| 50 |
|
| 51 |
// Normalize query text
|
|
|
|
| 198 |
// AI Interaction Functions
|
| 199 |
|
| 200 |
// Send a query to the AI
|
| 201 |
+
async function sendQueryToAI(query) {
|
| 202 |
console.log("Sending query to AI:", query);
|
| 203 |
isRequestInProgress = true;
|
| 204 |
updateActivityIndicators();
|
|
|
|
| 223 |
requestAbortController = new AbortController();
|
| 224 |
|
| 225 |
try {
|
| 226 |
+
const combinedQuery = `{USER: "${query}"}, ${lastCaption}, {USER: "${query}"}`;
|
| 227 |
+
await streamAndHandleAudioResponse(combinedQuery, voiceSelectionDropdown.value, requestAbortController.signal);
|
| 228 |
} catch (error) {
|
| 229 |
if (error.name !== 'AbortError') {
|
| 230 |
console.error("Error sending query to AI:", error);
|
|
|
|
| 533 |
|
| 534 |
setInterval(updateLatency, 100);
|
| 535 |
|
| 536 |
+
|
| 537 |
+
|
| 538 |
+
// Webcam Integration
|
| 539 |
+
import { client, handle_file } from 'https://cdn.jsdelivr.net/npm/@gradio/client/+esm';
|
| 540 |
+
|
| 541 |
+
const video = document.getElementById('webcam');
|
| 542 |
+
let app;
|
| 543 |
+
let lastCaption = "";
|
| 544 |
+
|
| 545 |
+
const clients = [
|
| 546 |
+
"multimodalart/Florence-2-l4",
|
| 547 |
+
"gokaygokay/Florence-2",
|
| 548 |
+
"multimodalart/Florence-2-l4-2",
|
| 549 |
+
"gokaygokay/Florence-2",
|
| 550 |
+
];
|
| 551 |
|
| 552 |
async function startWebcam() {
|
| 553 |
try {
|
| 554 |
const stream = await navigator.mediaDevices.getUserMedia({ video: true });
|
| 555 |
video.srcObject = stream;
|
| 556 |
+
setInterval(captureAndProcessImage, 5000);
|
| 557 |
} catch (error) {
|
| 558 |
console.error("Error accessing webcam: ", error);
|
|
|
|
| 559 |
}
|
| 560 |
}
|
| 561 |
|
|
|
|
| 562 |
async function captureAndProcessImage() {
|
| 563 |
const canvas = document.createElement('canvas');
|
| 564 |
canvas.width = video.videoWidth;
|
|
|
|
| 570 |
await processWithGradio(blob);
|
| 571 |
}
|
| 572 |
|
|
|
|
| 573 |
async function processWithGradio(imageBlob) {
|
| 574 |
try {
|
| 575 |
const randomClient = clients[Math.floor(Math.random() * clients.length)];
|
|
|
|
| 578 |
|
| 579 |
const result = await app.predict("/process_image", [handledFile, "Detailed Caption"]);
|
| 580 |
|
| 581 |
+
const dataString = result.data[0];
|
| 582 |
+
lastCaption = dataString || lastCaption;
|
| 583 |
} catch (error) {
|
| 584 |
console.error("Error processing with Gradio:", error);
|
|
|
|
|
|
|
| 585 |
}
|
| 586 |
}
|
| 587 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 588 |
window.onload = () => {
|
| 589 |
startWebcam();
|
| 590 |
};
|