Upload 21 files
Browse files- private/admin/admin.css +0 -0
- public/app.js +128 -2
- public/styles.css +87 -0
- server.js +156 -68
private/admin/admin.css
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
public/app.js
CHANGED
|
@@ -3866,6 +3866,11 @@ class RoxAI {
|
|
| 3866 |
this._updateDeepResearchStatus(data.status);
|
| 3867 |
continue;
|
| 3868 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3869 |
// Handle DeepResearch started notification
|
| 3870 |
if (data.deepResearchStarted && data.info) {
|
| 3871 |
this._showDeepResearchNotice(data.info, data.searchCount, data.articlesRead);
|
|
@@ -4972,6 +4977,46 @@ class RoxAI {
|
|
| 4972 |
formData.append('chatId', this.currentConversationId || '');
|
| 4973 |
formData.append('conversationHistory', JSON.stringify(historyUpTo));
|
| 4974 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4975 |
this.requestController = new AbortController();
|
| 4976 |
|
| 4977 |
// Track streaming response for partial save on cancel
|
|
@@ -8417,6 +8462,87 @@ class RoxAI {
|
|
| 8417 |
}
|
| 8418 |
}
|
| 8419 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8420 |
/**
|
| 8421 |
* Update typing status with elapsed time and status messages
|
| 8422 |
* @private
|
|
@@ -8425,8 +8551,8 @@ class RoxAI {
|
|
| 8425 |
const statusEl = document.getElementById('typingStatus');
|
| 8426 |
if (!statusEl || !this._typingStartTime) return;
|
| 8427 |
|
| 8428 |
-
// Don't override DeepResearch status
|
| 8429 |
-
if (statusEl.classList.contains('deep-research-status')) {
|
| 8430 |
return;
|
| 8431 |
}
|
| 8432 |
|
|
|
|
| 3866 |
this._updateDeepResearchStatus(data.status);
|
| 3867 |
continue;
|
| 3868 |
}
|
| 3869 |
+
// Handle Vision processing real-time status updates
|
| 3870 |
+
if (data.visionStatus && data.status) {
|
| 3871 |
+
this._updateVisionStatus(data.status, data.phase, data.currentImage, data.totalImages, data.model);
|
| 3872 |
+
continue;
|
| 3873 |
+
}
|
| 3874 |
// Handle DeepResearch started notification
|
| 3875 |
if (data.deepResearchStarted && data.info) {
|
| 3876 |
this._showDeepResearchNotice(data.info, data.searchCount, data.articlesRead);
|
|
|
|
| 4977 |
formData.append('chatId', this.currentConversationId || '');
|
| 4978 |
formData.append('conversationHistory', JSON.stringify(historyUpTo));
|
| 4979 |
|
| 4980 |
+
// ==================== REGENERATE WITH ORIGINAL IMAGES ====================
|
| 4981 |
+
// If the original message had image attachments, include them in regeneration
|
| 4982 |
+
// This ensures vision analysis works correctly on regenerate
|
| 4983 |
+
if (userMsg.attachments && userMsg.attachments.length > 0) {
|
| 4984 |
+
const imageAttachments = userMsg.attachments.filter(att =>
|
| 4985 |
+
att.type && att.type.startsWith('image/') && att.preview
|
| 4986 |
+
);
|
| 4987 |
+
|
| 4988 |
+
if (imageAttachments.length > 0) {
|
| 4989 |
+
console.log(`๐ผ๏ธ Regenerating with ${imageAttachments.length} original image(s)`);
|
| 4990 |
+
|
| 4991 |
+
// Convert base64 previews back to File objects for upload
|
| 4992 |
+
for (const att of imageAttachments) {
|
| 4993 |
+
try {
|
| 4994 |
+
// Extract base64 data from data URL
|
| 4995 |
+
const base64Match = att.preview.match(/^data:([^;]+);base64,(.+)$/);
|
| 4996 |
+
if (base64Match) {
|
| 4997 |
+
const mimeType = base64Match[1];
|
| 4998 |
+
const base64Data = base64Match[2];
|
| 4999 |
+
|
| 5000 |
+
// Convert base64 to Blob
|
| 5001 |
+
const byteCharacters = atob(base64Data);
|
| 5002 |
+
const byteNumbers = new Array(byteCharacters.length);
|
| 5003 |
+
for (let i = 0; i < byteCharacters.length; i++) {
|
| 5004 |
+
byteNumbers[i] = byteCharacters.charCodeAt(i);
|
| 5005 |
+
}
|
| 5006 |
+
const byteArray = new Uint8Array(byteNumbers);
|
| 5007 |
+
const blob = new Blob([byteArray], { type: mimeType });
|
| 5008 |
+
|
| 5009 |
+
// Create File from Blob
|
| 5010 |
+
const file = new File([blob], att.name, { type: mimeType });
|
| 5011 |
+
formData.append('files', file);
|
| 5012 |
+
}
|
| 5013 |
+
} catch (imgErr) {
|
| 5014 |
+
console.warn('Failed to restore image for regeneration:', att.name, imgErr);
|
| 5015 |
+
}
|
| 5016 |
+
}
|
| 5017 |
+
}
|
| 5018 |
+
}
|
| 5019 |
+
|
| 5020 |
this.requestController = new AbortController();
|
| 5021 |
|
| 5022 |
// Track streaming response for partial save on cancel
|
|
|
|
| 8462 |
}
|
| 8463 |
}
|
| 8464 |
|
| 8465 |
+
/**
|
| 8466 |
+
* Update Vision processing status in real-time
|
| 8467 |
+
* Shows premium status updates during image analysis pipeline
|
| 8468 |
+
* @param {string} status - Current status message
|
| 8469 |
+
* @param {string} phase - Current phase (analyzing, fallback, sending, generating, error)
|
| 8470 |
+
* @param {number|null} currentImage - Current image being processed (1-based)
|
| 8471 |
+
* @param {number|null} totalImages - Total number of images
|
| 8472 |
+
* @param {string} model - The main LLM model name
|
| 8473 |
+
* @private
|
| 8474 |
+
*/
|
| 8475 |
+
_updateVisionStatus(status, phase, currentImage, totalImages, model) {
|
| 8476 |
+
const statusEl = document.getElementById('typingStatus');
|
| 8477 |
+
const typingIndicator = document.querySelector('.typing-indicator');
|
| 8478 |
+
|
| 8479 |
+
// Format display status based on phase
|
| 8480 |
+
let displayStatus = status;
|
| 8481 |
+
let shortStatus = status;
|
| 8482 |
+
|
| 8483 |
+
// Create premium phase-specific messages
|
| 8484 |
+
switch (phase) {
|
| 8485 |
+
case 'analyzing':
|
| 8486 |
+
if (totalImages === 1) {
|
| 8487 |
+
shortStatus = '๐๏ธ Rox Vision analyzing your image...';
|
| 8488 |
+
displayStatus = '๐๏ธ Rox Vision is carefully analyzing your image...';
|
| 8489 |
+
} else if (currentImage && totalImages) {
|
| 8490 |
+
shortStatus = `๐๏ธ Analyzing image ${currentImage}/${totalImages}...`;
|
| 8491 |
+
displayStatus = `๐๏ธ Rox Vision analyzing image ${currentImage} of ${totalImages}...`;
|
| 8492 |
+
}
|
| 8493 |
+
break;
|
| 8494 |
+
case 'analyzed':
|
| 8495 |
+
if (totalImages === 1) {
|
| 8496 |
+
shortStatus = 'โ
Image analyzed';
|
| 8497 |
+
displayStatus = 'โ
Image analysis complete';
|
| 8498 |
+
} else if (currentImage && totalImages) {
|
| 8499 |
+
shortStatus = `โ
Image ${currentImage}/${totalImages} done`;
|
| 8500 |
+
displayStatus = `โ
Image ${currentImage} of ${totalImages} analyzed`;
|
| 8501 |
+
}
|
| 8502 |
+
break;
|
| 8503 |
+
case 'fallback':
|
| 8504 |
+
shortStatus = '๐ Switching to Rox Vision Max...';
|
| 8505 |
+
displayStatus = '๐ Primary vision busy, Rox Vision Max taking over...';
|
| 8506 |
+
break;
|
| 8507 |
+
case 'sending':
|
| 8508 |
+
shortStatus = `๐ค Sending to ${model}...`;
|
| 8509 |
+
displayStatus = `๐ค Image context ready! Sending to ${model}...`;
|
| 8510 |
+
break;
|
| 8511 |
+
case 'generating':
|
| 8512 |
+
shortStatus = `๐ง ${model} is thinking...`;
|
| 8513 |
+
displayStatus = `๐ง ${model} is crafting your response...`;
|
| 8514 |
+
break;
|
| 8515 |
+
case 'error':
|
| 8516 |
+
shortStatus = 'โ ๏ธ Vision unavailable';
|
| 8517 |
+
displayStatus = status;
|
| 8518 |
+
break;
|
| 8519 |
+
}
|
| 8520 |
+
|
| 8521 |
+
// Update the status element
|
| 8522 |
+
if (statusEl) {
|
| 8523 |
+
statusEl.textContent = displayStatus;
|
| 8524 |
+
statusEl.classList.add('vision-status');
|
| 8525 |
+
statusEl.classList.remove('deep-research-status');
|
| 8526 |
+
}
|
| 8527 |
+
|
| 8528 |
+
// Update the typing indicator
|
| 8529 |
+
if (typingIndicator) {
|
| 8530 |
+
typingIndicator.classList.add('vision-active');
|
| 8531 |
+
typingIndicator.classList.remove('deep-research-active');
|
| 8532 |
+
|
| 8533 |
+
const textEl = typingIndicator.querySelector('.typing-text');
|
| 8534 |
+
if (textEl) {
|
| 8535 |
+
textEl.textContent = shortStatus;
|
| 8536 |
+
}
|
| 8537 |
+
|
| 8538 |
+
// Add progress indicator for multiple images
|
| 8539 |
+
if (totalImages > 1 && currentImage) {
|
| 8540 |
+
const progressPercent = Math.round((currentImage / totalImages) * 100);
|
| 8541 |
+
typingIndicator.style.setProperty('--vision-progress', `${progressPercent}%`);
|
| 8542 |
+
}
|
| 8543 |
+
}
|
| 8544 |
+
}
|
| 8545 |
+
|
| 8546 |
/**
|
| 8547 |
* Update typing status with elapsed time and status messages
|
| 8548 |
* @private
|
|
|
|
| 8551 |
const statusEl = document.getElementById('typingStatus');
|
| 8552 |
if (!statusEl || !this._typingStartTime) return;
|
| 8553 |
|
| 8554 |
+
// Don't override DeepResearch or Vision status
|
| 8555 |
+
if (statusEl.classList.contains('deep-research-status') || statusEl.classList.contains('vision-status')) {
|
| 8556 |
return;
|
| 8557 |
}
|
| 8558 |
|
public/styles.css
CHANGED
|
@@ -7510,6 +7510,93 @@ textarea:focus:not(:focus-visible) {
|
|
| 7510 |
background: #10b981;
|
| 7511 |
}
|
| 7512 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7513 |
/* ===== ATTACH MENU ===== */
|
| 7514 |
.attach-menu {
|
| 7515 |
position: fixed;
|
|
|
|
| 7510 |
background: #10b981;
|
| 7511 |
}
|
| 7512 |
|
| 7513 |
+
/* ===== VISION STATUS STYLES ===== */
|
| 7514 |
+
/* Premium Vision Processing Status */
|
| 7515 |
+
.vision-status {
|
| 7516 |
+
color: #8b5cf6 !important;
|
| 7517 |
+
font-weight: 500;
|
| 7518 |
+
animation: visionStatusPulse 2s ease-in-out infinite;
|
| 7519 |
+
}
|
| 7520 |
+
|
| 7521 |
+
/* Vision Status Animation */
|
| 7522 |
+
@keyframes visionStatusPulse {
|
| 7523 |
+
0%, 100% {
|
| 7524 |
+
opacity: 1;
|
| 7525 |
+
}
|
| 7526 |
+
50% {
|
| 7527 |
+
opacity: 0.7;
|
| 7528 |
+
}
|
| 7529 |
+
}
|
| 7530 |
+
|
| 7531 |
+
/* Enhanced Typing Indicator for Vision Processing */
|
| 7532 |
+
.typing-indicator.vision-active {
|
| 7533 |
+
background: linear-gradient(135deg, rgba(139, 92, 246, 0.1) 0%, rgba(167, 139, 250, 0.15) 100%);
|
| 7534 |
+
border: 1px solid rgba(139, 92, 246, 0.3);
|
| 7535 |
+
position: relative;
|
| 7536 |
+
overflow: hidden;
|
| 7537 |
+
}
|
| 7538 |
+
|
| 7539 |
+
.typing-indicator.vision-active::before {
|
| 7540 |
+
content: '';
|
| 7541 |
+
position: absolute;
|
| 7542 |
+
top: 0;
|
| 7543 |
+
left: 0;
|
| 7544 |
+
height: 100%;
|
| 7545 |
+
width: var(--vision-progress, 0%);
|
| 7546 |
+
background: linear-gradient(90deg, rgba(139, 92, 246, 0.15) 0%, rgba(167, 139, 250, 0.2) 100%);
|
| 7547 |
+
transition: width 0.3s ease;
|
| 7548 |
+
z-index: 0;
|
| 7549 |
+
}
|
| 7550 |
+
|
| 7551 |
+
.typing-indicator.vision-active .typing-text {
|
| 7552 |
+
color: #8b5cf6;
|
| 7553 |
+
font-weight: 500;
|
| 7554 |
+
position: relative;
|
| 7555 |
+
z-index: 1;
|
| 7556 |
+
}
|
| 7557 |
+
|
| 7558 |
+
.typing-indicator.vision-active .typing-dots span {
|
| 7559 |
+
background: #8b5cf6;
|
| 7560 |
+
position: relative;
|
| 7561 |
+
z-index: 1;
|
| 7562 |
+
}
|
| 7563 |
+
|
| 7564 |
+
/* Vision Eye Animation */
|
| 7565 |
+
@keyframes visionEyePulse {
|
| 7566 |
+
0%, 100% {
|
| 7567 |
+
transform: scale(1);
|
| 7568 |
+
}
|
| 7569 |
+
50% {
|
| 7570 |
+
transform: scale(1.1);
|
| 7571 |
+
}
|
| 7572 |
+
}
|
| 7573 |
+
|
| 7574 |
+
.typing-indicator.vision-active .typing-dots {
|
| 7575 |
+
animation: visionEyePulse 1.5s ease-in-out infinite;
|
| 7576 |
+
}
|
| 7577 |
+
|
| 7578 |
+
/* Light theme adjustments */
|
| 7579 |
+
[data-theme="light"] .vision-status {
|
| 7580 |
+
color: #7c3aed !important;
|
| 7581 |
+
}
|
| 7582 |
+
|
| 7583 |
+
[data-theme="light"] .typing-indicator.vision-active {
|
| 7584 |
+
background: linear-gradient(135deg, rgba(124, 58, 237, 0.08) 0%, rgba(139, 92, 246, 0.12) 100%);
|
| 7585 |
+
border-color: rgba(124, 58, 237, 0.25);
|
| 7586 |
+
}
|
| 7587 |
+
|
| 7588 |
+
[data-theme="light"] .typing-indicator.vision-active::before {
|
| 7589 |
+
background: linear-gradient(90deg, rgba(124, 58, 237, 0.1) 0%, rgba(139, 92, 246, 0.15) 100%);
|
| 7590 |
+
}
|
| 7591 |
+
|
| 7592 |
+
[data-theme="light"] .typing-indicator.vision-active .typing-text {
|
| 7593 |
+
color: #7c3aed;
|
| 7594 |
+
}
|
| 7595 |
+
|
| 7596 |
+
[data-theme="light"] .typing-indicator.vision-active .typing-dots span {
|
| 7597 |
+
background: #7c3aed;
|
| 7598 |
+
}
|
| 7599 |
+
|
| 7600 |
/* ===== ATTACH MENU ===== */
|
| 7601 |
.attach-menu {
|
| 7602 |
position: fixed;
|
server.js
CHANGED
|
@@ -10293,95 +10293,144 @@ app.post('/api/chat', upload.array('files', 50), async (req, res) => {
|
|
| 10293 |
return false;
|
| 10294 |
};
|
| 10295 |
|
| 10296 |
-
|
| 10297 |
-
|
| 10298 |
-
|
| 10299 |
-
|
| 10300 |
-
|
| 10301 |
-
|
| 10302 |
-
|
| 10303 |
-
|
| 10304 |
-
|
| 10305 |
-
|
| 10306 |
-
|
| 10307 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10308 |
}
|
| 10309 |
-
|
| 10310 |
-
|
| 10311 |
try {
|
| 10312 |
-
|
| 10313 |
-
|
| 10314 |
-
|
| 10315 |
-
|
| 10316 |
-
|
| 10317 |
-
|
| 10318 |
-
|
| 10319 |
-
|
| 10320 |
-
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10321 |
|
| 10322 |
-
|
|
|
|
|
|
|
| 10323 |
|
| 10324 |
-
|
| 10325 |
-
|
| 10326 |
-
|
| 10327 |
|
| 10328 |
-
|
| 10329 |
-
|
| 10330 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10331 |
{
|
| 10332 |
role: 'user',
|
| 10333 |
content: [
|
| 10334 |
-
{ type: 'text', text:
|
| 10335 |
-
|
| 10336 |
]
|
| 10337 |
}
|
| 10338 |
];
|
| 10339 |
|
| 10340 |
-
const
|
| 10341 |
-
model: config.
|
| 10342 |
-
messages:
|
| 10343 |
temperature: 0.3,
|
| 10344 |
top_p: 0.9,
|
| 10345 |
max_tokens: 4096,
|
| 10346 |
stream: false
|
| 10347 |
});
|
| 10348 |
|
| 10349 |
-
const
|
| 10350 |
|
| 10351 |
-
// Check if
|
| 10352 |
-
if (
|
| 10353 |
-
|
| 10354 |
-
|
| 10355 |
-
|
| 10356 |
-
|
| 10357 |
-
|
| 10358 |
-
|
| 10359 |
}
|
| 10360 |
-
|
| 10361 |
-
|
| 10362 |
-
|
| 10363 |
-
|
|
|
|
|
|
|
|
|
|
| 10364 |
}
|
| 10365 |
-
} catch (visionError) {
|
| 10366 |
-
log.error(`โ Vision analysis error: ${visionError.message || 'Unknown'}`);
|
| 10367 |
|
| 10368 |
-
// Try
|
| 10369 |
try {
|
| 10370 |
-
|
| 10371 |
-
|
|
|
|
|
|
|
|
|
|
| 10372 |
{ role: 'system', content: ROX_VISION_MAX_ANALYSIS_PROMPT },
|
| 10373 |
{
|
| 10374 |
role: 'user',
|
| 10375 |
content: [
|
| 10376 |
-
{ type: 'text', text:
|
| 10377 |
-
|
| 10378 |
]
|
| 10379 |
}
|
| 10380 |
];
|
| 10381 |
|
| 10382 |
const fallbackResponse = await openai.chat.completions.create({
|
| 10383 |
model: config.fallbackVisionModel,
|
| 10384 |
-
messages:
|
| 10385 |
temperature: 0.3,
|
| 10386 |
top_p: 0.9,
|
| 10387 |
max_tokens: 4096,
|
|
@@ -10390,20 +10439,53 @@ app.post('/api/chat', upload.array('files', 50), async (req, res) => {
|
|
| 10390 |
|
| 10391 |
const fallbackAnalysis = fallbackResponse.choices?.[0]?.message?.content || null;
|
| 10392 |
|
| 10393 |
-
// Check if fallback also refused
|
| 10394 |
if (fallbackAnalysis && !isVisionRefusal(fallbackAnalysis)) {
|
| 10395 |
-
|
| 10396 |
-
|
| 10397 |
-
|
| 10398 |
-
|
| 10399 |
-
|
| 10400 |
-
visionAnalysis = null;
|
| 10401 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10402 |
} catch (fallbackError) {
|
| 10403 |
-
log.error(`โ
|
| 10404 |
-
|
| 10405 |
-
visionAnalysis = null;
|
| 10406 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10407 |
}
|
| 10408 |
}
|
| 10409 |
|
|
@@ -10417,10 +10499,16 @@ app.post('/api/chat', upload.array('files', 50), async (req, res) => {
|
|
| 10417 |
// Inject vision analysis into the user message for the main LLM
|
| 10418 |
// The main LLM will use this analysis to generate the response
|
| 10419 |
const visionSibling = usingFallbackVision ? 'Rox Vision Max' : 'Rox Vision';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10420 |
const visionContext = `
|
| 10421 |
## ๐๏ธ IMAGE ANALYSIS FROM YOUR VISION SIBLING (${visionSibling})
|
| 10422 |
|
| 10423 |
-
Your vision sibling has analyzed
|
| 10424 |
|
| 10425 |
${visionAnalysis}
|
| 10426 |
|
|
@@ -10428,7 +10516,7 @@ ${visionAnalysis}
|
|
| 10428 |
|
| 10429 |
**USER'S ORIGINAL QUESTION:** ${message}
|
| 10430 |
|
| 10431 |
-
**YOUR TASK:** Using the image analysis above from your vision sibling, provide a helpful response to the user's question. You can reference the visual details your sibling identified. Remember, you're working as a team - your vision sibling sees the image, and you provide the intelligent response!
|
| 10432 |
`;
|
| 10433 |
|
| 10434 |
// Replace the last user message with the enhanced version (text only, no images)
|
|
|
|
| 10293 |
return false;
|
| 10294 |
};
|
| 10295 |
|
| 10296 |
+
// ==================== VISION STATUS HELPER ====================
|
| 10297 |
+
// Helper function to send vision processing status updates to frontend
|
| 10298 |
+
// Defined here so it can be used in both vision processing blocks
|
| 10299 |
+
let visionSseHeadersSent = false;
|
| 10300 |
+
|
| 10301 |
+
/**
|
| 10302 |
+
* Send vision status update to frontend
|
| 10303 |
+
* @param {string} status - Status message to display
|
| 10304 |
+
* @param {string} phase - Current phase (analyzing, fallback, sending, generating, error)
|
| 10305 |
+
* @param {number} [currentImage] - Current image number (1-based)
|
| 10306 |
+
* @param {number} [totalImgs] - Total number of images
|
| 10307 |
+
*/
|
| 10308 |
+
const sendVisionStatus = (status, phase, currentImage = null, totalImgs = null) => {
|
| 10309 |
+
// Ensure SSE headers are set up
|
| 10310 |
+
if (!sseHeadersSent && !visionSseHeadersSent) {
|
| 10311 |
+
try {
|
| 10312 |
+
res.setHeader('Content-Type', 'text/event-stream');
|
| 10313 |
+
res.setHeader('Cache-Control', 'no-cache, no-transform');
|
| 10314 |
+
res.setHeader('Connection', 'keep-alive');
|
| 10315 |
+
res.setHeader('X-Accel-Buffering', 'no');
|
| 10316 |
+
res.setHeader('Content-Encoding', 'none');
|
| 10317 |
+
res.setHeader('Transfer-Encoding', 'chunked');
|
| 10318 |
+
res.flushHeaders();
|
| 10319 |
+
visionSseHeadersSent = true;
|
| 10320 |
+
sseHeadersSent = true;
|
| 10321 |
+
} catch (e) {
|
| 10322 |
+
// Headers may already be sent
|
| 10323 |
}
|
| 10324 |
+
}
|
| 10325 |
+
|
| 10326 |
try {
|
| 10327 |
+
const statusEvent = {
|
| 10328 |
+
visionStatus: true,
|
| 10329 |
+
status: status,
|
| 10330 |
+
phase: phase,
|
| 10331 |
+
currentImage: currentImage,
|
| 10332 |
+
totalImages: totalImgs,
|
| 10333 |
+
model: config.name
|
| 10334 |
+
};
|
| 10335 |
+
res.write(`data: ${JSON.stringify(statusEvent)}\n\n`);
|
| 10336 |
+
if (typeof res.flush === 'function') res.flush();
|
| 10337 |
+
} catch (e) {
|
| 10338 |
+
// Ignore write errors - connection may have closed
|
| 10339 |
+
}
|
| 10340 |
+
};
|
| 10341 |
|
| 10342 |
+
if (hasImages) {
|
| 10343 |
+
const totalImages = imageContents.length;
|
| 10344 |
+
log.info(`๐ผ๏ธ Step 1: Rox Vision analyzing ${totalImages} image(s) sequentially...`);
|
| 10345 |
|
| 10346 |
+
// Send initial vision status
|
| 10347 |
+
const imageText = totalImages === 1 ? 'your image' : `${totalImages} images`;
|
| 10348 |
+
sendVisionStatus(`๐๏ธ Rox Vision is analyzing ${imageText}...`, 'analyzing', 1, totalImages);
|
| 10349 |
|
| 10350 |
+
// ==================== SEQUENTIAL IMAGE PROCESSING ====================
|
| 10351 |
+
// Vision models only support 1 image at a time, so we process each image
|
| 10352 |
+
// separately and combine the analyses with clear image numbering
|
| 10353 |
+
const imageAnalyses = [];
|
| 10354 |
+
|
| 10355 |
+
/**
|
| 10356 |
+
* Analyze a single image with primary vision model, fallback to Rox Vision Max if needed
|
| 10357 |
+
* @param {Object} imageContent - Single image content object
|
| 10358 |
+
* @param {number} imageIndex - 1-based index of the image
|
| 10359 |
+
* @returns {Promise<{analysis: string|null, usedFallback: boolean}>}
|
| 10360 |
+
*/
|
| 10361 |
+
const analyzeSingleImage = async (imageContent, imageIndex) => {
|
| 10362 |
+
const imageLabel = totalImages > 1 ? `Image ${imageIndex} of ${totalImages}` : 'the image';
|
| 10363 |
+
const imagePrompt = totalImages > 1
|
| 10364 |
+
? `You are analyzing ${imageLabel}. The user has uploaded ${totalImages} images total.\n\nUser's question: "${message}"\n\nProvide a detailed analysis of THIS specific image (Image ${imageIndex}). Start your analysis with "**Image ${imageIndex}:**" so the user knows which image you're describing.`
|
| 10365 |
+
: `Please analyze the following image thoroughly. The user's question is: "${message}"\n\nProvide a detailed analysis that will help answer their question.`;
|
| 10366 |
+
|
| 10367 |
+
// Send status: analyzing this specific image
|
| 10368 |
+
if (totalImages > 1) {
|
| 10369 |
+
sendVisionStatus(`๐๏ธ Rox Vision analyzing image ${imageIndex} of ${totalImages}...`, 'analyzing', imageIndex, totalImages);
|
| 10370 |
+
}
|
| 10371 |
+
|
| 10372 |
+
// Try primary Rox Vision first
|
| 10373 |
+
try {
|
| 10374 |
+
const visionMessages = [
|
| 10375 |
+
{ role: 'system', content: ROX_VISION_ANALYSIS_PROMPT },
|
| 10376 |
{
|
| 10377 |
role: 'user',
|
| 10378 |
content: [
|
| 10379 |
+
{ type: 'text', text: imagePrompt },
|
| 10380 |
+
imageContent
|
| 10381 |
]
|
| 10382 |
}
|
| 10383 |
];
|
| 10384 |
|
| 10385 |
+
const visionResponse = await openai.chat.completions.create({
|
| 10386 |
+
model: config.visionModel,
|
| 10387 |
+
messages: visionMessages,
|
| 10388 |
temperature: 0.3,
|
| 10389 |
top_p: 0.9,
|
| 10390 |
max_tokens: 4096,
|
| 10391 |
stream: false
|
| 10392 |
});
|
| 10393 |
|
| 10394 |
+
const analysis = visionResponse.choices?.[0]?.message?.content || null;
|
| 10395 |
|
| 10396 |
+
// Check if vision model refused
|
| 10397 |
+
if (analysis && !isVisionRefusal(analysis)) {
|
| 10398 |
+
log.info(`๐๏ธ Rox Vision analyzed Image ${imageIndex}/${totalImages}`);
|
| 10399 |
+
// Send success status for this image
|
| 10400 |
+
if (totalImages > 1) {
|
| 10401 |
+
sendVisionStatus(`โ
Image ${imageIndex} analyzed successfully`, 'analyzed', imageIndex, totalImages);
|
| 10402 |
+
}
|
| 10403 |
+
return { analysis, usedFallback: false };
|
| 10404 |
}
|
| 10405 |
+
|
| 10406 |
+
// Primary refused, try fallback
|
| 10407 |
+
log.warn(`โ ๏ธ Rox Vision refused Image ${imageIndex}, trying Rox Vision Max...`);
|
| 10408 |
+
sendVisionStatus(`๐ Switching to Rox Vision Max for image ${totalImages > 1 ? imageIndex : ''}...`, 'fallback', imageIndex, totalImages);
|
| 10409 |
+
} catch (primaryError) {
|
| 10410 |
+
log.warn(`โ ๏ธ Rox Vision error on Image ${imageIndex}: ${primaryError.message || 'Unknown'}`);
|
| 10411 |
+
sendVisionStatus(`๐ Trying Rox Vision Max...`, 'fallback', imageIndex, totalImages);
|
| 10412 |
}
|
|
|
|
|
|
|
| 10413 |
|
| 10414 |
+
// Try Rox Vision Max as fallback
|
| 10415 |
try {
|
| 10416 |
+
const fallbackPrompt = totalImages > 1
|
| 10417 |
+
? `You are analyzing ${imageLabel}. The user has uploaded ${totalImages} images total.\n\nUser's question: "${message}"\n\nProvide a detailed analysis of THIS specific image (Image ${imageIndex}). Start your analysis with "**Image ${imageIndex}:**"`
|
| 10418 |
+
: `Please analyze the following image. User question: "${message}"`;
|
| 10419 |
+
|
| 10420 |
+
const fallbackMessages = [
|
| 10421 |
{ role: 'system', content: ROX_VISION_MAX_ANALYSIS_PROMPT },
|
| 10422 |
{
|
| 10423 |
role: 'user',
|
| 10424 |
content: [
|
| 10425 |
+
{ type: 'text', text: fallbackPrompt },
|
| 10426 |
+
imageContent
|
| 10427 |
]
|
| 10428 |
}
|
| 10429 |
];
|
| 10430 |
|
| 10431 |
const fallbackResponse = await openai.chat.completions.create({
|
| 10432 |
model: config.fallbackVisionModel,
|
| 10433 |
+
messages: fallbackMessages,
|
| 10434 |
temperature: 0.3,
|
| 10435 |
top_p: 0.9,
|
| 10436 |
max_tokens: 4096,
|
|
|
|
| 10439 |
|
| 10440 |
const fallbackAnalysis = fallbackResponse.choices?.[0]?.message?.content || null;
|
| 10441 |
|
|
|
|
| 10442 |
if (fallbackAnalysis && !isVisionRefusal(fallbackAnalysis)) {
|
| 10443 |
+
log.info(`๐๏ธโจ Rox Vision Max analyzed Image ${imageIndex}/${totalImages}`);
|
| 10444 |
+
if (totalImages > 1) {
|
| 10445 |
+
sendVisionStatus(`โ
Image ${imageIndex} analyzed by Rox Vision Max`, 'analyzed', imageIndex, totalImages);
|
| 10446 |
+
}
|
| 10447 |
+
return { analysis: fallbackAnalysis, usedFallback: true };
|
|
|
|
| 10448 |
}
|
| 10449 |
+
|
| 10450 |
+
log.warn(`โ ๏ธ Rox Vision Max also refused Image ${imageIndex}`);
|
| 10451 |
+
sendVisionStatus(`โ ๏ธ Could not analyze image ${totalImages > 1 ? imageIndex : ''}`, 'error', imageIndex, totalImages);
|
| 10452 |
+
return { analysis: null, usedFallback: true };
|
| 10453 |
} catch (fallbackError) {
|
| 10454 |
+
log.error(`โ Both vision models failed on Image ${imageIndex}: ${fallbackError.message || 'Unknown'}`);
|
| 10455 |
+
return { analysis: null, usedFallback: true };
|
|
|
|
| 10456 |
}
|
| 10457 |
+
};
|
| 10458 |
+
|
| 10459 |
+
// Process each image sequentially (vision models don't support multiple images)
|
| 10460 |
+
let anyUsedFallback = false;
|
| 10461 |
+
for (let i = 0; i < imageContents.length; i++) {
|
| 10462 |
+
const result = await analyzeSingleImage(imageContents[i], i + 1);
|
| 10463 |
+
if (result.analysis) {
|
| 10464 |
+
imageAnalyses.push(result.analysis);
|
| 10465 |
+
}
|
| 10466 |
+
if (result.usedFallback) {
|
| 10467 |
+
anyUsedFallback = true;
|
| 10468 |
+
}
|
| 10469 |
+
}
|
| 10470 |
+
|
| 10471 |
+
// Combine all image analyses
|
| 10472 |
+
if (imageAnalyses.length > 0) {
|
| 10473 |
+
if (totalImages === 1) {
|
| 10474 |
+
// Single image - use analysis directly
|
| 10475 |
+
visionAnalysis = imageAnalyses[0];
|
| 10476 |
+
} else {
|
| 10477 |
+
// Multiple images - combine with clear separation
|
| 10478 |
+
visionAnalysis = `## Analysis of ${totalImages} Images\n\n` + imageAnalyses.join('\n\n---\n\n');
|
| 10479 |
+
}
|
| 10480 |
+
usingFallbackVision = anyUsedFallback;
|
| 10481 |
+
log.info(`๐๏ธ Vision analysis complete: ${imageAnalyses.length}/${totalImages} images analyzed`);
|
| 10482 |
+
|
| 10483 |
+
// Send status: sending image context to main LLM
|
| 10484 |
+
sendVisionStatus(`๐ค Sending image context to ${config.name}...`, 'sending', totalImages, totalImages);
|
| 10485 |
+
} else {
|
| 10486 |
+
log.warn(`โ ๏ธ No images could be analyzed`);
|
| 10487 |
+
sendVisionStatus(`โ ๏ธ Vision analysis unavailable`, 'error', 0, totalImages);
|
| 10488 |
+
visionAnalysis = null;
|
| 10489 |
}
|
| 10490 |
}
|
| 10491 |
|
|
|
|
| 10499 |
// Inject vision analysis into the user message for the main LLM
|
| 10500 |
// The main LLM will use this analysis to generate the response
|
| 10501 |
const visionSibling = usingFallbackVision ? 'Rox Vision Max' : 'Rox Vision';
|
| 10502 |
+
const imageCount = imageContents.length;
|
| 10503 |
+
const imageCountText = imageCount === 1 ? '1 image' : `${imageCount} images`;
|
| 10504 |
+
|
| 10505 |
+
// Send status: main LLM is now generating response
|
| 10506 |
+
sendVisionStatus(`๐ง ${config.name} is thinking about your ${imageCountText}...`, 'generating', imageCount, imageCount);
|
| 10507 |
+
|
| 10508 |
const visionContext = `
|
| 10509 |
## ๐๏ธ IMAGE ANALYSIS FROM YOUR VISION SIBLING (${visionSibling})
|
| 10510 |
|
| 10511 |
+
Your vision sibling has analyzed ${imageCountText} attached by the user and provided the following information:
|
| 10512 |
|
| 10513 |
${visionAnalysis}
|
| 10514 |
|
|
|
|
| 10516 |
|
| 10517 |
**USER'S ORIGINAL QUESTION:** ${message}
|
| 10518 |
|
| 10519 |
+
**YOUR TASK:** Using the image analysis above from your vision sibling, provide a helpful response to the user's question. ${imageCount > 1 ? `The user uploaded ${imageCount} images - make sure to address each image in your response (e.g., "In the 1st image...", "In the 2nd image...").` : 'You can reference the visual details your sibling identified.'} Remember, you're working as a team - your vision sibling sees the image(s), and you provide the intelligent response!
|
| 10520 |
`;
|
| 10521 |
|
| 10522 |
// Replace the last user message with the enhanced version (text only, no images)
|