File size: 11,237 Bytes
f0743f4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 | import { useCallback, useEffect, useRef } from 'react';
import copy from 'copy-to-clipboard';
import { ContentTypes, SearchResultData } from 'librechat-data-provider';
import type { TMessage } from 'librechat-data-provider';
import {
SPAN_REGEX,
CLEANUP_REGEX,
COMPOSITE_REGEX,
STANDALONE_PATTERN,
INVALID_CITATION_REGEX,
} from '~/utils/citations';
type Source = {
link: string;
title: string;
attribution?: string;
type: string;
typeIndex: number;
citationKey: string; // Used for deduplication
};
const refTypeMap: Record<string, string> = {
search: 'organic',
ref: 'references',
news: 'topStories',
image: 'images',
video: 'videos',
};
export default function useCopyToClipboard({
text,
content,
searchResults,
}: Partial<Pick<TMessage, 'text' | 'content'>> & {
searchResults?: { [key: string]: SearchResultData };
}) {
const copyTimeoutRef = useRef<NodeJS.Timeout | null>(null);
useEffect(() => {
return () => {
if (copyTimeoutRef.current) {
clearTimeout(copyTimeoutRef.current);
}
};
}, []);
const copyToClipboard = useCallback(
(setIsCopied: React.Dispatch<React.SetStateAction<boolean>>) => {
if (copyTimeoutRef.current) {
clearTimeout(copyTimeoutRef.current);
}
setIsCopied(true);
// Get the message text from content or text
let messageText = text ?? '';
if (content) {
messageText = content.reduce((acc, curr, i) => {
if (curr.type === ContentTypes.TEXT) {
const text = typeof curr.text === 'string' ? curr.text : curr.text.value;
return acc + text + (i === content.length - 1 ? '' : '\n');
}
return acc;
}, '');
}
// Early return if no search data
if (!searchResults || Object.keys(searchResults).length === 0) {
// Clean up any citation markers before returning
const cleanedText = messageText
.replace(INVALID_CITATION_REGEX, '')
.replace(CLEANUP_REGEX, '');
copy(cleanedText, { format: 'text/plain' });
copyTimeoutRef.current = setTimeout(() => {
setIsCopied(false);
}, 3000);
return;
}
// Process citations and build a citation manager
const citationManager = processCitations(messageText, searchResults);
let processedText = citationManager.formattedText;
// Add citations list at the end if we have any
if (citationManager.citations.size > 0) {
processedText += '\n\nCitations:\n';
// Sort citations by their reference number
const sortedCitations = Array.from(citationManager.citations.entries()).sort(
(a, b) => a[1].referenceNumber - b[1].referenceNumber,
);
// Add each citation to the text
for (const [_, citation] of sortedCitations) {
processedText += `[${citation.referenceNumber}] ${citation.link}\n`;
}
}
copy(processedText, { format: 'text/plain' });
copyTimeoutRef.current = setTimeout(() => {
setIsCopied(false);
}, 3000);
},
[text, content, searchResults],
);
return copyToClipboard;
}
/**
* Process citations in the text and format them properly
*/
function processCitations(text: string, searchResults: { [key: string]: SearchResultData }) {
// Maps citation keys to their info including reference numbers
const citations = new Map<
string,
{
referenceNumber: number;
link: string;
title?: string;
source: Source;
}
>();
// Map to track URLs to citation keys for deduplication
const urlToCitationKey = new Map<string, string>();
let nextReferenceNumber = 1;
let formattedText = text;
// Step 1: Process highlighted text first (simplify by just making it bold in markdown)
formattedText = formattedText.replace(SPAN_REGEX, (match) => {
const text = match.replace(/\\ue203|\\ue204|\ue203|\ue204/g, '');
return `**${text}**`;
});
// Step 2: Find all standalone citations and composite citation blocks
const allCitations: Array<{
turn: string;
type: string;
index: string;
position: number;
fullMatch: string;
isComposite: boolean;
}> = [];
// Find standalone citations
let standaloneMatch: RegExpExecArray | null;
const standaloneCopy = new RegExp(STANDALONE_PATTERN.source, 'g');
while ((standaloneMatch = standaloneCopy.exec(formattedText)) !== null) {
allCitations.push({
turn: standaloneMatch[1],
type: standaloneMatch[2],
index: standaloneMatch[3],
position: standaloneMatch.index,
fullMatch: standaloneMatch[0],
isComposite: false,
});
}
// Find composite citation blocks
let compositeMatch: RegExpExecArray | null;
const compositeCopy = new RegExp(COMPOSITE_REGEX.source, 'g');
while ((compositeMatch = compositeCopy.exec(formattedText)) !== null) {
const block = compositeMatch[0];
const blockStart = compositeMatch.index;
// Extract individual citations within the composite block
let citationMatch: RegExpExecArray | null;
const citationPattern = new RegExp(STANDALONE_PATTERN.source, 'g');
while ((citationMatch = citationPattern.exec(block)) !== null) {
allCitations.push({
turn: citationMatch[1],
type: citationMatch[2],
index: citationMatch[3],
position: blockStart + citationMatch.index,
fullMatch: block, // Store the full composite block
isComposite: true,
});
}
}
// Sort citations by their position in the text
allCitations.sort((a, b) => a.position - b.position);
// Step 3: Process each citation and build the reference mapping
const processedCitations = new Set<string>();
const replacements: Array<[string, string]> = [];
const compositeCitationsMap = new Map<string, number[]>();
for (const citation of allCitations) {
const { turn, type, index, fullMatch, isComposite } = citation;
const searchData = searchResults[turn];
if (!searchData) continue;
const dataType = refTypeMap[type.toLowerCase()] || type.toLowerCase();
const idx = parseInt(index, 10);
// Skip if no matching data
if (!searchData[dataType] || !searchData[dataType][idx]) {
continue;
}
// Get source data
const sourceData = searchData[dataType][idx];
const sourceUrl = sourceData.link || '';
// Skip if no link
if (!sourceUrl) continue;
// Check if this URL has already been cited
let citationKey = urlToCitationKey.get(sourceUrl);
// If not, create a new citation key
if (!citationKey) {
citationKey = `${turn}-${dataType}-${idx}`;
urlToCitationKey.set(sourceUrl, citationKey);
}
const source: Source = {
link: sourceUrl,
title: sourceData.title || sourceData.name || '',
attribution: sourceData.attribution || sourceData.source || '',
type: dataType,
typeIndex: idx,
citationKey,
};
// Skip if already processed this citation in a composite block
if (isComposite && processedCitations.has(fullMatch)) {
continue;
}
let referenceText = '';
// Check if this source has been cited before
let existingCitation = citations.get(citationKey);
if (!existingCitation) {
// New citation
existingCitation = {
referenceNumber: nextReferenceNumber++,
link: source.link,
title: source.title,
source,
};
citations.set(citationKey, existingCitation);
}
if (existingCitation) {
// For composite blocks, we need to find all citations and create a combined reference
if (isComposite) {
// Parse all citations in this composite block if we haven't processed it yet
if (!processedCitations.has(fullMatch)) {
const compositeCitations: number[] = [];
let citationMatch: RegExpExecArray | null;
const citationPattern = new RegExp(STANDALONE_PATTERN.source, 'g');
while ((citationMatch = citationPattern.exec(fullMatch)) !== null) {
const cTurn = citationMatch[1];
const cType = citationMatch[2];
const cIndex = citationMatch[3];
const cDataType = refTypeMap[cType.toLowerCase()] || cType.toLowerCase();
const cSource = searchResults[cTurn]?.[cDataType]?.[parseInt(cIndex, 10)];
if (cSource && cSource.link) {
// Check if we've already created a citation for this URL
const cUrl = cSource.link;
let cKey = urlToCitationKey.get(cUrl);
if (!cKey) {
cKey = `${cTurn}-${cDataType}-${cIndex}`;
urlToCitationKey.set(cUrl, cKey);
}
let cCitation = citations.get(cKey);
if (!cCitation) {
cCitation = {
referenceNumber: nextReferenceNumber++,
link: cSource.link,
title: cSource.title || cSource.name || '',
source: {
link: cSource.link,
title: cSource.title || cSource.name || '',
attribution: cSource.attribution || cSource.source || '',
type: cDataType,
typeIndex: parseInt(cIndex, 10),
citationKey: cKey,
},
};
citations.set(cKey, cCitation);
}
if (cCitation) {
compositeCitations.push(cCitation.referenceNumber);
}
}
}
// Sort and deduplicate the composite citations
const uniqueSortedCitations = [...new Set(compositeCitations)].sort((a, b) => a - b);
// Create combined reference numbers for all citations in this composite
referenceText =
uniqueSortedCitations.length > 0
? uniqueSortedCitations.map((num) => `[${num}]`).join('')
: '';
processedCitations.add(fullMatch);
compositeCitationsMap.set(fullMatch, uniqueSortedCitations);
replacements.push([fullMatch, referenceText]);
}
// Skip further processing since we've handled the entire composite block
continue;
} else {
// Single citation
referenceText = `[${existingCitation.referenceNumber}]`;
replacements.push([fullMatch, referenceText]);
}
}
}
// Step 4: Apply all replacements (from longest to shortest to avoid nested replacement issues)
replacements.sort((a, b) => b[0].length - a[0].length);
for (const [pattern, replacement] of replacements) {
formattedText = formattedText.replace(pattern, replacement);
}
// Step 5: Remove any orphaned composite blocks at the end of the text
// This prevents the [1][2][3][4] list that might appear at the end if there's a composite there
formattedText = formattedText.replace(/\n\s*\[\d+\](\[\d+\])*\s*$/g, '');
// Step 6: Clean up any remaining citation markers
formattedText = formattedText.replace(INVALID_CITATION_REGEX, '');
formattedText = formattedText.replace(CLEANUP_REGEX, '');
return {
formattedText,
citations,
};
}
|