Spaces:
Running
Running
| /** | |
| * Grounding / citation extraction. | |
| * | |
| * Gemini (Antigravity/CloudCode) returns search grounding on | |
| * `candidate.groundingMetadata`. Upstream converters drop it; this helper | |
| * normalizes it into: | |
| * - `queries`: the web search queries the model issued | |
| * - `sources`: the grounded source URLs/titles | |
| * - `annotations`: OpenAI-style `url_citation` entries (per answer segment | |
| * when groundingSupports is present, else one per source) | |
| * | |
| * The annotations shape matches OpenAI Chat Completions web-search output so | |
| * OpenAI-compatible clients can consume citations with no special-casing. | |
| */ | |
| const GROUNDING_REDIRECT_HOST = 'vertexaisearch.cloud.google.com'; | |
| const GROUNDING_REDIRECT_TIMEOUT_MS = Number(process.env.GROUNDING_REDIRECT_TIMEOUT_MS || 1500); | |
| const GROUNDING_REDIRECT_CACHE_TTL_MS = Number(process.env.GROUNDING_REDIRECT_CACHE_TTL_MS || 24 * 60 * 60 * 1000); | |
| const GROUNDING_REDIRECT_CACHE_MAX = Number(process.env.GROUNDING_REDIRECT_CACHE_MAX || 1000); | |
| const redirectCache = new Map(); | |
| function pickUri(chunk) { | |
| return chunk?.web?.uri || chunk?.retrievedContext?.uri || null; | |
| } | |
| function pickTitle(chunk) { | |
| return chunk?.web?.title || chunk?.retrievedContext?.title || ''; | |
| } | |
| function isVertexGroundingRedirect(rawUrl) { | |
| try { | |
| const parsed = new URL(rawUrl); | |
| return parsed.hostname === GROUNDING_REDIRECT_HOST | |
| && parsed.pathname.includes('/grounding-api-redirect/'); | |
| } catch { | |
| return false; | |
| } | |
| } | |
| function getCachedRedirect(rawUrl) { | |
| const row = redirectCache.get(rawUrl); | |
| if (!row) return null; | |
| if (Date.now() > row.expiresAt) { | |
| redirectCache.delete(rawUrl); | |
| return null; | |
| } | |
| return row.url; | |
| } | |
| function setCachedRedirect(rawUrl, resolvedUrl) { | |
| if (redirectCache.size >= GROUNDING_REDIRECT_CACHE_MAX) { | |
| const firstKey = redirectCache.keys().next().value; | |
| if (firstKey) redirectCache.delete(firstKey); | |
| } | |
| redirectCache.set(rawUrl, { | |
| url: resolvedUrl, | |
| expiresAt: Date.now() + GROUNDING_REDIRECT_CACHE_TTL_MS | |
| }); | |
| } | |
| async function readRedirectLocation(rawUrl, method) { | |
| const response = await fetch(rawUrl, { | |
| method, | |
| redirect: 'manual', | |
| signal: AbortSignal.timeout(GROUNDING_REDIRECT_TIMEOUT_MS) | |
| }); | |
| const location = response.headers.get('location'); | |
| if (!location) return null; | |
| try { | |
| return new URL(location, rawUrl).toString(); | |
| } catch { | |
| return location; | |
| } | |
| } | |
| export async function resolveCitationUrl(rawUrl) { | |
| if (!rawUrl || !isVertexGroundingRedirect(rawUrl)) return rawUrl; | |
| const cached = getCachedRedirect(rawUrl); | |
| if (cached) return cached; | |
| let resolvedUrl = rawUrl; | |
| try { | |
| resolvedUrl = await readRedirectLocation(rawUrl, 'HEAD') || rawUrl; | |
| } catch { | |
| try { | |
| resolvedUrl = await readRedirectLocation(rawUrl, 'GET') || rawUrl; | |
| } catch { | |
| resolvedUrl = rawUrl; | |
| } | |
| } | |
| setCachedRedirect(rawUrl, resolvedUrl); | |
| return resolvedUrl; | |
| } | |
| export async function resolveGroundingRedirects(grounding) { | |
| if (!grounding) return grounding; | |
| const urls = new Set(); | |
| for (const source of grounding.sources || []) { | |
| if (source?.raw_url) urls.add(source.raw_url); | |
| if (source?.url) urls.add(source.url); | |
| } | |
| for (const ann of grounding.annotations || []) { | |
| const rawUrl = ann?.url_citation?.raw_url; | |
| const url = ann?.url_citation?.url; | |
| if (rawUrl) urls.add(rawUrl); | |
| if (url) urls.add(url); | |
| } | |
| const pairs = await Promise.all( | |
| [...urls].map(async (url) => [url, await resolveCitationUrl(url)]) | |
| ); | |
| const resolvedByRawUrl = new Map(pairs); | |
| return { | |
| ...grounding, | |
| sources: (grounding.sources || []).map((source) => { | |
| const rawUrl = source.raw_url || source.url; | |
| return { | |
| ...source, | |
| raw_url: rawUrl, | |
| url: resolvedByRawUrl.get(rawUrl) || resolvedByRawUrl.get(source.url) || source.url | |
| }; | |
| }), | |
| annotations: (grounding.annotations || []).map((ann) => { | |
| const rawUrl = ann.url_citation.raw_url || ann.url_citation.url; | |
| return { | |
| ...ann, | |
| url_citation: { | |
| ...ann.url_citation, | |
| raw_url: rawUrl, | |
| url: resolvedByRawUrl.get(rawUrl) || resolvedByRawUrl.get(ann.url_citation.url) || ann.url_citation.url | |
| } | |
| }; | |
| }) | |
| }; | |
| } | |
| export function extractGrounding(groundingMetadata) { | |
| if (!groundingMetadata || typeof groundingMetadata !== 'object') return null; | |
| const chunks = Array.isArray(groundingMetadata.groundingChunks) | |
| ? groundingMetadata.groundingChunks | |
| : []; | |
| const supports = Array.isArray(groundingMetadata.groundingSupports) | |
| ? groundingMetadata.groundingSupports | |
| : []; | |
| const queries = Array.isArray(groundingMetadata.webSearchQueries) | |
| ? groundingMetadata.webSearchQueries | |
| : []; | |
| const sources = []; | |
| for (const chunk of chunks) { | |
| const url = pickUri(chunk); | |
| if (url) sources.push({ url, title: pickTitle(chunk) }); | |
| } | |
| const annotations = []; | |
| for (const support of supports) { | |
| const seg = support?.segment || {}; | |
| const indices = Array.isArray(support?.groundingChunkIndices) | |
| ? support.groundingChunkIndices | |
| : []; | |
| for (const idx of indices) { | |
| const url = pickUri(chunks[idx]); | |
| if (!url) continue; | |
| annotations.push({ | |
| type: 'url_citation', | |
| url_citation: { | |
| url, | |
| title: pickTitle(chunks[idx]), | |
| start_index: seg.startIndex || 0, | |
| end_index: seg.endIndex || 0 | |
| } | |
| }); | |
| } | |
| } | |
| // No per-segment support mapping → fall back to one annotation per source. | |
| if (annotations.length === 0) { | |
| for (const source of sources) { | |
| annotations.push({ | |
| type: 'url_citation', | |
| url_citation: { | |
| url: source.url, | |
| title: source.title, | |
| start_index: 0, | |
| end_index: 0 | |
| } | |
| }); | |
| } | |
| } | |
| if (sources.length === 0 && queries.length === 0 && annotations.length === 0) { | |
| return null; | |
| } | |
| return { queries, sources, annotations }; | |
| } | |