gravityyy-proxyyy / src /format /grounding.js
farooquiowais
Resolve Gemini grounding redirect citation URLs
cd036d7
Raw
History Blame Contribute Delete
6.58 kB
/**
* Grounding / citation extraction.
*
* Gemini (Antigravity/CloudCode) returns search grounding on
* `candidate.groundingMetadata`. Upstream converters drop it; this helper
* normalizes it into:
* - `queries`: the web search queries the model issued
* - `sources`: the grounded source URLs/titles
* - `annotations`: OpenAI-style `url_citation` entries (per answer segment
* when groundingSupports is present, else one per source)
*
* The annotations shape matches OpenAI Chat Completions web-search output so
* OpenAI-compatible clients can consume citations with no special-casing.
*/
const GROUNDING_REDIRECT_HOST = 'vertexaisearch.cloud.google.com';
const GROUNDING_REDIRECT_TIMEOUT_MS = Number(process.env.GROUNDING_REDIRECT_TIMEOUT_MS || 1500);
const GROUNDING_REDIRECT_CACHE_TTL_MS = Number(process.env.GROUNDING_REDIRECT_CACHE_TTL_MS || 24 * 60 * 60 * 1000);
const GROUNDING_REDIRECT_CACHE_MAX = Number(process.env.GROUNDING_REDIRECT_CACHE_MAX || 1000);
const redirectCache = new Map();
function pickUri(chunk) {
return chunk?.web?.uri || chunk?.retrievedContext?.uri || null;
}
function pickTitle(chunk) {
return chunk?.web?.title || chunk?.retrievedContext?.title || '';
}
function isVertexGroundingRedirect(rawUrl) {
try {
const parsed = new URL(rawUrl);
return parsed.hostname === GROUNDING_REDIRECT_HOST
&& parsed.pathname.includes('/grounding-api-redirect/');
} catch {
return false;
}
}
function getCachedRedirect(rawUrl) {
const row = redirectCache.get(rawUrl);
if (!row) return null;
if (Date.now() > row.expiresAt) {
redirectCache.delete(rawUrl);
return null;
}
return row.url;
}
function setCachedRedirect(rawUrl, resolvedUrl) {
if (redirectCache.size >= GROUNDING_REDIRECT_CACHE_MAX) {
const firstKey = redirectCache.keys().next().value;
if (firstKey) redirectCache.delete(firstKey);
}
redirectCache.set(rawUrl, {
url: resolvedUrl,
expiresAt: Date.now() + GROUNDING_REDIRECT_CACHE_TTL_MS
});
}
async function readRedirectLocation(rawUrl, method) {
const response = await fetch(rawUrl, {
method,
redirect: 'manual',
signal: AbortSignal.timeout(GROUNDING_REDIRECT_TIMEOUT_MS)
});
const location = response.headers.get('location');
if (!location) return null;
try {
return new URL(location, rawUrl).toString();
} catch {
return location;
}
}
export async function resolveCitationUrl(rawUrl) {
if (!rawUrl || !isVertexGroundingRedirect(rawUrl)) return rawUrl;
const cached = getCachedRedirect(rawUrl);
if (cached) return cached;
let resolvedUrl = rawUrl;
try {
resolvedUrl = await readRedirectLocation(rawUrl, 'HEAD') || rawUrl;
} catch {
try {
resolvedUrl = await readRedirectLocation(rawUrl, 'GET') || rawUrl;
} catch {
resolvedUrl = rawUrl;
}
}
setCachedRedirect(rawUrl, resolvedUrl);
return resolvedUrl;
}
export async function resolveGroundingRedirects(grounding) {
if (!grounding) return grounding;
const urls = new Set();
for (const source of grounding.sources || []) {
if (source?.raw_url) urls.add(source.raw_url);
if (source?.url) urls.add(source.url);
}
for (const ann of grounding.annotations || []) {
const rawUrl = ann?.url_citation?.raw_url;
const url = ann?.url_citation?.url;
if (rawUrl) urls.add(rawUrl);
if (url) urls.add(url);
}
const pairs = await Promise.all(
[...urls].map(async (url) => [url, await resolveCitationUrl(url)])
);
const resolvedByRawUrl = new Map(pairs);
return {
...grounding,
sources: (grounding.sources || []).map((source) => {
const rawUrl = source.raw_url || source.url;
return {
...source,
raw_url: rawUrl,
url: resolvedByRawUrl.get(rawUrl) || resolvedByRawUrl.get(source.url) || source.url
};
}),
annotations: (grounding.annotations || []).map((ann) => {
const rawUrl = ann.url_citation.raw_url || ann.url_citation.url;
return {
...ann,
url_citation: {
...ann.url_citation,
raw_url: rawUrl,
url: resolvedByRawUrl.get(rawUrl) || resolvedByRawUrl.get(ann.url_citation.url) || ann.url_citation.url
}
};
})
};
}
export function extractGrounding(groundingMetadata) {
if (!groundingMetadata || typeof groundingMetadata !== 'object') return null;
const chunks = Array.isArray(groundingMetadata.groundingChunks)
? groundingMetadata.groundingChunks
: [];
const supports = Array.isArray(groundingMetadata.groundingSupports)
? groundingMetadata.groundingSupports
: [];
const queries = Array.isArray(groundingMetadata.webSearchQueries)
? groundingMetadata.webSearchQueries
: [];
const sources = [];
for (const chunk of chunks) {
const url = pickUri(chunk);
if (url) sources.push({ url, title: pickTitle(chunk) });
}
const annotations = [];
for (const support of supports) {
const seg = support?.segment || {};
const indices = Array.isArray(support?.groundingChunkIndices)
? support.groundingChunkIndices
: [];
for (const idx of indices) {
const url = pickUri(chunks[idx]);
if (!url) continue;
annotations.push({
type: 'url_citation',
url_citation: {
url,
title: pickTitle(chunks[idx]),
start_index: seg.startIndex || 0,
end_index: seg.endIndex || 0
}
});
}
}
// No per-segment support mapping → fall back to one annotation per source.
if (annotations.length === 0) {
for (const source of sources) {
annotations.push({
type: 'url_citation',
url_citation: {
url: source.url,
title: source.title,
start_index: 0,
end_index: 0
}
});
}
}
if (sources.length === 0 && queries.length === 0 && annotations.length === 0) {
return null;
}
return { queries, sources, annotations };
}