|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import { GroundingMetadata } from '@google/genai'; |
|
|
import { SchemaValidator } from '../utils/schemaValidator.js'; |
|
|
import { |
|
|
BaseTool, |
|
|
ToolResult, |
|
|
ToolCallConfirmationDetails, |
|
|
ToolConfirmationOutcome, |
|
|
} from './tools.js'; |
|
|
import { getErrorMessage } from '../utils/errors.js'; |
|
|
import { Config, ApprovalMode } from '../config/config.js'; |
|
|
import { getResponseText } from '../utils/generateContentResponseUtilities.js'; |
|
|
import { fetchWithTimeout, isPrivateIp } from '../utils/fetch.js'; |
|
|
import { convert } from 'html-to-text'; |
|
|
|
|
|
const URL_FETCH_TIMEOUT_MS = 10000; |
|
|
const MAX_CONTENT_LENGTH = 100000; |
|
|
|
|
|
|
|
|
function extractUrls(text: string): string[] { |
|
|
const urlRegex = /(https?:\/\/[^\s]+)/g; |
|
|
return text.match(urlRegex) || []; |
|
|
} |
|
|
|
|
|
|
|
|
interface GroundingChunkWeb { |
|
|
uri?: string; |
|
|
title?: string; |
|
|
} |
|
|
|
|
|
interface GroundingChunkItem { |
|
|
web?: GroundingChunkWeb; |
|
|
} |
|
|
|
|
|
interface GroundingSupportSegment { |
|
|
startIndex: number; |
|
|
endIndex: number; |
|
|
text?: string; |
|
|
} |
|
|
|
|
|
interface GroundingSupportItem { |
|
|
segment?: GroundingSupportSegment; |
|
|
groundingChunkIndices?: number[]; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export interface WebFetchToolParams { |
|
|
|
|
|
|
|
|
|
|
|
prompt: string; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export class WebFetchTool extends BaseTool<WebFetchToolParams, ToolResult> { |
|
|
static readonly Name: string = 'web_fetch'; |
|
|
|
|
|
constructor(private readonly config: Config) { |
|
|
super( |
|
|
WebFetchTool.Name, |
|
|
'WebFetch', |
|
|
"Processes content from URL(s), including local and private network addresses (e.g., localhost), embedded in a prompt. Include up to 20 URLs and instructions (e.g., summarize, extract specific data) directly in the 'prompt' parameter.", |
|
|
{ |
|
|
properties: { |
|
|
prompt: { |
|
|
description: |
|
|
'A comprehensive prompt that includes the URL(s) (up to 20) to fetch and specific instructions on how to process their content (e.g., "Summarize https://example.com/article and extract key points from https://another.com/data"). Must contain as least one URL starting with http:// or https://.', |
|
|
type: 'string', |
|
|
}, |
|
|
}, |
|
|
required: ['prompt'], |
|
|
type: 'object', |
|
|
}, |
|
|
); |
|
|
} |
|
|
|
|
|
private async executeFallback( |
|
|
params: WebFetchToolParams, |
|
|
signal: AbortSignal, |
|
|
): Promise<ToolResult> { |
|
|
const urls = extractUrls(params.prompt); |
|
|
if (urls.length === 0) { |
|
|
return { |
|
|
llmContent: 'Error: No URL found in the prompt for fallback.', |
|
|
returnDisplay: 'Error: No URL found in the prompt for fallback.', |
|
|
}; |
|
|
} |
|
|
|
|
|
let url = urls[0]; |
|
|
|
|
|
|
|
|
if (url.includes('github.com') && url.includes('/blob/')) { |
|
|
url = url |
|
|
.replace('github.com', 'raw.githubusercontent.com') |
|
|
.replace('/blob/', '/'); |
|
|
} |
|
|
|
|
|
try { |
|
|
const response = await fetchWithTimeout(url, URL_FETCH_TIMEOUT_MS); |
|
|
if (!response.ok) { |
|
|
throw new Error( |
|
|
`Request failed with status code ${response.status} ${response.statusText}`, |
|
|
); |
|
|
} |
|
|
const html = await response.text(); |
|
|
const textContent = convert(html, { |
|
|
wordwrap: false, |
|
|
selectors: [ |
|
|
{ selector: 'a', options: { ignoreHref: true } }, |
|
|
{ selector: 'img', format: 'skip' }, |
|
|
], |
|
|
}).substring(0, MAX_CONTENT_LENGTH); |
|
|
|
|
|
const geminiClient = this.config.getGeminiClient(); |
|
|
const fallbackPrompt = `The user requested the following: "${params.prompt}". |
|
|
|
|
|
I was unable to access the URL directly. Instead, I have fetched the raw content of the page. Please use the following content to answer the user's request. Do not attempt to access the URL again. |
|
|
|
|
|
--- |
|
|
${textContent} |
|
|
---`; |
|
|
const result = await geminiClient.generateContent( |
|
|
[{ role: 'user', parts: [{ text: fallbackPrompt }] }], |
|
|
{}, |
|
|
signal, |
|
|
); |
|
|
const resultText = getResponseText(result) || ''; |
|
|
return { |
|
|
llmContent: resultText, |
|
|
returnDisplay: `Content for ${url} processed using fallback fetch.`, |
|
|
}; |
|
|
} catch (e) { |
|
|
const error = e as Error; |
|
|
const errorMessage = `Error during fallback fetch for ${url}: ${error.message}`; |
|
|
return { |
|
|
llmContent: `Error: ${errorMessage}`, |
|
|
returnDisplay: `Error: ${errorMessage}`, |
|
|
}; |
|
|
} |
|
|
} |
|
|
|
|
|
validateParams(params: WebFetchToolParams): string | null { |
|
|
if ( |
|
|
this.schema.parameters && |
|
|
!SchemaValidator.validate( |
|
|
this.schema.parameters as Record<string, unknown>, |
|
|
params, |
|
|
) |
|
|
) { |
|
|
return 'Parameters failed schema validation.'; |
|
|
} |
|
|
if (!params.prompt || params.prompt.trim() === '') { |
|
|
return "The 'prompt' parameter cannot be empty and must contain URL(s) and instructions."; |
|
|
} |
|
|
if ( |
|
|
!params.prompt.includes('http://') && |
|
|
!params.prompt.includes('https://') |
|
|
) { |
|
|
return "The 'prompt' must contain at least one valid URL (starting with http:// or https://)."; |
|
|
} |
|
|
return null; |
|
|
} |
|
|
|
|
|
getDescription(params: WebFetchToolParams): string { |
|
|
const displayPrompt = |
|
|
params.prompt.length > 100 |
|
|
? params.prompt.substring(0, 97) + '...' |
|
|
: params.prompt; |
|
|
return `Processing URLs and instructions from prompt: "${displayPrompt}"`; |
|
|
} |
|
|
|
|
|
async shouldConfirmExecute( |
|
|
params: WebFetchToolParams, |
|
|
): Promise<ToolCallConfirmationDetails | false> { |
|
|
if (this.config.getApprovalMode() === ApprovalMode.AUTO_EDIT) { |
|
|
return false; |
|
|
} |
|
|
|
|
|
const validationError = this.validateParams(params); |
|
|
if (validationError) { |
|
|
return false; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
const urls = extractUrls(params.prompt).map((url) => { |
|
|
if (url.includes('github.com') && url.includes('/blob/')) { |
|
|
return url |
|
|
.replace('github.com', 'raw.githubusercontent.com') |
|
|
.replace('/blob/', '/'); |
|
|
} |
|
|
return url; |
|
|
}); |
|
|
|
|
|
const confirmationDetails: ToolCallConfirmationDetails = { |
|
|
type: 'info', |
|
|
title: `Confirm Web Fetch`, |
|
|
prompt: params.prompt, |
|
|
urls, |
|
|
onConfirm: async (outcome: ToolConfirmationOutcome) => { |
|
|
if (outcome === ToolConfirmationOutcome.ProceedAlways) { |
|
|
this.config.setApprovalMode(ApprovalMode.AUTO_EDIT); |
|
|
} |
|
|
}, |
|
|
}; |
|
|
return confirmationDetails; |
|
|
} |
|
|
|
|
|
async execute( |
|
|
params: WebFetchToolParams, |
|
|
signal: AbortSignal, |
|
|
): Promise<ToolResult> { |
|
|
const validationError = this.validateParams(params); |
|
|
if (validationError) { |
|
|
return { |
|
|
llmContent: `Error: Invalid parameters provided. Reason: ${validationError}`, |
|
|
returnDisplay: validationError, |
|
|
}; |
|
|
} |
|
|
|
|
|
const userPrompt = params.prompt; |
|
|
const urls = extractUrls(userPrompt); |
|
|
const url = urls[0]; |
|
|
const isPrivate = isPrivateIp(url); |
|
|
|
|
|
if (isPrivate) { |
|
|
return this.executeFallback(params, signal); |
|
|
} |
|
|
|
|
|
const geminiClient = this.config.getGeminiClient(); |
|
|
|
|
|
try { |
|
|
const response = await geminiClient.generateContent( |
|
|
[{ role: 'user', parts: [{ text: userPrompt }] }], |
|
|
{ tools: [{ urlContext: {} }] }, |
|
|
signal, |
|
|
); |
|
|
|
|
|
console.debug( |
|
|
`[WebFetchTool] Full response for prompt "${userPrompt.substring( |
|
|
0, |
|
|
50, |
|
|
)}...":`, |
|
|
JSON.stringify(response, null, 2), |
|
|
); |
|
|
|
|
|
let responseText = getResponseText(response) || ''; |
|
|
const urlContextMeta = response.candidates?.[0]?.urlContextMetadata; |
|
|
const groundingMetadata = response.candidates?.[0]?.groundingMetadata as |
|
|
| GroundingMetadata |
|
|
| undefined; |
|
|
const sources = groundingMetadata?.groundingChunks as |
|
|
| GroundingChunkItem[] |
|
|
| undefined; |
|
|
const groundingSupports = groundingMetadata?.groundingSupports as |
|
|
| GroundingSupportItem[] |
|
|
| undefined; |
|
|
|
|
|
|
|
|
let processingError = false; |
|
|
|
|
|
if ( |
|
|
urlContextMeta?.urlMetadata && |
|
|
urlContextMeta.urlMetadata.length > 0 |
|
|
) { |
|
|
const allStatuses = urlContextMeta.urlMetadata.map( |
|
|
(m) => m.urlRetrievalStatus, |
|
|
); |
|
|
if (allStatuses.every((s) => s !== 'URL_RETRIEVAL_STATUS_SUCCESS')) { |
|
|
processingError = true; |
|
|
} |
|
|
} else if (!responseText.trim() && !sources?.length) { |
|
|
|
|
|
processingError = true; |
|
|
} |
|
|
|
|
|
if ( |
|
|
!processingError && |
|
|
!responseText.trim() && |
|
|
(!sources || sources.length === 0) |
|
|
) { |
|
|
|
|
|
processingError = true; |
|
|
} |
|
|
|
|
|
if (processingError) { |
|
|
return this.executeFallback(params, signal); |
|
|
} |
|
|
|
|
|
const sourceListFormatted: string[] = []; |
|
|
if (sources && sources.length > 0) { |
|
|
sources.forEach((source: GroundingChunkItem, index: number) => { |
|
|
const title = source.web?.title || 'Untitled'; |
|
|
const uri = source.web?.uri || 'Unknown URI'; |
|
|
sourceListFormatted.push(`[${index + 1}] ${title} (${uri})`); |
|
|
}); |
|
|
|
|
|
if (groundingSupports && groundingSupports.length > 0) { |
|
|
const insertions: Array<{ index: number; marker: string }> = []; |
|
|
groundingSupports.forEach((support: GroundingSupportItem) => { |
|
|
if (support.segment && support.groundingChunkIndices) { |
|
|
const citationMarker = support.groundingChunkIndices |
|
|
.map((chunkIndex: number) => `[${chunkIndex + 1}]`) |
|
|
.join(''); |
|
|
insertions.push({ |
|
|
index: support.segment.endIndex, |
|
|
marker: citationMarker, |
|
|
}); |
|
|
} |
|
|
}); |
|
|
|
|
|
insertions.sort((a, b) => b.index - a.index); |
|
|
const responseChars = responseText.split(''); |
|
|
insertions.forEach((insertion) => { |
|
|
responseChars.splice(insertion.index, 0, insertion.marker); |
|
|
}); |
|
|
responseText = responseChars.join(''); |
|
|
} |
|
|
|
|
|
if (sourceListFormatted.length > 0) { |
|
|
responseText += ` |
|
|
|
|
|
Sources: |
|
|
${sourceListFormatted.join('\n')}`; |
|
|
} |
|
|
} |
|
|
|
|
|
const llmContent = responseText; |
|
|
|
|
|
console.debug( |
|
|
`[WebFetchTool] Formatted tool response for prompt "${userPrompt}:\n\n":`, |
|
|
llmContent, |
|
|
); |
|
|
|
|
|
return { |
|
|
llmContent, |
|
|
returnDisplay: `Content processed from prompt.`, |
|
|
}; |
|
|
} catch (error: unknown) { |
|
|
const errorMessage = `Error processing web content for prompt "${userPrompt.substring( |
|
|
0, |
|
|
50, |
|
|
)}...": ${getErrorMessage(error)}`; |
|
|
console.error(errorMessage, error); |
|
|
return { |
|
|
llmContent: `Error: ${errorMessage}`, |
|
|
returnDisplay: `Error: ${errorMessage}`, |
|
|
}; |
|
|
} |
|
|
} |
|
|
} |
|
|
|