|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import { |
|
|
Content, |
|
|
GenerateContentConfig, |
|
|
SchemaUnion, |
|
|
Type, |
|
|
} from '@google/genai'; |
|
|
import { GeminiClient } from '../core/client.js'; |
|
|
import { EditToolParams } from '../tools/edit.js'; |
|
|
import { LruCache } from './LruCache.js'; |
|
|
import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js'; |
|
|
|
|
|
const EditModel = DEFAULT_GEMINI_FLASH_MODEL; |
|
|
const EditConfig: GenerateContentConfig = { |
|
|
thinkingConfig: { |
|
|
thinkingBudget: 0, |
|
|
}, |
|
|
}; |
|
|
|
|
|
const MAX_CACHE_SIZE = 50; |
|
|
|
|
|
|
|
|
const editCorrectionCache = new LruCache<string, CorrectedEditResult>( |
|
|
MAX_CACHE_SIZE, |
|
|
); |
|
|
|
|
|
|
|
|
const fileContentCorrectionCache = new LruCache<string, string>(MAX_CACHE_SIZE); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
interface CorrectedEditParams { |
|
|
file_path: string; |
|
|
old_string: string; |
|
|
new_string: string; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export interface CorrectedEditResult { |
|
|
params: CorrectedEditParams; |
|
|
occurrences: number; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export async function ensureCorrectEdit( |
|
|
currentContent: string, |
|
|
originalParams: EditToolParams, |
|
|
client: GeminiClient, |
|
|
abortSignal: AbortSignal, |
|
|
): Promise<CorrectedEditResult> { |
|
|
const cacheKey = `${currentContent}---${originalParams.old_string}---${originalParams.new_string}`; |
|
|
const cachedResult = editCorrectionCache.get(cacheKey); |
|
|
if (cachedResult) { |
|
|
return cachedResult; |
|
|
} |
|
|
|
|
|
let finalNewString = originalParams.new_string; |
|
|
const newStringPotentiallyEscaped = |
|
|
unescapeStringForGeminiBug(originalParams.new_string) !== |
|
|
originalParams.new_string; |
|
|
|
|
|
const expectedReplacements = originalParams.expected_replacements ?? 1; |
|
|
|
|
|
let finalOldString = originalParams.old_string; |
|
|
let occurrences = countOccurrences(currentContent, finalOldString); |
|
|
|
|
|
if (occurrences === expectedReplacements) { |
|
|
if (newStringPotentiallyEscaped) { |
|
|
finalNewString = await correctNewStringEscaping( |
|
|
client, |
|
|
finalOldString, |
|
|
originalParams.new_string, |
|
|
abortSignal, |
|
|
); |
|
|
} |
|
|
} else if (occurrences > expectedReplacements) { |
|
|
const expectedReplacements = originalParams.expected_replacements ?? 1; |
|
|
|
|
|
|
|
|
if (occurrences === expectedReplacements) { |
|
|
const result: CorrectedEditResult = { |
|
|
params: { ...originalParams }, |
|
|
occurrences, |
|
|
}; |
|
|
editCorrectionCache.set(cacheKey, result); |
|
|
return result; |
|
|
} |
|
|
|
|
|
|
|
|
if (expectedReplacements === 1) { |
|
|
const result: CorrectedEditResult = { |
|
|
params: { ...originalParams }, |
|
|
occurrences, |
|
|
}; |
|
|
editCorrectionCache.set(cacheKey, result); |
|
|
return result; |
|
|
} |
|
|
|
|
|
|
|
|
const result: CorrectedEditResult = { |
|
|
params: { ...originalParams }, |
|
|
occurrences, |
|
|
}; |
|
|
editCorrectionCache.set(cacheKey, result); |
|
|
return result; |
|
|
} else { |
|
|
|
|
|
const unescapedOldStringAttempt = unescapeStringForGeminiBug( |
|
|
originalParams.old_string, |
|
|
); |
|
|
occurrences = countOccurrences(currentContent, unescapedOldStringAttempt); |
|
|
|
|
|
if (occurrences === expectedReplacements) { |
|
|
finalOldString = unescapedOldStringAttempt; |
|
|
if (newStringPotentiallyEscaped) { |
|
|
finalNewString = await correctNewString( |
|
|
client, |
|
|
originalParams.old_string, |
|
|
unescapedOldStringAttempt, |
|
|
originalParams.new_string, |
|
|
abortSignal, |
|
|
); |
|
|
} |
|
|
} else if (occurrences === 0) { |
|
|
const llmCorrectedOldString = await correctOldStringMismatch( |
|
|
client, |
|
|
currentContent, |
|
|
unescapedOldStringAttempt, |
|
|
abortSignal, |
|
|
); |
|
|
const llmOldOccurrences = countOccurrences( |
|
|
currentContent, |
|
|
llmCorrectedOldString, |
|
|
); |
|
|
|
|
|
if (llmOldOccurrences === expectedReplacements) { |
|
|
finalOldString = llmCorrectedOldString; |
|
|
occurrences = llmOldOccurrences; |
|
|
|
|
|
if (newStringPotentiallyEscaped) { |
|
|
const baseNewStringForLLMCorrection = unescapeStringForGeminiBug( |
|
|
originalParams.new_string, |
|
|
); |
|
|
finalNewString = await correctNewString( |
|
|
client, |
|
|
originalParams.old_string, |
|
|
llmCorrectedOldString, |
|
|
baseNewStringForLLMCorrection, |
|
|
abortSignal, |
|
|
); |
|
|
} |
|
|
} else { |
|
|
|
|
|
const result: CorrectedEditResult = { |
|
|
params: { ...originalParams }, |
|
|
occurrences: 0, |
|
|
}; |
|
|
editCorrectionCache.set(cacheKey, result); |
|
|
return result; |
|
|
} |
|
|
} else { |
|
|
|
|
|
const result: CorrectedEditResult = { |
|
|
params: { ...originalParams }, |
|
|
occurrences, |
|
|
}; |
|
|
editCorrectionCache.set(cacheKey, result); |
|
|
return result; |
|
|
} |
|
|
} |
|
|
|
|
|
const { targetString, pair } = trimPairIfPossible( |
|
|
finalOldString, |
|
|
finalNewString, |
|
|
currentContent, |
|
|
expectedReplacements, |
|
|
); |
|
|
finalOldString = targetString; |
|
|
finalNewString = pair; |
|
|
|
|
|
|
|
|
const result: CorrectedEditResult = { |
|
|
params: { |
|
|
file_path: originalParams.file_path, |
|
|
old_string: finalOldString, |
|
|
new_string: finalNewString, |
|
|
}, |
|
|
occurrences: countOccurrences(currentContent, finalOldString), |
|
|
}; |
|
|
editCorrectionCache.set(cacheKey, result); |
|
|
return result; |
|
|
} |
|
|
|
|
|
export async function ensureCorrectFileContent( |
|
|
content: string, |
|
|
client: GeminiClient, |
|
|
abortSignal: AbortSignal, |
|
|
): Promise<string> { |
|
|
const cachedResult = fileContentCorrectionCache.get(content); |
|
|
if (cachedResult) { |
|
|
return cachedResult; |
|
|
} |
|
|
|
|
|
const contentPotentiallyEscaped = |
|
|
unescapeStringForGeminiBug(content) !== content; |
|
|
if (!contentPotentiallyEscaped) { |
|
|
fileContentCorrectionCache.set(content, content); |
|
|
return content; |
|
|
} |
|
|
|
|
|
const correctedContent = await correctStringEscaping( |
|
|
content, |
|
|
client, |
|
|
abortSignal, |
|
|
); |
|
|
fileContentCorrectionCache.set(content, correctedContent); |
|
|
return correctedContent; |
|
|
} |
|
|
|
|
|
|
|
|
const OLD_STRING_CORRECTION_SCHEMA: SchemaUnion = { |
|
|
type: Type.OBJECT, |
|
|
properties: { |
|
|
corrected_target_snippet: { |
|
|
type: Type.STRING, |
|
|
description: |
|
|
'The corrected version of the target snippet that exactly and uniquely matches a segment within the provided file content.', |
|
|
}, |
|
|
}, |
|
|
required: ['corrected_target_snippet'], |
|
|
}; |
|
|
|
|
|
export async function correctOldStringMismatch( |
|
|
geminiClient: GeminiClient, |
|
|
fileContent: string, |
|
|
problematicSnippet: string, |
|
|
abortSignal: AbortSignal, |
|
|
): Promise<string> { |
|
|
const prompt = ` |
|
|
Context: A process needs to find an exact literal, unique match for a specific text snippet within a file's content. The provided snippet failed to match exactly. This is most likely because it has been overly escaped. |
|
|
|
|
|
Task: Analyze the provided file content and the problematic target snippet. Identify the segment in the file content that the snippet was *most likely* intended to match. Output the *exact*, literal text of that segment from the file content. Focus *only* on removing extra escape characters and correcting formatting, whitespace, or minor differences to achieve a PERFECT literal match. The output must be the exact literal text as it appears in the file. |
|
|
|
|
|
Problematic target snippet: |
|
|
\`\`\` |
|
|
${problematicSnippet} |
|
|
\`\`\` |
|
|
|
|
|
File Content: |
|
|
\`\`\` |
|
|
${fileContent} |
|
|
\`\`\` |
|
|
|
|
|
For example, if the problematic target snippet was "\\\\\\nconst greeting = \`Hello \\\\\`\${name}\\\\\`\`;" and the file content had content that looked like "\nconst greeting = \`Hello ${'\\`'}\${name}${'\\`'}\`;", then corrected_target_snippet should likely be "\nconst greeting = \`Hello ${'\\`'}\${name}${'\\`'}\`;" to fix the incorrect escaping to match the original file content. |
|
|
If the differences are only in whitespace or formatting, apply similar whitespace/formatting changes to the corrected_target_snippet. |
|
|
|
|
|
Return ONLY the corrected target snippet in the specified JSON format with the key 'corrected_target_snippet'. If no clear, unique match can be found, return an empty string for 'corrected_target_snippet'. |
|
|
`.trim(); |
|
|
|
|
|
const contents: Content[] = [{ role: 'user', parts: [{ text: prompt }] }]; |
|
|
|
|
|
try { |
|
|
const result = await geminiClient.generateJson( |
|
|
contents, |
|
|
OLD_STRING_CORRECTION_SCHEMA, |
|
|
abortSignal, |
|
|
EditModel, |
|
|
EditConfig, |
|
|
); |
|
|
|
|
|
if ( |
|
|
result && |
|
|
typeof result.corrected_target_snippet === 'string' && |
|
|
result.corrected_target_snippet.length > 0 |
|
|
) { |
|
|
return result.corrected_target_snippet; |
|
|
} else { |
|
|
return problematicSnippet; |
|
|
} |
|
|
} catch (error) { |
|
|
if (abortSignal.aborted) { |
|
|
throw error; |
|
|
} |
|
|
|
|
|
console.error( |
|
|
'Error during LLM call for old string snippet correction:', |
|
|
error, |
|
|
); |
|
|
|
|
|
return problematicSnippet; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
const NEW_STRING_CORRECTION_SCHEMA: SchemaUnion = { |
|
|
type: Type.OBJECT, |
|
|
properties: { |
|
|
corrected_new_string: { |
|
|
type: Type.STRING, |
|
|
description: |
|
|
'The original_new_string adjusted to be a suitable replacement for the corrected_old_string, while maintaining the original intent of the change.', |
|
|
}, |
|
|
}, |
|
|
required: ['corrected_new_string'], |
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export async function correctNewString( |
|
|
geminiClient: GeminiClient, |
|
|
originalOldString: string, |
|
|
correctedOldString: string, |
|
|
originalNewString: string, |
|
|
abortSignal: AbortSignal, |
|
|
): Promise<string> { |
|
|
if (originalOldString === correctedOldString) { |
|
|
return originalNewString; |
|
|
} |
|
|
|
|
|
const prompt = ` |
|
|
Context: A text replacement operation was planned. The original text to be replaced (original_old_string) was slightly different from the actual text in the file (corrected_old_string). The original_old_string has now been corrected to match the file content. |
|
|
We now need to adjust the replacement text (original_new_string) so that it makes sense as a replacement for the corrected_old_string, while preserving the original intent of the change. |
|
|
|
|
|
original_old_string (what was initially intended to be found): |
|
|
\`\`\` |
|
|
${originalOldString} |
|
|
\`\`\` |
|
|
|
|
|
corrected_old_string (what was actually found in the file and will be replaced): |
|
|
\`\`\` |
|
|
${correctedOldString} |
|
|
\`\`\` |
|
|
|
|
|
original_new_string (what was intended to replace original_old_string): |
|
|
\`\`\` |
|
|
${originalNewString} |
|
|
\`\`\` |
|
|
|
|
|
Task: Based on the differences between original_old_string and corrected_old_string, and the content of original_new_string, generate a corrected_new_string. This corrected_new_string should be what original_new_string would have been if it was designed to replace corrected_old_string directly, while maintaining the spirit of the original transformation. |
|
|
|
|
|
For example, if original_old_string was "\\\\\\nconst greeting = \`Hello \\\\\`\${name}\\\\\`\`;" and corrected_old_string is "\nconst greeting = \`Hello ${'\\`'}\${name}${'\\`'}\`;", and original_new_string was "\\\\\\nconst greeting = \`Hello \\\\\`\${name} \${lastName}\\\\\`\`;", then corrected_new_string should likely be "\nconst greeting = \`Hello ${'\\`'}\${name} \${lastName}${'\\`'}\`;" to fix the incorrect escaping. |
|
|
If the differences are only in whitespace or formatting, apply similar whitespace/formatting changes to the corrected_new_string. |
|
|
|
|
|
Return ONLY the corrected string in the specified JSON format with the key 'corrected_new_string'. If no adjustment is deemed necessary or possible, return the original_new_string. |
|
|
`.trim(); |
|
|
|
|
|
const contents: Content[] = [{ role: 'user', parts: [{ text: prompt }] }]; |
|
|
|
|
|
try { |
|
|
const result = await geminiClient.generateJson( |
|
|
contents, |
|
|
NEW_STRING_CORRECTION_SCHEMA, |
|
|
abortSignal, |
|
|
EditModel, |
|
|
EditConfig, |
|
|
); |
|
|
|
|
|
if ( |
|
|
result && |
|
|
typeof result.corrected_new_string === 'string' && |
|
|
result.corrected_new_string.length > 0 |
|
|
) { |
|
|
return result.corrected_new_string; |
|
|
} else { |
|
|
return originalNewString; |
|
|
} |
|
|
} catch (error) { |
|
|
if (abortSignal.aborted) { |
|
|
throw error; |
|
|
} |
|
|
|
|
|
console.error('Error during LLM call for new_string correction:', error); |
|
|
return originalNewString; |
|
|
} |
|
|
} |
|
|
|
|
|
const CORRECT_NEW_STRING_ESCAPING_SCHEMA: SchemaUnion = { |
|
|
type: Type.OBJECT, |
|
|
properties: { |
|
|
corrected_new_string_escaping: { |
|
|
type: Type.STRING, |
|
|
description: |
|
|
'The new_string with corrected escaping, ensuring it is a proper replacement for the old_string, especially considering potential over-escaping issues from previous LLM generations.', |
|
|
}, |
|
|
}, |
|
|
required: ['corrected_new_string_escaping'], |
|
|
}; |
|
|
|
|
|
export async function correctNewStringEscaping( |
|
|
geminiClient: GeminiClient, |
|
|
oldString: string, |
|
|
potentiallyProblematicNewString: string, |
|
|
abortSignal: AbortSignal, |
|
|
): Promise<string> { |
|
|
const prompt = ` |
|
|
Context: A text replacement operation is planned. The text to be replaced (old_string) has been correctly identified in the file. However, the replacement text (new_string) might have been improperly escaped by a previous LLM generation (e.g. too many backslashes for newlines like \\n instead of \n, or unnecessarily quotes like \\"Hello\\" instead of "Hello"). |
|
|
|
|
|
old_string (this is the exact text that will be replaced): |
|
|
\`\`\` |
|
|
${oldString} |
|
|
\`\`\` |
|
|
|
|
|
potentially_problematic_new_string (this is the text that should replace old_string, but MIGHT have bad escaping, or might be entirely correct): |
|
|
\`\`\` |
|
|
${potentiallyProblematicNewString} |
|
|
\`\`\` |
|
|
|
|
|
Task: Analyze the potentially_problematic_new_string. If it's syntactically invalid due to incorrect escaping (e.g., "\n", "\t", "\\", "\\'", "\\""), correct the invalid syntax. The goal is to ensure the new_string, when inserted into the code, will be a valid and correctly interpreted. |
|
|
|
|
|
For example, if old_string is "foo" and potentially_problematic_new_string is "bar\\nbaz", the corrected_new_string_escaping should be "bar\nbaz". |
|
|
If potentially_problematic_new_string is console.log(\\"Hello World\\"), it should be console.log("Hello World"). |
|
|
|
|
|
Return ONLY the corrected string in the specified JSON format with the key 'corrected_new_string_escaping'. If no escaping correction is needed, return the original potentially_problematic_new_string. |
|
|
`.trim(); |
|
|
|
|
|
const contents: Content[] = [{ role: 'user', parts: [{ text: prompt }] }]; |
|
|
|
|
|
try { |
|
|
const result = await geminiClient.generateJson( |
|
|
contents, |
|
|
CORRECT_NEW_STRING_ESCAPING_SCHEMA, |
|
|
abortSignal, |
|
|
EditModel, |
|
|
EditConfig, |
|
|
); |
|
|
|
|
|
if ( |
|
|
result && |
|
|
typeof result.corrected_new_string_escaping === 'string' && |
|
|
result.corrected_new_string_escaping.length > 0 |
|
|
) { |
|
|
return result.corrected_new_string_escaping; |
|
|
} else { |
|
|
return potentiallyProblematicNewString; |
|
|
} |
|
|
} catch (error) { |
|
|
if (abortSignal.aborted) { |
|
|
throw error; |
|
|
} |
|
|
|
|
|
console.error( |
|
|
'Error during LLM call for new_string escaping correction:', |
|
|
error, |
|
|
); |
|
|
return potentiallyProblematicNewString; |
|
|
} |
|
|
} |
|
|
|
|
|
const CORRECT_STRING_ESCAPING_SCHEMA: SchemaUnion = { |
|
|
type: Type.OBJECT, |
|
|
properties: { |
|
|
corrected_string_escaping: { |
|
|
type: Type.STRING, |
|
|
description: |
|
|
'The string with corrected escaping, ensuring it is valid, specially considering potential over-escaping issues from previous LLM generations.', |
|
|
}, |
|
|
}, |
|
|
required: ['corrected_string_escaping'], |
|
|
}; |
|
|
|
|
|
export async function correctStringEscaping( |
|
|
potentiallyProblematicString: string, |
|
|
client: GeminiClient, |
|
|
abortSignal: AbortSignal, |
|
|
): Promise<string> { |
|
|
const prompt = ` |
|
|
Context: An LLM has just generated potentially_problematic_string and the text might have been improperly escaped (e.g. too many backslashes for newlines like \\n instead of \n, or unnecessarily quotes like \\"Hello\\" instead of "Hello"). |
|
|
|
|
|
potentially_problematic_string (this text MIGHT have bad escaping, or might be entirely correct): |
|
|
\`\`\` |
|
|
${potentiallyProblematicString} |
|
|
\`\`\` |
|
|
|
|
|
Task: Analyze the potentially_problematic_string. If it's syntactically invalid due to incorrect escaping (e.g., "\n", "\t", "\\", "\\'", "\\""), correct the invalid syntax. The goal is to ensure the text will be a valid and correctly interpreted. |
|
|
|
|
|
For example, if potentially_problematic_string is "bar\\nbaz", the corrected_new_string_escaping should be "bar\nbaz". |
|
|
If potentially_problematic_string is console.log(\\"Hello World\\"), it should be console.log("Hello World"). |
|
|
|
|
|
Return ONLY the corrected string in the specified JSON format with the key 'corrected_string_escaping'. If no escaping correction is needed, return the original potentially_problematic_string. |
|
|
`.trim(); |
|
|
|
|
|
const contents: Content[] = [{ role: 'user', parts: [{ text: prompt }] }]; |
|
|
|
|
|
try { |
|
|
const result = await client.generateJson( |
|
|
contents, |
|
|
CORRECT_STRING_ESCAPING_SCHEMA, |
|
|
abortSignal, |
|
|
EditModel, |
|
|
EditConfig, |
|
|
); |
|
|
|
|
|
if ( |
|
|
result && |
|
|
typeof result.corrected_string_escaping === 'string' && |
|
|
result.corrected_string_escaping.length > 0 |
|
|
) { |
|
|
return result.corrected_string_escaping; |
|
|
} else { |
|
|
return potentiallyProblematicString; |
|
|
} |
|
|
} catch (error) { |
|
|
if (abortSignal.aborted) { |
|
|
throw error; |
|
|
} |
|
|
|
|
|
console.error( |
|
|
'Error during LLM call for string escaping correction:', |
|
|
error, |
|
|
); |
|
|
return potentiallyProblematicString; |
|
|
} |
|
|
} |
|
|
|
|
|
function trimPairIfPossible( |
|
|
target: string, |
|
|
trimIfTargetTrims: string, |
|
|
currentContent: string, |
|
|
expectedReplacements: number, |
|
|
) { |
|
|
const trimmedTargetString = target.trim(); |
|
|
if (target.length !== trimmedTargetString.length) { |
|
|
const trimmedTargetOccurrences = countOccurrences( |
|
|
currentContent, |
|
|
trimmedTargetString, |
|
|
); |
|
|
|
|
|
if (trimmedTargetOccurrences === expectedReplacements) { |
|
|
const trimmedReactiveString = trimIfTargetTrims.trim(); |
|
|
return { |
|
|
targetString: trimmedTargetString, |
|
|
pair: trimmedReactiveString, |
|
|
}; |
|
|
} |
|
|
} |
|
|
|
|
|
return { |
|
|
targetString: target, |
|
|
pair: trimIfTargetTrims, |
|
|
}; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export function unescapeStringForGeminiBug(inputString: string): string { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return inputString.replace( |
|
|
/\\+(n|t|r|'|"|`|\\|\n)/g, |
|
|
(match, capturedChar) => { |
|
|
|
|
|
|
|
|
|
|
|
switch (capturedChar) { |
|
|
case 'n': |
|
|
return '\n'; |
|
|
case 't': |
|
|
return '\t'; |
|
|
case 'r': |
|
|
return '\r'; |
|
|
case "'": |
|
|
return "'"; |
|
|
case '"': |
|
|
return '"'; |
|
|
case '`': |
|
|
return '`'; |
|
|
case '\\': |
|
|
return '\\'; |
|
|
case '\n': |
|
|
return '\n'; |
|
|
default: |
|
|
|
|
|
|
|
|
return match; |
|
|
} |
|
|
}, |
|
|
); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export function countOccurrences(str: string, substr: string): number { |
|
|
if (substr === '') { |
|
|
return 0; |
|
|
} |
|
|
let count = 0; |
|
|
let pos = str.indexOf(substr); |
|
|
while (pos !== -1) { |
|
|
count++; |
|
|
pos = str.indexOf(substr, pos + substr.length); |
|
|
} |
|
|
return count; |
|
|
} |
|
|
|
|
|
export function resetEditCorrectorCaches_TEST_ONLY() { |
|
|
editCorrectionCache.clear(); |
|
|
fileContentCorrectionCache.clear(); |
|
|
} |
|
|
|