File size: 22,170 Bytes
40e575e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 |
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import {
Content,
GenerateContentConfig,
SchemaUnion,
Type,
} from '@google/genai';
import { GeminiClient } from '../core/client.js';
import { EditToolParams } from '../tools/edit.js';
import { LruCache } from './LruCache.js';
import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
const EditModel = DEFAULT_GEMINI_FLASH_MODEL;
const EditConfig: GenerateContentConfig = {
thinkingConfig: {
thinkingBudget: 0,
},
};
const MAX_CACHE_SIZE = 50;
// Cache for ensureCorrectEdit results
const editCorrectionCache = new LruCache<string, CorrectedEditResult>(
MAX_CACHE_SIZE,
);
// Cache for ensureCorrectFileContent results
const fileContentCorrectionCache = new LruCache<string, string>(MAX_CACHE_SIZE);
/**
* Defines the structure of the parameters within CorrectedEditResult
*/
interface CorrectedEditParams {
file_path: string;
old_string: string;
new_string: string;
}
/**
* Defines the result structure for ensureCorrectEdit.
*/
export interface CorrectedEditResult {
params: CorrectedEditParams;
occurrences: number;
}
/**
* Attempts to correct edit parameters if the original old_string is not found.
* It tries unescaping, and then LLM-based correction.
* Results are cached to avoid redundant processing.
*
* @param currentContent The current content of the file.
* @param originalParams The original EditToolParams
* @param client The GeminiClient for LLM calls.
* @returns A promise resolving to an object containing the (potentially corrected)
* EditToolParams (as CorrectedEditParams) and the final occurrences count.
*/
export async function ensureCorrectEdit(
currentContent: string,
originalParams: EditToolParams, // This is the EditToolParams from edit.ts, without \'corrected\'
client: GeminiClient,
abortSignal: AbortSignal,
): Promise<CorrectedEditResult> {
const cacheKey = `${currentContent}---${originalParams.old_string}---${originalParams.new_string}`;
const cachedResult = editCorrectionCache.get(cacheKey);
if (cachedResult) {
return cachedResult;
}
let finalNewString = originalParams.new_string;
const newStringPotentiallyEscaped =
unescapeStringForGeminiBug(originalParams.new_string) !==
originalParams.new_string;
const expectedReplacements = originalParams.expected_replacements ?? 1;
let finalOldString = originalParams.old_string;
let occurrences = countOccurrences(currentContent, finalOldString);
if (occurrences === expectedReplacements) {
if (newStringPotentiallyEscaped) {
finalNewString = await correctNewStringEscaping(
client,
finalOldString,
originalParams.new_string,
abortSignal,
);
}
} else if (occurrences > expectedReplacements) {
const expectedReplacements = originalParams.expected_replacements ?? 1;
// If user expects multiple replacements, return as-is
if (occurrences === expectedReplacements) {
const result: CorrectedEditResult = {
params: { ...originalParams },
occurrences,
};
editCorrectionCache.set(cacheKey, result);
return result;
}
// If user expects 1 but found multiple, try to correct (existing behavior)
if (expectedReplacements === 1) {
const result: CorrectedEditResult = {
params: { ...originalParams },
occurrences,
};
editCorrectionCache.set(cacheKey, result);
return result;
}
// If occurrences don't match expected, return as-is (will fail validation later)
const result: CorrectedEditResult = {
params: { ...originalParams },
occurrences,
};
editCorrectionCache.set(cacheKey, result);
return result;
} else {
// occurrences is 0 or some other unexpected state initially
const unescapedOldStringAttempt = unescapeStringForGeminiBug(
originalParams.old_string,
);
occurrences = countOccurrences(currentContent, unescapedOldStringAttempt);
if (occurrences === expectedReplacements) {
finalOldString = unescapedOldStringAttempt;
if (newStringPotentiallyEscaped) {
finalNewString = await correctNewString(
client,
originalParams.old_string, // original old
unescapedOldStringAttempt, // corrected old
originalParams.new_string, // original new (which is potentially escaped)
abortSignal,
);
}
} else if (occurrences === 0) {
const llmCorrectedOldString = await correctOldStringMismatch(
client,
currentContent,
unescapedOldStringAttempt,
abortSignal,
);
const llmOldOccurrences = countOccurrences(
currentContent,
llmCorrectedOldString,
);
if (llmOldOccurrences === expectedReplacements) {
finalOldString = llmCorrectedOldString;
occurrences = llmOldOccurrences;
if (newStringPotentiallyEscaped) {
const baseNewStringForLLMCorrection = unescapeStringForGeminiBug(
originalParams.new_string,
);
finalNewString = await correctNewString(
client,
originalParams.old_string, // original old
llmCorrectedOldString, // corrected old
baseNewStringForLLMCorrection, // base new for correction
abortSignal,
);
}
} else {
// LLM correction also failed for old_string
const result: CorrectedEditResult = {
params: { ...originalParams },
occurrences: 0, // Explicitly 0 as LLM failed
};
editCorrectionCache.set(cacheKey, result);
return result;
}
} else {
// Unescaping old_string resulted in > 1 occurrences
const result: CorrectedEditResult = {
params: { ...originalParams },
occurrences, // This will be > 1
};
editCorrectionCache.set(cacheKey, result);
return result;
}
}
const { targetString, pair } = trimPairIfPossible(
finalOldString,
finalNewString,
currentContent,
expectedReplacements,
);
finalOldString = targetString;
finalNewString = pair;
// Final result construction
const result: CorrectedEditResult = {
params: {
file_path: originalParams.file_path,
old_string: finalOldString,
new_string: finalNewString,
},
occurrences: countOccurrences(currentContent, finalOldString), // Recalculate occurrences with the final old_string
};
editCorrectionCache.set(cacheKey, result);
return result;
}
export async function ensureCorrectFileContent(
content: string,
client: GeminiClient,
abortSignal: AbortSignal,
): Promise<string> {
const cachedResult = fileContentCorrectionCache.get(content);
if (cachedResult) {
return cachedResult;
}
const contentPotentiallyEscaped =
unescapeStringForGeminiBug(content) !== content;
if (!contentPotentiallyEscaped) {
fileContentCorrectionCache.set(content, content);
return content;
}
const correctedContent = await correctStringEscaping(
content,
client,
abortSignal,
);
fileContentCorrectionCache.set(content, correctedContent);
return correctedContent;
}
// Define the expected JSON schema for the LLM response for old_string correction
const OLD_STRING_CORRECTION_SCHEMA: SchemaUnion = {
type: Type.OBJECT,
properties: {
corrected_target_snippet: {
type: Type.STRING,
description:
'The corrected version of the target snippet that exactly and uniquely matches a segment within the provided file content.',
},
},
required: ['corrected_target_snippet'],
};
export async function correctOldStringMismatch(
geminiClient: GeminiClient,
fileContent: string,
problematicSnippet: string,
abortSignal: AbortSignal,
): Promise<string> {
const prompt = `
Context: A process needs to find an exact literal, unique match for a specific text snippet within a file's content. The provided snippet failed to match exactly. This is most likely because it has been overly escaped.
Task: Analyze the provided file content and the problematic target snippet. Identify the segment in the file content that the snippet was *most likely* intended to match. Output the *exact*, literal text of that segment from the file content. Focus *only* on removing extra escape characters and correcting formatting, whitespace, or minor differences to achieve a PERFECT literal match. The output must be the exact literal text as it appears in the file.
Problematic target snippet:
\`\`\`
${problematicSnippet}
\`\`\`
File Content:
\`\`\`
${fileContent}
\`\`\`
For example, if the problematic target snippet was "\\\\\\nconst greeting = \`Hello \\\\\`\${name}\\\\\`\`;" and the file content had content that looked like "\nconst greeting = \`Hello ${'\\`'}\${name}${'\\`'}\`;", then corrected_target_snippet should likely be "\nconst greeting = \`Hello ${'\\`'}\${name}${'\\`'}\`;" to fix the incorrect escaping to match the original file content.
If the differences are only in whitespace or formatting, apply similar whitespace/formatting changes to the corrected_target_snippet.
Return ONLY the corrected target snippet in the specified JSON format with the key 'corrected_target_snippet'. If no clear, unique match can be found, return an empty string for 'corrected_target_snippet'.
`.trim();
const contents: Content[] = [{ role: 'user', parts: [{ text: prompt }] }];
try {
const result = await geminiClient.generateJson(
contents,
OLD_STRING_CORRECTION_SCHEMA,
abortSignal,
EditModel,
EditConfig,
);
if (
result &&
typeof result.corrected_target_snippet === 'string' &&
result.corrected_target_snippet.length > 0
) {
return result.corrected_target_snippet;
} else {
return problematicSnippet;
}
} catch (error) {
if (abortSignal.aborted) {
throw error;
}
console.error(
'Error during LLM call for old string snippet correction:',
error,
);
return problematicSnippet;
}
}
// Define the expected JSON schema for the new_string correction LLM response
const NEW_STRING_CORRECTION_SCHEMA: SchemaUnion = {
type: Type.OBJECT,
properties: {
corrected_new_string: {
type: Type.STRING,
description:
'The original_new_string adjusted to be a suitable replacement for the corrected_old_string, while maintaining the original intent of the change.',
},
},
required: ['corrected_new_string'],
};
/**
* Adjusts the new_string to align with a corrected old_string, maintaining the original intent.
*/
export async function correctNewString(
geminiClient: GeminiClient,
originalOldString: string,
correctedOldString: string,
originalNewString: string,
abortSignal: AbortSignal,
): Promise<string> {
if (originalOldString === correctedOldString) {
return originalNewString;
}
const prompt = `
Context: A text replacement operation was planned. The original text to be replaced (original_old_string) was slightly different from the actual text in the file (corrected_old_string). The original_old_string has now been corrected to match the file content.
We now need to adjust the replacement text (original_new_string) so that it makes sense as a replacement for the corrected_old_string, while preserving the original intent of the change.
original_old_string (what was initially intended to be found):
\`\`\`
${originalOldString}
\`\`\`
corrected_old_string (what was actually found in the file and will be replaced):
\`\`\`
${correctedOldString}
\`\`\`
original_new_string (what was intended to replace original_old_string):
\`\`\`
${originalNewString}
\`\`\`
Task: Based on the differences between original_old_string and corrected_old_string, and the content of original_new_string, generate a corrected_new_string. This corrected_new_string should be what original_new_string would have been if it was designed to replace corrected_old_string directly, while maintaining the spirit of the original transformation.
For example, if original_old_string was "\\\\\\nconst greeting = \`Hello \\\\\`\${name}\\\\\`\`;" and corrected_old_string is "\nconst greeting = \`Hello ${'\\`'}\${name}${'\\`'}\`;", and original_new_string was "\\\\\\nconst greeting = \`Hello \\\\\`\${name} \${lastName}\\\\\`\`;", then corrected_new_string should likely be "\nconst greeting = \`Hello ${'\\`'}\${name} \${lastName}${'\\`'}\`;" to fix the incorrect escaping.
If the differences are only in whitespace or formatting, apply similar whitespace/formatting changes to the corrected_new_string.
Return ONLY the corrected string in the specified JSON format with the key 'corrected_new_string'. If no adjustment is deemed necessary or possible, return the original_new_string.
`.trim();
const contents: Content[] = [{ role: 'user', parts: [{ text: prompt }] }];
try {
const result = await geminiClient.generateJson(
contents,
NEW_STRING_CORRECTION_SCHEMA,
abortSignal,
EditModel,
EditConfig,
);
if (
result &&
typeof result.corrected_new_string === 'string' &&
result.corrected_new_string.length > 0
) {
return result.corrected_new_string;
} else {
return originalNewString;
}
} catch (error) {
if (abortSignal.aborted) {
throw error;
}
console.error('Error during LLM call for new_string correction:', error);
return originalNewString;
}
}
const CORRECT_NEW_STRING_ESCAPING_SCHEMA: SchemaUnion = {
type: Type.OBJECT,
properties: {
corrected_new_string_escaping: {
type: Type.STRING,
description:
'The new_string with corrected escaping, ensuring it is a proper replacement for the old_string, especially considering potential over-escaping issues from previous LLM generations.',
},
},
required: ['corrected_new_string_escaping'],
};
export async function correctNewStringEscaping(
geminiClient: GeminiClient,
oldString: string,
potentiallyProblematicNewString: string,
abortSignal: AbortSignal,
): Promise<string> {
const prompt = `
Context: A text replacement operation is planned. The text to be replaced (old_string) has been correctly identified in the file. However, the replacement text (new_string) might have been improperly escaped by a previous LLM generation (e.g. too many backslashes for newlines like \\n instead of \n, or unnecessarily quotes like \\"Hello\\" instead of "Hello").
old_string (this is the exact text that will be replaced):
\`\`\`
${oldString}
\`\`\`
potentially_problematic_new_string (this is the text that should replace old_string, but MIGHT have bad escaping, or might be entirely correct):
\`\`\`
${potentiallyProblematicNewString}
\`\`\`
Task: Analyze the potentially_problematic_new_string. If it's syntactically invalid due to incorrect escaping (e.g., "\n", "\t", "\\", "\\'", "\\""), correct the invalid syntax. The goal is to ensure the new_string, when inserted into the code, will be a valid and correctly interpreted.
For example, if old_string is "foo" and potentially_problematic_new_string is "bar\\nbaz", the corrected_new_string_escaping should be "bar\nbaz".
If potentially_problematic_new_string is console.log(\\"Hello World\\"), it should be console.log("Hello World").
Return ONLY the corrected string in the specified JSON format with the key 'corrected_new_string_escaping'. If no escaping correction is needed, return the original potentially_problematic_new_string.
`.trim();
const contents: Content[] = [{ role: 'user', parts: [{ text: prompt }] }];
try {
const result = await geminiClient.generateJson(
contents,
CORRECT_NEW_STRING_ESCAPING_SCHEMA,
abortSignal,
EditModel,
EditConfig,
);
if (
result &&
typeof result.corrected_new_string_escaping === 'string' &&
result.corrected_new_string_escaping.length > 0
) {
return result.corrected_new_string_escaping;
} else {
return potentiallyProblematicNewString;
}
} catch (error) {
if (abortSignal.aborted) {
throw error;
}
console.error(
'Error during LLM call for new_string escaping correction:',
error,
);
return potentiallyProblematicNewString;
}
}
const CORRECT_STRING_ESCAPING_SCHEMA: SchemaUnion = {
type: Type.OBJECT,
properties: {
corrected_string_escaping: {
type: Type.STRING,
description:
'The string with corrected escaping, ensuring it is valid, specially considering potential over-escaping issues from previous LLM generations.',
},
},
required: ['corrected_string_escaping'],
};
export async function correctStringEscaping(
potentiallyProblematicString: string,
client: GeminiClient,
abortSignal: AbortSignal,
): Promise<string> {
const prompt = `
Context: An LLM has just generated potentially_problematic_string and the text might have been improperly escaped (e.g. too many backslashes for newlines like \\n instead of \n, or unnecessarily quotes like \\"Hello\\" instead of "Hello").
potentially_problematic_string (this text MIGHT have bad escaping, or might be entirely correct):
\`\`\`
${potentiallyProblematicString}
\`\`\`
Task: Analyze the potentially_problematic_string. If it's syntactically invalid due to incorrect escaping (e.g., "\n", "\t", "\\", "\\'", "\\""), correct the invalid syntax. The goal is to ensure the text will be a valid and correctly interpreted.
For example, if potentially_problematic_string is "bar\\nbaz", the corrected_new_string_escaping should be "bar\nbaz".
If potentially_problematic_string is console.log(\\"Hello World\\"), it should be console.log("Hello World").
Return ONLY the corrected string in the specified JSON format with the key 'corrected_string_escaping'. If no escaping correction is needed, return the original potentially_problematic_string.
`.trim();
const contents: Content[] = [{ role: 'user', parts: [{ text: prompt }] }];
try {
const result = await client.generateJson(
contents,
CORRECT_STRING_ESCAPING_SCHEMA,
abortSignal,
EditModel,
EditConfig,
);
if (
result &&
typeof result.corrected_string_escaping === 'string' &&
result.corrected_string_escaping.length > 0
) {
return result.corrected_string_escaping;
} else {
return potentiallyProblematicString;
}
} catch (error) {
if (abortSignal.aborted) {
throw error;
}
console.error(
'Error during LLM call for string escaping correction:',
error,
);
return potentiallyProblematicString;
}
}
function trimPairIfPossible(
target: string,
trimIfTargetTrims: string,
currentContent: string,
expectedReplacements: number,
) {
const trimmedTargetString = target.trim();
if (target.length !== trimmedTargetString.length) {
const trimmedTargetOccurrences = countOccurrences(
currentContent,
trimmedTargetString,
);
if (trimmedTargetOccurrences === expectedReplacements) {
const trimmedReactiveString = trimIfTargetTrims.trim();
return {
targetString: trimmedTargetString,
pair: trimmedReactiveString,
};
}
}
return {
targetString: target,
pair: trimIfTargetTrims,
};
}
/**
* Unescapes a string that might have been overly escaped by an LLM.
*/
export function unescapeStringForGeminiBug(inputString: string): string {
// Regex explanation:
// \\ : Matches exactly one literal backslash character.
// (n|t|r|'|"|`|\\|\n) : This is a capturing group. It matches one of the following:
// n, t, r, ', ", ` : These match the literal characters 'n', 't', 'r', single quote, double quote, or backtick.
// This handles cases like "\\n", "\\`", etc.
// \\ : This matches a literal backslash. This handles cases like "\\\\" (escaped backslash).
// \n : This matches an actual newline character. This handles cases where the input
// string might have something like "\\\n" (a literal backslash followed by a newline).
// g : Global flag, to replace all occurrences.
return inputString.replace(
/\\+(n|t|r|'|"|`|\\|\n)/g,
(match, capturedChar) => {
// 'match' is the entire erroneous sequence, e.g., if the input (in memory) was "\\\\`", match is "\\\\`".
// 'capturedChar' is the character that determines the true meaning, e.g., '`'.
switch (capturedChar) {
case 'n':
return '\n'; // Correctly escaped: \n (newline character)
case 't':
return '\t'; // Correctly escaped: \t (tab character)
case 'r':
return '\r'; // Correctly escaped: \r (carriage return character)
case "'":
return "'"; // Correctly escaped: ' (apostrophe character)
case '"':
return '"'; // Correctly escaped: " (quotation mark character)
case '`':
return '`'; // Correctly escaped: ` (backtick character)
case '\\': // This handles when 'capturedChar' is a literal backslash
return '\\'; // Replace escaped backslash (e.g., "\\\\") with single backslash
case '\n': // This handles when 'capturedChar' is an actual newline
return '\n'; // Replace the whole erroneous sequence (e.g., "\\\n" in memory) with a clean newline
default:
// This fallback should ideally not be reached if the regex captures correctly.
// It would return the original matched sequence if an unexpected character was captured.
return match;
}
},
);
}
/**
* Counts occurrences of a substring in a string
*/
export function countOccurrences(str: string, substr: string): number {
if (substr === '') {
return 0;
}
let count = 0;
let pos = str.indexOf(substr);
while (pos !== -1) {
count++;
pos = str.indexOf(substr, pos + substr.length); // Start search after the current match
}
return count;
}
export function resetEditCorrectorCaches_TEST_ONLY() {
editCorrectionCache.clear();
fileContentCorrectionCache.clear();
}
|