File size: 19,654 Bytes
cb4fc00 58360ec cb4fc00 4e8f3b8 cb4fc00 4e8f3b8 cb4fc00 be3f641 cb4fc00 2aaa293 cb4fc00 2aaa293 cb4fc00 2aaa293 cb4fc00 2aaa293 cb4fc00 2aaa293 cb4fc00 2aaa293 cb4fc00 2aaa293 cb4fc00 be3f641 cb4fc00 8fa7bb6 cb4fc00 8fa7bb6 cb4fc00 01de83c cb4fc00 0426577 fa48788 477e0cf 68809c8 0426577 cb4fc00 477e0cf cb4fc00 477e0cf cb4fc00 477e0cf c5424aa cb4fc00 477e0cf d65f551 01de83c 78c65dc d65f551 0426577 d65f551 9248162 d65f551 68809c8 0426577 d65f551 c5424aa d65f551 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 | import { createServerFn } from "@tanstack/react-start";
import { fetchAIWithFallback, getAIConfig } from "./ai-config.server";
import { z } from "zod";
const InputSchema = z.object({
diff: z.string().min(1).max(200_000),
failureDescription: z.string().min(1).max(5_000),
});
export type Suspect = {
filePath: string;
functionName: string | null;
lineStart: number;
lineEnd: number;
confidence: "high" | "medium" | "low";
mechanism: string;
changeSummary: string;
beforeSnippet: string | null;
afterSnippet: string | null;
};
export type AuditEntry = { token: string; real: string; occurrences: number };
export type AuditSample = { original: string; sanitized: string };
export type DebugResult = {
suspects: Suspect[];
summary: string;
sanitizationStats: { identifiersTokenized: number; commentsStripped: number; secretsBlocked: number };
audit: {
tokenMap: AuditEntry[]; // real -> token, sorted by occurrence
redactedComments: string[]; // up to 20 stripped comment lines (already comment-only, safe to show)
secretMatches: { pattern: string; replaced: string }[]; // never the real secret
sample: AuditSample; // first ~30 lines: original vs sanitized side-by-side
};
};
// βββββββββββββββββββββββββ IP Shield: sanitizer βββββββββββββββββββββββββ
// Strips comments + secrets, replaces identifiers with fn_NNNN tokens.
// Token map stays server-side and is used to restore real names afterwards.
const SECRET_PATTERNS = [
/(?:api[_-]?key|secret|token|password|bearer)\s*[:=]\s*["']?[A-Za-z0-9_\-]{8,}["']?/gi,
/sk-[A-Za-z0-9]{20,}/g,
/eyJ[A-Za-z0-9_\-]{20,}\.[A-Za-z0-9_\-]{20,}\.[A-Za-z0-9_\-]{10,}/g, // JWT
];
const RESERVED = new Set([
"if","else","for","while","return","def","class","import","from","const","let","var",
"function","async","await","try","catch","throw","new","null","true","false","this",
"self","int","str","bool","void","public","private","static","export","default",
"diff","git","index","main","feature","a","b","fix","add","remove","update",
]);
export function sanitize(diff: string) {
const tokenMap = new Map<string, string>(); // real -> token
const reverseMap = new Map<string, string>(); // token -> real
const occurrences = new Map<string, number>(); // real -> count
const redactedComments: string[] = [];
const secretMatches: { pattern: string; replaced: string }[] = [];
let counter = 1;
let commentsStripped = 0;
let secretsBlocked = 0;
// Strip secrets first
const PATTERN_LABELS = ["api-key/secret/token assignment", "OpenAI key (sk-β¦)", "JWT bearer"];
SECRET_PATTERNS.forEach((re, idx) => {
diff = diff.replace(re, (match) => {
secretsBlocked++;
// Only record a safe length-summary, NEVER the secret itself
secretMatches.push({
pattern: PATTERN_LABELS[idx] ?? "secret",
replaced: `[REDACTED ${match.length} chars]`,
});
return "[SECRET_REDACTED]";
});
});
// Strip comments line-wise (#, //, /* */)
const lines = diff.split("\n").map((line) => {
const original = line;
const stripped = line
.replace(/(^|\s)#.*$/g, "$1")
.replace(/\/\/.*$/g, "")
.replace(/\/\*[\s\S]*?\*\//g, "");
if (stripped !== original) {
commentsStripped++;
const removed = original.slice(stripped.length).trim();
if (removed && redactedComments.length < 20) redactedComments.push(removed);
}
return stripped;
});
const tokenize = (name: string): string => {
if (RESERVED.has(name) || /^\d+$/.test(name) || name.length < 3) return name;
occurrences.set(name, (occurrences.get(name) ?? 0) + 1);
let tok = tokenMap.get(name);
if (!tok) {
tok = `fn_${String(counter++).padStart(4, "0")}`;
tokenMap.set(name, tok);
reverseMap.set(tok, name);
}
return tok;
};
// Tokenize identifiers (simple heuristic: snake_case / camelCase words)
const sanitizedLines = lines.map((line) => {
if (line.startsWith("diff --git") || line.startsWith("index ") || line.startsWith("@@")) {
return line.replace(/[A-Za-z_][A-Za-z0-9_]{2,}/g, (m) => tokenize(m));
}
return line.replace(/[A-Za-z_][A-Za-z0-9_]{2,}/g, (m) => tokenize(m));
});
const sanitized = sanitizedLines.join("\n");
// Build audit token map sorted by occurrence (most-used first)
const auditMap: AuditEntry[] = Array.from(tokenMap.entries())
.map(([real, token]) => ({ token, real, occurrences: occurrences.get(real) ?? 0 }))
.sort((a, b) => b.occurrences - a.occurrences);
// Sample: first 30 non-empty lines, original (post-secret-redaction) vs sanitized
const SAMPLE_LINES = 30;
const sample: AuditSample = {
original: diff.split("\n").slice(0, SAMPLE_LINES).join("\n"),
sanitized: sanitized.split("\n").slice(0, SAMPLE_LINES).join("\n"),
};
return {
sanitized,
reverseMap,
stats: { identifiersTokenized: tokenMap.size, commentsStripped, secretsBlocked },
audit: { tokenMap: auditMap, redactedComments, secretMatches, sample },
};
}
export function restore(text: string, reverseMap: Map<string, string>): string {
return text.replace(/fn_\d{4}/g, (tok) => reverseMap.get(tok) ?? tok);
}
// βββββββββββββββββββββββββ Diff parser (file path + line numbers) βββββββββββββββββββββββββ
// Parses unified diff so we can report real file paths and the exact added-line range
// for each hunk. The AI returns a hunk index + reasoning; we look up the location here.
type Hunk = {
filePath: string;
hunkIndex: number;
newStart: number; // first new-file line of the hunk
newEnd: number; // last new-file line that was added/changed
addedLines: string[];
removedLines: string[];
functionContext: string | null;
};
function parseDiff(diff: string): Hunk[] {
const hunks: Hunk[] = [];
let currentFile: string | null = null;
let hunkIndex = 0;
const lines = diff.split("\n");
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const gitMatch = line.match(/^diff --git a\/(.+?) b\/(.+)$/);
if (gitMatch) { currentFile = gitMatch[2]; continue; }
const plusFile = line.match(/^\+\+\+ (?:b\/)?(.+?)(?:\s|$)/);
if (plusFile && plusFile[1] !== "/dev/null") { currentFile = plusFile[1]; continue; }
const hunkHeader = line.match(/^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@(.*)$/);
if (hunkHeader) {
if (!currentFile) currentFile = "unknown";
const newStart = parseInt(hunkHeader[1], 10);
const newCount = parseInt(hunkHeader[2] ?? "1", 10);
const fnCtx = hunkHeader[3].trim() || null;
const added: string[] = [];
const removed: string[] = [];
let cursor = newStart;
let lastChanged = newStart;
for (let j = i + 1; j < lines.length; j++) {
const l = lines[j];
if (l.startsWith("@@") || l.startsWith("diff --git")) break;
if (l.startsWith("+") && !l.startsWith("+++")) { added.push(l.slice(1)); lastChanged = cursor; cursor++; }
else if (l.startsWith("-") && !l.startsWith("---")) { removed.push(l.slice(1)); }
else { cursor++; }
}
hunks.push({
filePath: currentFile,
hunkIndex: hunkIndex++,
newStart,
newEnd: Math.max(newStart, lastChanged),
addedLines: added,
removedLines: removed,
functionContext: fnCtx,
});
}
}
return hunks;
}
// βββββββββββββββββββββββββ AI tool schema βββββββββββββββββββββββββ
const analysisTool = {
type: "function" as const,
function: {
name: "submit_root_cause_analysis",
description: "Submit ranked root-cause suspects derived from a sanitized git diff and a failure description.",
parameters: {
type: "object",
properties: {
summary: { type: "string", description: "1-2 sentence overall verdict." },
suspects: {
type: "array",
minItems: 1,
items: {
type: "object",
properties: {
hunkIndex: { type: "number", description: "Index into the provided HUNKS list (0-based)." },
functionToken: { type: "string", description: "Anonymized function token (e.g. fn_0019), or empty string." },
confidence: { type: "string", enum: ["high", "medium", "low"] },
mechanism: { type: "string", description: "Plain-English explanation of why this change causes the failure." },
changeSummary: { type: "string", description: "Short label of the change (e.g. 'Threshold change 15β16')." },
},
required: ["hunkIndex", "functionToken", "confidence", "mechanism", "changeSummary"],
additionalProperties: false,
},
},
},
required: ["summary", "suspects"],
additionalProperties: false,
},
},
};
const SYSTEM_PROMPT = `You are BranchDebug Bot, a code-aware root-cause analyzer powered by Qwen3 reasoning. Inputs are:
1. A SANITIZED unified git diff where real identifiers have been replaced with opaque tokens like fn_0019.
2. A natural-language description of an observed failure.
3. A numbered list of HUNKS (filePath, lineRange, function context).
For each hunk that plausibly caused the failure, return a suspect entry with:
- hunkIndex (the number from the HUNKS list)
- confidence (high/medium/low) β only mark "high" when the mechanism directly explains the failure
- mechanism β concrete cause-and-effect
- changeSummary β a short label
Rank by likelihood. Be conservative; if a hunk is unrelated, do not include it. Always call submit_root_cause_analysis.`;
export async function analyzeDiff(diff: string, failureDescription: string, userId?: string | null): Promise<DebugResult> {
getAIConfig();
const { sanitized, reverseMap, stats, audit } = sanitize(diff);
const hunks = parseDiff(diff);
if (hunks.length === 0) throw new Error("No hunks found in diff. Make sure the input is a unified git diff.");
const sanitizedHunks = parseDiff(sanitized);
const hunkList = sanitizedHunks.map((h, i) => {
const realLoc = hunks[i];
const range = realLoc ? `lines ${realLoc.newStart}-${realLoc.newEnd}` : `lines ?`;
const ctx = h.functionContext ? ` in ${h.functionContext}` : "";
const added = h.addedLines.slice(0, 8).map((l) => `+ ${l}`).join("\n");
const removed = h.removedLines.slice(0, 8).map((l) => `- ${l}`).join("\n");
return `[${i}] ${h.filePath} (${range})${ctx}\n${removed}\n${added}`;
}).join("\n\n");
const userContent = `FAILURE DESCRIPTION (sanitized):\n${sanitize(failureDescription).sanitized}\n\nHUNKS:\n${hunkList}\n\nFULL SANITIZED DIFF:\n${sanitized.slice(0, 40_000)}`;
const resp = await fetchAIWithFallback(JSON.stringify({
messages: [
{ role: "system", content: SYSTEM_PROMPT },
{ role: "user", content: userContent },
],
tools: [analysisTool],
tool_choice: { type: "function", function: { name: "submit_root_cause_analysis" } },
}), "google/gemini-2.5-pro", "debugBranch", userId);
if (!resp.ok) {
const text = await resp.text();
if (resp.status === 429) throw new Error("Rate limit reached. Try again shortly.");
if (resp.status === 402) throw new Error("AI credits exhausted. Add credits in Workspace > Usage.");
throw new Error(`AI gateway error ${resp.status}: ${text.slice(0, 300)}`);
}
const json = await resp.json();
const toolCall = json.choices?.[0]?.message?.tool_calls?.[0];
if (!toolCall?.function?.arguments) throw new Error("AI did not return structured analysis.");
const parsed = JSON.parse(toolCall.function.arguments) as {
summary: string;
suspects: { hunkIndex: number; functionToken: string; confidence: "high" | "medium" | "low"; mechanism: string; changeSummary: string }[];
};
const suspects: Suspect[] = parsed.suspects
.filter((s) => hunks[s.hunkIndex])
.map((s) => {
const h = hunks[s.hunkIndex];
return {
filePath: h.filePath,
functionName: s.functionToken ? restore(s.functionToken, reverseMap) : (h.functionContext ?? null),
lineStart: h.newStart,
lineEnd: h.newEnd,
confidence: s.confidence,
mechanism: restore(s.mechanism, reverseMap),
changeSummary: restore(s.changeSummary, reverseMap),
beforeSnippet: h.removedLines.slice(0, 6).join("\n") || null,
afterSnippet: h.addedLines.slice(0, 6).join("\n") || null,
};
})
.sort((a, b) => {
const order = { high: 0, medium: 1, low: 2 } as const;
return order[a.confidence] - order[b.confidence];
});
return {
summary: restore(parsed.summary, reverseMap),
suspects,
sanitizationStats: stats,
audit,
};
}
export const debugBranch = createServerFn({ method: "POST" })
.inputValidator((d: unknown) => InputSchema.parse(d))
.handler(async ({ data }): Promise<DebugResult> => {
return analyzeDiff(data.diff, data.failureDescription);
});
// βββββββββββββββββββββββββ Snippet mode (no diff) βββββββββββββββββββββββββ
// For users pasting a raw code snippet instead of a unified diff.
// We still run the IP Shield sanitizer, then ask the AI to locate bugs by line.
const SnippetInputSchema = z.object({
snippet: z.string().min(1).max(200_000),
failureDescription: z.string().min(1).max(5_000),
language: z.string().max(40).optional(),
});
const snippetTool = {
type: "function" as const,
function: {
name: "submit_snippet_analysis",
description: "Submit ranked bug suspects for a raw code snippet (no diff).",
parameters: {
type: "object",
properties: {
summary: { type: "string" },
suspects: {
type: "array",
minItems: 1,
items: {
type: "object",
properties: {
line: { type: "number", description: "1-based line number in the snippet." },
functionToken: { type: "string", description: "Anonymized function/block token, or empty string." },
confidence: { type: "string", enum: ["high", "medium", "low"] },
mechanism: { type: "string" },
changeSummary: { type: "string", description: "Short label of the suspicious pattern." },
codeFragment: { type: "string", description: "The exact suspect line(s), anonymized." },
},
required: ["line", "functionToken", "confidence", "mechanism", "changeSummary", "codeFragment"],
additionalProperties: false,
},
},
},
required: ["summary", "suspects"],
additionalProperties: false,
},
},
};
const SNIPPET_SYSTEM = `You are BranchDebug Bot in SNIPPET mode, powered by Qwen3 reasoning β an expert code reviewer with deep knowledge of every mainstream programming language (Python, TypeScript/JavaScript, C/C++, C#, Java, Kotlin, Swift, Go, Rust, Ruby, PHP, Scala, Elixir, Haskell, Lua, R, Dart, SQL, Bash, HTML/CSS, YAML/JSON/TOML, and more).
The user pasted a raw code snippet (not a diff). Identifiers are tokenized as fn_NNNN; treat them as opaque names. Carefully analyze the snippet and find ANY of the following classes of bugs that match the failure description (or are obvious defects, even if not described):
β’ Syntax errors (missing colons, brackets, semicolons, quotes, indentation)
β’ Type errors / wrong argument count / missing or extra parameters
β’ Off-by-one errors, bad thresholds, wrong comparison operators
β’ Null / undefined / None / nil dereferences
β’ Uninitialized variables, scope/closure mistakes, shadowing
β’ Logic errors, wrong control flow, unreachable code, infinite loops
β’ Race conditions, async/await misuse, unhandled promise rejections
β’ Resource leaks (unclosed files, connections, listeners)
β’ Security issues (SQL injection, XSS, path traversal, weak crypto, secrets)
β’ Performance pitfalls (N+1 queries, quadratic loops on large input)
β’ API misuse, deprecated calls, framework-specific anti-patterns
β’ Incorrect return values, missing return statements
For EACH defect you find, return one suspect with the 1-based line number from the snippet, a confidence rating, and a clear cause-and-effect mechanism. Be thorough but precise β return multiple suspects when there are multiple bugs (e.g. a syntax error AND a wrong argument count). Only mark "high" when the mechanism directly explains the failure or is an obvious defect. Always call submit_snippet_analysis.`;
export async function analyzeSnippet(
snippet: string,
failureDescription: string,
language?: string,
userId?: string | null,
): Promise<DebugResult> {
getAIConfig();
const { sanitized, reverseMap, stats, audit } = sanitize(snippet);
const numbered = sanitized.split("\n").map((l, i) => `${String(i + 1).padStart(4, " ")} | ${l}`).join("\n");
const userContent = `LANGUAGE: ${language || "auto-detect"}\n\nFAILURE DESCRIPTION (sanitized):\n${sanitize(failureDescription).sanitized}\n\nCODE SNIPPET (line-numbered, sanitized):\n${numbered.slice(0, 40_000)}`;
const resp = await fetchAIWithFallback(JSON.stringify({
messages: [
{ role: "system", content: SNIPPET_SYSTEM },
{ role: "user", content: userContent },
],
tools: [snippetTool],
tool_choice: { type: "function", function: { name: "submit_snippet_analysis" } },
}), "google/gemini-2.5-pro", "debugSnippet", userId);
if (!resp.ok) {
const text = await resp.text();
if (resp.status === 429) throw new Error("Rate limit reached. Try again shortly.");
if (resp.status === 402) throw new Error("AI credits exhausted. Add credits in Workspace > Usage.");
throw new Error(`AI gateway error ${resp.status}: ${text.slice(0, 300)}`);
}
const json = await resp.json();
const toolCall = json.choices?.[0]?.message?.tool_calls?.[0];
if (!toolCall?.function?.arguments) throw new Error("AI did not return structured analysis.");
const parsed = JSON.parse(toolCall.function.arguments) as {
summary: string;
suspects: { line: number; functionToken: string; confidence: "high" | "medium" | "low"; mechanism: string; changeSummary: string; codeFragment: string }[];
};
const snippetLines = snippet.split("\n");
const suspects: Suspect[] = parsed.suspects.map((s) => ({
filePath: language ? `snippet.${language}` : "snippet",
functionName: s.functionToken ? restore(s.functionToken, reverseMap) : null,
lineStart: s.line,
lineEnd: s.line,
confidence: s.confidence,
mechanism: restore(s.mechanism, reverseMap),
changeSummary: restore(s.changeSummary, reverseMap),
beforeSnippet: null,
afterSnippet: snippetLines[s.line - 1] ?? restore(s.codeFragment, reverseMap),
})).sort((a, b) => {
const order = { high: 0, medium: 1, low: 2 } as const;
return order[a.confidence] - order[b.confidence];
});
return {
summary: restore(parsed.summary, reverseMap),
suspects,
sanitizationStats: stats,
audit,
};
}
export const debugSnippet = createServerFn({ method: "POST" })
.inputValidator((d: unknown) => SnippetInputSchema.parse(d))
.handler(async ({ data }): Promise<DebugResult> => {
return analyzeSnippet(data.snippet, data.failureDescription, data.language);
});
|