Spaces:

lablab-ai-amd-developer-hackathon
/

drivecore

Running

drivecore / src /lib /forensic.functions.ts

gpt-engineer-app[bot]

Changes

58360ec about 2 months ago

20.9 kB

	import { createServerFn } from "@tanstack/react-start";
	import { z } from "zod";
	import { sanitize, restore } from "./branch-debug.functions";
	import { fetchAIWithFallback, getAIConfig } from "./ai-config.server";

	// ───────────────────────── SSRF Guard ─────────────────────────
	// Reject loopback, private (RFC1918), link-local, and cloud metadata addresses
	// before any server-side fetch of a user-supplied URL.
	function assertSafePublicUrl(raw: string): URL {
	let u: URL;
	try { u = new URL(raw); } catch { throw new Response("Invalid URL", { status: 400 }); }
	if (u.protocol !== "https:" && u.protocol !== "http:") {
	throw new Response("Only http(s) URLs are allowed", { status: 400 });
	}
	const host = u.hostname.toLowerCase();
	// Block obvious literals
	const blockedHosts = new Set([
	"localhost", "127.0.0.1", "0.0.0.0", "::1",
	"169.254.169.254", "metadata.google.internal", "metadata.goog",
	]);
	if (blockedHosts.has(host)) throw new Response("Host not allowed", { status: 400 });
	// Block IPv4 private/reserved ranges
	const ipv4 = host.match(/^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/);
	if (ipv4) {
	const [a, b] = ipv4.slice(1).map(Number);
	if (
	a === 10 \|\|
	a === 127 \|\|
	a === 0 \|\|
	(a === 172 && b >= 16 && b <= 31) \|\|
	(a === 192 && b === 168) \|\|
	(a === 169 && b === 254) \|\|
	a >= 224 // multicast/reserved
	) throw new Response("Private/reserved IP not allowed", { status: 400 });
	}
	// Block IPv6 loopback / link-local / unique-local
	if (host.includes(":")) {
	if (host === "::1" \|\| host.startsWith("fe80:") \|\| host.startsWith("fc") \|\| host.startsWith("fd")) {
	throw new Response("Private IPv6 not allowed", { status: 400 });
	}
	}
	return u;
	}

	// ───────────────────────── Types ─────────────────────────

	export type VehicleManifest = {
	vehicleId: string;
	commitHash: string;
	branch: string;
	source: "manifest" \| "git" \| "github" \| "manual";
	fetchedAt: number;
	warnings: string[];
	};

	export type FetchedCode = {
	manifest: VehicleManifest;
	files: { path: string; content: string }[];
	fetchMethod: string;
	warnings: string[];
	};

	export type Hypothesis = {
	id: string;
	title: string;
	confidence: "High" \| "Medium" \| "Low";
	mechanism: string;
	code_evidence: string;
	log_signatures: string[];
	eliminates_if: string;
	};

	export type Stage1Result = {
	stage: 1;
	summary: string;
	critical_path: string[];
	hypotheses: Hypothesis[];
	what_to_grep: string[];
	timeline_question: string;
	unknowns: string[];
	};

	export type Stage2Result = {
	stage: 2;
	summary: string;
	timeline: { offset: string; event: string; source: string }[];
	hypothesis_verdicts: { id: string; verdict: "CONFIRMED" \| "ELIMINATED" \| "POSSIBLE"; evidence: string }[];
	root_cause: { function: string; line_hint: string; mechanism: string; confidence: "High" \| "Medium" \| "Low" };
	what_vehicle_saw: string;
	still_unknown: string[];
	};

	export type Stage3Result = {
	stage: 3;
	summary: string;
	failure_layer: "SENSING" \| "PROCESSING" \| "PLANNING" \| "CONTROL";
	perception_state: { what_vehicle_saw: string; what_it_should_have_seen: string; discrepancy: string };
	full_chain: { layer: string; input: string; output: string; issue: string }[];
	definitive_cause: { function: string; mechanism: string; confidence: "High" };
	fix: string;
	test_case: string;
	};

	export type StageResult = Stage1Result \| Stage2Result \| Stage3Result;

	// ───────────────────────── Connect / Fetch ─────────────────────────
	//
	// The Worker runtime cannot SSH. Two real fetch strategies work here:
	// 1. Central manifest endpoint (HTTP GET) – vehicles push build info on startup
	// 2. GitHub raw / API (HTTP) – fetch exact files at the manifest commit
	// SSH-into-vehicle is intentionally surfaced as "agent required" to the user.

	const ConnectSchema = z.object({
	vehicleId: z.string().min(1).max(80),
	manifestUrl: z.string().url().optional().or(z.literal("")),
	githubRepo: z.string().max(160).optional(), // "org/repo"
	githubBranch: z.string().max(80).optional(),
	githubToken: z.string().max(200).optional(),
	});

	export const testVehicleConnection = createServerFn({ method: "POST" })
	.inputValidator((d: unknown) => ConnectSchema.parse(d))
	.handler(async ({ data }) => {
	const results: Record<string, any> = {};
	const t0 = Date.now();

	if (data.manifestUrl) {
	try {
	assertSafePublicUrl(data.manifestUrl);
	const url = `${data.manifestUrl.replace(/\/$/, "")}/vehicles/${encodeURIComponent(data.vehicleId)}/manifest`;
	const res = await fetch(url, { signal: AbortSignal.timeout(8000) });
	results.manifestReachable = res.ok;
	results.manifestStatus = res.status;
	if (res.ok) {
	const m = await res.json().catch(() => null);
	if (m && typeof m === "object") {
	// Only reflect known typed fields back to caller
	const commit = (m as any).commit;
	const branch = (m as any).branch;
	results.deployedCommit = typeof commit === "string" ? commit.slice(0, 80) : null;
	results.branch = typeof branch === "string" ? branch.slice(0, 80) : null;
	}
	}
	} catch (e) {
	results.manifestReachable = false;
	results.manifestError = (e as Error).message;
	}
	}

	if (data.githubRepo) {
	try {
	const headers: Record<string, string> = { Accept: "application/vnd.github+json" };
	if (data.githubToken) headers.Authorization = `Bearer ${data.githubToken}`;
	const r = await fetch(`https://api.github.com/repos/${data.githubRepo}`, {
	headers,
	signal: AbortSignal.timeout(8000),
	});
	results.githubReachable = r.ok;
	results.githubStatus = r.status;
	} catch (e) {
	results.githubReachable = false;
	results.githubError = (e as Error).message;
	}
	}

	results.latencyMs = Date.now() - t0;
	return { vehicleId: data.vehicleId, results };
	});

	const FetchSchema = z.object({
	vehicleId: z.string().min(1).max(80),
	targetFiles: z.array(z.string().min(1).max(300)).min(1).max(20),
	manifestUrl: z.string().optional(),
	githubRepo: z.string().optional(), // "org/repo"
	githubBranch: z.string().optional().default("main"),
	githubToken: z.string().optional(),
	manualCommit: z.string().optional(), // override commit when no manifest
	});

	export const fetchVehicleCode = createServerFn({ method: "POST" })
	.inputValidator((d: unknown) => FetchSchema.parse(d))
	.handler(async ({ data }): Promise<FetchedCode> => {
	const warnings: string[] = [];
	let commit = data.manualCommit?.trim() \|\| "";
	let branch = data.githubBranch \|\| "main";
	let source: VehicleManifest["source"] = "manual";

	// Strategy 1: manifest endpoint
	if (!commit && data.manifestUrl) {
	try {
	assertSafePublicUrl(data.manifestUrl);
	const url = `${data.manifestUrl.replace(/\/$/, "")}/vehicles/${encodeURIComponent(data.vehicleId)}/manifest`;
	const r = await fetch(url, { signal: AbortSignal.timeout(8000) });
	if (r.ok) {
	const m = (await r.json()) as { commit?: string; branch?: string };
	if (m.commit) {
	commit = m.commit;
	branch = m.branch \|\| branch;
	source = "manifest";
	}
	} else {
	warnings.push(`Manifest endpoint returned ${r.status}`);
	}
	} catch (e) {
	warnings.push(`Manifest fetch failed: ${(e as Error).message}`);
	}
	}

	// If still no commit, fall back to branch HEAD via GitHub
	if (!commit && data.githubRepo) {
	try {
	const headers: Record<string, string> = { Accept: "application/vnd.github+json" };
	if (data.githubToken) headers.Authorization = `Bearer ${data.githubToken}`;
	const r = await fetch(
	`https://api.github.com/repos/${data.githubRepo}/commits/${encodeURIComponent(branch)}`,
	{ headers, signal: AbortSignal.timeout(8000) },
	);
	if (r.ok) {
	const j = (await r.json()) as { sha?: string };
	if (j.sha) {
	commit = j.sha;
	source = "github";
	warnings.push(`No manifest — fell back to ${branch} HEAD on GitHub`);
	}
	} else {
	const body = await r.text().catch(() => "");
	warnings.push(
	`GitHub HEAD lookup returned ${r.status} for ${data.githubRepo}@${branch}` +
	(body ? `: ${body.slice(0, 160)}` : ""),
	);
	}
	} catch (e) {
	warnings.push(`GitHub HEAD lookup failed: ${(e as Error).message}`);
	}
	}

	if (!commit) {
	const detail = warnings.length ? `\n\nDetails:\n• ${warnings.join("\n• ")}` : "";
	throw new Error(
	"Could not determine deployed commit. Provide a manifest URL, a valid GitHub repo (org/repo) with a reachable branch, or paste the commit hash manually." +
	detail,
	);
	}

	// Fetch each file from GitHub raw at that commit
	if (!data.githubRepo) {
	throw new Error("GitHub repo (org/repo) is required to fetch the file contents at the resolved commit.");
	}
	const files: { path: string; content: string }[] = [];
	const headers: Record<string, string> = {};
	if (data.githubToken) headers.Authorization = `Bearer ${data.githubToken}`;
	for (const path of data.targetFiles) {
	const url = `https://raw.githubusercontent.com/${data.githubRepo}/${commit}/${path}`;
	try {
	const r = await fetch(url, { headers, signal: AbortSignal.timeout(10_000) });
	if (r.ok) {
	const content = await r.text();
	files.push({ path, content: content.slice(0, 80_000) });
	} else {
	warnings.push(`${path}: HTTP ${r.status}`);
	}
	} catch (e) {
	warnings.push(`${path}: ${(e as Error).message}`);
	}
	}

	if (files.length === 0) {
	throw new Error(`Could not fetch any of the requested files at commit ${commit.slice(0, 8)}.`);
	}

	return {
	manifest: {
	vehicleId: data.vehicleId,
	commitHash: commit,
	branch,
	source,
	fetchedAt: Date.now(),
	warnings,
	},
	files,
	fetchMethod: source === "manifest" ? "manifest+github" : source === "github" ? "github-head" : "manual+github",
	warnings,
	};
	});

	// ───────────────────────── Stage analysis ─────────────────────────

	const STAGE_PROMPTS: Record<1 \| 2 \| 3, string> = {
	1: `You are Forensics Bot, an expert AV engineer powered by Qwen3 reasoning, specializing in post-deployment forensic debugging.
	Inputs: deployed vehicle code (identifiers anonymized as fn_NNNN) and a failure description.
	Logs are NOT yet available. Analyze the code statically. Generate ranked hypotheses and predict log signatures.
	Always call submit_stage1.`,
	2: `You are Forensics Bot, an expert AV engineer powered by Qwen3 reasoning. You have anonymized deployed code, a failure description, AND system logs.
	Correlate log evidence against the prior hypotheses (passed in). Reconstruct the timeline. Find the root cause.
	Always call submit_stage2.`,
	3: `You are Forensics Bot, an expert AV engineer powered by Qwen3 reasoning. You have anonymized code, logs, AND ROS bag / sensor data excerpts.
	Trace the full perception → planning → control chain. Determine the failure layer and the definitive root cause.
	Always call submit_stage3.`,
	};

	const stage1Tool = {
	type: "function" as const,
	function: {
	name: "submit_stage1",
	description: "Submit Stage 1 hypotheses.",
	parameters: {
	type: "object",
	properties: {
	summary: { type: "string" },
	critical_path: { type: "array", items: { type: "string" } },
	hypotheses: {
	type: "array",
	minItems: 1,
	items: {
	type: "object",
	properties: {
	id: { type: "string" },
	title: { type: "string" },
	confidence: { type: "string", enum: ["High", "Medium", "Low"] },
	mechanism: { type: "string" },
	code_evidence: { type: "string" },
	log_signatures: { type: "array", items: { type: "string" } },
	eliminates_if: { type: "string" },
	},
	required: ["id", "title", "confidence", "mechanism", "code_evidence", "log_signatures", "eliminates_if"],
	additionalProperties: false,
	},
	},
	what_to_grep: { type: "array", items: { type: "string" } },
	timeline_question: { type: "string" },
	unknowns: { type: "array", items: { type: "string" } },
	},
	required: ["summary", "critical_path", "hypotheses", "what_to_grep", "timeline_question", "unknowns"],
	additionalProperties: false,
	},
	},
	};

	const stage2Tool = {
	type: "function" as const,
	function: {
	name: "submit_stage2",
	description: "Submit Stage 2 root cause from code + logs.",
	parameters: {
	type: "object",
	properties: {
	summary: { type: "string" },
	timeline: {
	type: "array",
	items: {
	type: "object",
	properties: {
	offset: { type: "string" },
	event: { type: "string" },
	source: { type: "string" },
	},
	required: ["offset", "event", "source"],
	additionalProperties: false,
	},
	},
	hypothesis_verdicts: {
	type: "array",
	items: {
	type: "object",
	properties: {
	id: { type: "string" },
	verdict: { type: "string", enum: ["CONFIRMED", "ELIMINATED", "POSSIBLE"] },
	evidence: { type: "string" },
	},
	required: ["id", "verdict", "evidence"],
	additionalProperties: false,
	},
	},
	root_cause: {
	type: "object",
	properties: {
	function: { type: "string" },
	line_hint: { type: "string" },
	mechanism: { type: "string" },
	confidence: { type: "string", enum: ["High", "Medium", "Low"] },
	},
	required: ["function", "line_hint", "mechanism", "confidence"],
	additionalProperties: false,
	},
	what_vehicle_saw: { type: "string" },
	still_unknown: { type: "array", items: { type: "string" } },
	},
	required: ["summary", "timeline", "hypothesis_verdicts", "root_cause", "what_vehicle_saw", "still_unknown"],
	additionalProperties: false,
	},
	},
	};

	const stage3Tool = {
	type: "function" as const,
	function: {
	name: "submit_stage3",
	description: "Submit Stage 3 definitive cause from code + logs + sensor data.",
	parameters: {
	type: "object",
	properties: {
	summary: { type: "string" },
	failure_layer: { type: "string", enum: ["SENSING", "PROCESSING", "PLANNING", "CONTROL"] },
	perception_state: {
	type: "object",
	properties: {
	what_vehicle_saw: { type: "string" },
	what_it_should_have_seen: { type: "string" },
	discrepancy: { type: "string" },
	},
	required: ["what_vehicle_saw", "what_it_should_have_seen", "discrepancy"],
	additionalProperties: false,
	},
	full_chain: {
	type: "array",
	items: {
	type: "object",
	properties: {
	layer: { type: "string" },
	input: { type: "string" },
	output: { type: "string" },
	issue: { type: "string" },
	},
	required: ["layer", "input", "output", "issue"],
	additionalProperties: false,
	},
	},
	definitive_cause: {
	type: "object",
	properties: {
	function: { type: "string" },
	mechanism: { type: "string" },
	confidence: { type: "string", enum: ["High"] },
	},
	required: ["function", "mechanism", "confidence"],
	additionalProperties: false,
	},
	fix: { type: "string" },
	test_case: { type: "string" },
	},
	required: ["summary", "failure_layer", "perception_state", "full_chain", "definitive_cause", "fix", "test_case"],
	additionalProperties: false,
	},
	},
	};

	const StageInput = z.object({
	stage: z.union([z.literal(1), z.literal(2), z.literal(3)]),
	code: z.string().min(1).max(200_000),
	failureDescription: z.string().min(1).max(5_000),
	logs: z.string().max(120_000).optional(),
	bagData: z.string().max(120_000).optional(),
	priorStage1: z.unknown().optional(),
	priorStage2: z.unknown().optional(),
	});

	function deepRestore(value: any, rev: Map<string, string>): any {
	if (typeof value === "string") return restore(value, rev);
	if (Array.isArray(value)) return value.map((v) => deepRestore(v, rev));
	if (value && typeof value === "object") {
	const out: any = {};
	for (const [k, v] of Object.entries(value)) out[k] = deepRestore(v, rev);
	return out;
	}
	return value;
	}

	export const runForensicStage = createServerFn({ method: "POST" })
	.inputValidator((d: unknown) => StageInput.parse(d))
	.handler(async ({ data }): Promise<{ result: StageResult; sanitizationStats: { identifiersTokenized: number; commentsStripped: number; secretsBlocked: number } }> => {
	getAIConfig();

	const codeS = sanitize(data.code);
	const failureS = sanitize(data.failureDescription);
	const logsS = data.logs ? sanitize(data.logs) : null;
	const bagS = data.bagData ? sanitize(data.bagData) : null;

	// Combine reverse maps so restore() works on AI output that references any token
	const reverse = new Map<string, string>();
	for (const m of [codeS.reverseMap, failureS.reverseMap, logsS?.reverseMap, bagS?.reverseMap]) {
	if (m) for (const [k, v] of m.entries()) reverse.set(k, v);
	}

	const tools = [stage1Tool, stage2Tool, stage3Tool];
	const toolName = `submit_stage${data.stage}` as const;

	const userParts: string[] = [
	`DEPLOYED CODE (anonymized):\n${codeS.sanitized.slice(0, 50_000)}`,
	`\n\nFAILURE DESCRIPTION:\n${failureS.sanitized}`,
	];
	if (data.priorStage1) userParts.push(`\n\nSTAGE 1 HYPOTHESES:\n${JSON.stringify(data.priorStage1)}`);
	if (data.priorStage2) userParts.push(`\n\nSTAGE 2 ROOT CAUSE:\n${JSON.stringify(data.priorStage2)}`);
	if (logsS) userParts.push(`\n\nSYSTEM LOGS (anonymized):\n${logsS.sanitized.slice(0, 40_000)}`);
	if (bagS) userParts.push(`\n\nROS BAG / SENSOR DATA (anonymized):\n${bagS.sanitized.slice(0, 40_000)}`);

	const resp = await fetchAIWithFallback(JSON.stringify({
	messages: [
	{ role: "system", content: STAGE_PROMPTS[data.stage] },
	{ role: "user", content: userParts.join("") },
	],
	tools,
	tool_choice: { type: "function", function: { name: toolName } },
	}), "google/gemini-2.5-flash", `forensicStage${data.stage}`);

	if (!resp.ok) {
	const text = await resp.text();
	if (resp.status === 429) throw new Error("Rate limit reached. Try again shortly.");
	if (resp.status === 402) throw new Error("AI credits exhausted. Add credits in Workspace > Usage.");
	throw new Error(`AI gateway error ${resp.status}: ${text.slice(0, 300)}`);
	}

	const json = await resp.json();
	const toolCall = json.choices?.[0]?.message?.tool_calls?.[0];
	if (!toolCall?.function?.arguments) throw new Error("AI did not return structured analysis.");

	const parsedRaw = JSON.parse(toolCall.function.arguments);
	const restored = deepRestore(parsedRaw, reverse) as StageResult;
	restored.stage = data.stage as any;

	return {
	result: restored,
	sanitizationStats: {
	identifiersTokenized:
	codeS.stats.identifiersTokenized +
	failureS.stats.identifiersTokenized +
	(logsS?.stats.identifiersTokenized ?? 0) +
	(bagS?.stats.identifiersTokenized ?? 0),
	commentsStripped:
	codeS.stats.commentsStripped +
	(logsS?.stats.commentsStripped ?? 0) +
	(bagS?.stats.commentsStripped ?? 0),
	secretsBlocked:
	codeS.stats.secretsBlocked +
	failureS.stats.secretsBlocked +
	(logsS?.stats.secretsBlocked ?? 0) +
	(bagS?.stats.secretsBlocked ?? 0),
	},
	};
	});