Gemma4-WebGPU / src /utils /detection-parser.ts
BryanBradfo's picture
Gemma 4 multimodal WebGPU detection Space
576d07a
Raw
History Blame Contribute Delete
1.84 kB
import type { Detection } from "../hooks/LLMContext";
/**
* Parse JSON detection output from Gemma 4.
* Handles markdown code blocks, extra text, and partial output.
* Returns array of {box_2d: [y1,x1,y2,x2], label: string}.
*/
export function parseJsonDetection(text: string): Detection[] {
// Strip markdown code blocks
let cleaned = text.replace(/```json\s*/g, "").replace(/```\s*/g, "").trim();
// Try direct parse
try {
const result = JSON.parse(cleaned);
if (Array.isArray(result)) return validateDetections(result);
} catch {
// fall through
}
// Try to find a JSON array in the text
const match = cleaned.match(/\[[\s\S]*\]/);
if (match) {
try {
const result = JSON.parse(match[0]);
if (Array.isArray(result)) return validateDetections(result);
} catch {
// fall through
}
}
return [];
}
function validateDetections(arr: unknown[]): Detection[] {
return arr.filter(
(d): d is Detection =>
typeof d === "object" &&
d !== null &&
"box_2d" in d &&
Array.isArray((d as Detection).box_2d) &&
(d as Detection).box_2d.length === 4,
);
}
/**
* Convert Gemma 4 normalized coords (0-1000) to pixel coordinates.
* Input: [y1, x1, y2, x2] (Gemma format, Y-first)
* Output: [x1, y1, x2, y2] (canvas format, X-first)
*/
export function toPixelCoords(
box_2d: [number, number, number, number],
width: number,
height: number,
): [number, number, number, number] {
const [y1Norm, x1Norm, y2Norm, x2Norm] = box_2d;
return [
(x1Norm / 1000) * width,
(y1Norm / 1000) * height,
(x2Norm / 1000) * width,
(y2Norm / 1000) * height,
];
}
export const DETECTION_COLORS = [
"#ff6b6b", "#4ecdc4", "#45b7d1", "#f9ca24",
"#6c5ce7", "#a8e6cf", "#fd79a8", "#fdcb6e",
"#0984e3", "#00b894", "#e17055", "#74b9ff",
];