Spaces:

lerobot
/

visualize_dataset

Running on CPU Upgrade

App Files Files Community

visualize_dataset / src /components /annotations-panel.tsx

mishig HF Staff

Sync from GitHub via hub-sync

461b74e verified 22 days ago

Raw

History Blame Contribute Delete

29.9 kB

	"use client";

	import "./annotations-skin.css";

	/**
	* Editor UI for v3.1 language atoms.
	*
	* Three vertical sections:
	* 1. Inline quick-add bar above the timeline (style picker + label + Add).
	* 2. Annotations timeline (in `annotations-timeline.tsx`).
	* 3. Workspace below the timeline:
	* - Left rail: full atom list grouped by style; click to select.
	* - Right pane: editor for the selected atom (or empty state).
	*
	* Bbox / keypoint VQA atoms are still added through the canvas overlay's
	* quick-label popup; the inline quick-add covers subtask / plan / memory /
	* interjection / speech / count / attribute / spatial.
	*/

	import React, { useMemo, useState } from "react";
	import { useTime } from "../context/time-context";
	import { useAnnotations } from "../context/annotations-context";
	import {
	buildSpeechAtom,
	classifyVqa,
	isSpeechAtom,
	parseVqaAnswer,
	speechText,
	type LanguageAtom,
	} from "../types/language.types";
	import {
	exportDataset as apiExport,
	isAnnotateBackendEnabled,
	} from "../utils/annotationsClient";

	interface Props {
	cameraKeys: string[];
	}

	function fmtTime(s: number): string {
	return s.toFixed(3) + "s";
	}

	function StylePill({ style }: { style: string \| null }) {
	const cls = style ?? "speech";
	return <span className={`style-pill ${cls}`}>{style ?? "speech"}</span>;
	}

	/**
	* Highlight a row when its timestamp is within ~half a frame of currentTime.
	*/
	function isActiveAt(ts: number, currentTime: number, fps = 30): boolean {
	return Math.abs(ts - currentTime) < 0.5 / fps;
	}

	type QuickAddKind =
	\| "task_aug"
	\| "subtask"
	\| "plan"
	\| "memory"
	\| "interjection"
	\| "speech"
	\| "count"
	\| "attribute"
	\| "spatial";

	interface QuickAddField {
	name: string;
	placeholder: string;
	type?: "text" \| "number";
	width?: string;
	grow?: boolean;
	}

	interface QuickAddBuildCtx {
	ts: number;
	vqaCamera: string \| null;
	}

	interface QuickAddDef {
	kind: QuickAddKind;
	label: string;
	/** When true, the displayed timestamp is 0 (atom is pinned to episode start). */
	atEpisodeStart?: boolean;
	fields: QuickAddField[];
	build: (
	values: Record<string, string>,
	ctx: QuickAddBuildCtx,
	) => LanguageAtom[] \| null;
	}

	// Each text-style atom kind (and the simpler VQA shapes) is one entry: how
	// it appears in the dropdown, what fields the user fills, and how those
	// values map to one or two language atoms.
	const QUICK_ADD_DEFS: QuickAddDef[] = [
	{
	kind: "task_aug",
	label: "task augmentation",
	atEpisodeStart: true,
	fields: [
	{
	name: "label",
	placeholder: "pick up the blue cube and place it in the green box",
	grow: true,
	},
	],
	build: ({ label }) => {
	const text = label.trim();
	if (!text) return null;
	return [
	{
	role: "user",
	content: text,
	style: "task_aug",
	timestamp: 0,
	camera: null,
	tool_calls: null,
	},
	];
	},
	},
	{
	kind: "subtask",
	label: "subtask",
	fields: [
	{
	name: "label",
	placeholder: "grasp the handle of the sponge",
	grow: true,
	},
	],
	build: ({ label }, { ts }) => {
	const text = label.trim();
	if (!text) return null;
	return [
	{
	role: "assistant",
	content: text,
	style: "subtask",
	timestamp: ts,
	camera: null,
	tool_calls: null,
	},
	];
	},
	},
	{
	kind: "plan",
	label: "plan",
	fields: [
	{
	name: "label",
	placeholder: "1. grab sponge / 2. wipe / 3. tidy",
	grow: true,
	},
	],
	build: ({ label }, { ts }) => {
	const text = label.trim();
	if (!text) return null;
	return [
	{
	role: "assistant",
	content: text,
	style: "plan",
	timestamp: ts,
	camera: null,
	tool_calls: null,
	},
	];
	},
	},
	{
	kind: "memory",
	label: "memory",
	fields: [
	{
	name: "label",
	placeholder: "sponge picked up; counter still dirty",
	grow: true,
	},
	],
	build: ({ label }, { ts }) => {
	const text = label.trim();
	if (!text) return null;
	return [
	{
	role: "assistant",
	content: text,
	style: "memory",
	timestamp: ts,
	camera: null,
	tool_calls: null,
	},
	];
	},
	},
	{
	kind: "interjection",
	label: "interjection (user)",
	fields: [
	{
	name: "label",
	placeholder: "user: actually skip the wipe…",
	grow: true,
	},
	],
	build: ({ label }, { ts }) => {
	const text = label.trim();
	if (!text) return null;
	return [
	{
	role: "user",
	content: text,
	style: "interjection",
	timestamp: ts,
	camera: null,
	tool_calls: null,
	},
	];
	},
	},
	{
	kind: "speech",
	label: "speech (robot say)",
	fields: [
	{
	name: "label",
	placeholder: "robot say: Got it, skipping the wipe.",
	grow: true,
	},
	],
	build: ({ label }, { ts }) => {
	const text = label.trim();
	if (!text) return null;
	return [buildSpeechAtom(ts, text)];
	},
	},
	{
	kind: "count",
	label: "vqa: count",
	fields: [
	{ name: "label", placeholder: "object label (e.g. cup)", grow: true },
	{ name: "count", placeholder: "count", type: "number", width: "80px" },
	],
	build: ({ label, count }, { ts, vqaCamera }) => {
	const text = label.trim();
	if (!text \|\| !count) return null;
	return [
	{
	role: "user",
	content: `How many ${text}?`,
	style: "vqa",
	timestamp: ts,
	camera: vqaCamera,
	tool_calls: null,
	},
	{
	role: "assistant",
	content: JSON.stringify({ label: text, count: Number(count) }),
	style: "vqa",
	timestamp: ts,
	camera: vqaCamera,
	tool_calls: null,
	},
	];
	},
	},
	{
	kind: "attribute",
	label: "vqa: attribute",
	fields: [
	{ name: "label", placeholder: "label", width: "120px" },
	{ name: "attribute", placeholder: "attribute (color)", width: "120px" },
	{ name: "value", placeholder: "value (red)", grow: true },
	],
	build: ({ label, attribute, value }, { ts, vqaCamera }) => {
	const text = label.trim();
	if (!text \|\| !attribute \|\| !value) return null;
	return [
	{
	role: "user",
	content: `What ${attribute} is the ${text}?`,
	style: "vqa",
	timestamp: ts,
	camera: vqaCamera,
	tool_calls: null,
	},
	{
	role: "assistant",
	content: JSON.stringify({ label: text, attribute, value }),
	style: "vqa",
	timestamp: ts,
	camera: vqaCamera,
	tool_calls: null,
	},
	];
	},
	},
	{
	kind: "spatial",
	label: "vqa: spatial relation",
	fields: [
	{ name: "subject", placeholder: "subject", width: "100px" },
	{ name: "relation", placeholder: "relation (right_of)", width: "130px" },
	{ name: "object", placeholder: "object", grow: true },
	],
	build: ({ subject, relation, object }, { ts, vqaCamera }) => {
	if (!subject \|\| !relation \|\| !object) return null;
	return [
	{
	role: "user",
	content: `Where is the ${subject} relative to the ${object}?`,
	style: "vqa",
	timestamp: ts,
	camera: vqaCamera,
	tool_calls: null,
	},
	{
	role: "assistant",
	content: JSON.stringify({ subject, relation, object }),
	style: "vqa",
	timestamp: ts,
	camera: vqaCamera,
	tool_calls: null,
	},
	];
	},
	},
	];

	const QUICK_ADD_DEFS_BY_KIND: Record<QuickAddKind, QuickAddDef> =
	QUICK_ADD_DEFS.reduce(
	(acc, def) => {
	acc[def.kind] = def;
	return acc;
	},
	{} as Record<QuickAddKind, QuickAddDef>,
	);

	interface RailGroupDef {
	key: string;
	title: string;
	dotClass: string;
	// Which v3.1 language column this style is written to. Used to group the
	// rail under "Persistent" vs "Events" headers so it's clear at a glance
	// that task_aug / subtask / plan / memory broadcast across the whole
	// episode (language_persistent) while interjection / speech / vqa fire on
	// a single frame (language_events). Mirrors columnForStyle() exactly.
	column: "persistent" \| "events";
	match: (
	atom: LanguageAtom,
	otherCamera: (a: LanguageAtom) => boolean,
	) => boolean;
	label: (
	atom: LanguageAtom,
	helpers: {
	activeCamera: string \| null;
	firstLine: (s: string \| null) => string;
	},
	) => string;
	}

	const RAIL_GROUPS: RailGroupDef[] = [
	{
	key: "task_aug",
	title: "task aug",
	dotClass: "dot-task-aug",
	column: "persistent",
	match: (a) => a.style === "task_aug",
	label: (a) => a.content \|\| "(empty)",
	},
	{
	key: "subtask",
	title: "subtask",
	dotClass: "dot-subtask",
	column: "persistent",
	match: (a) => a.style === "subtask",
	label: (a) => a.content \|\| "(empty)",
	},
	{
	key: "plan",
	title: "plan",
	dotClass: "dot-plan",
	column: "persistent",
	match: (a) => a.style === "plan",
	label: (a, { firstLine }) => firstLine(a.content),
	},
	{
	key: "memory",
	title: "memory",
	dotClass: "dot-memory",
	column: "persistent",
	match: (a) => a.style === "memory",
	label: (a, { firstLine }) => firstLine(a.content),
	},
	{
	key: "interjection",
	title: "interjection",
	dotClass: "dot-interjection",
	column: "events",
	match: (a) => a.style === "interjection",
	label: (a) => a.content \|\| "(empty)",
	},
	{
	key: "speech",
	title: "speech",
	dotClass: "dot-speech",
	column: "events",
	match: (a) => isSpeechAtom(a),
	label: (a) => speechText(a) \|\| "(empty)",
	},
	{
	key: "vqa",
	title: "vqa",
	dotClass: "dot-vqa",
	column: "events",
	match: (a, otherCamera) => a.style === "vqa" && !otherCamera(a),
	label: (a, { activeCamera }) => {
	const role = a.role === "user" ? "Q" : "A";
	const t = a.content \|\| "";
	const cameraSuffix =
	a.camera && a.camera !== activeCamera ? ` [${a.camera}]` : "";
	return `${role}: ${t.slice(0, 60)}${t.length > 60 ? "…" : ""}${cameraSuffix}`;
	},
	},
	];

	function useJump(): (ts: number) => void {
	const { seek, setIsPlaying } = useTime();
	return React.useCallback(
	(ts: number) => {
	seek(ts, "external");
	setIsPlaying(false);
	},
	[seek, setIsPlaying],
	);
	}

	export const AnnotationsPanel: React.FC<Props> = ({ cameraKeys }) => {
	const {
	atoms,
	addAtoms,
	updateAtom,
	deleteAtom,
	snap,
	save,
	saving,
	dirty,
	backendEnabled,
	activeCamera,
	setActiveCamera,
	setDrawMode,
	selectedIdx,
	selectAtom,
	ident,
	} = useAnnotations();
	const { currentTime } = useTime();

	// ============ Inline quick-add state ============
	const [qaKind, setQaKind] = useState<QuickAddKind>("subtask");
	const [qaValues, setQaValues] = useState<Record<string, string>>({});
	const [exportStatus, setExportStatus] = useState<string \| null>(null);
	const qaDef = QUICK_ADD_DEFS_BY_KIND[qaKind];

	// Initialize active camera once cameras arrive.
	React.useEffect(() => {
	if (!activeCamera && cameraKeys.length > 0) setActiveCamera(cameraKeys[0]);
	}, [activeCamera, cameraKeys, setActiveCamera]);

	// The Annotations tab keeps the canvas overlay in "auto" mode the whole
	// time — drag = bbox, click = keypoint.
	React.useEffect(() => {
	setDrawMode("auto");
	return () => setDrawMode("off");
	}, [setDrawMode]);

	// ============ Atom grouping for the rail ============
	// The rail shows one section per atom-kind. Each kind is a single config
	// entry: how to detect atoms in this kind, and how to label them in the row.
	// VQA filters out other-camera answers when the dataset has multiple
	// cameras so the rail mirrors the active video.
	const groups = useMemo(() => {
	const firstLine = (s: string \| null) =>
	(s \|\| "").split("\n")[0] \|\| "(empty)";
	const otherCamera = (a: LanguageAtom): boolean =>
	!!activeCamera &&
	cameraKeys.length > 1 &&
	a.camera != null &&
	a.camera !== activeCamera;
	return RAIL_GROUPS.map((def) => {
	const entries = atoms
	.map((atom, idx) => ({ atom, idx }))
	.filter(({ atom }) => def.match(atom, otherCamera))
	.map(({ atom, idx }) => ({
	atom,
	idx,
	label: def.label(atom, { activeCamera, firstLine }),
	}))
	.sort((a, b) => a.atom.timestamp - b.atom.timestamp);
	return { def, entries };
	});
	}, [atoms, activeCamera, cameraKeys.length]);

	// ============ Quick-add handler ============
	// VQA quick-adds inherit the active camera so per-camera filtering shows
	// them in the right rail / overlay. Non-VQA atoms stay camera-agnostic
	// (the def's `build` ignores `vqaCamera` for those).
	const handleQuickAdd = () => {
	const ts = snap(currentTime);
	const vqaCamera = activeCamera ?? cameraKeys[0] ?? null;
	const newAtoms = qaDef.build(qaValues, { ts, vqaCamera });
	if (!newAtoms \|\| !newAtoms.length) return;
	addAtoms(newAtoms);
	// Select the freshly added atom (last one added) so the editor opens for it.
	selectAtom(atoms.length + newAtoms.length - 1);
	setQaValues({});
	};

	// ============ Save / export ============
	const handleSave = async () => {
	const r = await save();
	if (!r.ok) {
	setExportStatus(`Save failed: ${r.error \|\| "unknown"}`);
	} else {
	setExportStatus(
	r.path
	? `Saved episode to ${r.path}`
	: "Saved episode (backend did not report a path — update/restart backend/app.py).",
	);
	}
	};

	const handleSaveDataset = async () => {
	if (!isAnnotateBackendEnabled()) {
	setExportStatus(
	"Backend not configured. Set NEXT_PUBLIC_ANNOTATE_BACKEND_URL and run backend/app.py.",
	);
	return;
	}
	setExportStatus("Saving dataset…");
	try {
	const r = await apiExport(ident);
	setExportStatus(
	`Saved dataset to ${r.output_dir} (persistent: ${r.persistent_rows}, events: ${r.event_rows}).`,
	);
	} catch (e) {
	setExportStatus(
	`Save dataset failed: ${e instanceof Error ? e.message : String(e)}`,
	);
	}
	};

	const selectedAtom =
	selectedIdx != null && selectedIdx >= 0 && selectedIdx < atoms.length
	? atoms[selectedIdx]
	: null;

	// ============ Render ============
	return (
	<div className="annotation-workbench">
	<div className="annotation-actionbar">
	<div>
	<h3>
	Language annotations
	{dirty && <span className="dirty-pill">unsaved</span>}
	</h3>
	<p>
	Select an atom from the timeline or list, then edit it in the
	inspector.
	</p>
	</div>
	<div className="actionbar-actions">
	{!backendEnabled && (
	<span className="backend-offline">
	backend offline — edits saved to sessionStorage only
	</span>
	)}
	<button
	disabled={saving \|\| !dirty}
	onClick={handleSave}
	className="text-xs h-7 px-3 rounded border border-cyan-500/40 bg-cyan-500/10 text-cyan-200 hover:bg-cyan-500/20 disabled:opacity-40"
	>
	{saving ? "Saving…" : "Save episode"}
	</button>
	<button
	disabled={!backendEnabled}
	onClick={handleSaveDataset}
	className="text-xs h-7 px-3 rounded border border-emerald-500/40 bg-emerald-500/10 text-emerald-200 hover:bg-emerald-500/20 disabled:opacity-40"
	>
	Save dataset
	</button>
	</div>
	</div>

	{exportStatus && <div className="save-status">{exportStatus}</div>}

	<section className="annotation-composer">
	<div className="composer-copy">
	<span className="section-kicker">Add text annotation</span>
	<p>
	Adds task phrasing, subtask, plan, memory, speech, or non-spatial
	VQA atoms. Task phrasings are saved at episode start.
	</p>
	</div>
	<div className="quick-add">
	<span className="ts-pill">
	t = {qaDef.atEpisodeStart ? fmtTime(0) : fmtTime(currentTime)}
	</span>
	<select
	value={qaKind}
	onChange={(e) => {
	setQaKind(e.target.value as QuickAddKind);
	setQaValues({});
	}}
	>
	{QUICK_ADD_DEFS.map((d) => (
	<option key={d.kind} value={d.kind}>
	{d.label}
	</option>
	))}
	</select>
	{qaDef.fields.map((f, i) => (
	<input
	key={f.name}
	type={f.type === "number" ? "number" : "text"}
	placeholder={f.placeholder}
	className={f.grow ? "grow" : undefined}
	style={f.width ? { width: f.width } : undefined}
	value={qaValues[f.name] ?? ""}
	onChange={(e) =>
	setQaValues((v) => ({ ...v, [f.name]: e.target.value }))
	}
	onKeyDown={
	i === qaDef.fields.length - 1
	? (e) => e.key === "Enter" && handleQuickAdd()
	: undefined
	}
	/>
	))}
	<button className="add-btn" onClick={handleQuickAdd}>
	+ Add at frame
	</button>
	</div>
	</section>

	<div className="workspace inspector-workspace">
	<div className="rail annotation-list">
	<div className="list-head">
	<div>
	<span className="section-kicker">Annotations</span>
	<p>{atoms.length} atoms in this episode</p>
	</div>
	<span className="ts-pill">{fmtTime(currentTime)}</span>
	</div>
	{atoms.length === 0 && (
	<div className="rail-empty">
	No annotations yet.
	<br />
	Add text above or draw on the active video.
	</div>
	)}
	{(["persistent", "events"] as const).map((column) => {
	const colGroups = groups.filter(({ def }) => def.column === column);
	const total = colGroups.reduce(
	(n, { entries }) => n + entries.length,
	0,
	);
	if (total === 0) return null;
	return (
	<div className="rail-column" key={column}>
	<div className={`rail-column-head ${column}`}>
	<span className="rail-column-title">
	{column === "persistent" ? "Persistent" : "Events"}
	</span>
	<span className="rail-column-sub">
	{column === "persistent"
	? "language_persistent · broadcast across every frame"
	: "language_events · fire on a single frame"}
	</span>
	</div>
	{colGroups.map(({ def, entries }) => (
	<RailGroup
	key={def.key}
	title={def.title}
	dotClass={def.dotClass}
	entries={entries}
	currentTime={currentTime}
	/>
	))}
	</div>
	);
	})}
	</div>

	<div className="editor inspector">
	{selectedAtom == null ? (
	<div className="editor-empty">
	<span className="section-kicker">Inspector</span>
	<p>
	Select an annotation from the list or timeline, or draw a new
	bbox/keypoint on the video.
	</p>
	</div>
	) : (
	<AtomEditor
	atom={selectedAtom}
	cameraKeys={cameraKeys}
	onChange={(updates) => updateAtom(selectedIdx as number, updates)}
	onDelete={() => deleteAtom(selectedAtom)}
	/>
	)}
	</div>
	</div>
	</div>
	);
	};

	// ---------------------------------------------------------------------------
	// Rail group — one row per atom, click selects.
	// ---------------------------------------------------------------------------

	const RailGroup: React.FC<{
	title: string;
	dotClass: string;
	entries: { atom: LanguageAtom; idx: number; label: string }[];
	currentTime: number;
	}> = ({ title, dotClass, entries, currentTime }) => {
	const { selectedIdx, selectAtom } = useAnnotations();
	const jump = useJump();
	if (entries.length === 0) return null;
	return (
	<div className="rail-group">
	<div className="rail-group-head">
	<span style={{ display: "inline-flex", alignItems: "center", gap: 6 }}>
	<span className={`style-dot ${dotClass}`} />
	{title}
	</span>
	<span className="count">{entries.length}</span>
	</div>
	{entries.map(({ atom, idx, label }) => {
	const sel = idx === selectedIdx;
	const active = isActiveAt(atom.timestamp, currentTime);
	return (
	<div
	key={idx}
	className={`rail-row ${sel ? "selected" : ""} ${active ? "active-now" : ""}`}
	onClick={() => {
	selectAtom(idx);
	jump(atom.timestamp);
	}}
	>
	<span className="ts">{fmtTime(atom.timestamp)}</span>
	<span className="body">{label}</span>
	</div>
	);
	})}
	</div>
	);
	};

	// ---------------------------------------------------------------------------
	// AtomEditor — form for the currently selected atom.
	// ---------------------------------------------------------------------------

	const AtomEditor: React.FC<{
	atom: LanguageAtom;
	cameraKeys: string[];
	onChange: (updates: Partial<LanguageAtom>) => void;
	onDelete: () => void;
	}> = ({ atom, cameraKeys, onChange, onDelete }) => {
	const jump = useJump();
	const { snap } = useAnnotations();
	const isSpeech = isSpeechAtom(atom);
	const cameraLabel = atom.camera ?? "all cameras";
	const roleLabel = isSpeech ? "speech" : atom.role;
	const [timestampDraft, setTimestampDraft] = useState(() =>
	String(atom.timestamp),
	);

	React.useEffect(() => {
	setTimestampDraft(String(atom.timestamp));
	}, [atom.timestamp]);

	const commitTimestamp = React.useCallback(
	(raw = timestampDraft) => {
	const next = Number(raw);
	if (!Number.isFinite(next) \|\| next < 0) {
	setTimestampDraft(String(atom.timestamp));
	return;
	}
	onChange({ timestamp: next });
	setTimestampDraft(String(next));
	},
	[atom.timestamp, onChange, timestampDraft],
	);

	const commitSnappedTimestamp = () => {
	const parsed = Number(timestampDraft);
	const next = snap(Number.isFinite(parsed) ? parsed : atom.timestamp);
	onChange({ timestamp: next });
	setTimestampDraft(String(next));
	};

	return (
	<div className="inspector-body">
	<div className="editor-head inspector-head">
	<div className="inspector-title">
	<StylePill style={atom.style} />
	<div>
	<strong>{fmtTime(atom.timestamp)}</strong>
	<span>
	{roleLabel} · {cameraLabel}
	</span>
	</div>
	</div>
	<div className="right">
	<button
	className="icon-btn"
	title="Jump to this atom's frame"
	onClick={() => jump(atom.timestamp)}
	>
	▶
	</button>
	<button
	className="icon-btn danger"
	title="Delete this atom"
	onClick={onDelete}
	>
	×
	</button>
	</div>
	</div>

	<div className="field">
	<label className="field-label">Timestamp (s)</label>
	<div className="ts-row">
	<input
	type="text"
	inputMode="decimal"
	value={timestampDraft}
	onChange={(e) => setTimestampDraft(e.target.value)}
	onBlur={() => commitTimestamp()}
	onKeyDown={(e) => {
	if (e.key === "Enter") commitTimestamp();
	if (e.key === "Escape") setTimestampDraft(String(atom.timestamp));
	}}
	/>
	<button
	type="button"
	className="frame-pill"
	onPointerDown={(e) => {
	e.preventDefault();
	commitSnappedTimestamp();
	}}
	onKeyDown={(e) => {
	if (e.key === "Enter" \|\| e.key === " ") {
	e.preventDefault();
	commitSnappedTimestamp();
	}
	}}
	>
	snap to frame
	</button>
	</div>
	</div>

	{/* Content / role-specific fields */}
	{(atom.style === "task_aug" \|\|
	atom.style === "subtask" \|\|
	atom.style === "plan" \|\|
	atom.style === "memory" \|\|
	atom.style === "interjection") && (
	<div className="field">
	<label className="field-label">
	{atom.style === "subtask"
	? "Subtask"
	: atom.style === "task_aug"
	? "Task augmentation"
	: atom.style === "plan"
	? "Plan"
	: atom.style === "memory"
	? "Memory"
	: "Interjection"}
	</label>
	{atom.style === "task_aug" \|\|
	atom.style === "subtask" \|\|
	atom.style === "interjection" ? (
	<textarea
	rows={3}
	value={atom.content \|\| ""}
	onChange={(e) => onChange({ content: e.target.value })}
	/>
	) : (
	<textarea
	rows={4}
	value={atom.content \|\| ""}
	onChange={(e) => onChange({ content: e.target.value })}
	/>
	)}
	</div>
	)}

	{isSpeech && atom.tool_calls && (
	<div className="field">
	<label className="field-label">Robot speech (say tool call)</label>
	<input
	type="text"
	value={speechText(atom) \|\| ""}
	onChange={(e) => {
	const next = atom.tool_calls
	? atom.tool_calls.map((tc, i) =>
	i === 0
	? {
	...tc,
	function: {
	...tc.function,
	arguments: { text: e.target.value },
	},
	}
	: tc,
	)
	: null;
	onChange({ tool_calls: next });
	}}
	/>
	</div>
	)}

	{atom.style === "vqa" && (
	<>
	<CameraField
	atom={atom}
	cameraKeys={cameraKeys}
	onChange={onChange}
	/>
	<VqaEditorFields atom={atom} onChange={onChange} />
	</>
	)}
	</div>
	);
	};

	// ---------------------------------------------------------------------------
	// CameraField — surface the row-level camera tag for VQA atoms (PR 3467).
	// ---------------------------------------------------------------------------

	const CameraField: React.FC<{
	atom: LanguageAtom;
	cameraKeys: string[];
	onChange: (updates: Partial<LanguageAtom>) => void;
	}> = ({ atom, cameraKeys, onChange }) => {
	if (atom.style !== "vqa") return null;
	if (cameraKeys.length === 0) return null;
	const value = atom.camera ?? "";
	return (
	<div className="field">
	<label className="field-label">Camera</label>
	<select
	value={value}
	onChange={(e) =>
	onChange({ camera: e.target.value === "" ? null : e.target.value })
	}
	>
	<option value="">(any — renders on every camera)</option>
	{cameraKeys.map((k) => (
	<option key={k} value={k}>
	{k}
	</option>
	))}
	</select>
	</div>
	);
	};

	const VqaEditorFields: React.FC<{
	atom: LanguageAtom;
	onChange: (updates: Partial<LanguageAtom>) => void;
	}> = ({ atom, onChange }) => {
	const parsed = parseVqaAnswer(atom.content);
	const kind = parsed ? classifyVqa(parsed) : null;

	if (atom.role === "user") {
	return (
	<div className="field">
	<label className="field-label">Question</label>
	<input
	type="text"
	value={atom.content \|\| ""}
	onChange={(e) => onChange({ content: e.target.value })}
	/>
	</div>
	);
	}

	// Assistant atom — answer JSON (raw + structured viewer)
	return (
	<div className="field">
	<label className="field-label">Answer ({kind \|\| "unknown"})</label>
	<textarea
	rows={5}
	style={{
	fontFamily:
	"ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace",
	}}
	value={atom.content \|\| ""}
	onChange={(e) => onChange({ content: e.target.value })}
	/>
	{parsed && kind === "bbox" && (
	<p className="text-[11px] text-slate-400 mt-1">
	Tip: bbox values are 0..1 image-relative (xyxy). Edit on the video
	itself by deleting this and re-drawing.
	</p>
	)}
	{parsed && kind === "keypoint" && (
	<p className="text-[11px] text-slate-400 mt-1">
	Tip: point values are 0..1 image-relative (xy).
	</p>
	)}
	</div>
	);
	};