File size: 12,444 Bytes
461b74e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
"use client";

/**
 * Per-episode annotation state for the v3.1 language schema.
 *
 * - Atoms live in memory + sessionStorage so the user can browse without a
 *   backend (read/edit, but no parquet rewrite).
 * - When `NEXT_PUBLIC_ANNOTATE_BACKEND_URL` is set, the context syncs with
 *   the FastAPI service in `backend/`: GET on episode entry, POST on save,
 *   plus frame-timestamp fetches used to snap event-style atoms to exact
 *   source-frame timestamps (the writer in lerobot#3471 enforces exact match).
 *
 * - VQA drawings (active `pendingDraw`) live here too so the panel and the
 *   video overlay component share a single source of truth.
 */

import React, {
  createContext,
  useCallback,
  useContext,
  useEffect,
  useMemo,
  useRef,
  useState,
} from "react";
import type { LanguageAtom } from "../types/language.types";
import { snapToFrame } from "../types/language.types";
import {
  fetchEpisodeAtoms,
  saveEpisodeAtoms,
  fetchFrameTimestamps,
  isAnnotateBackendEnabled,
} from "../utils/annotationsClient";

const STORAGE_PREFIX = "lerobot-annotations:v2:";

function storageKey(repoOrPath: string, episodeId: number): string {
  return `${STORAGE_PREFIX}${repoOrPath}::${episodeId}`;
}

export interface PendingBboxDraw {
  kind: "bbox";
  bbox: [number, number, number, number]; // 0..1, image-relative
  label: string;
  camera?: string;
}

export interface PendingPointDraw {
  kind: "keypoint";
  point: [number, number]; // 0..1, image-relative
  label: string;
  camera?: string;
}

export type PendingDraw = PendingBboxDraw | PendingPointDraw | null;
/**
 * `"auto"` — drag = bbox, single click = keypoint (the natural mode the
 * Annotations tab boots into). The other values force a single gesture
 * and exist for the legacy panel-driven flow.
 */
export type DrawMode = "off" | "auto" | "bbox" | "keypoint";

interface DatasetIdent {
  repoId?: string | null;
  localPath?: string | null;
  revision?: string | null;
}

interface AnnotationsContextType {
  episodeId: number | null;
  ident: DatasetIdent;
  atoms: LanguageAtom[];
  frameTimestamps: number[];
  /**
   * Index in `atoms` of the currently selected atom (the one the right-rail
   * editor is bound to). `null` means nothing is selected — the editor shows
   * an empty state. Selection survives content edits because we mutate atoms
   * in place at the same index; we clear it on delete or when atoms reset.
   */
  selectedIdx: number | null;
  selectAtom: (idx: number | null) => void;
  /**
   * Active <video> element for the camera the user is currently drawing on.
   * Registered by `VideoOverlayCanvas`. Used by the panel to read the
   * authoritative `currentTime` (the time-context's value is throttled and
   * can lag the real video by tens of ms — enough to land an annotation on
   * the wrong frame after a snap to the nearest frame timestamp).
   */
  activeVideoEl: HTMLVideoElement | null;
  setActiveVideoEl: (el: HTMLVideoElement | null) => void;
  pendingDraw: PendingDraw;
  // Selected camera for the drawing overlay (e.g. "observation.images.top").
  // Determines which video the next drawn bbox/point should be associated with.
  activeCamera: string | null;
  drawMode: DrawMode;
  drawLabel: string;
  backendEnabled: boolean;
  dirty: boolean;
  saving: boolean;

  setEpisode: (
    episodeId: number,
    ident: DatasetIdent,
    initialAtoms?: LanguageAtom[],
    initialFrameTimestamps?: number[],
  ) => void;
  setActiveCamera: (camera: string | null) => void;
  setDrawMode: (mode: DrawMode) => void;
  setDrawLabel: (label: string) => void;

  addAtom: (atom: LanguageAtom) => void;
  addAtoms: (atoms: LanguageAtom[]) => void;
  updateAtom: (index: number, updates: Partial<LanguageAtom>) => void;
  deleteAtom: (atom: LanguageAtom) => void;
  resetAtoms: () => void;

  setPendingDraw: (draw: PendingDraw) => void;
  clearPendingDraw: () => void;

  save: () => Promise<{ ok: boolean; error?: string; path?: string | null }>;
  // Snap an arbitrary timestamp to the nearest source frame (when known).
  snap: (ts: number) => number;
}

const AnnotationsContext = createContext<AnnotationsContextType | undefined>(
  undefined,
);

export function useAnnotations(): AnnotationsContextType {
  const ctx = useContext(AnnotationsContext);
  if (!ctx) {
    throw new Error("useAnnotations must be used within AnnotationsProvider");
  }
  return ctx;
}

function identKey(ident: DatasetIdent): string {
  return ident.localPath || ident.repoId || "unknown";
}

export const AnnotationsProvider: React.FC<{ children: React.ReactNode }> = ({
  children,
}) => {
  const [episodeId, setEpisodeId] = useState<number | null>(null);
  const [ident, setIdent] = useState<DatasetIdent>({});
  const [atoms, setAtoms] = useState<LanguageAtom[]>([]);
  const [frameTimestamps, setFrameTimestamps] = useState<number[]>([]);
  const [pendingDraw, setPendingDrawState] = useState<PendingDraw>(null);
  const [activeCamera, setActiveCameraState] = useState<string | null>(null);
  const [drawMode, setDrawModeState] = useState<DrawMode>("off");
  const [drawLabel, setDrawLabelState] = useState<string>("");
  const [activeVideoEl, setActiveVideoElState] =
    useState<HTMLVideoElement | null>(null);
  const [selectedIdx, setSelectedIdxState] = useState<number | null>(null);
  const [dirty, setDirty] = useState(false);
  const [saving, setSaving] = useState(false);
  const backendEnabled = isAnnotateBackendEnabled();

  // Track the last saved snapshot to detect dirtiness honestly.
  const savedSnapshotRef = useRef<string>("[]");

  // Hydrate from sessionStorage when episode/ident changes; if the backend
  // is enabled, also fetch authoritative atoms + frame timestamps.
  const setEpisode = useCallback(
    (
      newEpisodeId: number,
      newIdent: DatasetIdent,
      initialAtoms?: LanguageAtom[],
      initialFrameTimestamps?: number[],
    ) => {
      setEpisodeId(newEpisodeId);
      setIdent(newIdent);
      setPendingDrawState(null);
      setSelectedIdxState(null);

      // Hydrate from session first (so user edits survive episode toggles).
      // If session is empty, fall back to initialAtoms (parquet-extracted).
      let initial: LanguageAtom[] = [];
      try {
        const raw = sessionStorage.getItem(
          storageKey(identKey(newIdent), newEpisodeId),
        );
        if (raw) initial = JSON.parse(raw) as LanguageAtom[];
      } catch {
        /* ignore */
      }
      if (initial.length === 0 && initialAtoms && initialAtoms.length > 0) {
        initial = initialAtoms;
      }
      setAtoms(initial);
      savedSnapshotRef.current = JSON.stringify(initial);
      setDirty(false);
      // Seed frame timestamps from the parquet (no backend dependency); the
      // backend will optionally overwrite this below.
      setFrameTimestamps(initialFrameTimestamps ?? []);

      // Fetch from backend if available.
      if (isAnnotateBackendEnabled()) {
        fetchEpisodeAtoms(newEpisodeId, newIdent)
          .then((remoteAtoms) => {
            // Prefer backend if it has anything; otherwise keep session-cached
            // edits the user made before the backend came online.
            if (remoteAtoms && remoteAtoms.length > 0) {
              setAtoms(remoteAtoms);
              savedSnapshotRef.current = JSON.stringify(remoteAtoms);
              setDirty(false);
            }
          })
          .catch(() => {
            /* backend offline — silent fallback to sessionStorage */
          });

        fetchFrameTimestamps(newEpisodeId, newIdent)
          .then(setFrameTimestamps)
          .catch(() => setFrameTimestamps([]));
      }
    },
    [],
  );

  // Persist to sessionStorage on every change once we have an episode.
  useEffect(() => {
    if (episodeId == null) return;
    try {
      sessionStorage.setItem(
        storageKey(identKey(ident), episodeId),
        JSON.stringify(atoms),
      );
    } catch {
      /* ignore */
    }
    setDirty(JSON.stringify(atoms) !== savedSnapshotRef.current);
  }, [atoms, episodeId, ident]);

  const snap = useCallback(
    (ts: number) =>
      frameTimestamps.length > 0 ? snapToFrame(frameTimestamps, ts) : ts,
    [frameTimestamps],
  );

  const addAtom = useCallback((atom: LanguageAtom) => {
    setAtoms((prev) => [...prev, atom]);
  }, []);

  const addAtoms = useCallback((newAtoms: LanguageAtom[]) => {
    setAtoms((prev) => [...prev, ...newAtoms]);
  }, []);

  const updateAtom = useCallback(
    (index: number, updates: Partial<LanguageAtom>) => {
      setAtoms((prev) => {
        if (index < 0 || index >= prev.length) return prev;
        const next = prev.slice();
        next[index] = { ...next[index], ...updates };
        return next;
      });
    },
    [],
  );

  const deleteAtom = useCallback((atom: LanguageAtom) => {
    setAtoms((prev) => {
      const next = prev.filter((a) => a !== atom);
      // If the deleted index was selected (or the selected index was after the
      // deleted one), nudge selection so it remains pointing at a valid atom
      // — or null when the list is empty.
      setSelectedIdxState((cur) => {
        if (cur == null) return null;
        const oldIdx = prev.indexOf(atom);
        if (oldIdx < 0) return cur;
        if (cur === oldIdx) return null;
        if (cur > oldIdx) return cur - 1;
        return cur;
      });
      return next;
    });
  }, []);

  const resetAtoms = useCallback(() => {
    setAtoms([]);
    setSelectedIdxState(null);
  }, []);

  const setPendingDraw = useCallback((draw: PendingDraw) => {
    setPendingDrawState(draw);
  }, []);

  const clearPendingDraw = useCallback(() => setPendingDrawState(null), []);

  const setActiveCamera = useCallback((c: string | null) => {
    setActiveCameraState(c);
  }, []);

  const setDrawMode = useCallback((m: DrawMode) => setDrawModeState(m), []);
  const setDrawLabel = useCallback((l: string) => setDrawLabelState(l), []);
  const setActiveVideoEl = useCallback(
    (el: HTMLVideoElement | null) => setActiveVideoElState(el),
    [],
  );

  const selectAtom = useCallback(
    (idx: number | null) => setSelectedIdxState(idx),
    [],
  );

  const save = useCallback(async (): Promise<{
    ok: boolean;
    error?: string;
    path?: string | null;
  }> => {
    if (episodeId == null) return { ok: false, error: "no episode" };
    if (!isAnnotateBackendEnabled()) {
      // Persistence is sessionStorage-only — that already happened in the
      // effect above. Report the storage key as the location so the UI can
      // show a concrete "path" instead of a vague offline message.
      savedSnapshotRef.current = JSON.stringify(atoms);
      setDirty(false);
      return {
        ok: true,
        path: `sessionStorage://${storageKey(identKey(ident), episodeId)}`,
      };
    }
    setSaving(true);
    try {
      const { path } = await saveEpisodeAtoms(episodeId, ident, atoms);
      savedSnapshotRef.current = JSON.stringify(atoms);
      setDirty(false);
      return { ok: true, path };
    } catch (e) {
      return { ok: false, error: e instanceof Error ? e.message : String(e) };
    } finally {
      setSaving(false);
    }
  }, [atoms, episodeId, ident]);

  const value = useMemo<AnnotationsContextType>(
    () => ({
      episodeId,
      ident,
      atoms,
      frameTimestamps,
      pendingDraw,
      activeCamera,
      activeVideoEl,
      setActiveVideoEl,
      drawMode,
      drawLabel,
      selectedIdx,
      selectAtom,
      backendEnabled,
      dirty,
      saving,
      setEpisode,
      setActiveCamera,
      setDrawMode,
      setDrawLabel,
      addAtom,
      addAtoms,
      updateAtom,
      deleteAtom,
      resetAtoms,
      setPendingDraw,
      clearPendingDraw,
      save,
      snap,
    }),
    [
      episodeId,
      ident,
      atoms,
      frameTimestamps,
      pendingDraw,
      activeCamera,
      activeVideoEl,
      setActiveVideoEl,
      drawMode,
      drawLabel,
      selectedIdx,
      selectAtom,
      backendEnabled,
      dirty,
      saving,
      setEpisode,
      setActiveCamera,
      setDrawMode,
      setDrawLabel,
      addAtom,
      addAtoms,
      updateAtom,
      deleteAtom,
      resetAtoms,
      setPendingDraw,
      clearPendingDraw,
      save,
      snap,
    ],
  );

  return (
    <AnnotationsContext.Provider value={value}>
      {children}
    </AnnotationsContext.Provider>
  );
};