aractingi commited on
Commit
aa2bc0d
·
1 Parent(s): 5fe6175

Refactor episode data loading to support v3.0 structure and enhance video player segmentation features. Added debug logging for version checks and data loading processes (will remove later).

Browse files
src/app/[org]/[dataset]/[episode]/episode-viewer.tsx CHANGED
@@ -214,14 +214,6 @@ function EpisodeViewerInner({ data }: { data: any }) {
214
  onChartsReady={() => setChartsReady(true)}
215
  />
216
 
217
- {ignoredColumns.length > 0 && (
218
- <p className="mt-2 text-orange-700">
219
- Columns{" "}
220
- <span className="font-mono">{ignoredColumns.join(", ")}</span> are
221
- NOT shown since the visualizer currently does not support 2D or 3D
222
- data.
223
- </p>
224
- )}
225
  </div>
226
 
227
  <PlaybackBar />
 
214
  onChartsReady={() => setChartsReady(true)}
215
  />
216
 
 
 
 
 
 
 
 
 
217
  </div>
218
 
219
  <PlaybackBar />
src/app/[org]/[dataset]/[episode]/fetch-data.ts CHANGED
@@ -20,253 +20,941 @@ export async function getEpisodeData(
20
  ) {
21
  const repoId = `${org}/${dataset}`;
22
  try {
23
- const episode_chunk = Math.floor(0 / 1000);
24
-
25
- // Check for compatible dataset version (v2.1 or v2.0)
26
  const version = await getDatasetVersion(repoId);
 
 
27
  const jsonUrl = buildVersionedUrl(repoId, version, "meta/info.json");
28
 
29
  const info = await fetchJson<DatasetMetadata>(jsonUrl);
30
 
31
- // Dataset information
32
- const datasetInfo = {
33
- repoId,
34
- total_frames: info.total_frames,
35
- total_episodes: info.total_episodes,
36
- fps: info.fps,
37
- };
 
 
 
 
 
 
38
 
39
- // Generate list of episodes
40
- const episodes =
41
- process.env.EPISODES === undefined
42
- ? Array.from(
43
- { length: datasetInfo.total_episodes },
44
- // episode id starts from 0
45
- (_, i) => i,
46
- )
47
- : process.env.EPISODES
48
- .split(/\s+/)
49
- .map((x) => parseInt(x.trim(), 10))
50
- .filter((x) => !isNaN(x));
51
-
52
- // Videos information
53
- const videosInfo = Object.entries(info.features)
54
- .filter(([key, value]) => value.dtype === "video")
55
- .map(([key, _]) => {
56
- const videoPath = formatStringWithVars(info.video_path, {
57
- video_key: key,
58
- episode_chunk: episode_chunk.toString().padStart(3, "0"),
59
- episode_index: episodeId.toString().padStart(6, "0"),
60
- });
61
- return {
62
- filename: key,
63
- url: buildVersionedUrl(repoId, version, videoPath),
64
- };
65
- });
66
 
67
- // Column data
68
- const columnNames = Object.entries(info.features)
69
- .filter(
70
- ([key, value]) =>
71
- ["float32", "int32"].includes(value.dtype) &&
72
- value.shape.length === 1,
73
- )
74
- .map(([key, { shape }]) => ({ key, length: shape[0] }));
75
 
76
- // Exclude specific columns
77
- const excludedColumns = [
78
- "timestamp",
79
- "frame_index",
80
- "episode_index",
81
- "index",
82
- "task_index",
83
- ];
84
- const filteredColumns = columnNames.filter(
85
- (column) => !excludedColumns.includes(column.key),
86
- );
87
- const filteredColumnNames = [
88
- "timestamp",
89
- ...filteredColumns.map((column) => column.key),
90
- ];
91
 
92
- const columns = filteredColumns.map(({ key }) => {
93
- let column_names = info.features[key].names;
94
- while (typeof column_names === "object") {
95
- if (Array.isArray(column_names)) break;
96
- column_names = Object.values(column_names ?? {})[0];
97
- }
 
 
 
98
  return {
99
- key,
100
- value: Array.isArray(column_names)
101
- ? column_names.map((name) => `${key}${SERIES_NAME_DELIMITER}${name}`)
102
- : Array.from(
103
- { length: columnNames.find((c) => c.key === key)?.length ?? 1 },
104
- (_, i) => `${key}${SERIES_NAME_DELIMITER}${i}`,
105
- ),
106
  };
107
  });
108
 
109
- const parquetUrl = buildVersionedUrl(
110
- repoId,
111
- version,
112
- formatStringWithVars(info.data_path, {
113
- episode_chunk: episode_chunk.toString().padStart(3, "0"),
114
- episode_index: episodeId.toString().padStart(6, "0"),
115
- })
116
- );
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
- const arrayBuffer = await fetchParquetFile(parquetUrl);
119
- const data = await readParquetColumn(arrayBuffer, filteredColumnNames);
120
- // Flatten and map to array of objects for chartData
121
- const seriesNames = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  "timestamp",
123
- ...columns.map(({ value }) => value).flat(),
124
  ];
 
125
 
126
- const chartData = data.map((row) => {
127
- const flatRow = row.flat();
128
- const obj: Record<string, number> = {};
129
- seriesNames.forEach((key, idx) => {
130
- obj[key] = flatRow[idx];
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  });
132
- return obj;
133
- });
 
 
134
 
135
- // List of columns that are ignored (e.g., 2D or 3D data)
136
- const ignoredColumns = Object.entries(info.features)
 
137
  .filter(
138
  ([key, value]) =>
139
- ["float32", "int32"].includes(value.dtype) && value.shape.length > 1,
140
  )
141
- .map(([key]) => key);
 
 
142
 
143
- // 1. Group all numeric keys by suffix (excluding 'timestamp')
144
- const numericKeys = seriesNames.filter((k) => k !== "timestamp");
145
- const suffixGroupsMap: Record<string, string[]> = {};
 
 
 
 
 
 
 
 
146
  for (const key of numericKeys) {
147
- const parts = key.split(SERIES_NAME_DELIMITER);
148
- const suffix = parts[1] || parts[0]; // fallback to key if no delimiter
149
- if (!suffixGroupsMap[suffix]) suffixGroupsMap[suffix] = [];
150
- suffixGroupsMap[suffix].push(key);
151
- }
152
- const suffixGroups = Object.values(suffixGroupsMap);
153
-
154
- // 2. Compute min/max for each suffix group as a whole
155
- const groupStats: Record<string, { min: number; max: number }> = {};
156
- suffixGroups.forEach((group) => {
157
- let min = Infinity,
158
- max = -Infinity;
159
- for (const row of chartData) {
160
- for (const key of group) {
161
- const v = row[key];
162
- if (typeof v === "number" && !isNaN(v)) {
163
- if (v < min) min = v;
164
- if (v > max) max = v;
165
- }
166
  }
 
167
  }
168
- // Use the first key in the group as the group id
169
- groupStats[group[0]] = { min, max };
170
- });
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
- // 3. Group suffix groups by similar scale (treat each suffix group as a unit)
173
- const scaleGroups: Record<string, string[][]> = {};
174
- const used = new Set<string>();
175
- const SCALE_THRESHOLD = 2;
176
- for (const group of suffixGroups) {
177
- const groupId = group[0];
178
- if (used.has(groupId)) continue;
179
- const { min, max } = groupStats[groupId];
180
- if (!isFinite(min) || !isFinite(max)) continue;
181
- const logMin = Math.log10(Math.abs(min) + 1e-9);
182
- const logMax = Math.log10(Math.abs(max) + 1e-9);
183
- const unit: string[][] = [group];
184
- used.add(groupId);
185
- for (const other of suffixGroups) {
186
- const otherId = other[0];
187
- if (used.has(otherId) || otherId === groupId) continue;
188
- const { min: omin, max: omax } = groupStats[otherId];
189
- if (!isFinite(omin) || !isFinite(omax) || omin === omax) continue;
190
- const ologMin = Math.log10(Math.abs(omin) + 1e-9);
191
- const ologMax = Math.log10(Math.abs(omax) + 1e-9);
192
- if (
193
- Math.abs(logMin - ologMin) <= SCALE_THRESHOLD &&
194
- Math.abs(logMax - ologMax) <= SCALE_THRESHOLD
195
- ) {
196
- unit.push(other);
197
- used.add(otherId);
198
  }
199
  }
200
- scaleGroups[groupId] = unit;
201
  }
 
 
202
 
203
- // 4. Flatten scaleGroups into chartGroups (array of arrays of keys)
204
- const chartGroups: string[][] = Object.values(scaleGroups)
205
- .sort((a, b) => b.length - a.length)
206
- .flatMap((suffixGroupArr) => {
207
- // suffixGroupArr is array of suffix groups (each is array of keys)
208
- const merged = suffixGroupArr.flat();
209
- if (merged.length > 6) {
210
- const subgroups = [];
211
- for (let i = 0; i < merged.length; i += 6) {
212
- subgroups.push(merged.slice(i, i + 6));
213
- }
214
- return subgroups;
215
- }
216
- return [merged];
217
- });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
- const duration = chartData[chartData.length - 1].timestamp;
 
 
 
 
 
 
 
 
 
 
 
 
 
220
 
221
- // Utility: group row keys by suffix
222
- function groupRowBySuffix(row: Record<string, number>): Record<string, any> {
223
- const result: Record<string, any> = {};
224
- const suffixGroups: Record<string, Record<string, number>> = {};
 
 
 
 
 
 
 
225
  for (const [key, value] of Object.entries(row)) {
226
  if (key === "timestamp") {
227
  result["timestamp"] = value;
228
  continue;
229
  }
230
- const parts = key.split(SERIES_NAME_DELIMITER);
231
- if (parts.length === 2) {
232
- const [prefix, suffix] = parts;
233
- if (!suffixGroups[suffix]) suffixGroups[suffix] = {};
234
- suffixGroups[suffix][prefix] = value;
 
 
 
 
 
 
 
 
 
235
  } else {
 
236
  result[key] = value;
237
  }
238
  }
239
- for (const [suffix, group] of Object.entries(suffixGroups)) {
 
 
240
  const keys = Object.keys(group);
241
  if (keys.length === 1) {
242
- // Use the full original name as the key
243
- const fullName = `${keys[0]}${SERIES_NAME_DELIMITER}${suffix}`;
244
- result[fullName] = group[keys[0]];
245
  } else {
 
246
  result[suffix] = group;
247
  }
248
  }
249
- return result;
 
 
 
 
250
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
251
 
252
- const chartDataGroups = chartGroups.map((group) =>
253
- chartData.map((row) => groupRowBySuffix(pick(row, [...group, "timestamp"])))
254
- );
 
 
 
 
 
 
 
 
 
255
 
 
 
 
 
256
  return {
257
- datasetInfo,
258
- episodeId,
259
- videosInfo,
260
- chartDataGroups,
261
- episodes,
262
- ignoredColumns,
263
- duration,
264
  };
265
- } catch (err) {
266
- console.error("Error loading episode data:", err);
267
- throw err;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  }
 
270
 
271
  // Safe wrapper for UI error display
272
  export async function getEpisodeDataSafe(
 
20
  ) {
21
  const repoId = `${org}/${dataset}`;
22
  try {
23
+ // Check for compatible dataset version (v3.0, v2.1, or v2.0)
 
 
24
  const version = await getDatasetVersion(repoId);
25
+ console.log(`[DEBUG] Detected dataset version: ${version} for ${repoId}`);
26
+
27
  const jsonUrl = buildVersionedUrl(repoId, version, "meta/info.json");
28
 
29
  const info = await fetchJson<DatasetMetadata>(jsonUrl);
30
 
31
+ // Handle different versions
32
+ if (version === "v3.0") {
33
+ console.log(`[DEBUG] Using v3.0 data loader for ${repoId}`);
34
+ return await getEpisodeDataV3(repoId, version, info, episodeId);
35
+ } else {
36
+ console.log(`[DEBUG] Using v2.x data loader for ${repoId} (version: ${version})`);
37
+ return await getEpisodeDataV2(repoId, version, info, episodeId);
38
+ }
39
+ } catch (err) {
40
+ console.error("Error loading episode data:", err);
41
+ throw err;
42
+ }
43
+ }
44
 
45
+ // Legacy v2.x data loading
46
+ async function getEpisodeDataV2(
47
+ repoId: string,
48
+ version: string,
49
+ info: DatasetMetadata,
50
+ episodeId: number,
51
+ ) {
52
+ const episode_chunk = Math.floor(0 / 1000);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
+ // Dataset information
55
+ const datasetInfo = {
56
+ repoId,
57
+ total_frames: info.total_frames,
58
+ total_episodes: info.total_episodes,
59
+ fps: info.fps,
60
+ };
 
61
 
62
+ // Generate list of episodes
63
+ const episodes =
64
+ process.env.EPISODES === undefined
65
+ ? Array.from(
66
+ { length: datasetInfo.total_episodes },
67
+ // episode id starts from 0
68
+ (_, i) => i,
69
+ )
70
+ : process.env.EPISODES
71
+ .split(/\s+/)
72
+ .map((x) => parseInt(x.trim(), 10))
73
+ .filter((x) => !isNaN(x));
 
 
 
74
 
75
+ // Videos information
76
+ const videosInfo = Object.entries(info.features)
77
+ .filter(([key, value]) => value.dtype === "video")
78
+ .map(([key, _]) => {
79
+ const videoPath = formatStringWithVars(info.video_path, {
80
+ video_key: key,
81
+ episode_chunk: episode_chunk.toString().padStart(3, "0"),
82
+ episode_index: episodeId.toString().padStart(6, "0"),
83
+ });
84
  return {
85
+ filename: key,
86
+ url: buildVersionedUrl(repoId, version, videoPath),
 
 
 
 
 
87
  };
88
  });
89
 
90
+ // Column data
91
+ const columnNames = Object.entries(info.features)
92
+ .filter(
93
+ ([key, value]) =>
94
+ ["float32", "int32"].includes(value.dtype) &&
95
+ value.shape.length === 1,
96
+ )
97
+ .map(([key, { shape }]) => ({ key, length: shape[0] }));
98
+
99
+ // Exclude specific columns
100
+ const excludedColumns = [
101
+ "timestamp",
102
+ "frame_index",
103
+ "episode_index",
104
+ "index",
105
+ "task_index",
106
+ ];
107
+ const filteredColumns = columnNames.filter(
108
+ (column) => !excludedColumns.includes(column.key),
109
+ );
110
+ const filteredColumnNames = [
111
+ "timestamp",
112
+ ...filteredColumns.map((column) => column.key),
113
+ ];
114
+
115
+ const columns = filteredColumns.map(({ key }) => {
116
+ let column_names = info.features[key].names;
117
+ while (typeof column_names === "object") {
118
+ if (Array.isArray(column_names)) break;
119
+ column_names = Object.values(column_names ?? {})[0];
120
+ }
121
+ return {
122
+ key,
123
+ value: Array.isArray(column_names)
124
+ ? column_names.map((name) => `${key}${SERIES_NAME_DELIMITER}${name}`)
125
+ : Array.from(
126
+ { length: columnNames.find((c) => c.key === key)?.length ?? 1 },
127
+ (_, i) => `${key}${SERIES_NAME_DELIMITER}${i}`,
128
+ ),
129
+ };
130
+ });
131
+
132
+ const parquetUrl = buildVersionedUrl(
133
+ repoId,
134
+ version,
135
+ formatStringWithVars(info.data_path, {
136
+ episode_chunk: episode_chunk.toString().padStart(3, "0"),
137
+ episode_index: episodeId.toString().padStart(6, "0"),
138
+ })
139
+ );
140
+
141
+ const arrayBuffer = await fetchParquetFile(parquetUrl);
142
+ const data = await readParquetColumn(arrayBuffer, filteredColumnNames);
143
+ // Flatten and map to array of objects for chartData
144
+ const seriesNames = [
145
+ "timestamp",
146
+ ...columns.map(({ value }) => value).flat(),
147
+ ];
148
+
149
+ const chartData = data.map((row) => {
150
+ const flatRow = row.flat();
151
+ const obj: Record<string, number> = {};
152
+ seriesNames.forEach((key, idx) => {
153
+ obj[key] = flatRow[idx];
154
+ });
155
+ return obj;
156
+ });
157
+
158
+ // List of columns that are ignored (e.g., 2D or 3D data)
159
+ const ignoredColumns = Object.entries(info.features)
160
+ .filter(
161
+ ([key, value]) =>
162
+ ["float32", "int32"].includes(value.dtype) && value.shape.length > 1,
163
+ )
164
+ .map(([key]) => key);
165
 
166
+ // 1. Group all numeric keys by suffix (excluding 'timestamp')
167
+ const numericKeys = seriesNames.filter((k) => k !== "timestamp");
168
+ const suffixGroupsMap: Record<string, string[]> = {};
169
+ for (const key of numericKeys) {
170
+ const parts = key.split(SERIES_NAME_DELIMITER);
171
+ const suffix = parts[1] || parts[0]; // fallback to key if no delimiter
172
+ if (!suffixGroupsMap[suffix]) suffixGroupsMap[suffix] = [];
173
+ suffixGroupsMap[suffix].push(key);
174
+ }
175
+ const suffixGroups = Object.values(suffixGroupsMap);
176
+
177
+ // 2. Compute min/max for each suffix group as a whole
178
+ const groupStats: Record<string, { min: number; max: number }> = {};
179
+ suffixGroups.forEach((group) => {
180
+ let min = Infinity,
181
+ max = -Infinity;
182
+ for (const row of chartData) {
183
+ for (const key of group) {
184
+ const v = row[key];
185
+ if (typeof v === "number" && !isNaN(v)) {
186
+ if (v < min) min = v;
187
+ if (v > max) max = v;
188
+ }
189
+ }
190
+ }
191
+ // Use the first key in the group as the group id
192
+ groupStats[group[0]] = { min, max };
193
+ });
194
+
195
+ // 3. Group suffix groups by similar scale (treat each suffix group as a unit)
196
+ const scaleGroups: Record<string, string[][]> = {};
197
+ const used = new Set<string>();
198
+ const SCALE_THRESHOLD = 2;
199
+ for (const group of suffixGroups) {
200
+ const groupId = group[0];
201
+ if (used.has(groupId)) continue;
202
+ const { min, max } = groupStats[groupId];
203
+ if (!isFinite(min) || !isFinite(max)) continue;
204
+ const logMin = Math.log10(Math.abs(min) + 1e-9);
205
+ const logMax = Math.log10(Math.abs(max) + 1e-9);
206
+ const unit: string[][] = [group];
207
+ used.add(groupId);
208
+ for (const other of suffixGroups) {
209
+ const otherId = other[0];
210
+ if (used.has(otherId) || otherId === groupId) continue;
211
+ const { min: omin, max: omax } = groupStats[otherId];
212
+ if (!isFinite(omin) || !isFinite(omax) || omin === omax) continue;
213
+ const ologMin = Math.log10(Math.abs(omin) + 1e-9);
214
+ const ologMax = Math.log10(Math.abs(omax) + 1e-9);
215
+ if (
216
+ Math.abs(logMin - ologMin) <= SCALE_THRESHOLD &&
217
+ Math.abs(logMax - ologMax) <= SCALE_THRESHOLD
218
+ ) {
219
+ unit.push(other);
220
+ used.add(otherId);
221
+ }
222
+ }
223
+ scaleGroups[groupId] = unit;
224
+ }
225
+
226
+ // 4. Flatten scaleGroups into chartGroups (array of arrays of keys)
227
+ const chartGroups: string[][] = Object.values(scaleGroups)
228
+ .sort((a, b) => b.length - a.length)
229
+ .flatMap((suffixGroupArr) => {
230
+ // suffixGroupArr is array of suffix groups (each is array of keys)
231
+ const merged = suffixGroupArr.flat();
232
+ if (merged.length > 6) {
233
+ const subgroups = [];
234
+ for (let i = 0; i < merged.length; i += 6) {
235
+ subgroups.push(merged.slice(i, i + 6));
236
+ }
237
+ return subgroups;
238
+ }
239
+ return [merged];
240
+ });
241
+
242
+ const duration = chartData[chartData.length - 1].timestamp;
243
+
244
+ // Utility: group row keys by suffix
245
+ function groupRowBySuffix(row: Record<string, number>): Record<string, any> {
246
+ const result: Record<string, any> = {};
247
+ const suffixGroups: Record<string, Record<string, number>> = {};
248
+ for (const [key, value] of Object.entries(row)) {
249
+ if (key === "timestamp") {
250
+ result["timestamp"] = value;
251
+ continue;
252
+ }
253
+ const parts = key.split(SERIES_NAME_DELIMITER);
254
+ if (parts.length === 2) {
255
+ const [prefix, suffix] = parts;
256
+ if (!suffixGroups[suffix]) suffixGroups[suffix] = {};
257
+ suffixGroups[suffix][prefix] = value;
258
+ } else {
259
+ result[key] = value;
260
+ }
261
+ }
262
+ for (const [suffix, group] of Object.entries(suffixGroups)) {
263
+ const keys = Object.keys(group);
264
+ if (keys.length === 1) {
265
+ // Use the full original name as the key
266
+ const fullName = `${keys[0]}${SERIES_NAME_DELIMITER}${suffix}`;
267
+ result[fullName] = group[keys[0]];
268
+ } else {
269
+ result[suffix] = group;
270
+ }
271
+ }
272
+ return result;
273
+ }
274
+
275
+ const chartDataGroups = chartGroups.map((group) =>
276
+ chartData.map((row) => groupRowBySuffix(pick(row, [...group, "timestamp"])))
277
+ );
278
+
279
+ return {
280
+ datasetInfo,
281
+ episodeId,
282
+ videosInfo,
283
+ chartDataGroups,
284
+ episodes,
285
+ ignoredColumns,
286
+ duration,
287
+ };
288
+ }
289
+
290
+ // v3.0 implementation with segmentation support for all episodes
291
+ async function getEpisodeDataV3(
292
+ repoId: string,
293
+ version: string,
294
+ info: DatasetMetadata,
295
+ episodeId: number,
296
+ ) {
297
+ console.log(`[DEBUG] Loading v3.0 episode data for ${repoId}, episode ${episodeId}`);
298
+
299
+ // Create dataset info structure (like v2.x)
300
+ const datasetInfo = {
301
+ repoId,
302
+ total_frames: info.total_frames,
303
+ total_episodes: info.total_episodes,
304
+ fps: info.fps,
305
+ };
306
+
307
+ // Generate episodes list based on total_episodes from dataset info
308
+ const episodes = Array.from({ length: info.total_episodes }, (_, i) => i);
309
+ console.log(`[DEBUG] Available episodes: ${episodes.length} (0 to ${episodes.length - 1})`);
310
+
311
+ // Load episode metadata to get timestamps for episode 0
312
+ const episodeMetadata = await loadEpisodeMetadataV3Simple(repoId, version, episodeId);
313
+
314
+ // Create video info with segmentation using the metadata
315
+ const videosInfo = extractVideoInfoV3WithSegmentation(repoId, version, info, episodeMetadata);
316
+
317
+ // Load episode data for charts
318
+ const { chartDataGroups, ignoredColumns } = await loadEpisodeDataV3(repoId, version, info, episodeMetadata);
319
+
320
+ return {
321
+ datasetInfo,
322
+ episodeId,
323
+ videosInfo,
324
+ chartDataGroups,
325
+ episodes,
326
+ ignoredColumns,
327
+ duration: episodeMetadata.video_to_timestamp - episodeMetadata.video_from_timestamp, // Use actual episode duration
328
+ };
329
+ }
330
+
331
+ // Load episode data for v3.0 charts
332
+ async function loadEpisodeDataV3(
333
+ repoId: string,
334
+ version: string,
335
+ info: DatasetMetadata,
336
+ episodeMetadata: any,
337
+ ): Promise<{ chartDataGroups: any[]; ignoredColumns: string[] }> {
338
+ console.log(`[DEBUG] Loading v3.0 data for episode ${episodeMetadata.episode_index}`);
339
+
340
+ // Build data file path using chunk and file indices
341
+ const dataChunkIndex = episodeMetadata.data_chunk_index || 0;
342
+ const dataFileIndex = episodeMetadata.data_file_index || 0;
343
+ const dataPath = `data/chunk-${dataChunkIndex.toString().padStart(3, "0")}/file-${dataFileIndex.toString().padStart(3, "0")}.parquet`;
344
+
345
+ console.log(`[DEBUG] Loading data from: ${dataPath}`);
346
+ console.log(`[DEBUG] Data range: ${episodeMetadata.dataset_from_index} to ${episodeMetadata.dataset_to_index}`);
347
+
348
+ try {
349
+ const dataUrl = buildVersionedUrl(repoId, version, dataPath);
350
+ const arrayBuffer = await fetchParquetFile(dataUrl);
351
+ const fullData = await readParquetColumn(arrayBuffer, []);
352
+
353
+ console.log(`[DEBUG] Loaded ${fullData.length} total data rows`);
354
+
355
+ // Extract the episode-specific data slice
356
+ // Convert BigInt to number if needed
357
+ const fromIndex = Number(episodeMetadata.dataset_from_index || 0);
358
+ const toIndex = Number(episodeMetadata.dataset_to_index || fullData.length);
359
+
360
+ console.log(`[DEBUG] Converting indices: ${episodeMetadata.dataset_from_index} → ${fromIndex}, ${episodeMetadata.dataset_to_index} → ${toIndex}`);
361
+
362
+ const episodeData = fullData.slice(fromIndex, toIndex);
363
+
364
+ console.log(`[DEBUG] Episode data slice: ${episodeData.length} rows (${fromIndex} to ${toIndex})`);
365
+
366
+ if (episodeData.length === 0) {
367
+ console.log(`[DEBUG] No data found for episode ${episodeMetadata.episode_index}`);
368
+ return { chartDataGroups: [], ignoredColumns: [] };
369
+ }
370
+
371
+ // Convert to the same format as v2.x for compatibility with existing chart code
372
+ const { chartDataGroups, ignoredColumns } = processEpisodeDataForCharts(episodeData, info, episodeMetadata);
373
+
374
+ return { chartDataGroups, ignoredColumns };
375
+ } catch (error) {
376
+ console.error(`[DEBUG] Failed to load episode data:`, error);
377
+ return { chartDataGroups: [], ignoredColumns: [] };
378
+ }
379
+ }
380
+
381
+ // Process episode data for charts (v3.0 compatible)
382
+ function processEpisodeDataForCharts(
383
+ episodeData: any[],
384
+ info: DatasetMetadata,
385
+ episodeMetadata?: any,
386
+ ): { chartDataGroups: any[]; ignoredColumns: string[] } {
387
+ const SERIES_NAME_DELIMITER = ".";
388
+
389
+ // Get numeric column features
390
+ const columnNames = Object.entries(info.features)
391
+ .filter(
392
+ ([key, value]) =>
393
+ ["float32", "int32"].includes(value.dtype) &&
394
+ value.shape.length === 1,
395
+ )
396
+ .map(([key, value]) => ({ key, value }));
397
+
398
+ // Convert parquet data to chart format
399
+ let seriesNames: string[] = [];
400
+
401
+ // Create a mapping from numeric indices to feature names for v3.0 data
402
+ const v3IndexToFeatureMap: Record<string, string> = {
403
+ '0': 'observation.state',
404
+ '1': 'action',
405
+ '2': 'timestamp',
406
+ '3': 'episode_index',
407
+ '4': 'frame_index',
408
+ '5': 'next.reward',
409
+ '6': 'next.done',
410
+ '7': 'index',
411
+ '8': 'task_index'
412
+ };
413
+
414
+ // Columns to exclude from charts
415
+ const excludedColumns = ['index', 'task_index', 'episode_index', 'frame_index'];
416
+
417
+ // First, extract all series from the first data row to understand the structure
418
+ if (episodeData.length > 0) {
419
+ const firstRow = episodeData[0];
420
+ const allKeys: string[] = [];
421
+
422
+ Object.entries(firstRow || {}).forEach(([key, value]) => {
423
+ if (key === 'timestamp') return; // Skip timestamp, we'll add it separately
424
+
425
+ // Map numeric key to feature name if available
426
+ const featureName = v3IndexToFeatureMap[key] || key;
427
+
428
+ // Skip excluded columns
429
+ if (excludedColumns.includes(featureName)) return;
430
+
431
+ if (Array.isArray(value) && value.length > 0) {
432
+ // For array values like observation.state and action, create a key for each element
433
+ value.forEach((_, idx) => {
434
+ allKeys.push(`${featureName}[${idx}]`);
435
+ });
436
+ } else if (typeof value === 'number' && !isNaN(value)) {
437
+ // For scalar numeric values
438
+ allKeys.push(featureName);
439
+ } else if (typeof value === 'bigint') {
440
+ // For BigInt values
441
+ allKeys.push(featureName);
442
+ }
443
+ });
444
+
445
+ seriesNames = ["timestamp", ...allKeys];
446
+ console.log(`[DEBUG] Detected series:`, allKeys);
447
+ console.log(`[DEBUG] First row sample:`, firstRow);
448
+ } else {
449
+ // Fallback to feature-based approach
450
+ seriesNames = [
451
  "timestamp",
452
+ ...columnNames.map(({ key }) => key),
453
  ];
454
+ }
455
 
456
+ const chartData = episodeData.map((row, index) => {
457
+ const obj: Record<string, number> = {};
458
+
459
+ // Add timestamp aligned with video timing
460
+ // For v3.0, we need to map the episode data index to the actual video duration
461
+ let videoDuration = episodeData.length; // Fallback to data length
462
+ if (episodeMetadata) {
463
+ // Use actual video segment duration if available
464
+ videoDuration = (episodeMetadata.video_to_timestamp || 30) - (episodeMetadata.video_from_timestamp || 0);
465
+ }
466
+ obj["timestamp"] = (index / Math.max(episodeData.length - 1, 1)) * videoDuration;
467
+
468
+ // For v3.0, data might have numeric string keys, so we need to map them
469
+ // Get all available keys from the first row to understand the structure
470
+ if (index === 0) {
471
+ console.log(`[DEBUG] Data row keys:`, Object.keys(row || {}));
472
+ console.log(`[DEBUG] Available features:`, Object.keys(info.features));
473
+ }
474
+
475
+ // Add all data columns
476
+ if (row && typeof row === 'object') {
477
+ Object.entries(row).forEach(([key, value]) => {
478
+ if (key === 'timestamp') {
479
+ // Timestamp is already handled above
480
+ return;
481
+ }
482
+
483
+ // Map numeric key to feature name if available
484
+ const featureName = v3IndexToFeatureMap[key] || key;
485
+
486
+ // Skip excluded columns
487
+ if (excludedColumns.includes(featureName)) return;
488
+
489
+ if (Array.isArray(value)) {
490
+ // For array values like observation.state and action
491
+ value.forEach((val, idx) => {
492
+ const elementKey = `${featureName}[${idx}]`;
493
+ obj[elementKey] = typeof val === 'number' ? val : Number(val);
494
+ });
495
+ } else if (typeof value === 'number' && !isNaN(value)) {
496
+ obj[featureName] = value;
497
+ } else if (typeof value === 'bigint') {
498
+ obj[featureName] = Number(value);
499
+ } else if (typeof value === 'boolean') {
500
+ // Convert boolean to number for charts
501
+ obj[featureName] = value ? 1 : 0;
502
+ }
503
  });
504
+ }
505
+
506
+ return obj;
507
+ });
508
 
509
+ // List of columns that are ignored (now we handle 2D data by flattening)
510
+ const ignoredColumns = [
511
+ ...Object.entries(info.features)
512
  .filter(
513
  ([key, value]) =>
514
+ ["float32", "int32"].includes(value.dtype) && value.shape.length > 2, // Only ignore 3D+ data
515
  )
516
+ .map(([key]) => key),
517
+ ...excludedColumns // Also include the manually excluded columns
518
+ ];
519
 
520
+ // Group processing logic (adapted for v3.0 numeric keys)
521
+ const numericKeys = seriesNames.filter((k) => k !== "timestamp");
522
+
523
+ // Group keys by prefix (for hierarchical structure like v2)
524
+ const suffixGroupsMap: Record<string, string[]> = {};
525
+
526
+ // First, let's check if we have keys with dots (hierarchical structure)
527
+ const hasHierarchicalKeys = numericKeys.some(key => key.includes('.') && !key.includes('['));
528
+
529
+ if (hasHierarchicalKeys) {
530
+ // Group by suffix after the dot (like v2 does)
531
  for (const key of numericKeys) {
532
+ const cleanKey = key.replace(/\[\d+\]$/, ''); // Remove array indices
533
+ const parts = cleanKey.split('.');
534
+
535
+ if (parts.length >= 2) {
536
+ // For keys like "observation.state" or "action.main_shoulder_pan"
537
+ const suffix = parts.slice(1).join('.'); // Everything after first dot
538
+ if (!suffixGroupsMap[suffix]) {
539
+ suffixGroupsMap[suffix] = [];
540
+ }
541
+ suffixGroupsMap[suffix].push(key);
542
+ } else {
543
+ // Keys without dots go in their own group
544
+ if (!suffixGroupsMap[key]) {
545
+ suffixGroupsMap[key] = [];
 
 
 
 
 
546
  }
547
+ suffixGroupsMap[key].push(key);
548
  }
549
+ }
550
+ } else {
551
+ // For v3 data without hierarchical keys, group by base name (removing array indices)
552
+ for (const key of numericKeys) {
553
+ const baseKey = key.replace(/\[\d+\]$/, '');
554
+
555
+ if (!suffixGroupsMap[baseKey]) {
556
+ suffixGroupsMap[baseKey] = [];
557
+ }
558
+ suffixGroupsMap[baseKey].push(key);
559
+ }
560
+ }
561
+ const suffixGroups = Object.values(suffixGroupsMap);
562
+
563
+ console.log(`[DEBUG] Created suffix groups:`, suffixGroupsMap);
564
 
565
+ // Compute min/max for each suffix group
566
+ const groupStats: Record<string, { min: number; max: number }> = {};
567
+ suffixGroups.forEach((group) => {
568
+ let min = Infinity, max = -Infinity;
569
+ for (const row of chartData) {
570
+ for (const key of group) {
571
+ const v = row[key];
572
+ if (typeof v === "number" && !isNaN(v)) {
573
+ if (v < min) min = v;
574
+ if (v > max) max = v;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
575
  }
576
  }
 
577
  }
578
+ groupStats[group[0]] = { min, max };
579
+ });
580
 
581
+ // Group by similar scale
582
+ const scaleGroups: Record<string, string[][]> = {};
583
+ const used = new Set<string>();
584
+ const SCALE_THRESHOLD = 2;
585
+ for (const group of suffixGroups) {
586
+ const groupId = group[0];
587
+ if (used.has(groupId)) continue;
588
+ const { min, max } = groupStats[groupId];
589
+ if (!isFinite(min) || !isFinite(max)) continue;
590
+ const logMin = Math.log10(Math.abs(min) + 1e-9);
591
+ const logMax = Math.log10(Math.abs(max) + 1e-9);
592
+ const unit: string[][] = [group];
593
+ used.add(groupId);
594
+ for (const other of suffixGroups) {
595
+ const otherId = other[0];
596
+ if (used.has(otherId) || otherId === groupId) continue;
597
+ const { min: omin, max: omax } = groupStats[otherId];
598
+ if (!isFinite(omin) || !isFinite(omax) || omin === omax) continue;
599
+ const ologMin = Math.log10(Math.abs(omin) + 1e-9);
600
+ const ologMax = Math.log10(Math.abs(omax) + 1e-9);
601
+ if (
602
+ Math.abs(logMin - ologMin) <= SCALE_THRESHOLD &&
603
+ Math.abs(logMax - ologMax) <= SCALE_THRESHOLD
604
+ ) {
605
+ unit.push(other);
606
+ used.add(otherId);
607
+ }
608
+ }
609
+ scaleGroups[groupId] = unit;
610
+ }
611
 
612
+ // Flatten into chartGroups
613
+ const chartGroups: string[][] = Object.values(scaleGroups)
614
+ .sort((a, b) => b.length - a.length)
615
+ .flatMap((suffixGroupArr) => {
616
+ const merged = suffixGroupArr.flat();
617
+ if (merged.length > 6) {
618
+ const subgroups = [];
619
+ for (let i = 0; i < merged.length; i += 6) {
620
+ subgroups.push(merged.slice(i, i + 6));
621
+ }
622
+ return subgroups;
623
+ }
624
+ return [merged];
625
+ });
626
 
627
+ // Utility function to group row keys by suffix
628
+ function groupRowBySuffix(row: Record<string, number>): Record<string, any> {
629
+ const result: Record<string, any> = {};
630
+
631
+ // Check if we have hierarchical keys
632
+ const hasHierarchicalKeys = Object.keys(row).some(key => key.includes('.') && !key.includes('[') && key !== 'timestamp');
633
+
634
+ if (hasHierarchicalKeys) {
635
+ // Group by prefix for hierarchical display
636
+ const prefixGroups: Record<string, Record<string, number>> = {};
637
+
638
  for (const [key, value] of Object.entries(row)) {
639
  if (key === "timestamp") {
640
  result["timestamp"] = value;
641
  continue;
642
  }
643
+
644
+ const cleanKey = key.replace(/\[\d+\]$/, ''); // Remove array indices
645
+ const parts = cleanKey.split('.');
646
+
647
+ if (parts.length >= 2) {
648
+ const prefix = parts[0];
649
+ const suffix = parts.slice(1).join('.');
650
+
651
+ if (!prefixGroups[suffix]) {
652
+ prefixGroups[suffix] = {};
653
+ }
654
+
655
+ // Store with the prefix as key
656
+ prefixGroups[suffix][prefix] = value;
657
  } else {
658
+ // Non-hierarchical keys go directly to result
659
  result[key] = value;
660
  }
661
  }
662
+
663
+ // Add grouped data to result
664
+ for (const [suffix, group] of Object.entries(prefixGroups)) {
665
  const keys = Object.keys(group);
666
  if (keys.length === 1) {
667
+ // Single value, use full name
668
+ result[`${keys[0]}.${suffix}`] = group[keys[0]];
 
669
  } else {
670
+ // Multiple values, create nested structure
671
  result[suffix] = group;
672
  }
673
  }
674
+ } else {
675
+ // For non-hierarchical data, just pass through
676
+ for (const [key, value] of Object.entries(row)) {
677
+ result[key] = value;
678
+ }
679
  }
680
+
681
+ return result;
682
+ }
683
+
684
+ const chartDataGroups = chartGroups.map((group) =>
685
+ chartData.map((row) => groupRowBySuffix(pick(row, [...group, "timestamp"])))
686
+ );
687
+
688
+ console.log(`[DEBUG] Generated ${chartDataGroups.length} chart groups`);
689
+ console.log(`[DEBUG] Chart groups structure:`, chartGroups);
690
+ if (chartDataGroups.length > 0 && chartDataGroups[0].length > 0) {
691
+ console.log(`[DEBUG] Sample chart data:`, chartDataGroups[0][0]);
692
+ }
693
 
694
+ return { chartDataGroups, ignoredColumns };
695
+ }
696
+
697
+ // Simplified video info extraction for v3.0 - just use first chunk files
698
+ function extractSimpleVideoInfoV3(
699
+ repoId: string,
700
+ version: string,
701
+ info: DatasetMetadata,
702
+ ): any[] {
703
+ // Get video features from dataset info
704
+ const videoFeatures = Object.entries(info.features)
705
+ .filter(([key, value]) => value.dtype === "video");
706
 
707
+ const videosInfo = videoFeatures.map(([videoKey, _]) => {
708
+ // For simplified version, just use chunk-000/file-000.mp4
709
+ const videoPath = `videos/${videoKey}/chunk-000/file-000.mp4`;
710
+
711
  return {
712
+ filename: videoKey,
713
+ url: buildVersionedUrl(repoId, version, videoPath),
714
+ // No segmentation - just show the full video file
715
+ isSegmented: false,
 
 
 
716
  };
717
+ });
718
+
719
+ return videosInfo;
720
+ }
721
+
722
+ // Video info extraction with segmentation for v3.0
723
+ function extractVideoInfoV3WithSegmentation(
724
+ repoId: string,
725
+ version: string,
726
+ info: DatasetMetadata,
727
+ episodeMetadata: any,
728
+ ): any[] {
729
+ // Get video features from dataset info
730
+ const videoFeatures = Object.entries(info.features)
731
+ .filter(([key, value]) => value.dtype === "video");
732
+
733
+ const videosInfo = videoFeatures.map(([videoKey, _]) => {
734
+ // Use chunk and file indices from metadata
735
+ const chunkIndex = episodeMetadata.video_chunk_index || 0;
736
+ const fileIndex = episodeMetadata.video_file_index || 0;
737
+
738
+ const videoPath = `videos/${videoKey}/chunk-${chunkIndex.toString().padStart(3, "0")}/file-${fileIndex.toString().padStart(3, "0")}.mp4`;
739
+ const fullUrl = buildVersionedUrl(repoId, version, videoPath);
740
+
741
+ console.log(`[DEBUG] Video URL for ${videoKey}: ${fullUrl}`);
742
+ console.log(`[DEBUG] Chunk index: ${chunkIndex}, File index: ${fileIndex}`);
743
+ console.log(`[DEBUG] Timestamps: ${episodeMetadata.video_from_timestamp} to ${episodeMetadata.video_to_timestamp}`);
744
+
745
+ return {
746
+ filename: videoKey,
747
+ url: fullUrl,
748
+ // Enable segmentation with timestamps from metadata
749
+ isSegmented: true,
750
+ segmentStart: episodeMetadata.video_from_timestamp || 0,
751
+ segmentEnd: episodeMetadata.video_to_timestamp || 30,
752
+ segmentDuration: (episodeMetadata.video_to_timestamp || 30) - (episodeMetadata.video_from_timestamp || 0),
753
+ };
754
+ });
755
+
756
+ console.log(`[DEBUG] Created segmented video info:`, videosInfo);
757
+ return videosInfo;
758
+ }
759
+
760
+ // Metadata loading for v3.0 episodes
761
+ async function loadEpisodeMetadataV3Simple(
762
+ repoId: string,
763
+ version: string,
764
+ episodeId: number,
765
+ ): Promise<any> {
766
+ console.log(`[DEBUG] Loading v3.0 metadata for episode ${episodeId}`);
767
+
768
+ const episodesMetadataUrl = buildVersionedUrl(
769
+ repoId,
770
+ version,
771
+ "meta/episodes/chunk-000/file-000.parquet"
772
+ );
773
+
774
+ try {
775
+ const arrayBuffer = await fetchParquetFile(episodesMetadataUrl);
776
+ const episodesData = await readParquetColumn(arrayBuffer, []);
777
+
778
+ console.log(`[DEBUG] Loaded ${episodesData.length} episode rows`);
779
+
780
+ if (episodesData.length === 0) {
781
+ throw new Error("No episode metadata found");
782
+ }
783
+
784
+ // Find the row for the requested episode
785
+ let episodeRow = null;
786
+
787
+ for (let i = 0; i < episodesData.length; i++) { // Check all rows
788
+ const row = episodesData[i];
789
+ const parsedRow = parseEpisodeRowSimple(row, false); // Don't log for each attempt
790
+
791
+ if (parsedRow.episode_index === episodeId) {
792
+ episodeRow = row;
793
+ console.log(`[DEBUG] Found episode ${episodeId} at row ${i}`);
794
+ break;
795
+ }
796
+ }
797
+
798
+ if (!episodeRow) {
799
+ // Fallback: if we can't find the exact episode, use the row at index episodeId
800
+ if (episodeId < episodesData.length) {
801
+ episodeRow = episodesData[episodeId];
802
+ console.log(`[DEBUG] Using fallback row ${episodeId} for episode ${episodeId}`);
803
+ } else {
804
+ throw new Error(`Episode ${episodeId} not found in metadata`);
805
+ }
806
+ }
807
+
808
+ // Convert the row to a usable format
809
+ return parseEpisodeRowSimple(episodeRow, true); // Enable logging for final parse
810
+ } catch (error) {
811
+ console.error(`Failed to load episode metadata:`, error);
812
+ throw error;
813
+ }
814
+ }
815
+
816
+ // Simple parser for episode row - focuses on key fields for episodes
817
+ function parseEpisodeRowSimple(row: any, enableLogging: boolean = true): any {
818
+ if (enableLogging) {
819
+ console.log(`[DEBUG] Parsing episode row with keys:`, Object.keys(row || {}));
820
+ console.log(`[DEBUG] Row type:`, typeof row);
821
  }
822
+
823
+ // Based on the debug output we saw, the row has numeric string keys
824
+ // We'll need to map these to meaningful field names
825
+ // This is a best-guess mapping - may need adjustment based on actual data
826
+
827
+ if (row && typeof row === 'object') {
828
+ // Try to extract key fields we need for video segmentation
829
+ // Based on your example: episode_index, video timestamps, etc.
830
+ const episodeData = {
831
+ episode_index: row['0'] || 0, // First column likely episode index
832
+ data_chunk_index: row['1'] || 0, // Data chunk index
833
+ data_file_index: row['2'] || 0, // Data file index
834
+ dataset_from_index: row['3'] || 0, // Dataset start index
835
+ dataset_to_index: row['4'] || 0, // Dataset end index
836
+ video_chunk_index: row['5'] || 0, // Video chunk index
837
+ video_file_index: row['6'] || 0, // Video file index
838
+ video_from_timestamp: row['7'] || 0, // Video from timestamp
839
+ video_to_timestamp: row['8'] || 30, // Video to timestamp
840
+ length: row['9'] || 30, // Episode length
841
+ };
842
+
843
+ if (enableLogging) {
844
+ console.log(`[DEBUG] Raw row values:`);
845
+ console.log(` Row['0'] (episode_index): ${row['0']}`);
846
+ console.log(` Row['1'] (data_chunk_index): ${row['1']}`);
847
+ console.log(` Row['2'] (data_file_index): ${row['2']}`);
848
+ console.log(` Row['3'] (dataset_from_index): ${row['3']}`);
849
+ console.log(` Row['4'] (dataset_to_index): ${row['4']}`);
850
+ console.log(` Row['5'] (video_chunk_index): ${row['5']}`);
851
+ console.log(` Row['6'] (video_file_index): ${row['6']}`);
852
+ console.log(` Row['7'] (video_from_timestamp): ${row['7']}`);
853
+ console.log(` Row['8'] (video_to_timestamp): ${row['8']}`);
854
+ console.log(` Row['9'] (length): ${row['9']}`);
855
+ }
856
+
857
+ if (enableLogging) {
858
+ console.log(`[DEBUG] Parsed episode data:`, episodeData);
859
+ }
860
+ return episodeData;
861
+ }
862
+
863
+ // Fallback if parsing fails
864
+ const fallback = {
865
+ episode_index: 0,
866
+ data_chunk_index: 0,
867
+ data_file_index: 0,
868
+ dataset_from_index: 0,
869
+ dataset_to_index: 0,
870
+ video_chunk_index: 0,
871
+ video_file_index: 0,
872
+ video_from_timestamp: 0,
873
+ video_to_timestamp: 30,
874
+ length: 30,
875
+ };
876
+
877
+ if (enableLogging) {
878
+ console.log(`[DEBUG] Using fallback episode data:`, fallback);
879
+ }
880
+ return fallback;
881
+ }
882
+
883
+ // Parse episode metadata row into structured object
884
+ function parseEpisodeRow(row: any): any {
885
+ // This is a placeholder - the actual structure depends on how the parquet data is organized
886
+ // You may need to adjust this based on the actual column names and order
887
+ if (Array.isArray(row)) {
888
+ // If it's an array, we need to map positions to field names
889
+ // This is a rough mapping - needs to be adjusted based on actual data structure
890
+ return {
891
+ episode_index: row[0],
892
+ data_chunk_index: row[1],
893
+ data_file_index: row[2],
894
+ dataset_from_index: row[3],
895
+ dataset_to_index: row[4],
896
+ video_chunk_index: row[5],
897
+ video_file_index: row[6],
898
+ video_from_timestamp: row[7],
899
+ video_to_timestamp: row[8],
900
+ length: row[9],
901
+ // Add more fields as needed
902
+ };
903
+ } else {
904
+ // If it's already an object, return as-is
905
+ return row;
906
+ }
907
+ }
908
+
909
+ // Extract video information for v3.0 format
910
+ async function extractVideoInfoV3(
911
+ repoId: string,
912
+ version: string,
913
+ info: DatasetMetadata,
914
+ episodeMetadata: any,
915
+ ): Promise<any[]> {
916
+ // Get video features from dataset info
917
+ const videoFeatures = Object.entries(info.features)
918
+ .filter(([key, value]) => value.dtype === "video");
919
+
920
+ const videosInfo = videoFeatures.map(([videoKey, _]) => {
921
+ // For v3.0, video path format is: videos/camera_key/chunk-000/file-000.mp4
922
+ // Extract the appropriate chunk and file indices for this video key
923
+ const videoChunkKey = `videos/${videoKey}/chunk_index`;
924
+ const videoFileKey = `videos/${videoKey}/file_index`;
925
+ const videoFromTimestampKey = `videos/${videoKey}/from_timestamp`;
926
+ const videoToTimestampKey = `videos/${videoKey}/to_timestamp`;
927
+
928
+ const chunkIndex = episodeMetadata[videoChunkKey] || 0;
929
+ const fileIndex = episodeMetadata[videoFileKey] || 0;
930
+ const fromTimestamp = episodeMetadata[videoFromTimestampKey] || 0;
931
+ const toTimestamp = episodeMetadata[videoToTimestampKey] || 0;
932
+
933
+ const videoPath = `videos/${videoKey}/chunk-${chunkIndex.toString().padStart(3, "0")}/file-${fileIndex.toString().padStart(3, "0")}.mp4`;
934
+
935
+ return {
936
+ filename: videoKey,
937
+ url: buildVersionedUrl(repoId, version, videoPath),
938
+ // Segment information for v3.0 chunked videos
939
+ isSegmented: true,
940
+ segmentStart: fromTimestamp,
941
+ segmentEnd: toTimestamp,
942
+ segmentDuration: toTimestamp - fromTimestamp,
943
+ };
944
+ });
945
+
946
+ return videosInfo;
947
+ }
948
+
949
+ // DISABLED: Complex episode data loading for simplified v3.0 implementation
950
+ /*
951
+ async function loadEpisodeDataV3(
952
+ episodeMetadata: any,
953
+ ): Promise<{ chartDataGroups: any[]; ignoredColumns: string[]; duration: number }> {
954
+ // Complex data loading disabled for simplified implementation
955
+ throw new Error("Complex data loading disabled in simplified v3.0 implementation");
956
  }
957
+ */
958
 
959
  // Safe wrapper for UI error display
960
  export async function getEpisodeDataSafe(
src/components/videos-player.tsx CHANGED
@@ -7,6 +7,10 @@ import { FaExpand, FaCompress, FaTimes, FaEye } from "react-icons/fa";
7
  type VideoInfo = {
8
  filename: string;
9
  url: string;
 
 
 
 
10
  };
11
 
12
  type VideoPlayerProps = {
@@ -142,27 +146,87 @@ export const VideosPlayer = ({
142
  }
143
  }, [hiddenVideos, showHiddenMenu, enlargedVideo]);
144
 
145
- // Sync video times
146
  useEffect(() => {
147
- videoRefs.current.forEach((video) => {
148
  if (video && Math.abs(video.currentTime - currentTime) > 0.2) {
149
- video.currentTime = currentTime;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  }
151
  });
152
- }, [currentTime]);
153
 
154
  // Handle time update
155
  const handleTimeUpdate = (e: React.SyntheticEvent<HTMLVideoElement>) => {
156
  const video = e.target as HTMLVideoElement;
157
  if (video && video.duration) {
158
- setCurrentTime(video.currentTime);
 
 
 
 
 
 
 
 
 
 
 
 
159
  }
160
  };
161
 
162
- // Handle video ready
163
  useEffect(() => {
164
  let videosReadyCount = 0;
165
- const onCanPlayThrough = () => {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  videosReadyCount += 1;
167
  if (videosReadyCount === videosInfo.length) {
168
  if (typeof onVideosReady === "function") {
@@ -172,13 +236,15 @@ export const VideosPlayer = ({
172
  }
173
  };
174
 
175
- videoRefs.current.forEach((video) => {
176
  if (video) {
177
  // If already ready, call the handler immediately
178
  if (video.readyState >= 4) {
179
- onCanPlayThrough();
180
  } else {
181
- video.addEventListener("canplaythrough", onCanPlayThrough);
 
 
182
  }
183
  }
184
  });
@@ -186,11 +252,18 @@ export const VideosPlayer = ({
186
  return () => {
187
  videoRefs.current.forEach((video) => {
188
  if (video) {
189
- video.removeEventListener("canplaythrough", onCanPlayThrough);
 
 
 
 
 
 
 
190
  }
191
  });
192
  };
193
- }, []);
194
 
195
  return (
196
  <>
@@ -323,6 +396,7 @@ export const VideosPlayer = ({
323
  }}
324
  muted
325
  loop
 
326
  className={`w-full object-contain ${isEnlarged ? "max-h-[90vh] max-w-[90vw]" : ""}`}
327
  onTimeUpdate={
328
  idx === firstVisibleIdx ? handleTimeUpdate : undefined
 
7
  type VideoInfo = {
8
  filename: string;
9
  url: string;
10
+ isSegmented?: boolean;
11
+ segmentStart?: number;
12
+ segmentEnd?: number;
13
+ segmentDuration?: number;
14
  };
15
 
16
  type VideoPlayerProps = {
 
146
  }
147
  }, [hiddenVideos, showHiddenMenu, enlargedVideo]);
148
 
149
+ // Sync video times (with segment awareness)
150
  useEffect(() => {
151
+ videoRefs.current.forEach((video, index) => {
152
  if (video && Math.abs(video.currentTime - currentTime) > 0.2) {
153
+ const videoInfo = videosInfo[index];
154
+
155
+ if (videoInfo?.isSegmented) {
156
+ // For segmented videos, map the global time to segment time
157
+ const segmentStart = videoInfo.segmentStart || 0;
158
+ const segmentDuration = videoInfo.segmentDuration || 0;
159
+
160
+ if (segmentDuration > 0) {
161
+ // Map currentTime (0 to segmentDuration) to video time (segmentStart to segmentEnd)
162
+ const segmentTime = segmentStart + currentTime;
163
+ video.currentTime = segmentTime;
164
+ }
165
+ } else {
166
+ // For non-segmented videos, use direct time mapping
167
+ video.currentTime = currentTime;
168
+ }
169
  }
170
  });
171
+ }, [currentTime, videosInfo]);
172
 
173
  // Handle time update
174
  const handleTimeUpdate = (e: React.SyntheticEvent<HTMLVideoElement>) => {
175
  const video = e.target as HTMLVideoElement;
176
  if (video && video.duration) {
177
+ // Find the video info for this video element
178
+ const videoIndex = videoRefs.current.findIndex(ref => ref === video);
179
+ const videoInfo = videosInfo[videoIndex];
180
+
181
+ if (videoInfo?.isSegmented) {
182
+ // For segmented videos, map the video time back to global time (0 to segmentDuration)
183
+ const segmentStart = videoInfo.segmentStart || 0;
184
+ const globalTime = Math.max(0, video.currentTime - segmentStart);
185
+ setCurrentTime(globalTime);
186
+ } else {
187
+ // For non-segmented videos, use direct time mapping
188
+ setCurrentTime(video.currentTime);
189
+ }
190
  }
191
  };
192
 
193
+ // Handle video ready and setup segmentation
194
  useEffect(() => {
195
  let videosReadyCount = 0;
196
+ const onCanPlayThrough = (videoIndex: number) => {
197
+ const video = videoRefs.current[videoIndex];
198
+ const videoInfo = videosInfo[videoIndex];
199
+
200
+ // Setup video segmentation for v3.0 chunked videos
201
+ if (video && videoInfo?.isSegmented) {
202
+ const segmentStart = videoInfo.segmentStart || 0;
203
+ const segmentEnd = videoInfo.segmentEnd || video.duration || 0;
204
+
205
+ console.log(`[VIDEO DEBUG] Setting up segmentation for ${videoInfo.filename}: ${segmentStart}s to ${segmentEnd}s`);
206
+
207
+ // Set initial time to segment start if not already set
208
+ if (video.currentTime < segmentStart || video.currentTime > segmentEnd) {
209
+ video.currentTime = segmentStart;
210
+ }
211
+
212
+ // Add event listener to handle segment boundaries
213
+ const handleTimeUpdate = () => {
214
+ if (video.currentTime > segmentEnd) {
215
+ video.currentTime = segmentStart;
216
+ if (!video.loop) {
217
+ video.pause();
218
+ }
219
+ }
220
+ };
221
+
222
+ video.addEventListener('timeupdate', handleTimeUpdate);
223
+
224
+ // Store cleanup function
225
+ (video as any)._segmentCleanup = () => {
226
+ video.removeEventListener('timeupdate', handleTimeUpdate);
227
+ };
228
+ }
229
+
230
  videosReadyCount += 1;
231
  if (videosReadyCount === videosInfo.length) {
232
  if (typeof onVideosReady === "function") {
 
236
  }
237
  };
238
 
239
+ videoRefs.current.forEach((video, index) => {
240
  if (video) {
241
  // If already ready, call the handler immediately
242
  if (video.readyState >= 4) {
243
+ onCanPlayThrough(index);
244
  } else {
245
+ const readyHandler = () => onCanPlayThrough(index);
246
+ video.addEventListener("canplaythrough", readyHandler);
247
+ (video as any)._readyHandler = readyHandler;
248
  }
249
  }
250
  });
 
252
  return () => {
253
  videoRefs.current.forEach((video) => {
254
  if (video) {
255
+ // Remove ready handler
256
+ if ((video as any)._readyHandler) {
257
+ video.removeEventListener("canplaythrough", (video as any)._readyHandler);
258
+ }
259
+ // Remove segment handler
260
+ if ((video as any)._segmentCleanup) {
261
+ (video as any)._segmentCleanup();
262
+ }
263
  }
264
  });
265
  };
266
+ }, [videosInfo, onVideosReady, setIsPlaying]);
267
 
268
  return (
269
  <>
 
396
  }}
397
  muted
398
  loop
399
+ preload="auto"
400
  className={`w-full object-contain ${isEnlarged ? "max-h-[90vh] max-w-[90vw]" : ""}`}
401
  onTimeUpdate={
402
  idx === firstVisibleIdx ? handleTimeUpdate : undefined
src/utils/parquetUtils.ts CHANGED
@@ -52,11 +52,21 @@ export async function readParquetColumn(
52
  fileBuffer: ArrayBuffer,
53
  columns: string[],
54
  ): Promise<any[]> {
55
- return new Promise((resolve) => {
56
  parquetRead({
57
  file: fileBuffer,
58
- columns,
59
- onComplete: (data: any[]) => resolve(data),
 
 
 
 
 
 
 
 
 
 
60
  });
61
  });
62
  }
 
52
  fileBuffer: ArrayBuffer,
53
  columns: string[],
54
  ): Promise<any[]> {
55
+ return new Promise((resolve, reject) => {
56
  parquetRead({
57
  file: fileBuffer,
58
+ columns: columns.length > 0 ? columns : undefined, // Let hyparquet read all columns if empty array
59
+ onComplete: (data: any[]) => {
60
+ console.log('[DEBUG] Parquet read completed, data length:', data.length);
61
+ if (data.length > 0) {
62
+ console.log('[DEBUG] First data row structure:', typeof data[0], Object.keys(data[0] || {}));
63
+ }
64
+ resolve(data);
65
+ },
66
+ onError: (error: any) => {
67
+ console.error('[DEBUG] Parquet read error:', error);
68
+ reject(error);
69
+ }
70
  });
71
  });
72
  }
src/utils/versionUtils.ts CHANGED
@@ -41,27 +41,69 @@ async function checkVersionExists(repoId: string, version: string): Promise<bool
41
  }
42
  }
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  /**
45
  * Determines the best available version for a dataset.
46
  * Prefers v3.0, falls back to v2.1, then v2.0, or throws an error if none exist.
47
  */
48
  export async function getDatasetVersion(repoId: string): Promise<string> {
49
- // Check for v3.0 first
50
- if (await checkVersionExists(repoId, "v3.0")) {
51
- return "v3.0";
 
 
 
 
 
 
 
 
 
 
 
52
  }
53
 
54
  // Check for v2.1
55
- if (await checkVersionExists(repoId, "v2.1")) {
 
 
 
56
  return "v2.1";
57
  }
58
 
59
  // Fall back to v2.0
60
- if (await checkVersionExists(repoId, "v2.0")) {
 
 
 
61
  return "v2.0";
62
  }
63
 
64
  // If none of the supported versions exist, throw an error
 
65
  throw new Error(
66
  `Dataset ${repoId} is not compatible with this visualizer. ` +
67
  "This tool only works with dataset versions 3.0, 2.1, or 2.0. " +
 
41
  }
42
  }
43
 
44
+ /**
45
+ * Checks if a dataset has v3.0 chunked structure
46
+ */
47
+ async function checkV3ChunkedStructure(repoId: string): Promise<boolean> {
48
+ try {
49
+ const testUrl = `${DATASET_URL}/${repoId}/resolve/v3.0/meta/episodes/chunk-000/file-000.parquet`;
50
+
51
+ const controller = new AbortController();
52
+ const timeoutId = setTimeout(() => controller.abort(), 10000);
53
+
54
+ const response = await fetch(testUrl, {
55
+ method: "HEAD",
56
+ cache: "no-store",
57
+ signal: controller.signal
58
+ });
59
+
60
+ clearTimeout(timeoutId);
61
+
62
+ return response.ok;
63
+ } catch (error) {
64
+ return false;
65
+ }
66
+ }
67
+
68
  /**
69
  * Determines the best available version for a dataset.
70
  * Prefers v3.0, falls back to v2.1, then v2.0, or throws an error if none exist.
71
  */
72
  export async function getDatasetVersion(repoId: string): Promise<string> {
73
+ console.log(`[VERSION DEBUG] Checking versions for ${repoId}`);
74
+
75
+ // Check for v3.0 first - must have both info.json AND chunked episode structure
76
+ const hasV3Info = await checkVersionExists(repoId, "v3.0");
77
+ console.log(`[VERSION DEBUG] v3.0 info.json exists: ${hasV3Info}`);
78
+
79
+ if (hasV3Info) {
80
+ const hasV3Structure = await checkV3ChunkedStructure(repoId);
81
+ console.log(`[VERSION DEBUG] v3.0 chunked structure exists: ${hasV3Structure}`);
82
+
83
+ if (hasV3Structure) {
84
+ console.log(`[VERSION DEBUG] Using v3.0 for ${repoId}`);
85
+ return "v3.0";
86
+ }
87
  }
88
 
89
  // Check for v2.1
90
+ const hasV21 = await checkVersionExists(repoId, "v2.1");
91
+ console.log(`[VERSION DEBUG] v2.1 exists: ${hasV21}`);
92
+ if (hasV21) {
93
+ console.log(`[VERSION DEBUG] Using v2.1 for ${repoId}`);
94
  return "v2.1";
95
  }
96
 
97
  // Fall back to v2.0
98
+ const hasV20 = await checkVersionExists(repoId, "v2.0");
99
+ console.log(`[VERSION DEBUG] v2.0 exists: ${hasV20}`);
100
+ if (hasV20) {
101
+ console.log(`[VERSION DEBUG] Using v2.0 for ${repoId}`);
102
  return "v2.0";
103
  }
104
 
105
  // If none of the supported versions exist, throw an error
106
+ console.log(`[VERSION DEBUG] No compatible versions found for ${repoId}`);
107
  throw new Error(
108
  `Dataset ${repoId} is not compatible with this visualizer. ` +
109
  "This tool only works with dataset versions 3.0, 2.1, or 2.0. " +