pepijn223 HF Staff commited on
Commit
9196260
·
unverified ·
1 Parent(s): b52a552

perf: faster video loading

Browse files
src/app/[org]/[dataset]/[episode]/episode-viewer.tsx CHANGED
@@ -69,20 +69,29 @@ function EpisodeViewerInner({ data, org, dataset }: { data: any; org?: string; d
69
  currentPage * pageSize,
70
  );
71
 
72
- // Preload adjacent episodes' videos
73
  useEffect(() => {
74
  if (!org || !dataset) return;
75
-
76
- const preloadAdjacent = async () => {
77
- try {
78
- await getAdjacentEpisodesVideoInfo(org, dataset, episodeId, 2);
79
- // Preload adjacent episodes for smoother navigation
80
- } catch {
81
- // Skip preloading on error
82
- }
 
 
 
 
 
 
 
 
 
 
 
83
  };
84
-
85
- preloadAdjacent();
86
  }, [org, dataset, episodeId]);
87
 
88
  // Initialize based on URL time parameter
 
69
  currentPage * pageSize,
70
  );
71
 
72
+ // Preload adjacent episodes' videos via <link rel="preload"> tags
73
  useEffect(() => {
74
  if (!org || !dataset) return;
75
+ const links: HTMLLinkElement[] = [];
76
+
77
+ getAdjacentEpisodesVideoInfo(org, dataset, episodeId, 2)
78
+ .then((adjacentVideos) => {
79
+ for (const ep of adjacentVideos) {
80
+ for (const v of ep.videosInfo) {
81
+ const link = document.createElement("link");
82
+ link.rel = "preload";
83
+ link.as = "video";
84
+ link.href = v.url;
85
+ document.head.appendChild(link);
86
+ links.push(link);
87
+ }
88
+ }
89
+ })
90
+ .catch(() => {});
91
+
92
+ return () => {
93
+ links.forEach((l) => l.remove());
94
  };
 
 
95
  }, [org, dataset, episodeId]);
96
 
97
  // Initialize based on URL time parameter
src/app/[org]/[dataset]/[episode]/fetch-data.ts CHANGED
@@ -1,13 +1,11 @@
1
  import {
2
  DatasetMetadata,
3
- fetchJson,
4
  fetchParquetFile,
5
  formatStringWithVars,
6
- readParquetColumn,
7
  readParquetAsObjects,
8
  } from "@/utils/parquetUtils";
9
  import { pick } from "@/utils/pick";
10
- import { getDatasetVersion, buildVersionedUrl } from "@/utils/versionUtils";
11
 
12
  const SERIES_NAME_DELIMITER = " | ";
13
 
@@ -18,16 +16,13 @@ export async function getEpisodeData(
18
  ) {
19
  const repoId = `${org}/${dataset}`;
20
  try {
21
- // Check for compatible dataset version (v3.0, v2.1, or v2.0)
22
- const version = await getDatasetVersion(repoId);
23
- const jsonUrl = buildVersionedUrl(repoId, version, "meta/info.json");
24
- const info = await fetchJson<DatasetMetadata>(jsonUrl);
25
 
26
  if (info.video_path === null) {
27
  throw new Error("Only videos datasets are supported in this visualizer.\nPlease use Rerun visualizer for images datasets.");
28
  }
29
 
30
- // Handle different versions
31
  if (version === "v3.0") {
32
  return await getEpisodeDataV3(repoId, version, info, episodeId);
33
  } else {
@@ -39,7 +34,6 @@ export async function getEpisodeData(
39
  }
40
  }
41
 
42
- // Get video info for adjacent episodes (for preloading)
43
  export async function getAdjacentEpisodesVideoInfo(
44
  org: string,
45
  dataset: string,
@@ -48,9 +42,8 @@ export async function getAdjacentEpisodesVideoInfo(
48
  ) {
49
  const repoId = `${org}/${dataset}`;
50
  try {
51
- const version = await getDatasetVersion(repoId);
52
- const jsonUrl = buildVersionedUrl(repoId, version, "meta/info.json");
53
- const info = await fetchJson<DatasetMetadata>(jsonUrl);
54
 
55
  const totalEpisodes = info.total_episodes;
56
  const adjacentVideos: Array<{episodeId: number; videosInfo: any[]}> = [];
@@ -196,47 +189,34 @@ async function getEpisodeDataV2(
196
  );
197
 
198
  const arrayBuffer = await fetchParquetFile(parquetUrl);
 
199
 
200
- // Extract task - first check for language instructions (preferred), then fallback to task field or tasks.jsonl
201
  let task: string | undefined;
202
- let allData: any[] = [];
203
 
204
- // Load data first
205
- try {
206
- allData = await readParquetAsObjects(arrayBuffer, []);
207
- } catch (error) {
208
- // Could not read parquet data
209
- }
210
-
211
- // First check for language_instruction fields in the data (preferred)
212
  if (allData.length > 0) {
213
  const firstRow = allData[0];
214
  const languageInstructions: string[] = [];
215
 
216
- // Check for language_instruction field
217
  if (firstRow.language_instruction) {
218
  languageInstructions.push(firstRow.language_instruction);
219
  }
220
 
221
- // Check for numbered language_instruction fields
222
  let instructionNum = 2;
223
  while (firstRow[`language_instruction_${instructionNum}`]) {
224
  languageInstructions.push(firstRow[`language_instruction_${instructionNum}`]);
225
  instructionNum++;
226
  }
227
 
228
- // Join all instructions with line breaks
229
  if (languageInstructions.length > 0) {
230
  task = languageInstructions.join('\n');
231
  }
232
  }
233
 
234
- // If no language instructions found, try direct task field
235
  if (!task && allData.length > 0 && allData[0].task) {
236
  task = allData[0].task;
237
  }
238
 
239
- // If still no task found, try loading from tasks.jsonl metadata file (v2.x format)
240
  if (!task && allData.length > 0) {
241
  try {
242
  const tasksUrl = buildVersionedUrl(repoId, version, "meta/tasks.jsonl");
@@ -244,7 +224,6 @@ async function getEpisodeDataV2(
244
 
245
  if (tasksResponse.ok) {
246
  const tasksText = await tasksResponse.text();
247
- // Parse JSONL format (one JSON object per line)
248
  const tasksData = tasksText
249
  .split('\n')
250
  .filter(line => line.trim())
@@ -252,11 +231,7 @@ async function getEpisodeDataV2(
252
 
253
  if (tasksData && tasksData.length > 0) {
254
  const taskIndex = allData[0].task_index;
255
-
256
- // Convert BigInt to number for comparison
257
  const taskIndexNum = typeof taskIndex === 'bigint' ? Number(taskIndex) : taskIndex;
258
-
259
- // Find task by task_index
260
  const taskData = tasksData.find(t => t.task_index === taskIndexNum);
261
  if (taskData) {
262
  task = taskData.task;
@@ -268,19 +243,25 @@ async function getEpisodeDataV2(
268
  }
269
  }
270
 
271
- const data = await readParquetColumn(arrayBuffer, filteredColumnNames);
272
- // Flatten and map to array of objects for chartData
273
  const seriesNames = [
274
  "timestamp",
275
  ...columns.map(({ value }) => value).flat(),
276
  ];
277
 
278
- const chartData = data.map((row) => {
279
- const flatRow = row.flat();
280
  const obj: Record<string, number> = {};
281
- seriesNames.forEach((key, idx) => {
282
- obj[key] = flatRow[idx];
283
- });
 
 
 
 
 
 
 
 
284
  return obj;
285
  });
286
 
 
1
  import {
2
  DatasetMetadata,
 
3
  fetchParquetFile,
4
  formatStringWithVars,
 
5
  readParquetAsObjects,
6
  } from "@/utils/parquetUtils";
7
  import { pick } from "@/utils/pick";
8
+ import { getDatasetVersionAndInfo, buildVersionedUrl } from "@/utils/versionUtils";
9
 
10
  const SERIES_NAME_DELIMITER = " | ";
11
 
 
16
  ) {
17
  const repoId = `${org}/${dataset}`;
18
  try {
19
+ const { version, info: rawInfo } = await getDatasetVersionAndInfo(repoId);
20
+ const info = rawInfo as unknown as DatasetMetadata;
 
 
21
 
22
  if (info.video_path === null) {
23
  throw new Error("Only videos datasets are supported in this visualizer.\nPlease use Rerun visualizer for images datasets.");
24
  }
25
 
 
26
  if (version === "v3.0") {
27
  return await getEpisodeDataV3(repoId, version, info, episodeId);
28
  } else {
 
34
  }
35
  }
36
 
 
37
  export async function getAdjacentEpisodesVideoInfo(
38
  org: string,
39
  dataset: string,
 
42
  ) {
43
  const repoId = `${org}/${dataset}`;
44
  try {
45
+ const { version, info: rawInfo } = await getDatasetVersionAndInfo(repoId);
46
+ const info = rawInfo as unknown as DatasetMetadata;
 
47
 
48
  const totalEpisodes = info.total_episodes;
49
  const adjacentVideos: Array<{episodeId: number; videosInfo: any[]}> = [];
 
189
  );
190
 
191
  const arrayBuffer = await fetchParquetFile(parquetUrl);
192
+ const allData = await readParquetAsObjects(arrayBuffer, []);
193
 
194
+ // Extract task from language_instruction fields, task field, or tasks.jsonl
195
  let task: string | undefined;
 
196
 
 
 
 
 
 
 
 
 
197
  if (allData.length > 0) {
198
  const firstRow = allData[0];
199
  const languageInstructions: string[] = [];
200
 
 
201
  if (firstRow.language_instruction) {
202
  languageInstructions.push(firstRow.language_instruction);
203
  }
204
 
 
205
  let instructionNum = 2;
206
  while (firstRow[`language_instruction_${instructionNum}`]) {
207
  languageInstructions.push(firstRow[`language_instruction_${instructionNum}`]);
208
  instructionNum++;
209
  }
210
 
 
211
  if (languageInstructions.length > 0) {
212
  task = languageInstructions.join('\n');
213
  }
214
  }
215
 
 
216
  if (!task && allData.length > 0 && allData[0].task) {
217
  task = allData[0].task;
218
  }
219
 
 
220
  if (!task && allData.length > 0) {
221
  try {
222
  const tasksUrl = buildVersionedUrl(repoId, version, "meta/tasks.jsonl");
 
224
 
225
  if (tasksResponse.ok) {
226
  const tasksText = await tasksResponse.text();
 
227
  const tasksData = tasksText
228
  .split('\n')
229
  .filter(line => line.trim())
 
231
 
232
  if (tasksData && tasksData.length > 0) {
233
  const taskIndex = allData[0].task_index;
 
 
234
  const taskIndexNum = typeof taskIndex === 'bigint' ? Number(taskIndex) : taskIndex;
 
 
235
  const taskData = tasksData.find(t => t.task_index === taskIndexNum);
236
  if (taskData) {
237
  task = taskData.task;
 
243
  }
244
  }
245
 
246
+ // Build chart data from already-parsed allData (no second parquet parse)
 
247
  const seriesNames = [
248
  "timestamp",
249
  ...columns.map(({ value }) => value).flat(),
250
  ];
251
 
252
+ const chartData = allData.map((row) => {
 
253
  const obj: Record<string, number> = {};
254
+ obj["timestamp"] = row.timestamp;
255
+ for (const col of columns) {
256
+ const rawVal = row[col.key];
257
+ if (Array.isArray(rawVal)) {
258
+ rawVal.forEach((v: any, i: number) => {
259
+ if (i < col.value.length) obj[col.value[i]] = Number(v);
260
+ });
261
+ } else if (rawVal !== undefined) {
262
+ obj[col.value[0]] = Number(rawVal);
263
+ }
264
+ }
265
  return obj;
266
  });
267
 
src/app/[org]/[dataset]/[episode]/page.tsx CHANGED
@@ -27,7 +27,7 @@ export default async function EpisodePage({
27
  const { data, error } = await getEpisodeDataSafe(org, dataset, episodeNumber);
28
  return (
29
  <Suspense fallback={null}>
30
- <EpisodeViewer data={data} error={error} />
31
  </Suspense>
32
  );
33
  }
 
27
  const { data, error } = await getEpisodeDataSafe(org, dataset, episodeNumber);
28
  return (
29
  <Suspense fallback={null}>
30
+ <EpisodeViewer data={data} error={error} org={org} dataset={dataset} />
31
  </Suspense>
32
  );
33
  }
src/components/simple-videos-player.tsx CHANGED
@@ -252,7 +252,7 @@ export const SimpleVideosPlayer = ({
252
  isEnlarged ? "max-h-[90vh] max-w-[90vw]" : ""
253
  }`}
254
  muted
255
- preload="auto"
256
  onPlay={(e) => handlePlay(e.currentTarget, info)}
257
  onTimeUpdate={isFirstVisible ? handleTimeUpdate : undefined}
258
  >
 
252
  isEnlarged ? "max-h-[90vh] max-w-[90vw]" : ""
253
  }`}
254
  muted
255
+ preload={isFirstVisible ? "auto" : "metadata"}
256
  onPlay={(e) => handlePlay(e.currentTarget, info)}
257
  onTimeUpdate={isFirstVisible ? handleTimeUpdate : undefined}
258
  >
src/components/videos-player.tsx CHANGED
@@ -395,7 +395,7 @@ export const VideosPlayer = ({
395
  }}
396
  muted
397
  loop
398
- preload="auto"
399
  className={`w-full object-contain ${isEnlarged ? "max-h-[90vh] max-w-[90vw]" : ""}`}
400
  onTimeUpdate={
401
  idx === firstVisibleIdx ? handleTimeUpdate : undefined
 
395
  }}
396
  muted
397
  loop
398
+ preload={idx === firstVisibleIdx ? "auto" : "metadata"}
399
  className={`w-full object-contain ${isEnlarged ? "max-h-[90vh] max-w-[90vw]" : ""}`}
400
  onTimeUpdate={
401
  idx === firstVisibleIdx ? handleTimeUpdate : undefined
src/utils/versionUtils.ts CHANGED
@@ -23,19 +23,24 @@ interface DatasetInfo {
23
  features: Record<string, any>;
24
  }
25
 
26
- /**
27
- * Fetches dataset information from the main revision
28
- */
 
29
  export async function getDatasetInfo(repoId: string): Promise<DatasetInfo> {
 
 
 
 
 
30
  try {
31
  const testUrl = `${DATASET_URL}/${repoId}/resolve/main/meta/info.json`;
32
 
33
  const controller = new AbortController();
34
- const timeoutId = setTimeout(() => controller.abort(), 10000); // 10 second timeout
35
 
36
  const response = await fetch(testUrl, {
37
  method: "GET",
38
- cache: "no-store",
39
  signal: controller.signal
40
  });
41
 
@@ -47,11 +52,11 @@ export async function getDatasetInfo(repoId: string): Promise<DatasetInfo> {
47
 
48
  const data = await response.json();
49
 
50
- // Check if it has the required structure
51
  if (!data.features) {
52
  throw new Error("Dataset info.json does not have the expected features structure");
53
  }
54
 
 
55
  return data as DatasetInfo;
56
  } catch (error) {
57
  if (error instanceof Error) {
@@ -64,40 +69,33 @@ export async function getDatasetInfo(repoId: string): Promise<DatasetInfo> {
64
  }
65
  }
66
 
 
67
 
68
  /**
69
- * Gets the dataset version by reading the codebase_version from the main revision's info.json
 
70
  */
71
- export async function getDatasetVersion(repoId: string): Promise<string> {
72
- try {
73
- const datasetInfo = await getDatasetInfo(repoId);
74
-
75
- // Extract codebase_version
76
- const codebaseVersion = datasetInfo.codebase_version;
77
- if (!codebaseVersion) {
78
- throw new Error("Dataset info.json does not contain codebase_version");
79
- }
80
-
81
- // Validate that it's a supported version
82
- const supportedVersions = ["v3.0", "v2.1", "v2.0"];
83
- if (!supportedVersions.includes(codebaseVersion)) {
84
- throw new Error(
85
- `Dataset ${repoId} has codebase version ${codebaseVersion}, which is not supported. ` +
86
- "This tool only works with dataset versions 3.0, 2.1, or 2.0. " +
87
- "Please use a compatible dataset version."
88
- );
89
- }
90
-
91
- return codebaseVersion;
92
- } catch (error) {
93
- if (error instanceof Error) {
94
- throw error;
95
- }
96
  throw new Error(
97
- `Dataset ${repoId} is not compatible with this visualizer. ` +
98
- "Failed to read dataset information from the main revision."
 
99
  );
100
  }
 
 
 
 
 
 
 
101
  }
102
 
103
  export function buildVersionedUrl(repoId: string, version: string, path: string): string {
 
23
  features: Record<string, any>;
24
  }
25
 
26
+ // In-memory cache for dataset info (5 min TTL)
27
+ const datasetInfoCache = new Map<string, { data: DatasetInfo; expiry: number }>();
28
+ const CACHE_TTL_MS = 5 * 60 * 1000;
29
+
30
  export async function getDatasetInfo(repoId: string): Promise<DatasetInfo> {
31
+ const cached = datasetInfoCache.get(repoId);
32
+ if (cached && Date.now() < cached.expiry) {
33
+ return cached.data;
34
+ }
35
+
36
  try {
37
  const testUrl = `${DATASET_URL}/${repoId}/resolve/main/meta/info.json`;
38
 
39
  const controller = new AbortController();
40
+ const timeoutId = setTimeout(() => controller.abort(), 10000);
41
 
42
  const response = await fetch(testUrl, {
43
  method: "GET",
 
44
  signal: controller.signal
45
  });
46
 
 
52
 
53
  const data = await response.json();
54
 
 
55
  if (!data.features) {
56
  throw new Error("Dataset info.json does not have the expected features structure");
57
  }
58
 
59
+ datasetInfoCache.set(repoId, { data: data as DatasetInfo, expiry: Date.now() + CACHE_TTL_MS });
60
  return data as DatasetInfo;
61
  } catch (error) {
62
  if (error instanceof Error) {
 
69
  }
70
  }
71
 
72
+ const SUPPORTED_VERSIONS = ["v3.0", "v2.1", "v2.0"];
73
 
74
  /**
75
+ * Returns both the validated version string and the dataset info in one call,
76
+ * avoiding a duplicate info.json fetch.
77
  */
78
+ export async function getDatasetVersionAndInfo(repoId: string): Promise<{ version: string; info: DatasetInfo }> {
79
+ const info = await getDatasetInfo(repoId);
80
+
81
+ const version = info.codebase_version;
82
+ if (!version) {
83
+ throw new Error("Dataset info.json does not contain codebase_version");
84
+ }
85
+ if (!SUPPORTED_VERSIONS.includes(version)) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  throw new Error(
87
+ `Dataset ${repoId} has codebase version ${version}, which is not supported. ` +
88
+ "This tool only works with dataset versions 3.0, 2.1, or 2.0. " +
89
+ "Please use a compatible dataset version."
90
  );
91
  }
92
+
93
+ return { version, info };
94
+ }
95
+
96
+ export async function getDatasetVersion(repoId: string): Promise<string> {
97
+ const { version } = await getDatasetVersionAndInfo(repoId);
98
+ return version;
99
  }
100
 
101
  export function buildVersionedUrl(repoId: string, version: string, path: string): string {