Spaces:
Running
Running
feat: local dataset paths, API route, and versioned fetch routing
#4
by alexis779 - opened
- .gitignore +8 -0
- .prettierignore +3 -0
- src/app/[org]/[dataset]/[episode]/episode-viewer.tsx +19 -7
- src/app/[org]/[dataset]/[episode]/fetch-data.ts +10 -0
- src/app/[org]/[dataset]/[episode]/page.tsx +6 -1
- src/app/api/local-dataset/file/route.ts +14 -0
- src/app/page.tsx +13 -2
- src/components/filtering-panel.tsx +32 -13
- src/components/stats-panel.tsx +2 -2
- src/lib/local-dataset-fs.ts +143 -0
- src/utils/__tests__/localDataset.test.ts +57 -0
- src/utils/__tests__/versionUtils.test.ts +8 -0
- src/utils/internalFetch.ts +12 -0
- src/utils/localDataset.ts +116 -0
- src/utils/parquetUtils.ts +3 -1
- src/utils/versionUtils.ts +28 -7
.gitignore
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
node_modules
|
| 2 |
+
.next
|
| 3 |
+
out
|
| 4 |
+
tsconfig.tsbuildinfo
|
| 5 |
+
next-env.d.ts
|
| 6 |
+
.env*.local
|
| 7 |
+
*.log
|
| 8 |
+
.DS_Store
|
.prettierignore
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.next
|
| 2 |
+
node_modules
|
| 3 |
+
tsconfig.tsbuildinfo
|
src/app/[org]/[dataset]/[episode]/episode-viewer.tsx
CHANGED
|
@@ -34,6 +34,7 @@ import {
|
|
| 34 |
type CrossEpisodeVarianceData,
|
| 35 |
} from "./fetch-data";
|
| 36 |
import { getDatasetVersionAndInfo } from "@/utils/versionUtils";
|
|
|
|
| 37 |
import type { DatasetMetadata } from "@/utils/parquetUtils";
|
| 38 |
|
| 39 |
const URDFViewer = lazy(() => import("@/components/urdf-viewer"));
|
|
@@ -614,14 +615,24 @@ function EpisodeViewerInner({
|
|
| 614 |
</a>
|
| 615 |
|
| 616 |
<div>
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
|
| 622 |
-
{datasetInfo.repoId}
|
| 623 |
</p>
|
| 624 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 625 |
|
| 626 |
<p className="font-mono text-lg font-semibold">
|
| 627 |
episode {episodeId}
|
|
@@ -701,6 +712,7 @@ function EpisodeViewerInner({
|
|
| 701 |
<Suspense fallback={<Loading />}>
|
| 702 |
<FilteringPanel
|
| 703 |
repoId={datasetInfo.repoId}
|
|
|
|
| 704 |
crossEpisodeData={crossEpData}
|
| 705 |
crossEpisodeLoading={insightsLoading}
|
| 706 |
episodeLengthStats={episodeLengthStats}
|
|
|
|
| 34 |
type CrossEpisodeVarianceData,
|
| 35 |
} from "./fetch-data";
|
| 36 |
import { getDatasetVersionAndInfo } from "@/utils/versionUtils";
|
| 37 |
+
import { isLocalRepoId } from "@/utils/localDataset";
|
| 38 |
import type { DatasetMetadata } from "@/utils/parquetUtils";
|
| 39 |
|
| 40 |
const URDFViewer = lazy(() => import("@/components/urdf-viewer"));
|
|
|
|
| 615 |
</a>
|
| 616 |
|
| 617 |
<div>
|
| 618 |
+
{isLocalRepoId(datasetInfo.repoId) ? (
|
| 619 |
+
<p
|
| 620 |
+
className="text-lg font-semibold text-left max-w-md break-all"
|
| 621 |
+
title={datasetInfo.localDiskPath}
|
| 622 |
+
>
|
| 623 |
+
{datasetInfo.displayLabel ?? datasetInfo.repoId}
|
| 624 |
</p>
|
| 625 |
+
) : (
|
| 626 |
+
<a
|
| 627 |
+
href={`https://huggingface.co/datasets/${datasetInfo.repoId}`}
|
| 628 |
+
target="_blank"
|
| 629 |
+
rel="noopener noreferrer"
|
| 630 |
+
>
|
| 631 |
+
<p className="text-lg font-semibold">
|
| 632 |
+
{datasetInfo.displayLabel ?? datasetInfo.repoId}
|
| 633 |
+
</p>
|
| 634 |
+
</a>
|
| 635 |
+
)}
|
| 636 |
|
| 637 |
<p className="font-mono text-lg font-semibold">
|
| 638 |
episode {episodeId}
|
|
|
|
| 712 |
<Suspense fallback={<Loading />}>
|
| 713 |
<FilteringPanel
|
| 714 |
repoId={datasetInfo.repoId}
|
| 715 |
+
localDiskPath={datasetInfo.localDiskPath}
|
| 716 |
crossEpisodeData={crossEpData}
|
| 717 |
crossEpisodeLoading={insightsLoading}
|
| 718 |
episodeLengthStats={episodeLengthStats}
|
src/app/[org]/[dataset]/[episode]/fetch-data.ts
CHANGED
|
@@ -20,6 +20,7 @@ import {
|
|
| 20 |
buildV3EpisodesMetadataPath,
|
| 21 |
} from "@/utils/stringFormatting";
|
| 22 |
import { bigIntToNumber } from "@/utils/typeGuards";
|
|
|
|
| 23 |
import type { VideoInfo, AdjacentEpisodeVideos } from "@/types";
|
| 24 |
|
| 25 |
const SERIES_NAME_DELIMITER = CHART_CONFIG.SERIES_NAME_DELIMITER;
|
|
@@ -28,6 +29,10 @@ export type CameraInfo = { name: string; width: number; height: number };
|
|
| 28 |
|
| 29 |
export type DatasetDisplayInfo = {
|
| 30 |
repoId: string;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
total_frames: number;
|
| 32 |
total_episodes: number;
|
| 33 |
fps: number;
|
|
@@ -321,6 +326,9 @@ export async function getEpisodeData(
|
|
| 321 |
.filter(([, f]) => f.dtype === "video" && f.shape.length >= 2)
|
| 322 |
.map(([name, f]) => ({ name, height: f.shape[0], width: f.shape[1] }));
|
| 323 |
|
|
|
|
|
|
|
|
|
|
| 324 |
result.datasetInfo = {
|
| 325 |
...result.datasetInfo,
|
| 326 |
robot_type: rawInfo.robot_type ?? null,
|
|
@@ -333,6 +341,8 @@ export async function getEpisodeData(
|
|
| 333 |
10,
|
| 334 |
) / 10,
|
| 335 |
cameras,
|
|
|
|
|
|
|
| 336 |
};
|
| 337 |
|
| 338 |
const progressGroup = await loadEpisodeProgressGroup(
|
|
|
|
| 20 |
buildV3EpisodesMetadataPath,
|
| 21 |
} from "@/utils/stringFormatting";
|
| 22 |
import { bigIntToNumber } from "@/utils/typeGuards";
|
| 23 |
+
import { LOCAL_ORG, decodeLocalDatasetSegment } from "@/utils/localDataset";
|
| 24 |
import type { VideoInfo, AdjacentEpisodeVideos } from "@/types";
|
| 25 |
|
| 26 |
const SERIES_NAME_DELIMITER = CHART_CONFIG.SERIES_NAME_DELIMITER;
|
|
|
|
| 29 |
|
| 30 |
export type DatasetDisplayInfo = {
|
| 31 |
repoId: string;
|
| 32 |
+
/** Shown in the UI for local datasets (includes absolute path). */
|
| 33 |
+
displayLabel?: string;
|
| 34 |
+
/** Decoded dataset root on disk when using a local dataset. */
|
| 35 |
+
localDiskPath?: string;
|
| 36 |
total_frames: number;
|
| 37 |
total_episodes: number;
|
| 38 |
fps: number;
|
|
|
|
| 326 |
.filter(([, f]) => f.dtype === "video" && f.shape.length >= 2)
|
| 327 |
.map(([name, f]) => ({ name, height: f.shape[0], width: f.shape[1] }));
|
| 328 |
|
| 329 |
+
const decodedPath =
|
| 330 |
+
org === LOCAL_ORG ? decodeLocalDatasetSegment(dataset) : null;
|
| 331 |
+
|
| 332 |
result.datasetInfo = {
|
| 333 |
...result.datasetInfo,
|
| 334 |
robot_type: rawInfo.robot_type ?? null,
|
|
|
|
| 341 |
10,
|
| 342 |
) / 10,
|
| 343 |
cameras,
|
| 344 |
+
displayLabel: decodedPath !== null ? `Local: ${decodedPath}` : undefined,
|
| 345 |
+
localDiskPath: decodedPath ?? undefined,
|
| 346 |
};
|
| 347 |
|
| 348 |
const progressGroup = await loadEpisodeProgressGroup(
|
src/app/[org]/[dataset]/[episode]/page.tsx
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import EpisodeViewer from "./episode-viewer";
|
| 2 |
import { Suspense } from "react";
|
|
|
|
| 3 |
|
| 4 |
export const dynamic = "force-dynamic";
|
| 5 |
|
|
@@ -9,8 +10,12 @@ export async function generateMetadata({
|
|
| 9 |
params: Promise<{ org: string; dataset: string; episode: string }>;
|
| 10 |
}) {
|
| 11 |
const { org, dataset, episode } = await params;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
return {
|
| 13 |
-
title: `${
|
| 14 |
};
|
| 15 |
}
|
| 16 |
|
|
|
|
| 1 |
import EpisodeViewer from "./episode-viewer";
|
| 2 |
import { Suspense } from "react";
|
| 3 |
+
import { LOCAL_ORG, decodeLocalDatasetSegment } from "@/utils/localDataset";
|
| 4 |
|
| 5 |
export const dynamic = "force-dynamic";
|
| 6 |
|
|
|
|
| 10 |
params: Promise<{ org: string; dataset: string; episode: string }>;
|
| 11 |
}) {
|
| 12 |
const { org, dataset, episode } = await params;
|
| 13 |
+
const label =
|
| 14 |
+
org === LOCAL_ORG
|
| 15 |
+
? (decodeLocalDatasetSegment(dataset) ?? `${org}/${dataset}`)
|
| 16 |
+
: `${org}/${dataset}`;
|
| 17 |
return {
|
| 18 |
+
title: `${label} | episode ${episode}`,
|
| 19 |
};
|
| 20 |
}
|
| 21 |
|
src/app/api/local-dataset/file/route.ts
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { readLocalDatasetFileResponse } from "@/lib/local-dataset-fs";
|
| 2 |
+
|
| 3 |
+
export async function GET(request: Request) {
|
| 4 |
+
const { searchParams } = new URL(request.url);
|
| 5 |
+
const rootEncoded = searchParams.get("root");
|
| 6 |
+
const rel = searchParams.get("rel");
|
| 7 |
+
if (!rootEncoded || !rel) {
|
| 8 |
+
return Response.json(
|
| 9 |
+
{ error: "Missing root or rel query parameter" },
|
| 10 |
+
{ status: 400 },
|
| 11 |
+
);
|
| 12 |
+
}
|
| 13 |
+
return readLocalDatasetFileResponse(rootEncoded, rel, request);
|
| 14 |
+
}
|
src/app/page.tsx
CHANGED
|
@@ -3,6 +3,11 @@ import { useEffect, useRef, useState, useCallback, Suspense } from "react";
|
|
| 3 |
import Link from "next/link";
|
| 4 |
import { useRouter } from "next/navigation";
|
| 5 |
import { useSearchParams } from "next/navigation";
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
declare global {
|
| 8 |
interface Window {
|
|
@@ -189,10 +194,16 @@ function HomeInner() {
|
|
| 189 |
|
| 190 |
const handleSubmit = (e: { preventDefault: () => void }) => {
|
| 191 |
e.preventDefault();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
const target =
|
| 193 |
activeIndex >= 0 && suggestions[activeIndex]
|
| 194 |
? suggestions[activeIndex]
|
| 195 |
-
:
|
| 196 |
if (target) navigate(target);
|
| 197 |
};
|
| 198 |
|
|
@@ -261,7 +272,7 @@ function HomeInner() {
|
|
| 261 |
onChange={(e) => setQuery(e.target.value)}
|
| 262 |
onKeyDown={handleKeyDown}
|
| 263 |
onFocus={() => query.trim() && setShowSuggestions(true)}
|
| 264 |
-
placeholder="
|
| 265 |
className="pl-10 pr-4 py-2.5 rounded-md text-base text-white bg-white/10 backdrop-blur-sm border border-white/30 focus:outline-none focus:border-sky-400 focus:bg-white/15 w-[380px] shadow-md placeholder:text-white/40 transition-colors"
|
| 266 |
autoComplete="off"
|
| 267 |
/>
|
|
|
|
| 3 |
import Link from "next/link";
|
| 4 |
import { useRouter } from "next/navigation";
|
| 5 |
import { useSearchParams } from "next/navigation";
|
| 6 |
+
import {
|
| 7 |
+
encodeLocalDatasetRoot,
|
| 8 |
+
looksLikeAbsoluteFilesystemPath,
|
| 9 |
+
normalizeUserLocalPathInput,
|
| 10 |
+
} from "@/utils/localDataset";
|
| 11 |
|
| 12 |
declare global {
|
| 13 |
interface Window {
|
|
|
|
| 194 |
|
| 195 |
const handleSubmit = (e: { preventDefault: () => void }) => {
|
| 196 |
e.preventDefault();
|
| 197 |
+
const typed = query.trim();
|
| 198 |
+
if (looksLikeAbsoluteFilesystemPath(typed)) {
|
| 199 |
+
const path = normalizeUserLocalPathInput(typed);
|
| 200 |
+
navigate(`/local/${encodeLocalDatasetRoot(path)}`);
|
| 201 |
+
return;
|
| 202 |
+
}
|
| 203 |
const target =
|
| 204 |
activeIndex >= 0 && suggestions[activeIndex]
|
| 205 |
? suggestions[activeIndex]
|
| 206 |
+
: typed;
|
| 207 |
if (target) navigate(target);
|
| 208 |
};
|
| 209 |
|
|
|
|
| 272 |
onChange={(e) => setQuery(e.target.value)}
|
| 273 |
onKeyDown={handleKeyDown}
|
| 274 |
onFocus={() => query.trim() && setShowSuggestions(true)}
|
| 275 |
+
placeholder="Dataset id or local path (e.g. /tmp/my_dataset)"
|
| 276 |
className="pl-10 pr-4 py-2.5 rounded-md text-base text-white bg-white/10 backdrop-blur-sm border border-white/30 focus:outline-none focus:border-sky-400 focus:bg-white/15 w-[380px] shadow-md placeholder:text-white/40 transition-colors"
|
| 277 |
autoComplete="off"
|
| 278 |
/>
|
src/components/filtering-panel.tsx
CHANGED
|
@@ -225,6 +225,8 @@ function EpisodeLengthFilter({ episodes }: { episodes: EpisodeLengthInfo[] }) {
|
|
| 225 |
|
| 226 |
interface FilteringPanelProps {
|
| 227 |
repoId: string;
|
|
|
|
|
|
|
| 228 |
crossEpisodeData: CrossEpisodeVarianceData | null;
|
| 229 |
crossEpisodeLoading: boolean;
|
| 230 |
episodeLengthStats: EpisodeLengthStats | null;
|
|
@@ -234,9 +236,11 @@ interface FilteringPanelProps {
|
|
| 234 |
|
| 235 |
function FlaggedIdsCopyBar({
|
| 236 |
repoId,
|
|
|
|
| 237 |
onViewEpisodes,
|
| 238 |
}: {
|
| 239 |
repoId: string;
|
|
|
|
| 240 |
onViewEpisodes?: () => void;
|
| 241 |
}) {
|
| 242 |
const { flagged, count, clear } = useFlaggedEpisodes();
|
|
@@ -331,19 +335,32 @@ function FlaggedIdsCopyBar({
|
|
| 331 |
</button>
|
| 332 |
)}
|
| 333 |
<div className="bg-slate-900/60 rounded-md px-3 py-2 border border-slate-700/60 space-y-2.5">
|
| 334 |
-
|
| 335 |
-
<
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
</
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 347 |
</div>
|
| 348 |
</div>
|
| 349 |
);
|
|
@@ -351,6 +368,7 @@ function FlaggedIdsCopyBar({
|
|
| 351 |
|
| 352 |
function FilteringPanel({
|
| 353 |
repoId,
|
|
|
|
| 354 |
crossEpisodeData,
|
| 355 |
crossEpisodeLoading,
|
| 356 |
episodeLengthStats,
|
|
@@ -369,6 +387,7 @@ function FilteringPanel({
|
|
| 369 |
|
| 370 |
<FlaggedIdsCopyBar
|
| 371 |
repoId={repoId}
|
|
|
|
| 372 |
onViewEpisodes={onViewFlaggedEpisodes}
|
| 373 |
/>
|
| 374 |
|
|
|
|
| 225 |
|
| 226 |
interface FilteringPanelProps {
|
| 227 |
repoId: string;
|
| 228 |
+
/** When set, Hugging Face CLI snippets are hidden (local folder dataset). */
|
| 229 |
+
localDiskPath?: string;
|
| 230 |
crossEpisodeData: CrossEpisodeVarianceData | null;
|
| 231 |
crossEpisodeLoading: boolean;
|
| 232 |
episodeLengthStats: EpisodeLengthStats | null;
|
|
|
|
| 236 |
|
| 237 |
function FlaggedIdsCopyBar({
|
| 238 |
repoId,
|
| 239 |
+
localDiskPath,
|
| 240 |
onViewEpisodes,
|
| 241 |
}: {
|
| 242 |
repoId: string;
|
| 243 |
+
localDiskPath?: string;
|
| 244 |
onViewEpisodes?: () => void;
|
| 245 |
}) {
|
| 246 |
const { flagged, count, clear } = useFlaggedEpisodes();
|
|
|
|
| 335 |
</button>
|
| 336 |
)}
|
| 337 |
<div className="bg-slate-900/60 rounded-md px-3 py-2 border border-slate-700/60 space-y-2.5">
|
| 338 |
+
{localDiskPath ? (
|
| 339 |
+
<p className="text-xs text-slate-400 leading-relaxed">
|
| 340 |
+
This is a local folder dataset (not a Hugging Face repo). Flagged
|
| 341 |
+
indices are listed above; edit or remove episodes on disk under{" "}
|
| 342 |
+
<span className="text-slate-200 font-mono break-all">
|
| 343 |
+
{localDiskPath}
|
| 344 |
+
</span>
|
| 345 |
+
.
|
| 346 |
+
</p>
|
| 347 |
+
) : (
|
| 348 |
+
<>
|
| 349 |
+
<p className="text-xs text-slate-400">
|
| 350 |
+
<a
|
| 351 |
+
href="https://github.com/huggingface/lerobot"
|
| 352 |
+
target="_blank"
|
| 353 |
+
rel="noopener noreferrer"
|
| 354 |
+
className="text-orange-400 underline"
|
| 355 |
+
>
|
| 356 |
+
LeRobot CLI
|
| 357 |
+
</a>{" "}
|
| 358 |
+
— delete flagged episodes:
|
| 359 |
+
</p>
|
| 360 |
+
<pre className="text-xs text-slate-300 bg-slate-950/50 rounded px-2 py-1.5 overflow-x-auto select-all">{`# Delete episodes (modifies original dataset)\nlerobot-edit-dataset \\\n --repo_id ${repoId} \\\n --operation.type delete_episodes \\\n --operation.episode_indices "[${ids.join(", ")}]"`}</pre>
|
| 361 |
+
<pre className="text-xs text-slate-300 bg-slate-950/50 rounded px-2 py-1.5 overflow-x-auto select-all">{`# Delete episodes and save to a new dataset (preserves original)\nlerobot-edit-dataset \\\n --repo_id ${repoId} \\\n --new_repo_id ${repoId}_filtered \\\n --operation.type delete_episodes \\\n --operation.episode_indices "[${ids.join(", ")}]"`}</pre>
|
| 362 |
+
</>
|
| 363 |
+
)}
|
| 364 |
</div>
|
| 365 |
</div>
|
| 366 |
);
|
|
|
|
| 368 |
|
| 369 |
function FilteringPanel({
|
| 370 |
repoId,
|
| 371 |
+
localDiskPath,
|
| 372 |
crossEpisodeData,
|
| 373 |
crossEpisodeLoading,
|
| 374 |
episodeLengthStats,
|
|
|
|
| 387 |
|
| 388 |
<FlaggedIdsCopyBar
|
| 389 |
repoId={repoId}
|
| 390 |
+
localDiskPath={localDiskPath}
|
| 391 |
onViewEpisodes={onViewFlaggedEpisodes}
|
| 392 |
/>
|
| 393 |
|
src/components/stats-panel.tsx
CHANGED
|
@@ -123,8 +123,8 @@ function StatsPanel({
|
|
| 123 |
<div>
|
| 124 |
<h2 className="text-xl text-slate-100">
|
| 125 |
<span className="font-bold">Dataset Statistics:</span>{" "}
|
| 126 |
-
<span className="font-normal text-slate-400">
|
| 127 |
-
{datasetInfo.repoId}
|
| 128 |
</span>
|
| 129 |
</h2>
|
| 130 |
</div>
|
|
|
|
| 123 |
<div>
|
| 124 |
<h2 className="text-xl text-slate-100">
|
| 125 |
<span className="font-bold">Dataset Statistics:</span>{" "}
|
| 126 |
+
<span className="font-normal text-slate-400 break-all">
|
| 127 |
+
{datasetInfo.displayLabel ?? datasetInfo.repoId}
|
| 128 |
</span>
|
| 129 |
</h2>
|
| 130 |
</div>
|
src/lib/local-dataset-fs.ts
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import fs from "fs/promises";
|
| 2 |
+
import path from "path";
|
| 3 |
+
import { decodeLocalDatasetSegment } from "@/utils/localDataset";
|
| 4 |
+
|
| 5 |
+
export function resolveLocalDatasetRoot(rootEncoded: string): string | null {
|
| 6 |
+
const decoded = decodeLocalDatasetSegment(rootEncoded);
|
| 7 |
+
if (!decoded) return null;
|
| 8 |
+
return path.resolve(decoded);
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
/**
|
| 12 |
+
* Join `rel` under `root` and ensure the result stays inside `root` (no path traversal).
|
| 13 |
+
*/
|
| 14 |
+
export function safePathUnderRoot(root: string, rel: string): string | null {
|
| 15 |
+
if (!rel || rel.includes("\0")) return null;
|
| 16 |
+
const relNorm = rel.replace(/\\/g, "/");
|
| 17 |
+
const segments = relNorm.split("/").filter((s) => s.length > 0);
|
| 18 |
+
if (segments.length === 0) return null;
|
| 19 |
+
if (segments.some((s) => s === "..")) return null;
|
| 20 |
+
|
| 21 |
+
const rootResolved = path.resolve(root);
|
| 22 |
+
const joined = path.resolve(rootResolved, ...segments);
|
| 23 |
+
|
| 24 |
+
const prefix =
|
| 25 |
+
rootResolved.endsWith(path.sep) || rootResolved.endsWith("/")
|
| 26 |
+
? rootResolved
|
| 27 |
+
: rootResolved + path.sep;
|
| 28 |
+
if (joined !== rootResolved && !joined.startsWith(prefix)) {
|
| 29 |
+
return null;
|
| 30 |
+
}
|
| 31 |
+
return joined;
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
function guessContentType(filePath: string): string {
|
| 35 |
+
const ext = path.extname(filePath).toLowerCase();
|
| 36 |
+
switch (ext) {
|
| 37 |
+
case ".json":
|
| 38 |
+
return "application/json";
|
| 39 |
+
case ".jsonl":
|
| 40 |
+
return "application/x-ndjson";
|
| 41 |
+
case ".mp4":
|
| 42 |
+
return "video/mp4";
|
| 43 |
+
case ".webm":
|
| 44 |
+
return "video/webm";
|
| 45 |
+
case ".parquet":
|
| 46 |
+
return "application/octet-stream";
|
| 47 |
+
default:
|
| 48 |
+
return "application/octet-stream";
|
| 49 |
+
}
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
function parseBytesRange(
|
| 53 |
+
rangeHeader: string | null,
|
| 54 |
+
size: number,
|
| 55 |
+
): { start: number; end: number } | null {
|
| 56 |
+
if (!rangeHeader || !rangeHeader.startsWith("bytes=")) return null;
|
| 57 |
+
const spec = rangeHeader.slice(6).split(",")[0]?.trim();
|
| 58 |
+
if (!spec) return null;
|
| 59 |
+
const dash = spec.indexOf("-");
|
| 60 |
+
if (dash < 0) return null;
|
| 61 |
+
const startStr = spec.slice(0, dash);
|
| 62 |
+
const endStr = spec.slice(dash + 1);
|
| 63 |
+
|
| 64 |
+
let start: number;
|
| 65 |
+
let end: number;
|
| 66 |
+
|
| 67 |
+
if (startStr === "") {
|
| 68 |
+
const suffix = parseInt(endStr, 10);
|
| 69 |
+
if (!Number.isFinite(suffix) || suffix <= 0) return null;
|
| 70 |
+
start = Math.max(0, size - suffix);
|
| 71 |
+
end = size - 1;
|
| 72 |
+
} else {
|
| 73 |
+
start = parseInt(startStr, 10);
|
| 74 |
+
end = endStr !== "" ? parseInt(endStr, 10) : size - 1;
|
| 75 |
+
if (!Number.isFinite(start) || !Number.isFinite(end)) return null;
|
| 76 |
+
end = Math.min(end, size - 1);
|
| 77 |
+
if (start > end || start < 0 || start >= size) return null;
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
return { start, end };
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
export async function readLocalDatasetFileResponse(
|
| 84 |
+
rootEncoded: string,
|
| 85 |
+
rel: string,
|
| 86 |
+
request: Request,
|
| 87 |
+
): Promise<Response> {
|
| 88 |
+
const root = resolveLocalDatasetRoot(rootEncoded);
|
| 89 |
+
if (!root) {
|
| 90 |
+
return Response.json({ error: "Invalid root" }, { status: 400 });
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
const fullPath = safePathUnderRoot(root, rel);
|
| 94 |
+
if (!fullPath) {
|
| 95 |
+
return Response.json({ error: "Invalid path" }, { status: 400 });
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
let stat;
|
| 99 |
+
try {
|
| 100 |
+
stat = await fs.stat(fullPath);
|
| 101 |
+
} catch {
|
| 102 |
+
return Response.json({ error: "Not found" }, { status: 404 });
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
if (!stat.isFile()) {
|
| 106 |
+
return Response.json({ error: "Not a file" }, { status: 400 });
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
const size = stat.size;
|
| 110 |
+
const contentType = guessContentType(fullPath);
|
| 111 |
+
const range = parseBytesRange(request.headers.get("range"), size);
|
| 112 |
+
|
| 113 |
+
if (range && contentType.startsWith("video/")) {
|
| 114 |
+
const { start, end } = range;
|
| 115 |
+
const length = end - start + 1;
|
| 116 |
+
const fh = await fs.open(fullPath, "r");
|
| 117 |
+
try {
|
| 118 |
+
const buf = Buffer.alloc(length);
|
| 119 |
+
await fh.read(buf, 0, length, start);
|
| 120 |
+
return new Response(new Uint8Array(buf), {
|
| 121 |
+
status: 206,
|
| 122 |
+
headers: {
|
| 123 |
+
"Content-Type": contentType,
|
| 124 |
+
"Content-Length": String(length),
|
| 125 |
+
"Content-Range": `bytes ${start}-${end}/${size}`,
|
| 126 |
+
"Accept-Ranges": "bytes",
|
| 127 |
+
},
|
| 128 |
+
});
|
| 129 |
+
} finally {
|
| 130 |
+
await fh.close();
|
| 131 |
+
}
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
const body = await fs.readFile(fullPath);
|
| 135 |
+
return new Response(new Uint8Array(body), {
|
| 136 |
+
status: 200,
|
| 137 |
+
headers: {
|
| 138 |
+
"Content-Type": contentType,
|
| 139 |
+
"Content-Length": String(size),
|
| 140 |
+
"Accept-Ranges": "bytes",
|
| 141 |
+
},
|
| 142 |
+
});
|
| 143 |
+
}
|
src/utils/__tests__/localDataset.test.ts
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { describe, expect, test } from "bun:test";
|
| 2 |
+
import {
|
| 3 |
+
decodeLocalDatasetSegment,
|
| 4 |
+
encodeLocalDatasetRoot,
|
| 5 |
+
isLocalRepoId,
|
| 6 |
+
localDatasetFileApiPath,
|
| 7 |
+
localRepoIdFromAbsolutePath,
|
| 8 |
+
looksLikeAbsoluteFilesystemPath,
|
| 9 |
+
normalizeUserLocalPathInput,
|
| 10 |
+
tryParseLocalDatasetFileApiUrl,
|
| 11 |
+
} from "@/utils/localDataset";
|
| 12 |
+
|
| 13 |
+
describe("localDataset", () => {
|
| 14 |
+
test("encode/decode round trip for unix path", () => {
|
| 15 |
+
const p = "/tmp/aic_lerobot";
|
| 16 |
+
const enc = encodeLocalDatasetRoot(p);
|
| 17 |
+
expect(decodeLocalDatasetSegment(enc)).toBe(p);
|
| 18 |
+
});
|
| 19 |
+
|
| 20 |
+
test("localRepoIdFromAbsolutePath and isLocalRepoId", () => {
|
| 21 |
+
const id = localRepoIdFromAbsolutePath("/data/foo");
|
| 22 |
+
expect(isLocalRepoId(id)).toBe(true);
|
| 23 |
+
expect(isLocalRepoId("lerobot/pusht")).toBe(false);
|
| 24 |
+
});
|
| 25 |
+
|
| 26 |
+
test("looksLikeAbsoluteFilesystemPath", () => {
|
| 27 |
+
expect(looksLikeAbsoluteFilesystemPath("/tmp/x")).toBe(true);
|
| 28 |
+
expect(looksLikeAbsoluteFilesystemPath("file:///tmp/x")).toBe(true);
|
| 29 |
+
expect(looksLikeAbsoluteFilesystemPath("C:\\data\\x")).toBe(true);
|
| 30 |
+
expect(looksLikeAbsoluteFilesystemPath("lerobot/foo")).toBe(false);
|
| 31 |
+
});
|
| 32 |
+
|
| 33 |
+
test("normalizeUserLocalPathInput strips file://", () => {
|
| 34 |
+
expect(normalizeUserLocalPathInput("file:///tmp/a")).toBe("/tmp/a");
|
| 35 |
+
});
|
| 36 |
+
|
| 37 |
+
test("localDatasetFileApiPath builds query string", () => {
|
| 38 |
+
const q = localDatasetFileApiPath("abc", "meta/info.json");
|
| 39 |
+
expect(q).toContain("/api/local-dataset/file?");
|
| 40 |
+
expect(q).toContain("root=abc");
|
| 41 |
+
expect(q).toContain("rel=");
|
| 42 |
+
});
|
| 43 |
+
|
| 44 |
+
test("tryParseLocalDatasetFileApiUrl", () => {
|
| 45 |
+
const u =
|
| 46 |
+
"http://127.0.0.1:3000/api/local-dataset/file?root=x&rel=meta%2Finfo.json";
|
| 47 |
+
expect(tryParseLocalDatasetFileApiUrl(u)).toEqual({
|
| 48 |
+
rootEncoded: "x",
|
| 49 |
+
relPath: "meta/info.json",
|
| 50 |
+
});
|
| 51 |
+
expect(
|
| 52 |
+
tryParseLocalDatasetFileApiUrl(
|
| 53 |
+
"/api/local-dataset/file?root=y&rel=data%2F0.parquet",
|
| 54 |
+
),
|
| 55 |
+
).toEqual({ rootEncoded: "y", relPath: "data/0.parquet" });
|
| 56 |
+
});
|
| 57 |
+
});
|
src/utils/__tests__/versionUtils.test.ts
CHANGED
|
@@ -55,6 +55,14 @@ describe("buildVersionedUrl", () => {
|
|
| 55 |
"https://huggingface.co/datasets/myorg/mydataset/resolve/main/meta/info.json",
|
| 56 |
);
|
| 57 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
});
|
| 59 |
|
| 60 |
// ---------------------------------------------------------------------------
|
|
|
|
| 55 |
"https://huggingface.co/datasets/myorg/mydataset/resolve/main/meta/info.json",
|
| 56 |
);
|
| 57 |
});
|
| 58 |
+
|
| 59 |
+
test("builds local API URL for dataset files", () => {
|
| 60 |
+
const url = buildVersionedUrl("local/abc123", "v3.0", "meta/info.json");
|
| 61 |
+
expect(url.startsWith("/api/local-dataset/file?")).toBe(true);
|
| 62 |
+
const params = new URLSearchParams(url.split("?")[1]!);
|
| 63 |
+
expect(params.get("root")).toBe("abc123");
|
| 64 |
+
expect(params.get("rel")).toBe("meta/info.json");
|
| 65 |
+
});
|
| 66 |
});
|
| 67 |
|
| 68 |
// ---------------------------------------------------------------------------
|
src/utils/internalFetch.ts
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Server-side code (e.g. server actions) cannot use relative fetch URLs; prefix with the app origin.
|
| 3 |
+
*/
|
| 4 |
+
export function resolveInternalFetchUrl(url: string): string {
|
| 5 |
+
if (typeof window !== "undefined") return url;
|
| 6 |
+
if (url.startsWith("http://") || url.startsWith("https://")) return url;
|
| 7 |
+
if (!url.startsWith("/")) return url;
|
| 8 |
+
const base =
|
| 9 |
+
process.env.NEXT_PUBLIC_APP_URL?.replace(/\/$/, "") ??
|
| 10 |
+
`http://127.0.0.1:${process.env.PORT ?? "3000"}`;
|
| 11 |
+
return `${base}${url}`;
|
| 12 |
+
}
|
src/utils/localDataset.ts
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Local LeRobot dataset roots (absolute filesystem paths) are encoded in URLs as
|
| 3 |
+
* `/local/<base64url(utf8 path)>/episode_0`. File bytes are served via
|
| 4 |
+
* `/api/local-dataset/file?root=...&rel=...`.
|
| 5 |
+
*/
|
| 6 |
+
|
| 7 |
+
export const LOCAL_ORG = "local";
|
| 8 |
+
export const LOCAL_REPO_PREFIX = `${LOCAL_ORG}/`;
|
| 9 |
+
|
| 10 |
+
function utf8ToBase64Url(s: string): string {
|
| 11 |
+
const bytes = new TextEncoder().encode(s);
|
| 12 |
+
let bin = "";
|
| 13 |
+
for (let i = 0; i < bytes.length; i++) {
|
| 14 |
+
bin += String.fromCharCode(bytes[i]!);
|
| 15 |
+
}
|
| 16 |
+
const b64 =
|
| 17 |
+
typeof btoa !== "undefined"
|
| 18 |
+
? btoa(bin)
|
| 19 |
+
: Buffer.from(bytes).toString("base64");
|
| 20 |
+
return b64.replace(/\+/g, "-").replace(/\//g, "_").replace(/=+$/, "");
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
function base64UrlToUtf8(s: string): string {
|
| 24 |
+
const pad = s.length % 4 === 0 ? "" : "=".repeat(4 - (s.length % 4));
|
| 25 |
+
const b64 = s.replace(/-/g, "+").replace(/_/g, "/") + pad;
|
| 26 |
+
let bin: string;
|
| 27 |
+
if (typeof atob !== "undefined") {
|
| 28 |
+
bin = atob(b64);
|
| 29 |
+
} else {
|
| 30 |
+
bin = Buffer.from(b64, "base64").toString("binary");
|
| 31 |
+
}
|
| 32 |
+
const bytes = new Uint8Array(bin.length);
|
| 33 |
+
for (let i = 0; i < bin.length; i++) {
|
| 34 |
+
bytes[i] = bin.charCodeAt(i);
|
| 35 |
+
}
|
| 36 |
+
return new TextDecoder().decode(bytes);
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
export function normalizeUserLocalPathInput(input: string): string {
|
| 40 |
+
const t = input.trim();
|
| 41 |
+
if (t.startsWith("file://")) {
|
| 42 |
+
try {
|
| 43 |
+
const u = new URL(t);
|
| 44 |
+
return decodeURIComponent(u.pathname);
|
| 45 |
+
} catch {
|
| 46 |
+
return t;
|
| 47 |
+
}
|
| 48 |
+
}
|
| 49 |
+
return t;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
export function looksLikeAbsoluteFilesystemPath(input: string): boolean {
|
| 53 |
+
const t = normalizeUserLocalPathInput(input);
|
| 54 |
+
if (!t) return false;
|
| 55 |
+
if (t.startsWith("/")) return true;
|
| 56 |
+
return /^[A-Za-z]:[\\/]/.test(t);
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
export function encodeLocalDatasetRoot(absolutePath: string): string {
|
| 60 |
+
return utf8ToBase64Url(absolutePath);
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
export function decodeLocalDatasetSegment(
|
| 64 |
+
encodedSegment: string,
|
| 65 |
+
): string | null {
|
| 66 |
+
if (!encodedSegment) return null;
|
| 67 |
+
try {
|
| 68 |
+
return base64UrlToUtf8(encodedSegment);
|
| 69 |
+
} catch {
|
| 70 |
+
return null;
|
| 71 |
+
}
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
export function localRepoIdFromAbsolutePath(absolutePath: string): string {
|
| 75 |
+
return `${LOCAL_REPO_PREFIX}${encodeLocalDatasetRoot(absolutePath)}`;
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
export function isLocalRepoId(repoId: string): boolean {
|
| 79 |
+
return (
|
| 80 |
+
repoId.startsWith(LOCAL_REPO_PREFIX) &&
|
| 81 |
+
repoId.length > LOCAL_REPO_PREFIX.length
|
| 82 |
+
);
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
export function localDatasetFileApiPath(
|
| 86 |
+
rootEncoded: string,
|
| 87 |
+
relPath: string,
|
| 88 |
+
): string {
|
| 89 |
+
const q = new URLSearchParams({
|
| 90 |
+
root: rootEncoded,
|
| 91 |
+
rel: relPath,
|
| 92 |
+
});
|
| 93 |
+
return `/api/local-dataset/file?${q.toString()}`;
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
/** Parse our local file API URL (absolute or relative) for server-side fs shortcut. */
|
| 97 |
+
export function tryParseLocalDatasetFileApiUrl(
|
| 98 |
+
url: string,
|
| 99 |
+
): { rootEncoded: string; relPath: string } | null {
|
| 100 |
+
const marker = "/api/local-dataset/file";
|
| 101 |
+
const idx = url.indexOf(marker);
|
| 102 |
+
if (idx === -1) return null;
|
| 103 |
+
const after = url.slice(idx + marker.length);
|
| 104 |
+
const queryStart = after.indexOf("?");
|
| 105 |
+
if (queryStart === -1) return null;
|
| 106 |
+
const search = after.slice(queryStart + 1);
|
| 107 |
+
try {
|
| 108 |
+
const params = new URLSearchParams(search);
|
| 109 |
+
const root = params.get("root");
|
| 110 |
+
const rel = params.get("rel");
|
| 111 |
+
if (!root || !rel) return null;
|
| 112 |
+
return { rootEncoded: root, relPath: rel };
|
| 113 |
+
} catch {
|
| 114 |
+
return null;
|
| 115 |
+
}
|
| 116 |
+
}
|
src/utils/parquetUtils.ts
CHANGED
|
@@ -5,6 +5,7 @@ import {
|
|
| 5 |
parquetReadObjects,
|
| 6 |
type AsyncBuffer,
|
| 7 |
} from "hyparquet";
|
|
|
|
| 8 |
|
| 9 |
export interface DatasetMetadata {
|
| 10 |
codebase_version: string;
|
|
@@ -56,8 +57,9 @@ export async function fetchParquetFile(url: string): Promise<ParquetFile> {
|
|
| 56 |
const cached = parquetFileCache.get(url);
|
| 57 |
if (cached) return cached;
|
| 58 |
|
|
|
|
| 59 |
const file = await asyncBufferFromUrl({
|
| 60 |
-
url,
|
| 61 |
requestInit: { cache: "no-store" },
|
| 62 |
});
|
| 63 |
const wrapped = cachedAsyncBuffer(file);
|
|
|
|
| 5 |
parquetReadObjects,
|
| 6 |
type AsyncBuffer,
|
| 7 |
} from "hyparquet";
|
| 8 |
+
import { resolveInternalFetchUrl } from "@/utils/internalFetch";
|
| 9 |
|
| 10 |
export interface DatasetMetadata {
|
| 11 |
codebase_version: string;
|
|
|
|
| 57 |
const cached = parquetFileCache.get(url);
|
| 58 |
if (cached) return cached;
|
| 59 |
|
| 60 |
+
const fetchUrl = resolveInternalFetchUrl(url);
|
| 61 |
const file = await asyncBufferFromUrl({
|
| 62 |
+
url: fetchUrl,
|
| 63 |
requestInit: { cache: "no-store" },
|
| 64 |
});
|
| 65 |
const wrapped = cachedAsyncBuffer(file);
|
src/utils/versionUtils.ts
CHANGED
|
@@ -2,6 +2,13 @@
|
|
| 2 |
* Utility functions for checking dataset version compatibility
|
| 3 |
*/
|
| 4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
const DATASET_URL =
|
| 6 |
process.env.DATASET_URL || "https://huggingface.co/datasets";
|
| 7 |
|
|
@@ -73,16 +80,26 @@ export async function getDatasetInfo(repoId: string): Promise<DatasetInfo> {
|
|
| 73 |
console.log(`[perf] getDatasetInfo cache MISS for ${repoId} — fetching`);
|
| 74 |
|
| 75 |
try {
|
| 76 |
-
const testUrl = `${DATASET_URL}/${repoId}/resolve/main/meta/info.json`;
|
| 77 |
-
|
| 78 |
const controller = new AbortController();
|
| 79 |
const timeoutId = setTimeout(() => controller.abort(), 10000);
|
| 80 |
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
clearTimeout(timeoutId);
|
| 88 |
|
|
@@ -149,5 +166,9 @@ export function buildVersionedUrl(
|
|
| 149 |
version: string,
|
| 150 |
path: string,
|
| 151 |
): string {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
return `${DATASET_URL}/${repoId}/resolve/main/${path}`;
|
| 153 |
}
|
|
|
|
| 2 |
* Utility functions for checking dataset version compatibility
|
| 3 |
*/
|
| 4 |
|
| 5 |
+
import {
|
| 6 |
+
isLocalRepoId,
|
| 7 |
+
localDatasetFileApiPath,
|
| 8 |
+
LOCAL_REPO_PREFIX,
|
| 9 |
+
} from "@/utils/localDataset";
|
| 10 |
+
import { resolveInternalFetchUrl } from "@/utils/internalFetch";
|
| 11 |
+
|
| 12 |
const DATASET_URL =
|
| 13 |
process.env.DATASET_URL || "https://huggingface.co/datasets";
|
| 14 |
|
|
|
|
| 80 |
console.log(`[perf] getDatasetInfo cache MISS for ${repoId} — fetching`);
|
| 81 |
|
| 82 |
try {
|
|
|
|
|
|
|
| 83 |
const controller = new AbortController();
|
| 84 |
const timeoutId = setTimeout(() => controller.abort(), 10000);
|
| 85 |
|
| 86 |
+
let response: Response;
|
| 87 |
+
if (isLocalRepoId(repoId)) {
|
| 88 |
+
const rootEncoded = repoId.slice(LOCAL_REPO_PREFIX.length);
|
| 89 |
+
const localUrl = localDatasetFileApiPath(rootEncoded, "meta/info.json");
|
| 90 |
+
response = await fetch(resolveInternalFetchUrl(localUrl), {
|
| 91 |
+
method: "GET",
|
| 92 |
+
cache: "no-store",
|
| 93 |
+
signal: controller.signal,
|
| 94 |
+
});
|
| 95 |
+
} else {
|
| 96 |
+
const testUrl = `${DATASET_URL}/${repoId}/resolve/main/meta/info.json`;
|
| 97 |
+
response = await fetch(testUrl, {
|
| 98 |
+
method: "GET",
|
| 99 |
+
cache: "no-store",
|
| 100 |
+
signal: controller.signal,
|
| 101 |
+
});
|
| 102 |
+
}
|
| 103 |
|
| 104 |
clearTimeout(timeoutId);
|
| 105 |
|
|
|
|
| 166 |
version: string,
|
| 167 |
path: string,
|
| 168 |
): string {
|
| 169 |
+
if (isLocalRepoId(repoId)) {
|
| 170 |
+
const rootEncoded = repoId.slice(LOCAL_REPO_PREFIX.length);
|
| 171 |
+
return localDatasetFileApiPath(rootEncoded, path);
|
| 172 |
+
}
|
| 173 |
return `${DATASET_URL}/${repoId}/resolve/main/${path}`;
|
| 174 |
}
|