Spaces:
Running
Running
File size: 4,271 Bytes
8a37195 28ef794 8a37195 8ffbccb 8a37195 a990603 8ffbccb a990603 8ffbccb 2b90078 291477f 9196260 0ec6cb5 9196260 8ffbccb 0ec6cb5 9196260 0ec6cb5 a990603 9196260 a990603 9196260 8a37195 8ffbccb 28ef794 8a37195 9196260 291477f 8a37195 28ef794 8a37195 28ef794 8a37195 28ef794 8ffbccb 291477f 8ffbccb 28ef794 8a37195 291477f 0ec6cb5 8ffbccb 8a37195 8ffbccb 28ef794 8ffbccb 8a37195 9196260 8ffbccb aa2bc0d 9196260 aa2bc0d 291477f 9196260 8ffbccb 9196260 291477f 8ffbccb 5fe6175 9196260 8a37195 28ef794 8ffbccb 8a37195 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 | /**
* Utility functions for checking dataset version compatibility
*/
const DATASET_URL =
process.env.DATASET_URL || "https://huggingface.co/datasets";
/**
* Dataset information structure from info.json
*/
type FeatureInfo = {
dtype: string;
shape: number[];
names: string[] | Record<string, unknown> | null;
info?: Record<string, unknown>;
};
export interface DatasetInfo {
codebase_version: string;
robot_type: string | null;
total_episodes: number;
total_frames: number;
total_tasks: number;
chunks_size: number;
data_files_size_in_mb: number;
video_files_size_in_mb: number;
fps: number;
splits: Record<string, string>;
data_path: string;
video_path: string;
features: Record<string, FeatureInfo>;
}
// In-memory cache for dataset info (5 min TTL, max 200 entries)
const datasetInfoCache = new Map<
string,
{ data: DatasetInfo; expiry: number }
>();
const CACHE_TTL_MS = 5 * 60 * 1000;
const MAX_CACHE_ENTRIES = Math.max(
8,
parseInt(process.env.MAX_DATASET_INFO_CACHE_ENTRIES ?? "64", 10) || 64,
);
function pruneDatasetInfoCache(now: number) {
// Remove expired entries first.
for (const [key, value] of datasetInfoCache) {
if (now >= value.expiry) {
datasetInfoCache.delete(key);
}
}
// Then cap overall cache size to prevent unbounded growth.
while (datasetInfoCache.size > MAX_CACHE_ENTRIES) {
const oldestKey = datasetInfoCache.keys().next().value;
if (!oldestKey) break;
datasetInfoCache.delete(oldestKey);
}
}
export async function getDatasetInfo(repoId: string): Promise<DatasetInfo> {
const now = Date.now();
pruneDatasetInfoCache(now);
const cached = datasetInfoCache.get(repoId);
if (cached && now < cached.expiry) {
// Keep insertion order fresh so the cache behaves closer to LRU.
datasetInfoCache.delete(repoId);
datasetInfoCache.set(repoId, cached);
console.log(`[perf] getDatasetInfo cache HIT for ${repoId}`);
return cached.data;
}
console.log(`[perf] getDatasetInfo cache MISS for ${repoId} — fetching`);
try {
const testUrl = `${DATASET_URL}/${repoId}/resolve/main/meta/info.json`;
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), 10000);
const response = await fetch(testUrl, {
method: "GET",
cache: "no-store",
signal: controller.signal,
});
clearTimeout(timeoutId);
if (!response.ok) {
throw new Error(`Failed to fetch dataset info: ${response.status}`);
}
const data = await response.json();
if (!data.features) {
throw new Error(
"Dataset info.json does not have the expected features structure",
);
}
datasetInfoCache.set(repoId, {
data: data as DatasetInfo,
expiry: Date.now() + CACHE_TTL_MS,
});
pruneDatasetInfoCache(Date.now());
return data as DatasetInfo;
} catch (error) {
if (error instanceof Error) {
throw error;
}
throw new Error(
`Dataset ${repoId} is not compatible with this visualizer. ` +
"Failed to read dataset information from the main revision.",
);
}
}
const SUPPORTED_VERSIONS = ["v3.0", "v2.1", "v2.0"];
/**
* Returns both the validated version string and the dataset info in one call,
* avoiding a duplicate info.json fetch.
*/
export async function getDatasetVersionAndInfo(
repoId: string,
): Promise<{ version: string; info: DatasetInfo }> {
const info = await getDatasetInfo(repoId);
const version = info.codebase_version;
if (!version) {
throw new Error("Dataset info.json does not contain codebase_version");
}
if (!SUPPORTED_VERSIONS.includes(version)) {
throw new Error(
`Dataset ${repoId} has codebase version ${version}, which is not supported. ` +
"This tool only works with dataset versions 3.0, 2.1, or 2.0. " +
"Please use a compatible dataset version.",
);
}
return { version, info };
}
export async function getDatasetVersion(repoId: string): Promise<string> {
const { version } = await getDatasetVersionAndInfo(repoId);
return version;
}
export function buildVersionedUrl(
repoId: string,
version: string,
path: string,
): string {
return `${DATASET_URL}/${repoId}/resolve/main/${path}`;
}
|