aractingi commited on
Commit
8ffbccb
·
1 Parent(s): a17ffcc

change the way we check the dataset version (#8)

Browse files
Files changed (1) hide show
  1. src/utils/versionUtils.ts +65 -69
src/utils/versionUtils.ts CHANGED
@@ -5,13 +5,31 @@
5
  const DATASET_URL = process.env.DATASET_URL || "https://huggingface.co/datasets";
6
 
7
  /**
8
- * Checks if a specific version/branch exists for a dataset
9
  */
10
- async function checkVersionExists(repoId: string, version: string): Promise<boolean> {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  try {
12
- const testUrl = `${DATASET_URL}/${repoId}/resolve/${version}/meta/info.json`;
13
 
14
- // Try a simple GET request with a timeout
15
  const controller = new AbortController();
16
  const timeoutId = setTimeout(() => controller.abort(), 10000); // 10 second timeout
17
 
@@ -23,88 +41,66 @@ async function checkVersionExists(repoId: string, version: string): Promise<bool
23
 
24
  clearTimeout(timeoutId);
25
 
26
- // Check if it's a successful response
27
- if (response.ok) {
28
- // Try to parse a bit of the JSON to make sure it's valid
29
- try {
30
- const text = await response.text();
31
- const data = JSON.parse(text);
32
- return !!data.features; // Only return true if it has features
33
- } catch (parseError) {
34
- return false;
35
- }
36
  }
37
 
38
- return false;
39
  } catch (error) {
40
- return false;
 
 
 
 
 
 
41
  }
42
  }
43
 
 
44
  /**
45
- * Checks if a dataset has v3.0 chunked structure
46
  */
47
- async function checkV3ChunkedStructure(repoId: string): Promise<boolean> {
48
  try {
49
- const testUrl = `${DATASET_URL}/${repoId}/resolve/v3.0/meta/episodes/chunk-000/file-000.parquet`;
50
-
51
- const controller = new AbortController();
52
- const timeoutId = setTimeout(() => controller.abort(), 10000);
53
 
54
- const response = await fetch(testUrl, {
55
- method: "HEAD",
56
- cache: "no-store",
57
- signal: controller.signal
58
- });
59
 
60
- clearTimeout(timeoutId);
 
 
 
 
 
 
 
 
61
 
62
- return response.ok;
63
  } catch (error) {
64
- return false;
65
- }
66
- }
67
-
68
- /**
69
- * Determines the best available version for a dataset.
70
- * Prefers v3.0, falls back to v2.1, then v2.0, or throws an error if none exist.
71
- */
72
- export async function getDatasetVersion(repoId: string): Promise<string> {
73
- // Check for v3.0 first - must have both info.json AND chunked episode structure
74
- const hasV3Info = await checkVersionExists(repoId, "v3.0");
75
-
76
- if (hasV3Info) {
77
- const hasV3Structure = await checkV3ChunkedStructure(repoId);
78
-
79
- if (hasV3Structure) {
80
- return "v3.0";
81
  }
 
 
 
 
82
  }
83
-
84
- // Check for v2.1
85
- const hasV21 = await checkVersionExists(repoId, "v2.1");
86
- if (hasV21) {
87
- return "v2.1";
88
- }
89
-
90
- // Fall back to v2.0
91
- const hasV20 = await checkVersionExists(repoId, "v2.0");
92
- if (hasV20) {
93
- return "v2.0";
94
- }
95
-
96
- // If none of the supported versions exist, throw an error
97
- throw new Error(
98
- `Dataset ${repoId} is not compatible with this visualizer. ` +
99
- "This tool only works with dataset versions 3.0, 2.1, or 2.0. " +
100
- "Please use a compatible dataset version."
101
- );
102
  }
103
 
104
- /**
105
- * Constructs a versioned URL for dataset resources
106
- */
107
  export function buildVersionedUrl(repoId: string, version: string, path: string): string {
108
- return `${DATASET_URL}/${repoId}/resolve/${version}/${path}`;
109
  }
110
 
 
5
  const DATASET_URL = process.env.DATASET_URL || "https://huggingface.co/datasets";
6
 
7
  /**
8
+ * Dataset information structure from info.json
9
  */
10
+ interface DatasetInfo {
11
+ codebase_version: string;
12
+ robot_type: string | null;
13
+ total_episodes: number;
14
+ total_frames: number;
15
+ total_tasks: number;
16
+ chunks_size: number;
17
+ data_files_size_in_mb: number;
18
+ video_files_size_in_mb: number;
19
+ fps: number;
20
+ splits: Record<string, string>;
21
+ data_path: string;
22
+ video_path: string;
23
+ features: Record<string, any>;
24
+ }
25
+
26
+ /**
27
+ * Fetches dataset information from the main revision
28
+ */
29
+ export async function getDatasetInfo(repoId: string): Promise<DatasetInfo> {
30
  try {
31
+ const testUrl = `${DATASET_URL}/${repoId}/resolve/main/meta/info.json`;
32
 
 
33
  const controller = new AbortController();
34
  const timeoutId = setTimeout(() => controller.abort(), 10000); // 10 second timeout
35
 
 
41
 
42
  clearTimeout(timeoutId);
43
 
44
+ if (!response.ok) {
45
+ throw new Error(`Failed to fetch dataset info: ${response.status}`);
46
+ }
47
+
48
+ const data = await response.json();
49
+
50
+ // Check if it has the required structure
51
+ if (!data.features) {
52
+ throw new Error("Dataset info.json does not have the expected features structure");
 
53
  }
54
 
55
+ return data as DatasetInfo;
56
  } catch (error) {
57
+ if (error instanceof Error) {
58
+ throw error;
59
+ }
60
+ throw new Error(
61
+ `Dataset ${repoId} is not compatible with this visualizer. ` +
62
+ "Failed to read dataset information from the main revision."
63
+ );
64
  }
65
  }
66
 
67
+
68
  /**
69
+ * Gets the dataset version by reading the codebase_version from the main revision's info.json
70
  */
71
+ export async function getDatasetVersion(repoId: string): Promise<string> {
72
  try {
73
+ const datasetInfo = await getDatasetInfo(repoId);
 
 
 
74
 
75
+ // Extract codebase_version
76
+ const codebaseVersion = datasetInfo.codebase_version;
77
+ if (!codebaseVersion) {
78
+ throw new Error("Dataset info.json does not contain codebase_version");
79
+ }
80
 
81
+ // Validate that it's a supported version
82
+ const supportedVersions = ["v3.0", "v2.1", "v2.0"];
83
+ if (!supportedVersions.includes(codebaseVersion)) {
84
+ throw new Error(
85
+ `Dataset ${repoId} has codebase version ${codebaseVersion}, which is not supported. ` +
86
+ "This tool only works with dataset versions 3.0, 2.1, or 2.0. " +
87
+ "Please use a compatible dataset version."
88
+ );
89
+ }
90
 
91
+ return codebaseVersion;
92
  } catch (error) {
93
+ if (error instanceof Error) {
94
+ throw error;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  }
96
+ throw new Error(
97
+ `Dataset ${repoId} is not compatible with this visualizer. ` +
98
+ "Failed to read dataset information from the main revision."
99
+ );
100
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  }
102
 
 
 
 
103
  export function buildVersionedUrl(repoId: string, version: string, path: string): string {
104
+ return `${DATASET_URL}/${repoId}/resolve/main/${path}`;
105
  }
106