File size: 4,230 Bytes
6f0655f
 
 
 
9b45fba
 
6f0655f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
abd54b8
6f0655f
 
 
 
 
abd54b8
46f3967
abd54b8
6f0655f
 
 
abd54b8
6f0655f
 
 
 
 
 
 
 
cbae1bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6f0655f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
/**
 * Utility functions for checking dataset version compatibility
 */

import { buildProxyUrl } from './apiHelpers';

const DATASET_URL = process.env.DATASET_URL || "https://huggingface.co/datasets";

/**
 * Dataset information structure from info.json
 */
interface DatasetInfo {
  codebase_version?: string;
  robot_type?: string | null;
  total_episodes: number;
  total_frames: number;
  total_tasks?: number;
  chunks_size?: number;
  data_files_size_in_mb?: number;
  video_files_size_in_mb?: number;
  fps: number;
  splits?: Record<string, string>;
  data_path: string;
  video_path: string;
  features: Record<string, any>;
}

/**
 * Fetches dataset information from the main revision
 * Uses authenticated API proxy to handle private datasets
 */
export async function getDatasetInfo(repoId: string): Promise<DatasetInfo> {
  try {
    const testUrl = `${DATASET_URL}/${repoId}/resolve/main/meta/info.json`;
    
    // Use API proxy for authenticated requests
    const proxyUrl = await buildProxyUrl(testUrl);
    
    const controller = new AbortController();
    const timeoutId = setTimeout(() => controller.abort(), 10000); // 10 second timeout
    
    const response = await fetch(proxyUrl, { 
      method: "GET",
      cache: "no-store",
      signal: controller.signal
    });
    
    clearTimeout(timeoutId);
    
    if (!response.ok) {
      // Try to get error details
      let errorText = '';
      try {
        const errorData = await response.json();
        errorText = errorData.error || errorData.details || JSON.stringify(errorData);
      } catch {
        errorText = await response.text().catch(() => `Status: ${response.status} ${response.statusText}`);
      }
      
      // Provide helpful error message
      if (response.status === 404) {
        throw new Error(
          `Dataset ${repoId} not found (404). ` +
          `This usually means the dataset is PRIVATE and the HF_TOKEN has no access. ` +
          `URL tried: ${testUrl}. ` +
          `Error: ${errorText}`
        );
      } else if (response.status === 401) {
        throw new Error(
          `Unauthorized access to dataset ${repoId} (401). ` +
          `The HF_TOKEN in the Space needs access to this private dataset. ` +
          `Go to dataset settings → Gated user access → Add access for the token account. ` +
          `Error: ${errorText}`
        );
      } else {
        throw new Error(`Failed to fetch dataset info: ${response.status} ${response.statusText}. ${errorText}`);
      }
    }

    const data = await response.json();
    
    // Check if it has the required structure
    if (!data.features) {
      throw new Error("Dataset info.json does not have the expected features structure");
    }
    
    return data as DatasetInfo;
  } catch (error) {
    if (error instanceof Error) {
      throw error;
    }
    throw new Error(
      `Dataset ${repoId} is not compatible with this visualizer. ` +
      "Failed to read dataset information from the main revision."
    );
  }
}


/**
 * Gets the dataset version by reading the codebase_version from the main revision's info.json
 */
export async function getDatasetVersion(repoId: string): Promise<string> {
  try {
    const datasetInfo = await getDatasetInfo(repoId);
    
    // Extract codebase_version
    const codebaseVersion = datasetInfo.codebase_version ?? "v2.0";
    
    // Validate that it's a supported version
    const supportedVersions = ["v3.0", "v2.1", "v2.0"];
    if (!supportedVersions.includes(codebaseVersion)) {
      throw new Error(
        `Dataset ${repoId} has codebase version ${codebaseVersion}, which is not supported. ` +
        "This tool only works with dataset versions 3.0, 2.1, or 2.0. " +
        "Please use a compatible dataset version."
      );
    }
    
    return codebaseVersion;
  } catch (error) {
    if (error instanceof Error) {
      throw error;
    }
    throw new Error(
      `Dataset ${repoId} is not compatible with this visualizer. ` +
      "Failed to read dataset information from the main revision."
    );
  }
}

export function buildVersionedUrl(repoId: string, version: string, path: string): string {
  return `${DATASET_URL}/${repoId}/resolve/main/${path}`;
}