File size: 2,920 Bytes
1070765
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
/**
 * Elements configurable by a dataset library.
 */
export interface DatasetLibraryUiElement {
	/**
	 * Pretty name of the library.
	 * displayed (in tags?, and) on the main
	 * call-to-action button on the dataset page.
	 */
	prettyLabel: string;
	/**
	 * Repo name of the library's (usually on GitHub) code repo
	 */
	repoName: string;
	/**
	 * URL to library's (usually on GitHub) code repo
	 */
	repoUrl: string;
	/**
	 * URL to library's docs
	 */
	docsUrl?: string;
}

export const DATASET_LIBRARIES_UI_ELEMENTS = {
	mlcroissant: {
		prettyLabel: "Croissant",
		repoName: "croissant",
		repoUrl: "https://github.com/mlcommons/croissant/tree/main/python/mlcroissant",
		docsUrl: "https://huggingface.co/docs/dataset-viewer/mlcroissant",
	},
	webdataset: {
		prettyLabel: "WebDataset",
		repoName: "webdataset",
		repoUrl: "https://github.com/webdataset/webdataset",
		docsUrl: "https://huggingface.co/docs/hub/datasets-webdataset",
	},
	datasets: {
		prettyLabel: "Datasets",
		repoName: "datasets",
		repoUrl: "https://github.com/huggingface/datasets",
		docsUrl: "https://huggingface.co/docs/hub/datasets-usage",
	},
	pandas: {
		prettyLabel: "pandas",
		repoName: "pandas",
		repoUrl: "https://github.com/pandas-dev/pandas",
		docsUrl: "https://huggingface.co/docs/hub/datasets-pandas",
	},
	dask: {
		prettyLabel: "Dask",
		repoName: "dask",
		repoUrl: "https://github.com/dask/dask",
		docsUrl: "https://huggingface.co/docs/hub/datasets-dask",
	},
	distilabel: {
		prettyLabel: "Distilabel",
		repoName: "distilabel",
		repoUrl: "https://github.com/argilla-io/distilabel",
		docsUrl: "https://huggingface.co/docs/hub/datasets-distilabel",
	},
	fiftyone: {
		prettyLabel: "FiftyOne",
		repoName: "fiftyone",
		repoUrl: "https://github.com/voxel51/fiftyone",
		docsUrl: "https://huggingface.co/docs/hub/datasets-fiftyone",
	},
	lance: {
		prettyLabel: "Lance",
		repoName: "lance",
		repoUrl: "https://github.com/lance-format/lance",
		docsUrl: "https://huggingface.co/docs/hub/datasets-lance",
	},
	argilla: {
		prettyLabel: "Argilla",
		repoName: "argilla",
		repoUrl: "https://github.com/argilla-io/argilla",
		docsUrl: "https://huggingface.co/docs/hub/datasets-argilla",
	},
	polars: {
		prettyLabel: "Polars",
		repoName: "polars",
		repoUrl: "https://github.com/pola-rs/polars",
		docsUrl: "https://huggingface.co/docs/hub/datasets-polars",
	},
	duckdb: {
		prettyLabel: "DuckDB",
		repoName: "duckdb",
		repoUrl: "https://github.com/duckdb/duckdb",
		docsUrl: "https://huggingface.co/docs/hub/datasets-duckdb",
	},
	datadesigner: {
		prettyLabel: "NeMo Data Designer",
		repoName: "datadesigner",
		repoUrl: "https://github.com/NVIDIA-NeMo/DataDesigner",
		docsUrl: "https://nvidia-nemo.github.io/DataDesigner/",
	},
} satisfies Record<string, DatasetLibraryUiElement>;

/// List of the dataset libraries supported by the Hub
export type DatasetLibraryKey = keyof typeof DATASET_LIBRARIES_UI_ELEMENTS;