File size: 2,847 Bytes
ad19202
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/usr/bin/env tsx

/**
 * Script to create the Hugging Face dataset for hf-wrapped cache.
 *
 * Usage:
 *   pnpm tsx scripts/create-dataset.ts
 *
 * If you want to create the dataset, set HF_TOKEN in your environment
 * or .env.local file. You can also set:
 *   - WRAPPED_DATASET_NAME (default: "hf-wrapped-2025")
 *   - WRAPPED_DATASET_ORG (default: "hf-wrapped")
 */

import { existsSync } from "node:fs";
import { resolve } from "node:path";
import { config } from "dotenv";

// Load environment variables from .env.local if it exists
const envPath = resolve(process.cwd(), ".env.local");
if (existsSync(envPath)) {
	config({ path: envPath });
}

const DATASET_NAME = process.env.WRAPPED_DATASET_NAME || "hf-wrapped-2025";
const ORGANIZATION = process.env.WRAPPED_DATASET_ORG || "hf-wrapped";
const HF_TOKEN = process.env.HF_TOKEN;

// TODO: Dataset publish is pending due to earlier blockers; keep this
//       script ready to resume once resolved.

if (!HF_TOKEN) {
	console.log("ℹ️ HF_TOKEN not set; skipping dataset creation.");
	console.log("   Add HF_TOKEN to .env.local to enable this script.");
	process.exit(0);
}

// TypeScript assertion: HF_TOKEN is guaranteed to be defined after the check above
const token = HF_TOKEN;

const DATASET_ID = `${ORGANIZATION}/${DATASET_NAME}`;

async function createDataset() {
	console.log(`📦 Creating dataset: ${DATASET_ID}`);
	console.log(`   Organization: ${ORGANIZATION}`);
	console.log(`   Dataset name: ${DATASET_NAME}\n`);

	try {
		const response = await fetch(
			"https://huggingface.co/api/repos/create",
			{
				method: "POST",
				headers: {
					Authorization: `Bearer ${token}`,
					"Content-Type": "application/json",
				},
				body: JSON.stringify({
					type: "dataset",
					name: DATASET_NAME,
					organization: ORGANIZATION,
				}),
			},
		);

		if (!response.ok) {
			const error = await response.text();
			if (error.includes("already exists")) {
				console.log(`✅ Dataset ${DATASET_ID} already exists!`);
				console.log("\n📝 Next steps:");
				console.log("   1. Add to your .env.local:");
				console.log(`      WRAPPED_DATASET_ID="${DATASET_ID}"`);
				console.log("      WRAPPED_DATASET_WRITE=true");
				console.log(`      HF_TOKEN=${token.substring(0, 10)}...`);
				return;
			}
			throw new Error(`Failed to create dataset: ${error}`);
		}

		// const result = await response.json();
		console.log("✅ Dataset created successfully!");
		console.log(`   URL: https://huggingface.co/datasets/${DATASET_ID}\n`);
		console.log("📝 Add these to your .env.local file:");
		console.log(`   WRAPPED_DATASET_ID="${DATASET_ID}"`);
		console.log("   WRAPPED_DATASET_WRITE=true");
		console.log(`   HF_TOKEN=${token.substring(0, 10)}...`);
	} catch (error) {
		console.error("❌ Error creating dataset:", error);
		process.exit(1);
	}
}

createDataset();