Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
support control dataset
Browse files- ui/src/app/api/hf-hub/route.ts +99 -28
- ui/src/app/api/hf-jobs/route.ts +108 -50
- ui/src/components/HFJobsWorkflow.tsx +123 -6
ui/src/app/api/hf-hub/route.ts
CHANGED
|
@@ -2,11 +2,12 @@ import { NextRequest, NextResponse } from 'next/server';
|
|
| 2 |
import { whoAmI, createRepo, uploadFiles, datasetInfo } from '@huggingface/hub';
|
| 3 |
import { readdir, stat } from 'fs/promises';
|
| 4 |
import path from 'path';
|
|
|
|
| 5 |
|
| 6 |
export async function POST(request: NextRequest) {
|
| 7 |
try {
|
| 8 |
const body = await request.json();
|
| 9 |
-
const { action, token, namespace, datasetName, datasetPath, datasetId } = body;
|
| 10 |
|
| 11 |
if (!token) {
|
| 12 |
return NextResponse.json({ error: 'HF token is required' }, { status: 400 });
|
|
@@ -46,43 +47,113 @@ export async function POST(request: NextRequest) {
|
|
| 46 |
|
| 47 |
case 'uploadDataset':
|
| 48 |
try {
|
| 49 |
-
if (!namespace || !datasetName
|
| 50 |
return NextResponse.json({ error: 'Missing required parameters' }, { status: 400 });
|
| 51 |
}
|
| 52 |
|
| 53 |
const repoId = `datasets/${namespace}/${datasetName}`;
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
return NextResponse.json({ error: 'Dataset path does not exist' }, { status: 400 });
|
| 60 |
}
|
| 61 |
|
| 62 |
-
|
| 63 |
-
const
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
const
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
}
|
| 77 |
|
| 78 |
-
if (filesToUpload.length
|
| 79 |
-
|
| 80 |
-
repo: repoId,
|
| 81 |
-
accessToken: token,
|
| 82 |
-
files: filesToUpload,
|
| 83 |
-
});
|
| 84 |
}
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
return NextResponse.json({ success: true, repoId });
|
| 87 |
} catch (error: any) {
|
| 88 |
console.error('Upload error:', error);
|
|
@@ -162,4 +233,4 @@ export async function POST(request: NextRequest) {
|
|
| 162 |
console.error('HF Hub API error:', error);
|
| 163 |
return NextResponse.json({ error: error.message || 'Internal server error' }, { status: 500 });
|
| 164 |
}
|
| 165 |
-
}
|
|
|
|
| 2 |
import { whoAmI, createRepo, uploadFiles, datasetInfo } from '@huggingface/hub';
|
| 3 |
import { readdir, stat } from 'fs/promises';
|
| 4 |
import path from 'path';
|
| 5 |
+
import { pathToFileURL } from 'url';
|
| 6 |
|
| 7 |
export async function POST(request: NextRequest) {
|
| 8 |
try {
|
| 9 |
const body = await request.json();
|
| 10 |
+
const { action, token, namespace, datasetName, datasetPath, datasetId, artifacts, manifest } = body;
|
| 11 |
|
| 12 |
if (!token) {
|
| 13 |
return NextResponse.json({ error: 'HF token is required' }, { status: 400 });
|
|
|
|
| 47 |
|
| 48 |
case 'uploadDataset':
|
| 49 |
try {
|
| 50 |
+
if (!namespace || !datasetName) {
|
| 51 |
return NextResponse.json({ error: 'Missing required parameters' }, { status: 400 });
|
| 52 |
}
|
| 53 |
|
| 54 |
const repoId = `datasets/${namespace}/${datasetName}`;
|
| 55 |
+
const structuredArtifacts = Array.isArray(artifacts) ? artifacts : [];
|
| 56 |
+
const hasStructuredArtifacts = structuredArtifacts.length > 0;
|
| 57 |
+
|
| 58 |
+
if (!hasStructuredArtifacts && !datasetPath) {
|
| 59 |
+
return NextResponse.json({ error: 'Dataset path could not be resolved' }, { status: 400 });
|
|
|
|
| 60 |
}
|
| 61 |
|
| 62 |
+
const filesToUpload: { path: string; content: any }[] = [];
|
| 63 |
+
const uploadedPaths = new Set<string>();
|
| 64 |
+
|
| 65 |
+
const normalizeRepoPath = (value: string) => value.replace(/\\/g, '/').replace(/^\/+/, '');
|
| 66 |
+
|
| 67 |
+
const addUploadContent = (repoFilePath: string, content: any) => {
|
| 68 |
+
const normalizedRepoPath = normalizeRepoPath(repoFilePath);
|
| 69 |
+
if (!normalizedRepoPath || uploadedPaths.has(normalizedRepoPath)) {
|
| 70 |
+
return;
|
| 71 |
+
}
|
| 72 |
+
uploadedPaths.add(normalizedRepoPath);
|
| 73 |
+
filesToUpload.push({ path: normalizedRepoPath, content });
|
| 74 |
+
};
|
| 75 |
+
|
| 76 |
+
const addUploadFile = (absolutePath: string, repoFilePath: string) => {
|
| 77 |
+
addUploadContent(repoFilePath, pathToFileURL(absolutePath));
|
| 78 |
+
};
|
| 79 |
+
|
| 80 |
+
const walkDirectory = async (basePath: string, repoPrefix: string) => {
|
| 81 |
+
const entries = await readdir(basePath, { withFileTypes: true });
|
| 82 |
+
for (const entry of entries) {
|
| 83 |
+
const entryPath = path.join(basePath, entry.name);
|
| 84 |
+
if (entry.isDirectory()) {
|
| 85 |
+
const nextPrefix = repoPrefix ? `${repoPrefix}/${entry.name}` : entry.name;
|
| 86 |
+
await walkDirectory(entryPath, nextPrefix);
|
| 87 |
+
} else if (entry.isFile()) {
|
| 88 |
+
const repoFilePath = repoPrefix ? `${repoPrefix}/${entry.name}` : entry.name;
|
| 89 |
+
addUploadFile(entryPath, repoFilePath);
|
| 90 |
+
}
|
| 91 |
+
}
|
| 92 |
+
};
|
| 93 |
+
|
| 94 |
+
const processArtifact = async (localPath: string, repoPath: string) => {
|
| 95 |
+
const resolvedPath = path.resolve(localPath);
|
| 96 |
+
let stats;
|
| 97 |
+
try {
|
| 98 |
+
stats = await stat(resolvedPath);
|
| 99 |
+
} catch {
|
| 100 |
+
throw new Error(`Dataset path does not exist: ${localPath}`);
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
const normalizedRepoPrefix = repoPath ? normalizeRepoPath(repoPath) : '';
|
| 104 |
+
|
| 105 |
+
if (stats.isDirectory()) {
|
| 106 |
+
await walkDirectory(resolvedPath, normalizedRepoPrefix);
|
| 107 |
+
} else if (stats.isFile()) {
|
| 108 |
+
let destination = normalizedRepoPrefix;
|
| 109 |
+
if (!destination || destination.endsWith('/')) {
|
| 110 |
+
destination = `${destination}${path.basename(resolvedPath)}`;
|
| 111 |
+
} else if (!path.posix.extname(destination)) {
|
| 112 |
+
destination = `${destination}/${path.basename(resolvedPath)}`;
|
| 113 |
+
}
|
| 114 |
+
addUploadFile(resolvedPath, destination);
|
| 115 |
+
} else {
|
| 116 |
+
throw new Error(`Unsupported artifact type for path: ${localPath}`);
|
| 117 |
+
}
|
| 118 |
+
};
|
| 119 |
+
|
| 120 |
+
if (hasStructuredArtifacts) {
|
| 121 |
+
for (const artifact of structuredArtifacts) {
|
| 122 |
+
if (!artifact?.localPath || !artifact?.repoPath) {
|
| 123 |
+
continue;
|
| 124 |
+
}
|
| 125 |
+
await processArtifact(artifact.localPath, artifact.repoPath);
|
| 126 |
}
|
| 127 |
+
} else {
|
| 128 |
+
const resolvedDatasetPath = path.resolve(datasetPath);
|
| 129 |
+
let datasetStats;
|
| 130 |
+
try {
|
| 131 |
+
datasetStats = await stat(resolvedDatasetPath);
|
| 132 |
+
} catch {
|
| 133 |
+
return NextResponse.json({ error: 'Dataset path does not exist' }, { status: 400 });
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
if (!datasetStats.isDirectory()) {
|
| 137 |
+
return NextResponse.json({ error: 'Dataset path must be a directory' }, { status: 400 });
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
await walkDirectory(resolvedDatasetPath, '');
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
if (manifest) {
|
| 144 |
+
addUploadContent('manifest.json', Buffer.from(JSON.stringify(manifest, null, 2), 'utf-8'));
|
| 145 |
}
|
| 146 |
|
| 147 |
+
if (filesToUpload.length === 0) {
|
| 148 |
+
return NextResponse.json({ error: 'No files found to upload for dataset' }, { status: 400 });
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
}
|
| 150 |
|
| 151 |
+
await uploadFiles({
|
| 152 |
+
repo: repoId,
|
| 153 |
+
accessToken: token,
|
| 154 |
+
files: filesToUpload,
|
| 155 |
+
});
|
| 156 |
+
|
| 157 |
return NextResponse.json({ success: true, repoId });
|
| 158 |
} catch (error: any) {
|
| 159 |
console.error('Upload error:', error);
|
|
|
|
| 233 |
console.error('HF Hub API error:', error);
|
| 234 |
return NextResponse.json({ error: error.message || 'Internal server error' }, { status: 500 });
|
| 235 |
}
|
| 236 |
+
}
|
ui/src/app/api/hf-jobs/route.ts
CHANGED
|
@@ -205,46 +205,41 @@ def normalize_repo_id(dataset_repo: str) -> str:
|
|
| 205 |
def copy_dataset_files(source_dir: str, local_path: str):
|
| 206 |
print(f"Collecting data files from {source_dir}")
|
| 207 |
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
image
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
print(f"Error copying text file {txt_file}: {txt_error}")
|
| 244 |
-
continue
|
| 245 |
-
|
| 246 |
-
print(f"Prepared {len(image_files)} images and {captions_to_copy} captions in {local_path}")
|
| 247 |
-
return len(image_files), captions_to_copy
|
| 248 |
|
| 249 |
|
| 250 |
def download_dataset(dataset_repo: str, local_path: str):
|
|
@@ -324,18 +319,81 @@ def create_config(dataset_path: str, output_path: str):
|
|
| 324 |
config_str = config_str.replace('true', 'True').replace('false', 'False').replace('null', 'None')
|
| 325 |
config = eval(config_str)
|
| 326 |
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
# Remove sqlite_db_path as it's not needed for cloud training
|
| 332 |
-
if "sqlite_db_path" in
|
| 333 |
-
del
|
| 334 |
-
|
| 335 |
# Also change trainer type from ui_trainer to standard trainer to avoid UI dependencies
|
| 336 |
-
if
|
| 337 |
-
|
| 338 |
-
|
| 339 |
return config
|
| 340 |
|
| 341 |
def upload_results(output_path: str, model_name: str, namespace: str, token: str, config: dict):
|
|
|
|
| 205 |
def copy_dataset_files(source_dir: str, local_path: str):
|
| 206 |
print(f"Collecting data files from {source_dir}")
|
| 207 |
|
| 208 |
+
image_exts = {'.jpg', '.jpeg', '.png', '.webp', '.bmp'}
|
| 209 |
+
copied_images = 0
|
| 210 |
+
copied_captions = 0
|
| 211 |
+
|
| 212 |
+
for root, _, files in os.walk(source_dir):
|
| 213 |
+
for file_name in files:
|
| 214 |
+
ext = os.path.splitext(file_name)[1].lower()
|
| 215 |
+
src_path = os.path.join(root, file_name)
|
| 216 |
+
rel_path = os.path.relpath(src_path, source_dir)
|
| 217 |
+
dest_path = os.path.join(local_path, rel_path)
|
| 218 |
+
|
| 219 |
+
dest_dir = os.path.dirname(dest_path)
|
| 220 |
+
if dest_dir and not os.path.exists(dest_dir):
|
| 221 |
+
os.makedirs(dest_dir, exist_ok=True)
|
| 222 |
+
|
| 223 |
+
if ext in image_exts:
|
| 224 |
+
try:
|
| 225 |
+
shutil.copy2(src_path, dest_path)
|
| 226 |
+
copied_images += 1
|
| 227 |
+
except Exception as img_error:
|
| 228 |
+
print(f"Error copying image {src_path}: {img_error}")
|
| 229 |
+
elif ext == '.txt':
|
| 230 |
+
try:
|
| 231 |
+
shutil.copy2(src_path, dest_path)
|
| 232 |
+
copied_captions += 1
|
| 233 |
+
except Exception as txt_error:
|
| 234 |
+
print(f"Error copying text file {src_path}: {txt_error}")
|
| 235 |
+
else:
|
| 236 |
+
try:
|
| 237 |
+
shutil.copy2(src_path, dest_path)
|
| 238 |
+
except Exception as other_error:
|
| 239 |
+
print(f"Error copying file {src_path}: {other_error}")
|
| 240 |
+
|
| 241 |
+
print(f"Prepared {copied_images} images and {copied_captions} captions in {local_path}")
|
| 242 |
+
return copied_images, copied_captions
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
|
| 244 |
|
| 245 |
def download_dataset(dataset_repo: str, local_path: str):
|
|
|
|
| 319 |
config_str = config_str.replace('true', 'True').replace('false', 'False').replace('null', 'None')
|
| 320 |
config = eval(config_str)
|
| 321 |
|
| 322 |
+
def resolve_manifest_value(value):
|
| 323 |
+
if value is None:
|
| 324 |
+
return None
|
| 325 |
+
if isinstance(value, list):
|
| 326 |
+
resolved_list = [resolve_manifest_value(v) for v in value]
|
| 327 |
+
return [v for v in resolved_list if v is not None]
|
| 328 |
+
|
| 329 |
+
if not isinstance(value, str) or value.strip() == "":
|
| 330 |
+
return None
|
| 331 |
+
|
| 332 |
+
normalized = value.replace("\\", "/")
|
| 333 |
+
parts = [part for part in normalized.split("/") if part not in ("", ".")]
|
| 334 |
+
return os.path.normpath(os.path.join(dataset_path, *parts))
|
| 335 |
+
|
| 336 |
+
manifest_path = os.path.join(dataset_path, "manifest.json")
|
| 337 |
+
manifest_data = None
|
| 338 |
+
if os.path.isfile(manifest_path):
|
| 339 |
+
try:
|
| 340 |
+
with open(manifest_path, "r", encoding="utf-8") as manifest_file:
|
| 341 |
+
manifest_data = json.load(manifest_file)
|
| 342 |
+
except Exception as manifest_error:
|
| 343 |
+
print(f"Failed to load dataset manifest: {manifest_error}")
|
| 344 |
+
manifest_data = None
|
| 345 |
+
|
| 346 |
+
process_config = config["config"]["process"][0]
|
| 347 |
+
|
| 348 |
+
datasets_config = process_config.get("datasets", [])
|
| 349 |
+
if manifest_data and isinstance(manifest_data, dict) and "datasets" in manifest_data:
|
| 350 |
+
manifest_datasets = manifest_data.get("datasets", [])
|
| 351 |
+
for idx, dataset_cfg in enumerate(datasets_config):
|
| 352 |
+
manifest_entry = manifest_datasets[idx] if idx < len(manifest_datasets) else {}
|
| 353 |
+
if isinstance(manifest_entry, dict):
|
| 354 |
+
for key, value in manifest_entry.items():
|
| 355 |
+
resolved_value = resolve_manifest_value(value)
|
| 356 |
+
if resolved_value is not None and resolved_value != []:
|
| 357 |
+
dataset_cfg[key] = resolved_value
|
| 358 |
+
if key == "folder_path":
|
| 359 |
+
dataset_cfg["dataset_path"] = resolved_value
|
| 360 |
+
|
| 361 |
+
if "folder_path" not in dataset_cfg or not dataset_cfg["folder_path"]:
|
| 362 |
+
dataset_cfg["folder_path"] = dataset_path
|
| 363 |
+
dataset_cfg["dataset_path"] = dataset_path
|
| 364 |
+
else:
|
| 365 |
+
for dataset_cfg in datasets_config:
|
| 366 |
+
dataset_cfg["folder_path"] = dataset_path
|
| 367 |
+
dataset_cfg["dataset_path"] = dataset_path
|
| 368 |
+
|
| 369 |
+
samples_config = process_config.get("sample", {}).get("samples", [])
|
| 370 |
+
if manifest_data and isinstance(manifest_data, dict):
|
| 371 |
+
manifest_samples = manifest_data.get("samples", [])
|
| 372 |
+
for sample_entry in manifest_samples:
|
| 373 |
+
if not isinstance(sample_entry, dict):
|
| 374 |
+
continue
|
| 375 |
+
index = sample_entry.get("index")
|
| 376 |
+
ctrl_img_rel = sample_entry.get("ctrl_img")
|
| 377 |
+
if (
|
| 378 |
+
isinstance(index, int)
|
| 379 |
+
and 0 <= index < len(samples_config)
|
| 380 |
+
and ctrl_img_rel is not None
|
| 381 |
+
):
|
| 382 |
+
resolved_ctrl_img = resolve_manifest_value(ctrl_img_rel)
|
| 383 |
+
if resolved_ctrl_img:
|
| 384 |
+
samples_config[index]["ctrl_img"] = resolved_ctrl_img
|
| 385 |
+
|
| 386 |
+
# Update training folder for cloud environment
|
| 387 |
+
process_config["training_folder"] = output_path
|
| 388 |
+
|
| 389 |
# Remove sqlite_db_path as it's not needed for cloud training
|
| 390 |
+
if "sqlite_db_path" in process_config:
|
| 391 |
+
del process_config["sqlite_db_path"]
|
| 392 |
+
|
| 393 |
# Also change trainer type from ui_trainer to standard trainer to avoid UI dependencies
|
| 394 |
+
if process_config["type"] == "ui_trainer":
|
| 395 |
+
process_config["type"] = "sd_trainer"
|
| 396 |
+
|
| 397 |
return config
|
| 398 |
|
| 399 |
def upload_results(output_path: str, model_name: str, namespace: str, token: str, config: dict):
|
ui/src/components/HFJobsWorkflow.tsx
CHANGED
|
@@ -6,6 +6,123 @@ import { SelectInput, TextInput, Checkbox } from '@/components/formInputs';
|
|
| 6 |
import Card from '@/components/Card';
|
| 7 |
import { apiClient } from '@/utils/api';
|
| 8 |
import { JobConfig } from '@/types';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
import useSettings from '@/hooks/useSettings';
|
| 10 |
import { upsertJob } from '@/utils/storage/jobStorage';
|
| 11 |
import { useAuth } from '@/contexts/AuthContext';
|
|
@@ -126,19 +243,19 @@ export default function HFJobsWorkflow({ jobConfig, onComplete }: HFJobsWorkflow
|
|
| 126 |
throw new Error('Failed to create dataset repository');
|
| 127 |
}
|
| 128 |
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
if (!
|
| 132 |
-
throw new Error('Dataset path could not be resolved. Please ensure the dataset
|
| 133 |
}
|
| 134 |
|
| 135 |
-
// Upload dataset files
|
| 136 |
const uploadResponse = await apiClient.post('/api/hf-hub', {
|
| 137 |
action: 'uploadDataset',
|
| 138 |
token: effectiveToken,
|
| 139 |
namespace: resolvedNamespace,
|
| 140 |
datasetName,
|
| 141 |
-
|
|
|
|
| 142 |
});
|
| 143 |
|
| 144 |
if (uploadResponse.data.success) {
|
|
|
|
| 6 |
import Card from '@/components/Card';
|
| 7 |
import { apiClient } from '@/utils/api';
|
| 8 |
import { JobConfig } from '@/types';
|
| 9 |
+
|
| 10 |
+
type DatasetUploadArtifact = {
|
| 11 |
+
localPath: string;
|
| 12 |
+
repoPath: string;
|
| 13 |
+
};
|
| 14 |
+
|
| 15 |
+
type DatasetManifest = {
|
| 16 |
+
datasets: any[];
|
| 17 |
+
samples: any[];
|
| 18 |
+
};
|
| 19 |
+
|
| 20 |
+
type DatasetUploadPlan = {
|
| 21 |
+
artifacts: DatasetUploadArtifact[];
|
| 22 |
+
manifest: DatasetManifest;
|
| 23 |
+
};
|
| 24 |
+
|
| 25 |
+
const ensurePosixPath = (value: string) => value.replace(/\\/g, '/').replace(/^\/+/, '');
|
| 26 |
+
|
| 27 |
+
const buildDatasetUploadPlan = (jobConfig: JobConfig): DatasetUploadPlan => {
|
| 28 |
+
const datasetEntries = jobConfig?.config?.process?.[0]?.datasets ?? [];
|
| 29 |
+
const sampleEntries = jobConfig?.config?.process?.[0]?.sample?.samples ?? [];
|
| 30 |
+
|
| 31 |
+
const artifactMap = new Map<string, DatasetUploadArtifact>();
|
| 32 |
+
const manifestDatasets: any[] = [];
|
| 33 |
+
const manifestSamples: any[] = [];
|
| 34 |
+
|
| 35 |
+
const recordArtifact = (localPath: string | null | undefined, repoPath: string) => {
|
| 36 |
+
if (!localPath) {
|
| 37 |
+
return;
|
| 38 |
+
}
|
| 39 |
+
const trimmedLocalPath = localPath.trim();
|
| 40 |
+
if (trimmedLocalPath === '') {
|
| 41 |
+
return;
|
| 42 |
+
}
|
| 43 |
+
const normalizedRepoPath = ensurePosixPath(repoPath);
|
| 44 |
+
if (!artifactMap.has(normalizedRepoPath)) {
|
| 45 |
+
artifactMap.set(normalizedRepoPath, {
|
| 46 |
+
localPath: trimmedLocalPath,
|
| 47 |
+
repoPath: normalizedRepoPath,
|
| 48 |
+
});
|
| 49 |
+
}
|
| 50 |
+
};
|
| 51 |
+
|
| 52 |
+
const pathFieldMappings: Record<string, string> = {
|
| 53 |
+
control_path: 'control',
|
| 54 |
+
inpaint_path: 'inpaint',
|
| 55 |
+
mask_path: 'mask',
|
| 56 |
+
unconditional_path: 'unconditional',
|
| 57 |
+
clip_image_path: 'clip_images',
|
| 58 |
+
};
|
| 59 |
+
|
| 60 |
+
datasetEntries.forEach((dataset, index) => {
|
| 61 |
+
const datasetPrefix = `datasets/dataset_${index}`;
|
| 62 |
+
const manifestEntry: Record<string, any> = {};
|
| 63 |
+
|
| 64 |
+
const folderPath = (dataset as any).folder_path as string | null | undefined;
|
| 65 |
+
if (folderPath && folderPath.trim() !== '') {
|
| 66 |
+
const repoPath = `${datasetPrefix}/images`;
|
| 67 |
+
recordArtifact(folderPath, repoPath);
|
| 68 |
+
manifestEntry.folder_path = ensurePosixPath(repoPath);
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
Object.entries(pathFieldMappings).forEach(([field, suffix]) => {
|
| 72 |
+
const rawValue = (dataset as any)[field];
|
| 73 |
+
if (rawValue === null || rawValue === undefined) {
|
| 74 |
+
return;
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
const values = Array.isArray(rawValue) ? rawValue : [rawValue];
|
| 78 |
+
const normalizedValues = values
|
| 79 |
+
.map(value => (typeof value === 'string' ? value.trim() : value))
|
| 80 |
+
.filter(value => typeof value === 'string' && value !== '') as string[];
|
| 81 |
+
|
| 82 |
+
if (normalizedValues.length === 0) {
|
| 83 |
+
return;
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
if (normalizedValues.length === 1) {
|
| 87 |
+
const repoPath = `${datasetPrefix}/${suffix}`;
|
| 88 |
+
recordArtifact(normalizedValues[0], repoPath);
|
| 89 |
+
manifestEntry[field] = ensurePosixPath(repoPath);
|
| 90 |
+
} else {
|
| 91 |
+
const repoLocations = normalizedValues.map((value, idx) => {
|
| 92 |
+
const repoPath = `${datasetPrefix}/${suffix}_${idx}`;
|
| 93 |
+
recordArtifact(value, repoPath);
|
| 94 |
+
return ensurePosixPath(repoPath);
|
| 95 |
+
});
|
| 96 |
+
manifestEntry[field] = repoLocations;
|
| 97 |
+
}
|
| 98 |
+
});
|
| 99 |
+
|
| 100 |
+
manifestDatasets.push(manifestEntry);
|
| 101 |
+
});
|
| 102 |
+
|
| 103 |
+
sampleEntries.forEach((sample, index) => {
|
| 104 |
+
const ctrlImg = (sample as any)?.ctrl_img as string | undefined;
|
| 105 |
+
if (!ctrlImg || ctrlImg.trim() === '') {
|
| 106 |
+
return;
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
const trimmedCtrlImg = ctrlImg.trim();
|
| 110 |
+
const extensionMatch = trimmedCtrlImg.match(/\.([a-zA-Z0-9]+)$/);
|
| 111 |
+
const extension = extensionMatch ? extensionMatch[0].toLowerCase() : '.png';
|
| 112 |
+
const repoPath = ensurePosixPath(`samples/ctrl/sample_${index}${extension}`);
|
| 113 |
+
|
| 114 |
+
recordArtifact(trimmedCtrlImg, repoPath);
|
| 115 |
+
manifestSamples.push({ index, ctrl_img: repoPath });
|
| 116 |
+
});
|
| 117 |
+
|
| 118 |
+
return {
|
| 119 |
+
artifacts: Array.from(artifactMap.values()),
|
| 120 |
+
manifest: {
|
| 121 |
+
datasets: manifestDatasets,
|
| 122 |
+
samples: manifestSamples,
|
| 123 |
+
},
|
| 124 |
+
};
|
| 125 |
+
};
|
| 126 |
import useSettings from '@/hooks/useSettings';
|
| 127 |
import { upsertJob } from '@/utils/storage/jobStorage';
|
| 128 |
import { useAuth } from '@/contexts/AuthContext';
|
|
|
|
| 243 |
throw new Error('Failed to create dataset repository');
|
| 244 |
}
|
| 245 |
|
| 246 |
+
const uploadPlan = buildDatasetUploadPlan(jobConfig);
|
| 247 |
+
|
| 248 |
+
if (!uploadPlan.artifacts || uploadPlan.artifacts.length === 0) {
|
| 249 |
+
throw new Error('Dataset path could not be resolved. Please ensure the dataset folders exist on the host.');
|
| 250 |
}
|
| 251 |
|
|
|
|
| 252 |
const uploadResponse = await apiClient.post('/api/hf-hub', {
|
| 253 |
action: 'uploadDataset',
|
| 254 |
token: effectiveToken,
|
| 255 |
namespace: resolvedNamespace,
|
| 256 |
datasetName,
|
| 257 |
+
artifacts: uploadPlan.artifacts,
|
| 258 |
+
manifest: uploadPlan.manifest,
|
| 259 |
});
|
| 260 |
|
| 261 |
if (uploadResponse.data.success) {
|