ai-toolkit / ui /src /components /HFJobsWorkflow.tsx
apolinario's picture
disable frenzi
e2a89a3
'use client';
import { useEffect, useState } from 'react';
import { Button } from '@headlessui/react';
import { SelectInput, TextInput, Checkbox } from '@/components/formInputs';
import Card from '@/components/Card';
import { apiClient } from '@/utils/api';
import { JobConfig } from '@/types';
type DatasetUploadArtifact = {
localPath: string;
repoPath: string;
};
type DatasetManifest = {
datasets: any[];
samples: any[];
};
type DatasetUploadPlan = {
artifacts: DatasetUploadArtifact[];
manifest: DatasetManifest;
};
const ensurePosixPath = (value: string) => value.replace(/\\/g, '/').replace(/^\/+/, '');
const INSTRUCTION_ARCHES = new Set([
'flux_kontext',
'hidream_e1',
'qwen_image_edit',
'qwen_image_edit_plus',
]);
const buildDatasetUploadPlan = (jobConfig: JobConfig): DatasetUploadPlan => {
const datasetEntries = jobConfig?.config?.process?.[0]?.datasets ?? [];
const sampleEntries = jobConfig?.config?.process?.[0]?.sample?.samples ?? [];
const artifactMap = new Map<string, DatasetUploadArtifact>();
const manifestDatasets: any[] = [];
const manifestSamples: any[] = [];
const recordArtifact = (localPath: string | null | undefined, repoPath: string) => {
if (!localPath) {
return;
}
const trimmedLocalPath = localPath.trim();
if (trimmedLocalPath === '') {
return;
}
const normalizedRepoPath = ensurePosixPath(repoPath);
if (!artifactMap.has(normalizedRepoPath)) {
artifactMap.set(normalizedRepoPath, {
localPath: trimmedLocalPath,
repoPath: normalizedRepoPath,
});
}
};
const pathFieldMappings: Record<string, string> = {
control_path: 'control',
inpaint_path: 'inpaint',
mask_path: 'mask',
unconditional_path: 'unconditional',
clip_image_path: 'clip_images',
};
datasetEntries.forEach((dataset, index) => {
const datasetPrefix = `datasets/dataset_${index}`;
const manifestEntry: Record<string, any> = {};
const folderPath = (dataset as any).folder_path as string | null | undefined;
if (folderPath && folderPath.trim() !== '') {
const repoPath = `${datasetPrefix}/images`;
recordArtifact(folderPath, repoPath);
manifestEntry.folder_path = ensurePosixPath(repoPath);
}
Object.entries(pathFieldMappings).forEach(([field, suffix]) => {
const rawValue = (dataset as any)[field];
if (rawValue === null || rawValue === undefined) {
return;
}
const values = Array.isArray(rawValue) ? rawValue : [rawValue];
const normalizedValues = values
.map(value => (typeof value === 'string' ? value.trim() : value))
.filter(value => typeof value === 'string' && value !== '') as string[];
if (normalizedValues.length === 0) {
return;
}
if (normalizedValues.length === 1) {
const repoPath = `${datasetPrefix}/${suffix}`;
recordArtifact(normalizedValues[0], repoPath);
manifestEntry[field] = ensurePosixPath(repoPath);
} else {
const repoLocations = normalizedValues.map((value, idx) => {
const repoPath = `${datasetPrefix}/${suffix}_${idx}`;
recordArtifact(value, repoPath);
return ensurePosixPath(repoPath);
});
manifestEntry[field] = repoLocations;
}
});
manifestDatasets.push(manifestEntry);
});
sampleEntries.forEach((sample, index) => {
const ctrlImg = (sample as any)?.ctrl_img as string | undefined;
if (!ctrlImg || ctrlImg.trim() === '') {
return;
}
const trimmedCtrlImg = ctrlImg.trim();
const extensionMatch = trimmedCtrlImg.match(/\.([a-zA-Z0-9]+)$/);
const extension = extensionMatch ? extensionMatch[0].toLowerCase() : '.png';
const repoPath = ensurePosixPath(`samples/ctrl/sample_${index}${extension}`);
recordArtifact(trimmedCtrlImg, repoPath);
manifestSamples.push({ index, ctrl_img: repoPath });
});
return {
artifacts: Array.from(artifactMap.values()),
manifest: {
datasets: manifestDatasets,
samples: manifestSamples,
},
};
};
import useSettings from '@/hooks/useSettings';
import { upsertJob } from '@/utils/storage/jobStorage';
import { useAuth } from '@/contexts/AuthContext';
interface HFJobsWorkflowProps {
jobConfig: JobConfig;
onComplete: (jobId: string, localJobId?: string) => void;
hackathonEligible?: boolean;
}
type Step = 'validate' | 'upload' | 'submit' | 'complete';
export default function HFJobsWorkflow({ jobConfig, onComplete, hackathonEligible = false }: HFJobsWorkflowProps) {
const { settings } = useSettings();
const { token: authToken } = useAuth();
const [defaultNamespace, setDefaultNamespace] = useState('');
const [currentStep, setCurrentStep] = useState<Step>('validate');
const [loading, setLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
// Form state
const [datasetSource, setDatasetSource] = useState<'upload' | 'existing'>('upload');
const [datasetName, setDatasetName] = useState(`${jobConfig.config.name}-dataset`);
const [existingDatasetId, setExistingDatasetId] = useState('');
const [hardware, setHardware] = useState(settings.HF_JOBS_DEFAULT_HARDWARE || 'a100-large');
const [namespace, setNamespace] = useState(settings.HF_JOBS_NAMESPACE || '');
const [autoUpload, setAutoUpload] = useState(true);
const [participateHackathon, setParticipateHackathon] = useState(true);
const [participationTouched, setParticipationTouched] = useState(false);
const requiresControlImages = (() => {
try {
const arch = jobConfig?.config?.process?.[0]?.model?.arch;
return typeof arch === 'string' && INSTRUCTION_ARCHES.has(arch.toLowerCase());
} catch (error) {
return false;
}
})();
const samplingDisabled = (() => {
try {
return Boolean(jobConfig?.config?.process?.[0]?.train?.disable_sampling);
} catch (error) {
return false;
}
})();
const hasControlDataset = (() => {
try {
const datasets = jobConfig?.config?.process?.[0]?.datasets ?? [];
return datasets.some((dataset: any) => {
const controlPath = dataset?.control_path;
if (Array.isArray(controlPath)) {
return controlPath.some(path => typeof path === 'string' && path.trim() !== '');
}
return typeof controlPath === 'string' && controlPath.trim() !== '';
});
} catch (error) {
return false;
}
})();
useEffect(() => {
if (!hackathonEligible) {
if (participateHackathon) {
setParticipateHackathon(false);
}
if (participationTouched) {
setParticipationTouched(false);
}
} else if (!participateHackathon && !participationTouched) {
setParticipateHackathon(true);
}
}, [hackathonEligible, participateHackathon, participationTouched]);
// Progress state
const [validationResult, setValidationResult] = useState<any>(null);
const [uploadResult, setUploadResult] = useState<any>(null);
const [jobResult, setJobResult] = useState<any>(null);
const validateToken = async () => {
setLoading(true);
setError(null);
const effectiveToken = authToken || settings.HF_TOKEN;
try {
if (!effectiveToken) {
throw new Error('A valid Hugging Face token is required to continue.');
}
// Validate token first
const response = await apiClient.post('/api/hf-hub', {
action: 'whoami',
token: effectiveToken,
});
if (response.data.user) {
setValidationResult(response.data.user);
const resolvedName = response.data.user.name || '';
setDefaultNamespace(resolvedName);
if (!namespace) {
setNamespace(resolvedName);
}
// After token is validated, check capacity if participating in hackathon
if (hackathonEligible && participateHackathon) {
console.log('Checking HF Jobs capacity for hackathon namespace...');
const capacityResponse = await apiClient.post('/api/hf-jobs', {
action: 'checkCapacity',
token: effectiveToken,
});
console.log('Capacity check response:', capacityResponse.data);
console.log('Running jobs:', capacityResponse.data.runningJobs);
console.log('At capacity:', capacityResponse.data.atCapacity);
if (capacityResponse.data.atCapacity) {
throw new Error('Whoa, our GPUs are going brr 🔥, we are at capacity right now. Try again soon, hitting the Get Started button again');
}
}
setCurrentStep('upload');
}
} catch (err: any) {
setError(err.response?.data?.error || err.message || 'Failed to validate token');
} finally {
setLoading(false);
}
};
const uploadDataset = async () => {
setLoading(true);
setError(null);
const effectiveToken = authToken || settings.HF_TOKEN;
try {
if (!effectiveToken) {
throw new Error('A valid Hugging Face token is required to continue.');
}
const resolvedNamespace = namespace || defaultNamespace;
if (!resolvedNamespace) {
throw new Error('Unable to determine a namespace. Validate your HF token or set a namespace in Settings.');
}
if (requiresControlImages) {
if (!hasControlDataset) {
throw new Error('Instruction models require a control dataset. Please select or upload a control dataset before continuing.');
}
if (!samplingDisabled) {
const samples = jobConfig?.config?.process?.[0]?.sample?.samples ?? [];
const missingCtrl = samples.filter((sample: any) => !sample?.ctrl_img || !String(sample.ctrl_img).trim());
if (missingCtrl.length > 0) {
throw new Error('Instruction models require a control image for every sample prompt. Please add control images before continuing.');
}
}
}
if (datasetSource === 'existing') {
// Use existing dataset - just validate it exists
if (!existingDatasetId) {
throw new Error('Please enter a dataset ID');
}
// Validate dataset exists
const validateResponse = await apiClient.post('/api/hf-hub', {
action: 'validateDataset',
token: effectiveToken,
datasetId: existingDatasetId,
});
if (validateResponse.data.exists) {
setUploadResult({
repoId: existingDatasetId,
url: `https://huggingface.co/datasets/${existingDatasetId}`,
existing: true,
});
setCurrentStep('submit');
} else {
throw new Error(`Dataset ${existingDatasetId} not found or not accessible`);
}
} else {
if (!resolvedNamespace) {
throw new Error('Unable to determine a namespace. Validate your HF token or set a namespace in Settings.');
}
// Upload new dataset
// First, create the dataset repository
const createResponse = await apiClient.post('/api/hf-hub', {
action: 'createDataset',
token: effectiveToken,
namespace: resolvedNamespace,
datasetName,
});
if (!createResponse.data.success) {
throw new Error('Failed to create dataset repository');
}
const uploadPlan = buildDatasetUploadPlan(jobConfig);
if (!uploadPlan.artifacts || uploadPlan.artifacts.length === 0) {
throw new Error('Dataset path could not be resolved. Please ensure the dataset folders exist on the host.');
}
const uploadResponse = await apiClient.post('/api/hf-hub', {
action: 'uploadDataset',
token: effectiveToken,
namespace: resolvedNamespace,
datasetName,
artifacts: uploadPlan.artifacts,
manifest: uploadPlan.manifest,
});
if (uploadResponse.data.success) {
setUploadResult({
repoId: uploadResponse.data.repoId,
url: `https://huggingface.co/datasets/${uploadResponse.data.repoId}`,
existing: false,
});
setCurrentStep('submit');
}
}
} catch (err: any) {
setError(err.response?.data?.error || err.message || 'Failed to process dataset');
} finally {
setLoading(false);
}
};
const submitJob = async () => {
setLoading(true);
setError(null);
const effectiveToken = authToken || settings.HF_TOKEN;
try {
const resolvedNamespace = namespace || defaultNamespace;
if (!resolvedNamespace) {
throw new Error('Unable to determine a namespace. Validate your HF token or set a namespace in Settings.');
}
if (!effectiveToken) {
throw new Error('A valid Hugging Face token is required to continue.');
}
if (requiresControlImages) {
if (!hasControlDataset) {
setError('Instruction models require a control dataset. Please select one before submitting.');
setLoading(false);
return;
}
if (!samplingDisabled) {
const samples = jobConfig?.config?.process?.[0]?.sample?.samples ?? [];
const missingCtrl = samples.filter((sample: any) => !sample?.ctrl_img || !String(sample.ctrl_img).trim());
if (missingCtrl.length > 0) {
setError('Instruction models require a control image for every sample prompt. Please add control images before submitting.');
setLoading(false);
return;
}
}
}
const datasetRepo =
uploadResult?.repoId ||
(datasetSource === 'existing'
? existingDatasetId
: `${resolvedNamespace}/${datasetName}`);
const response = await apiClient.post('/api/hf-jobs', {
action: 'submitJob',
token: effectiveToken,
hardware,
namespace: resolvedNamespace,
jobConfig,
datasetRepo,
participateHackathon: hackathonEligible && participateHackathon,
});
if (response.data.success) {
const hfJobId = response.data.jobId;
const jobNamespace = response.data.jobNamespace || resolvedNamespace;
// Save job to local database for tracking
let localJobId = undefined;
try {
const savedJob = await upsertJob({
name: `${jobConfig.config.name}-hf-cloud`,
gpu_ids: hardware,
job_config: {
...jobConfig,
hf_job_id: hfJobId,
hf_job_url:
hfJobId !== 'unknown'
? `https://huggingface.co/jobs/${jobNamespace}/${hfJobId}`
: null,
dataset_repo: datasetRepo,
hardware,
is_hf_job: true,
training_backend: 'hf-jobs',
hf_job_submitted: true,
hf_job_namespace: jobNamespace,
},
info: response.data.message || 'HF Job submitted',
status: 'submitted',
});
localJobId = savedJob.id;
console.log('Saved HF Job to local storage:', savedJob);
} catch (localSaveError: any) {
console.warn('Failed to save HF Job locally:', localSaveError);
// Attempt to create a fallback entry with a unique name if the conflict is due to duplicates
if (localSaveError?.code === 'P2002') {
const fallbackName = `${jobConfig.config.name}-${hfJobId?.slice(-6) || Date.now()}`.replace(/[^a-zA-Z0-9-_]/g, '_');
try {
const savedJob = await upsertJob({
name: `${fallbackName}-hf-cloud`,
gpu_ids: hardware,
job_config: {
...jobConfig,
hf_job_id: hfJobId,
hf_job_url:
hfJobId !== 'unknown'
? `https://huggingface.co/jobs/${jobNamespace}/${hfJobId}`
: null,
dataset_repo: datasetRepo,
hardware,
is_hf_job: true,
training_backend: 'hf-jobs',
hf_job_submitted: true,
hf_job_namespace: jobNamespace,
},
info: response.data.message || 'HF Job submitted',
status: 'submitted',
});
localJobId = savedJob.id;
console.log('Saved HF Job with fallback name:', savedJob);
} catch (fallbackError) {
console.warn('Fallback save for HF Job failed:', fallbackError);
}
}
}
setJobResult({
jobId: hfJobId,
message: response.data.message,
localJobId: localJobId,
jobNamespace,
});
setCurrentStep('complete');
onComplete(hfJobId, localJobId);
}
} catch (err: any) {
setError(err.response?.data?.error || 'Failed to submit job');
} finally {
setLoading(false);
}
};
const renderStepContent = () => {
switch (currentStep) {
case 'validate':
return (
<Card title="Validate HF Token">
<div className="space-y-4">
{hackathonEligible && (
<div className="space-y-3">
<Checkbox
label="Participate in LoRA Frenzi"
checked={participateHackathon}
onChange={value => {
setParticipationTouched(true);
setParticipateHackathon(value);
}}
/>
{participateHackathon && (
<ul className="text-xs text-gray-400 space-y-1 pl-4 list-disc">
<li>Maximum 5,000 training steps per run</li>
<li>Jobs longer than 6 hours will time out</li>
<li>Train only one LoRA simultaneously</li>
<li>Do not train on likenesses without consent or NSFW content</li>
</ul>
)}
</div>
)}
<p className="text-sm text-gray-400">
{hackathonEligible && participateHackathon
? "To continue, accept the rules above and we'll validate your Hugging Face token."
: "Click below to validate your Hugging Face token and start training. Train LoRAs at $0.042/minute if you are a PRO user."}
</p>
{validationResult && (
<div className="p-3 bg-green-900/20 border border-green-700 rounded">
<p className="text-green-400">
✓ Token valid! Logged in as: <strong>{validationResult.name}</strong>
</p>
</div>
)}
<Button
onClick={validateToken}
disabled={
loading ||
!(authToken || settings.HF_TOKEN)
}
className="w-full px-4 py-2 bg-blue-600 hover:bg-blue-700 text-white rounded disabled:opacity-50"
>
{loading ? 'Validating...' : (hackathonEligible && participateHackathon ? 'I accept the rules, get started' : 'Get started')}
</Button>
</div>
</Card>
);
case 'upload':
return (
<Card title="Dataset Configuration">
<div className="space-y-4">
<p className="text-sm text-gray-400">
Choose whether to upload a new dataset or use an existing one from HF Hub.
</p>
<SelectInput
label="Dataset Source"
value={datasetSource}
onChange={(value) => setDatasetSource(value as 'upload' | 'existing')}
options={[
{ value: 'upload', label: 'Upload New Dataset' },
{ value: 'existing', label: 'Use Existing HF Dataset' }
]}
/>
{datasetSource === 'upload' ? (
<>
<TextInput
label="Dataset Name"
value={datasetName}
onChange={setDatasetName}
placeholder="my-training-dataset"
required
/>
<TextInput
label="Namespace"
value={namespace}
onChange={setNamespace}
placeholder="your-username or org-name"
required
/>
</>
) : (
<>
<TextInput
label="Existing Dataset ID"
value={existingDatasetId}
onChange={setExistingDatasetId}
placeholder="e.g. multimodalart/flux-tarot-v1 or username/dataset-name"
required
/>
<p className="text-xs text-gray-500">
Enter the full dataset ID (namespace/name) from HuggingFace Hub
</p>
</>
)}
{uploadResult && (
<div className="p-3 bg-green-900/20 border border-green-700 rounded">
<p className="text-green-400">
✓ Dataset {uploadResult.existing ? 'validated' : 'uploaded'} successfully!
</p>
<p className="text-sm text-gray-400 mt-1">
{uploadResult.existing ? 'Using dataset:' : 'View at:'} <a href={uploadResult.url} target="_blank" rel="noopener noreferrer" className="text-blue-400 underline">{uploadResult.repoId}</a>
</p>
</div>
)}
<Button
onClick={uploadDataset}
disabled={loading || (datasetSource === 'upload' ? (!datasetName || !namespace) : !existingDatasetId)}
className="w-full px-4 py-2 bg-blue-600 hover:bg-blue-700 text-white rounded disabled:opacity-50"
>
{loading ? (datasetSource === 'upload' ? 'Uploading...' : 'Validating...') : (datasetSource === 'upload' ? 'Upload Dataset' : 'Validate Dataset')}
</Button>
</div>
</Card>
);
case 'submit':
return (
<Card title="Submit Training Job">
<div className="space-y-4">
<p className="text-sm text-gray-400">
Configure and submit your training job to HF Jobs.
</p>
<SelectInput
label="Hardware"
value={hardware}
onChange={setHardware}
options={[
{ value: 'cpu-basic', label: 'CPU Basic' },
{ value: 'cpu-upgrade', label: 'CPU Upgrade' },
{ value: 't4-small', label: 'T4 Small' },
{ value: 't4-medium', label: 'T4 Medium' },
{ value: 'l4x1', label: 'L4x1' },
{ value: 'l4x4', label: 'L4x4' },
{ value: 'a10g-small', label: 'A10G Small' },
{ value: 'a10g-large', label: 'A10G Large' },
{ value: 'a10g-largex2', label: 'A10G Large x2' },
{ value: 'a10g-largex4', label: 'A10G Large x4' },
{ value: 'a100-large', label: 'A100 Large' },
{ value: 'v5e-1x1', label: 'TPU v5e-1x1' },
{ value: 'v5e-2x2', label: 'TPU v5e-2x2' },
{ value: 'v5e-2x4', label: 'TPU v5e-2x4' },
]}
/>
<Checkbox
label="Auto-upload trained model to HF Hub"
checked={autoUpload}
onChange={setAutoUpload}
/>
{jobResult && (
<div className="p-3 bg-green-900/20 border border-green-700 rounded">
<p className="text-green-400">
✓ Job submitted successfully!
</p>
<p className="text-sm text-gray-400 mt-1">
Job ID: <code className="bg-gray-800 px-2 py-1 rounded text-xs">{jobResult.jobId}</code>
</p>
</div>
)}
<Button
onClick={submitJob}
disabled={loading || !hardware}
className="w-full px-4 py-2 bg-green-600 hover:bg-green-700 text-white rounded disabled:opacity-50"
>
{loading ? 'Submitting...' : 'Submit Training Job'}
</Button>
</div>
</Card>
);
case 'complete':
return (
<Card title="Job Submitted Successfully">
<div className="space-y-4">
<div className="p-4 bg-green-900/20 border border-green-700 rounded">
<h3 className="text-lg font-semibold text-green-400 mb-2">🎉 Training job submitted!</h3>
<p className="text-sm text-gray-300 mb-2">
Your training job has been submitted to Hugging Face Jobs and is now running in the cloud.
</p>
<div className="space-y-2">
<p className="text-sm">
<strong>Job ID:</strong> <code className="bg-gray-800 px-2 py-1 rounded text-xs">{jobResult?.jobId}</code>
</p>
{jobResult?.jobId && jobResult.jobId !== 'unknown' && (
<p className="text-sm">
<strong>Monitor Job:</strong>{' '}
<a
href={`https://huggingface.co/jobs/${jobResult.jobNamespace || namespace}/${jobResult.jobId}`}
target="_blank"
rel="noopener noreferrer"
className="text-blue-400 underline"
>
View on HF Jobs →
</a>
</p>
)}
<p className="text-sm">
<strong>Dataset:</strong>{' '}
<a
href={uploadResult?.url}
target="_blank"
rel="noopener noreferrer"
className="text-blue-400 underline"
>
{uploadResult?.repoId}
</a>
</p>
<p className="text-sm">
<strong>Hardware:</strong> {hardware}
</p>
</div>
</div>
<div className="text-sm text-gray-400 space-y-2">
<p><strong>Next steps:</strong></p>
<ul className="list-disc list-inside space-y-1 ml-4">
<li>Monitor your job progress using: <code className="bg-gray-800 px-2 py-1 rounded text-xs">hf jobs logs {jobResult?.jobId}</code></li>
<li>The trained model will be uploaded to: <code className="bg-gray-800 px-2 py-1 rounded text-xs">{namespace}/{jobConfig.config.name}-lora</code></li>
<li>You'll receive notifications when training completes</li>
</ul>
</div>
</div>
</Card>
);
default:
return null;
}
};
return (
<div className="space-y-6">
<h2 id="hf-start-training" className="text-lg font-semibold text-gray-100">Start training</h2>
{/* Progress indicator */}
<div className="flex items-center justify-between mb-6">
{(['validate', 'upload', 'submit', 'complete'] as Step[]).map((step, index) => (
<div key={step} className="flex items-center">
<div className={`w-8 h-8 rounded-full flex items-center justify-center text-sm font-semibold ${
currentStep === step
? 'bg-blue-600 text-white'
: index < (['validate', 'upload', 'submit', 'complete'] as Step[]).indexOf(currentStep)
? 'bg-green-600 text-white'
: 'bg-gray-600 text-gray-300'
}`}>
{index + 1}
</div>
{index < 3 && (
<div className={`w-16 h-1 mx-2 ${
index < (['validate', 'upload', 'submit', 'complete'] as Step[]).indexOf(currentStep)
? 'bg-green-600'
: 'bg-gray-600'
}`} />
)}
</div>
))}
</div>
{/* Error display */}
{error && (
<div className="p-4 bg-red-900/20 border border-red-700 rounded">
<p className="text-red-400">{error}</p>
</div>
)}
{/* Current step content */}
{renderStepContent()}
</div>
);
}