ai-toolkit / ui /src /components /HFJobsWorkflow.tsx
multimodalart's picture
Upload 121 files
f555806 verified
raw
history blame
18.3 kB
'use client';
import { useState } from 'react';
import { Button } from '@headlessui/react';
import { SelectInput, TextInput, Checkbox } from '@/components/formInputs';
import Card from '@/components/Card';
import { apiClient } from '@/utils/api';
import { JobConfig } from '@/types';
import useSettings from '@/hooks/useSettings';
import { upsertJob } from '@/utils/storage/jobStorage';
import { useAuth } from '@/contexts/AuthContext';
interface HFJobsWorkflowProps {
jobConfig: JobConfig;
onComplete: (jobId: string, localJobId?: string) => void;
}
type Step = 'validate' | 'upload' | 'submit' | 'complete';
export default function HFJobsWorkflow({ jobConfig, onComplete }: HFJobsWorkflowProps) {
const { settings } = useSettings();
const { token: authToken } = useAuth();
const [defaultNamespace, setDefaultNamespace] = useState('');
const [currentStep, setCurrentStep] = useState<Step>('validate');
const [loading, setLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
// Form state
const [datasetSource, setDatasetSource] = useState<'upload' | 'existing'>('upload');
const [datasetName, setDatasetName] = useState(`${jobConfig.config.name}-dataset`);
const [existingDatasetId, setExistingDatasetId] = useState('');
const [hardware, setHardware] = useState(settings.HF_JOBS_DEFAULT_HARDWARE || 'a100-large');
const [namespace, setNamespace] = useState(settings.HF_JOBS_NAMESPACE || '');
const [autoUpload, setAutoUpload] = useState(true);
// Progress state
const [validationResult, setValidationResult] = useState<any>(null);
const [uploadResult, setUploadResult] = useState<any>(null);
const [jobResult, setJobResult] = useState<any>(null);
const validateToken = async () => {
setLoading(true);
setError(null);
const effectiveToken = authToken || settings.HF_TOKEN;
try {
if (!effectiveToken) {
throw new Error('A valid Hugging Face token is required to continue.');
}
const response = await apiClient.post('/api/hf-hub', {
action: 'whoami',
token: effectiveToken,
});
if (response.data.user) {
setValidationResult(response.data.user);
const resolvedName = response.data.user.name || '';
setDefaultNamespace(resolvedName);
if (!namespace) {
setNamespace(resolvedName);
}
setCurrentStep('upload');
}
} catch (err: any) {
setError(err.response?.data?.error || 'Failed to validate token');
} finally {
setLoading(false);
}
};
const uploadDataset = async () => {
setLoading(true);
setError(null);
const effectiveToken = authToken || settings.HF_TOKEN;
try {
if (!effectiveToken) {
throw new Error('A valid Hugging Face token is required to continue.');
}
const resolvedNamespace = namespace || defaultNamespace;
if (!resolvedNamespace) {
throw new Error('Unable to determine a namespace. Validate your HF token or set a namespace in Settings.');
}
if (datasetSource === 'existing') {
// Use existing dataset - just validate it exists
if (!existingDatasetId) {
throw new Error('Please enter a dataset ID');
}
// Validate dataset exists
const validateResponse = await apiClient.post('/api/hf-hub', {
action: 'validateDataset',
token: effectiveToken,
datasetId: existingDatasetId,
});
if (validateResponse.data.exists) {
setUploadResult({
repoId: existingDatasetId,
url: `https://huggingface.co/datasets/${existingDatasetId}`,
existing: true,
});
setCurrentStep('submit');
} else {
throw new Error(`Dataset ${existingDatasetId} not found or not accessible`);
}
} else {
if (!resolvedNamespace) {
throw new Error('Unable to determine a namespace. Validate your HF token or set a namespace in Settings.');
}
// Upload new dataset
// First, create the dataset repository
const createResponse = await apiClient.post('/api/hf-hub', {
action: 'createDataset',
token: effectiveToken,
namespace: resolvedNamespace,
datasetName,
});
if (!createResponse.data.success) {
throw new Error('Failed to create dataset repository');
}
// Get dataset path from first dataset in config
const datasetPath = jobConfig.config.process[0].datasets[0]?.folder_path;
if (!datasetPath || datasetPath.trim() === '' || datasetPath === datasetName) {
throw new Error('Dataset path could not be resolved. Please ensure the dataset folder exists on the host.');
}
// Upload dataset files
const uploadResponse = await apiClient.post('/api/hf-hub', {
action: 'uploadDataset',
token: effectiveToken,
namespace: resolvedNamespace,
datasetName,
datasetPath,
});
if (uploadResponse.data.success) {
setUploadResult({
repoId: uploadResponse.data.repoId,
url: `https://huggingface.co/datasets/${uploadResponse.data.repoId}`,
existing: false,
});
setCurrentStep('submit');
}
}
} catch (err: any) {
setError(err.response?.data?.error || 'Failed to process dataset');
} finally {
setLoading(false);
}
};
const submitJob = async () => {
setLoading(true);
setError(null);
const effectiveToken = authToken || settings.HF_TOKEN;
try {
const resolvedNamespace = namespace || defaultNamespace;
if (!resolvedNamespace) {
throw new Error('Unable to determine a namespace. Validate your HF token or set a namespace in Settings.');
}
if (!effectiveToken) {
throw new Error('A valid Hugging Face token is required to continue.');
}
const datasetRepo =
uploadResult?.repoId ||
(datasetSource === 'existing'
? existingDatasetId
: `${resolvedNamespace}/${datasetName}`);
const response = await apiClient.post('/api/hf-jobs', {
action: 'submitJob',
token: effectiveToken,
hardware,
namespace: resolvedNamespace,
jobConfig,
datasetRepo,
});
if (response.data.success) {
const hfJobId = response.data.jobId;
// Save job to local database for tracking
let localJobId = undefined;
try {
const savedJob = await upsertJob({
name: `${jobConfig.config.name}-hf-cloud`,
gpu_ids: hardware,
job_config: {
...jobConfig,
hf_job_id: hfJobId,
hf_job_url:
hfJobId !== 'unknown' && resolvedNamespace
? `https://huggingface.co/jobs/${resolvedNamespace}/${hfJobId}`
: null,
dataset_repo: datasetRepo,
hardware,
is_hf_job: true,
training_backend: 'hf-jobs',
hf_job_submitted: true,
},
info: response.data.message || 'HF Job submitted',
status: 'submitted',
});
localJobId = savedJob.id;
console.log('Saved HF Job to local storage:', savedJob);
} catch (localSaveError: any) {
console.warn('Failed to save HF Job locally:', localSaveError);
// Don't fail the whole process if local save fails
}
setJobResult({
jobId: hfJobId,
message: response.data.message,
localJobId: localJobId,
});
setCurrentStep('complete');
onComplete(hfJobId, localJobId);
}
} catch (err: any) {
setError(err.response?.data?.error || 'Failed to submit job');
} finally {
setLoading(false);
}
};
const renderStepContent = () => {
switch (currentStep) {
case 'validate':
return (
<Card title="Validate HF Token">
<div className="space-y-4">
<p className="text-sm text-gray-400">
First, let's validate your Hugging Face token and get your username for dataset uploads.
</p>
{validationResult && (
<div className="p-3 bg-green-900/20 border border-green-700 rounded">
<p className="text-green-400">
✓ Token valid! Logged in as: <strong>{validationResult.name}</strong>
</p>
</div>
)}
<Button
onClick={validateToken}
disabled={loading || !(authToken || settings.HF_TOKEN)}
className="w-full px-4 py-2 bg-blue-600 hover:bg-blue-700 text-white rounded disabled:opacity-50"
>
{loading ? 'Validating...' : 'Validate Token'}
</Button>
</div>
</Card>
);
case 'upload':
return (
<Card title="Dataset Configuration">
<div className="space-y-4">
<p className="text-sm text-gray-400">
Choose whether to upload a new dataset or use an existing one from HF Hub.
</p>
<SelectInput
label="Dataset Source"
value={datasetSource}
onChange={(value) => setDatasetSource(value as 'upload' | 'existing')}
options={[
{ value: 'upload', label: 'Upload New Dataset' },
{ value: 'existing', label: 'Use Existing HF Dataset' }
]}
/>
{datasetSource === 'upload' ? (
<>
<TextInput
label="Dataset Name"
value={datasetName}
onChange={setDatasetName}
placeholder="my-training-dataset"
required
/>
<TextInput
label="Namespace"
value={namespace}
onChange={setNamespace}
placeholder="your-username or org-name"
required
/>
</>
) : (
<>
<TextInput
label="Existing Dataset ID"
value={existingDatasetId}
onChange={setExistingDatasetId}
placeholder="e.g. multimodalart/flux-tarot-v1 or username/dataset-name"
required
/>
<p className="text-xs text-gray-500">
Enter the full dataset ID (namespace/name) from HuggingFace Hub
</p>
</>
)}
{uploadResult && (
<div className="p-3 bg-green-900/20 border border-green-700 rounded">
<p className="text-green-400">
✓ Dataset {uploadResult.existing ? 'validated' : 'uploaded'} successfully!
</p>
<p className="text-sm text-gray-400 mt-1">
{uploadResult.existing ? 'Using dataset:' : 'View at:'} <a href={uploadResult.url} target="_blank" rel="noopener noreferrer" className="text-blue-400 underline">{uploadResult.repoId}</a>
</p>
</div>
)}
<Button
onClick={uploadDataset}
disabled={loading || (datasetSource === 'upload' ? (!datasetName || !namespace) : !existingDatasetId)}
className="w-full px-4 py-2 bg-blue-600 hover:bg-blue-700 text-white rounded disabled:opacity-50"
>
{loading ? (datasetSource === 'upload' ? 'Uploading...' : 'Validating...') : (datasetSource === 'upload' ? 'Upload Dataset' : 'Validate Dataset')}
</Button>
</div>
</Card>
);
case 'submit':
return (
<Card title="Submit Training Job">
<div className="space-y-4">
<p className="text-sm text-gray-400">
Configure and submit your training job to HF Jobs.
</p>
<SelectInput
label="Hardware"
value={hardware}
onChange={setHardware}
options={[
{ value: 'cpu-basic', label: 'CPU Basic' },
{ value: 'cpu-upgrade', label: 'CPU Upgrade' },
{ value: 't4-small', label: 'T4 Small' },
{ value: 't4-medium', label: 'T4 Medium' },
{ value: 'l4x1', label: 'L4x1' },
{ value: 'l4x4', label: 'L4x4' },
{ value: 'a10g-small', label: 'A10G Small' },
{ value: 'a10g-large', label: 'A10G Large' },
{ value: 'a10g-largex2', label: 'A10G Large x2' },
{ value: 'a10g-largex4', label: 'A10G Large x4' },
{ value: 'a100-large', label: 'A100 Large' },
{ value: 'v5e-1x1', label: 'TPU v5e-1x1' },
{ value: 'v5e-2x2', label: 'TPU v5e-2x2' },
{ value: 'v5e-2x4', label: 'TPU v5e-2x4' },
]}
/>
<Checkbox
label="Auto-upload trained model to HF Hub"
checked={autoUpload}
onChange={setAutoUpload}
/>
{jobResult && (
<div className="p-3 bg-green-900/20 border border-green-700 rounded">
<p className="text-green-400">
✓ Job submitted successfully!
</p>
<p className="text-sm text-gray-400 mt-1">
Job ID: <code className="bg-gray-800 px-2 py-1 rounded text-xs">{jobResult.jobId}</code>
</p>
</div>
)}
<Button
onClick={submitJob}
disabled={loading || !hardware}
className="w-full px-4 py-2 bg-green-600 hover:bg-green-700 text-white rounded disabled:opacity-50"
>
{loading ? 'Submitting...' : 'Submit Training Job'}
</Button>
</div>
</Card>
);
case 'complete':
return (
<Card title="Job Submitted Successfully">
<div className="space-y-4">
<div className="p-4 bg-green-900/20 border border-green-700 rounded">
<h3 className="text-lg font-semibold text-green-400 mb-2">🎉 Training job submitted!</h3>
<p className="text-sm text-gray-300 mb-2">
Your training job has been submitted to Hugging Face Jobs and is now running in the cloud.
</p>
<div className="space-y-2">
<p className="text-sm">
<strong>Job ID:</strong> <code className="bg-gray-800 px-2 py-1 rounded text-xs">{jobResult?.jobId}</code>
</p>
{jobResult?.jobId && jobResult.jobId !== 'unknown' && (
<p className="text-sm">
<strong>Monitor Job:</strong> <a
href={`https://huggingface.co/jobs/${namespace}/${jobResult.jobId}`}
target="_blank"
rel="noopener noreferrer"
className="text-blue-400 underline"
>
View on HF Jobs →
</a>
</p>
)}
<p className="text-sm">
<strong>Dataset:</strong> <a href={uploadResult?.url} target="_blank" rel="noopener noreferrer" className="text-blue-400 underline">{uploadResult?.repoId}</a>
</p>
<p className="text-sm">
<strong>Hardware:</strong> {hardware}
</p>
</div>
</div>
<div className="text-sm text-gray-400 space-y-2">
<p><strong>Next steps:</strong></p>
<ul className="list-disc list-inside space-y-1 ml-4">
<li>Monitor your job progress using: <code className="bg-gray-800 px-2 py-1 rounded text-xs">hf jobs logs {jobResult?.jobId}</code></li>
<li>The trained model will be uploaded to: <code className="bg-gray-800 px-2 py-1 rounded text-xs">{namespace}/{jobConfig.config.name}-lora</code></li>
<li>You'll receive notifications when training completes</li>
</ul>
</div>
</div>
</Card>
);
default:
return null;
}
};
return (
<div className="space-y-6">
{/* Progress indicator */}
<div className="flex items-center justify-between mb-6">
{(['validate', 'upload', 'submit', 'complete'] as Step[]).map((step, index) => (
<div key={step} className="flex items-center">
<div className={`w-8 h-8 rounded-full flex items-center justify-center text-sm font-semibold ${
currentStep === step
? 'bg-blue-600 text-white'
: index < (['validate', 'upload', 'submit', 'complete'] as Step[]).indexOf(currentStep)
? 'bg-green-600 text-white'
: 'bg-gray-600 text-gray-300'
}`}>
{index + 1}
</div>
{index < 3 && (
<div className={`w-16 h-1 mx-2 ${
index < (['validate', 'upload', 'submit', 'complete'] as Step[]).indexOf(currentStep)
? 'bg-green-600'
: 'bg-gray-600'
}`} />
)}
</div>
))}
</div>
{/* Error display */}
{error && (
<div className="p-4 bg-red-900/20 border border-red-700 rounded">
<p className="text-red-400">{error}</p>
</div>
)}
{/* Current step content */}
{renderStepContent()}
</div>
);
}