Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| 'use client'; | |
| import { useState } from 'react'; | |
| import { Button } from '@headlessui/react'; | |
| import { SelectInput, TextInput, Checkbox } from '@/components/formInputs'; | |
| import Card from '@/components/Card'; | |
| import { apiClient } from '@/utils/api'; | |
| import { JobConfig } from '@/types'; | |
| import useSettings from '@/hooks/useSettings'; | |
| import { upsertJob } from '@/utils/storage/jobStorage'; | |
| import { useAuth } from '@/contexts/AuthContext'; | |
| interface HFJobsWorkflowProps { | |
| jobConfig: JobConfig; | |
| onComplete: (jobId: string, localJobId?: string) => void; | |
| } | |
| type Step = 'validate' | 'upload' | 'submit' | 'complete'; | |
| export default function HFJobsWorkflow({ jobConfig, onComplete }: HFJobsWorkflowProps) { | |
| const { settings } = useSettings(); | |
| const { token: authToken } = useAuth(); | |
| const [defaultNamespace, setDefaultNamespace] = useState(''); | |
| const [currentStep, setCurrentStep] = useState<Step>('validate'); | |
| const [loading, setLoading] = useState(false); | |
| const [error, setError] = useState<string | null>(null); | |
| // Form state | |
| const [datasetSource, setDatasetSource] = useState<'upload' | 'existing'>('upload'); | |
| const [datasetName, setDatasetName] = useState(`${jobConfig.config.name}-dataset`); | |
| const [existingDatasetId, setExistingDatasetId] = useState(''); | |
| const [hardware, setHardware] = useState(settings.HF_JOBS_DEFAULT_HARDWARE || 'a100-large'); | |
| const [namespace, setNamespace] = useState(settings.HF_JOBS_NAMESPACE || ''); | |
| const [autoUpload, setAutoUpload] = useState(true); | |
| // Progress state | |
| const [validationResult, setValidationResult] = useState<any>(null); | |
| const [uploadResult, setUploadResult] = useState<any>(null); | |
| const [jobResult, setJobResult] = useState<any>(null); | |
| const validateToken = async () => { | |
| setLoading(true); | |
| setError(null); | |
| const effectiveToken = authToken || settings.HF_TOKEN; | |
| try { | |
| if (!effectiveToken) { | |
| throw new Error('A valid Hugging Face token is required to continue.'); | |
| } | |
| const response = await apiClient.post('/api/hf-hub', { | |
| action: 'whoami', | |
| token: effectiveToken, | |
| }); | |
| if (response.data.user) { | |
| setValidationResult(response.data.user); | |
| const resolvedName = response.data.user.name || ''; | |
| setDefaultNamespace(resolvedName); | |
| if (!namespace) { | |
| setNamespace(resolvedName); | |
| } | |
| setCurrentStep('upload'); | |
| } | |
| } catch (err: any) { | |
| setError(err.response?.data?.error || 'Failed to validate token'); | |
| } finally { | |
| setLoading(false); | |
| } | |
| }; | |
| const uploadDataset = async () => { | |
| setLoading(true); | |
| setError(null); | |
| const effectiveToken = authToken || settings.HF_TOKEN; | |
| try { | |
| if (!effectiveToken) { | |
| throw new Error('A valid Hugging Face token is required to continue.'); | |
| } | |
| const resolvedNamespace = namespace || defaultNamespace; | |
| if (!resolvedNamespace) { | |
| throw new Error('Unable to determine a namespace. Validate your HF token or set a namespace in Settings.'); | |
| } | |
| if (datasetSource === 'existing') { | |
| // Use existing dataset - just validate it exists | |
| if (!existingDatasetId) { | |
| throw new Error('Please enter a dataset ID'); | |
| } | |
| // Validate dataset exists | |
| const validateResponse = await apiClient.post('/api/hf-hub', { | |
| action: 'validateDataset', | |
| token: effectiveToken, | |
| datasetId: existingDatasetId, | |
| }); | |
| if (validateResponse.data.exists) { | |
| setUploadResult({ | |
| repoId: existingDatasetId, | |
| url: `https://huggingface.co/datasets/${existingDatasetId}`, | |
| existing: true, | |
| }); | |
| setCurrentStep('submit'); | |
| } else { | |
| throw new Error(`Dataset ${existingDatasetId} not found or not accessible`); | |
| } | |
| } else { | |
| if (!resolvedNamespace) { | |
| throw new Error('Unable to determine a namespace. Validate your HF token or set a namespace in Settings.'); | |
| } | |
| // Upload new dataset | |
| // First, create the dataset repository | |
| const createResponse = await apiClient.post('/api/hf-hub', { | |
| action: 'createDataset', | |
| token: effectiveToken, | |
| namespace: resolvedNamespace, | |
| datasetName, | |
| }); | |
| if (!createResponse.data.success) { | |
| throw new Error('Failed to create dataset repository'); | |
| } | |
| // Get dataset path from first dataset in config | |
| const datasetPath = jobConfig.config.process[0].datasets[0]?.folder_path; | |
| if (!datasetPath || datasetPath.trim() === '' || datasetPath === datasetName) { | |
| throw new Error('Dataset path could not be resolved. Please ensure the dataset folder exists on the host.'); | |
| } | |
| // Upload dataset files | |
| const uploadResponse = await apiClient.post('/api/hf-hub', { | |
| action: 'uploadDataset', | |
| token: effectiveToken, | |
| namespace: resolvedNamespace, | |
| datasetName, | |
| datasetPath, | |
| }); | |
| if (uploadResponse.data.success) { | |
| setUploadResult({ | |
| repoId: uploadResponse.data.repoId, | |
| url: `https://huggingface.co/datasets/${uploadResponse.data.repoId}`, | |
| existing: false, | |
| }); | |
| setCurrentStep('submit'); | |
| } | |
| } | |
| } catch (err: any) { | |
| setError(err.response?.data?.error || 'Failed to process dataset'); | |
| } finally { | |
| setLoading(false); | |
| } | |
| }; | |
| const submitJob = async () => { | |
| setLoading(true); | |
| setError(null); | |
| const effectiveToken = authToken || settings.HF_TOKEN; | |
| try { | |
| const resolvedNamespace = namespace || defaultNamespace; | |
| if (!resolvedNamespace) { | |
| throw new Error('Unable to determine a namespace. Validate your HF token or set a namespace in Settings.'); | |
| } | |
| if (!effectiveToken) { | |
| throw new Error('A valid Hugging Face token is required to continue.'); | |
| } | |
| const datasetRepo = | |
| uploadResult?.repoId || | |
| (datasetSource === 'existing' | |
| ? existingDatasetId | |
| : `${resolvedNamespace}/${datasetName}`); | |
| const response = await apiClient.post('/api/hf-jobs', { | |
| action: 'submitJob', | |
| token: effectiveToken, | |
| hardware, | |
| namespace: resolvedNamespace, | |
| jobConfig, | |
| datasetRepo, | |
| }); | |
| if (response.data.success) { | |
| const hfJobId = response.data.jobId; | |
| // Save job to local database for tracking | |
| let localJobId = undefined; | |
| try { | |
| const savedJob = await upsertJob({ | |
| name: `${jobConfig.config.name}-hf-cloud`, | |
| gpu_ids: hardware, | |
| job_config: { | |
| ...jobConfig, | |
| hf_job_id: hfJobId, | |
| hf_job_url: | |
| hfJobId !== 'unknown' && resolvedNamespace | |
| ? `https://huggingface.co/jobs/${resolvedNamespace}/${hfJobId}` | |
| : null, | |
| dataset_repo: datasetRepo, | |
| hardware, | |
| is_hf_job: true, | |
| training_backend: 'hf-jobs', | |
| hf_job_submitted: true, | |
| }, | |
| info: response.data.message || 'HF Job submitted', | |
| status: 'submitted', | |
| }); | |
| localJobId = savedJob.id; | |
| console.log('Saved HF Job to local storage:', savedJob); | |
| } catch (localSaveError: any) { | |
| console.warn('Failed to save HF Job locally:', localSaveError); | |
| // Don't fail the whole process if local save fails | |
| } | |
| setJobResult({ | |
| jobId: hfJobId, | |
| message: response.data.message, | |
| localJobId: localJobId, | |
| }); | |
| setCurrentStep('complete'); | |
| onComplete(hfJobId, localJobId); | |
| } | |
| } catch (err: any) { | |
| setError(err.response?.data?.error || 'Failed to submit job'); | |
| } finally { | |
| setLoading(false); | |
| } | |
| }; | |
| const renderStepContent = () => { | |
| switch (currentStep) { | |
| case 'validate': | |
| return ( | |
| <Card title="Validate HF Token"> | |
| <div className="space-y-4"> | |
| <p className="text-sm text-gray-400"> | |
| First, let's validate your Hugging Face token and get your username for dataset uploads. | |
| </p> | |
| {validationResult && ( | |
| <div className="p-3 bg-green-900/20 border border-green-700 rounded"> | |
| <p className="text-green-400"> | |
| ✓ Token valid! Logged in as: <strong>{validationResult.name}</strong> | |
| </p> | |
| </div> | |
| )} | |
| <Button | |
| onClick={validateToken} | |
| disabled={loading || !(authToken || settings.HF_TOKEN)} | |
| className="w-full px-4 py-2 bg-blue-600 hover:bg-blue-700 text-white rounded disabled:opacity-50" | |
| > | |
| {loading ? 'Validating...' : 'Validate Token'} | |
| </Button> | |
| </div> | |
| </Card> | |
| ); | |
| case 'upload': | |
| return ( | |
| <Card title="Dataset Configuration"> | |
| <div className="space-y-4"> | |
| <p className="text-sm text-gray-400"> | |
| Choose whether to upload a new dataset or use an existing one from HF Hub. | |
| </p> | |
| <SelectInput | |
| label="Dataset Source" | |
| value={datasetSource} | |
| onChange={(value) => setDatasetSource(value as 'upload' | 'existing')} | |
| options={[ | |
| { value: 'upload', label: 'Upload New Dataset' }, | |
| { value: 'existing', label: 'Use Existing HF Dataset' } | |
| ]} | |
| /> | |
| {datasetSource === 'upload' ? ( | |
| <> | |
| <TextInput | |
| label="Dataset Name" | |
| value={datasetName} | |
| onChange={setDatasetName} | |
| placeholder="my-training-dataset" | |
| required | |
| /> | |
| <TextInput | |
| label="Namespace" | |
| value={namespace} | |
| onChange={setNamespace} | |
| placeholder="your-username or org-name" | |
| required | |
| /> | |
| </> | |
| ) : ( | |
| <> | |
| <TextInput | |
| label="Existing Dataset ID" | |
| value={existingDatasetId} | |
| onChange={setExistingDatasetId} | |
| placeholder="e.g. multimodalart/flux-tarot-v1 or username/dataset-name" | |
| required | |
| /> | |
| <p className="text-xs text-gray-500"> | |
| Enter the full dataset ID (namespace/name) from HuggingFace Hub | |
| </p> | |
| </> | |
| )} | |
| {uploadResult && ( | |
| <div className="p-3 bg-green-900/20 border border-green-700 rounded"> | |
| <p className="text-green-400"> | |
| ✓ Dataset {uploadResult.existing ? 'validated' : 'uploaded'} successfully! | |
| </p> | |
| <p className="text-sm text-gray-400 mt-1"> | |
| {uploadResult.existing ? 'Using dataset:' : 'View at:'} <a href={uploadResult.url} target="_blank" rel="noopener noreferrer" className="text-blue-400 underline">{uploadResult.repoId}</a> | |
| </p> | |
| </div> | |
| )} | |
| <Button | |
| onClick={uploadDataset} | |
| disabled={loading || (datasetSource === 'upload' ? (!datasetName || !namespace) : !existingDatasetId)} | |
| className="w-full px-4 py-2 bg-blue-600 hover:bg-blue-700 text-white rounded disabled:opacity-50" | |
| > | |
| {loading ? (datasetSource === 'upload' ? 'Uploading...' : 'Validating...') : (datasetSource === 'upload' ? 'Upload Dataset' : 'Validate Dataset')} | |
| </Button> | |
| </div> | |
| </Card> | |
| ); | |
| case 'submit': | |
| return ( | |
| <Card title="Submit Training Job"> | |
| <div className="space-y-4"> | |
| <p className="text-sm text-gray-400"> | |
| Configure and submit your training job to HF Jobs. | |
| </p> | |
| <SelectInput | |
| label="Hardware" | |
| value={hardware} | |
| onChange={setHardware} | |
| options={[ | |
| { value: 'cpu-basic', label: 'CPU Basic' }, | |
| { value: 'cpu-upgrade', label: 'CPU Upgrade' }, | |
| { value: 't4-small', label: 'T4 Small' }, | |
| { value: 't4-medium', label: 'T4 Medium' }, | |
| { value: 'l4x1', label: 'L4x1' }, | |
| { value: 'l4x4', label: 'L4x4' }, | |
| { value: 'a10g-small', label: 'A10G Small' }, | |
| { value: 'a10g-large', label: 'A10G Large' }, | |
| { value: 'a10g-largex2', label: 'A10G Large x2' }, | |
| { value: 'a10g-largex4', label: 'A10G Large x4' }, | |
| { value: 'a100-large', label: 'A100 Large' }, | |
| { value: 'v5e-1x1', label: 'TPU v5e-1x1' }, | |
| { value: 'v5e-2x2', label: 'TPU v5e-2x2' }, | |
| { value: 'v5e-2x4', label: 'TPU v5e-2x4' }, | |
| ]} | |
| /> | |
| <Checkbox | |
| label="Auto-upload trained model to HF Hub" | |
| checked={autoUpload} | |
| onChange={setAutoUpload} | |
| /> | |
| {jobResult && ( | |
| <div className="p-3 bg-green-900/20 border border-green-700 rounded"> | |
| <p className="text-green-400"> | |
| ✓ Job submitted successfully! | |
| </p> | |
| <p className="text-sm text-gray-400 mt-1"> | |
| Job ID: <code className="bg-gray-800 px-2 py-1 rounded text-xs">{jobResult.jobId}</code> | |
| </p> | |
| </div> | |
| )} | |
| <Button | |
| onClick={submitJob} | |
| disabled={loading || !hardware} | |
| className="w-full px-4 py-2 bg-green-600 hover:bg-green-700 text-white rounded disabled:opacity-50" | |
| > | |
| {loading ? 'Submitting...' : 'Submit Training Job'} | |
| </Button> | |
| </div> | |
| </Card> | |
| ); | |
| case 'complete': | |
| return ( | |
| <Card title="Job Submitted Successfully"> | |
| <div className="space-y-4"> | |
| <div className="p-4 bg-green-900/20 border border-green-700 rounded"> | |
| <h3 className="text-lg font-semibold text-green-400 mb-2">🎉 Training job submitted!</h3> | |
| <p className="text-sm text-gray-300 mb-2"> | |
| Your training job has been submitted to Hugging Face Jobs and is now running in the cloud. | |
| </p> | |
| <div className="space-y-2"> | |
| <p className="text-sm"> | |
| <strong>Job ID:</strong> <code className="bg-gray-800 px-2 py-1 rounded text-xs">{jobResult?.jobId}</code> | |
| </p> | |
| {jobResult?.jobId && jobResult.jobId !== 'unknown' && ( | |
| <p className="text-sm"> | |
| <strong>Monitor Job:</strong> <a | |
| href={`https://huggingface.co/jobs/${namespace}/${jobResult.jobId}`} | |
| target="_blank" | |
| rel="noopener noreferrer" | |
| className="text-blue-400 underline" | |
| > | |
| View on HF Jobs → | |
| </a> | |
| </p> | |
| )} | |
| <p className="text-sm"> | |
| <strong>Dataset:</strong> <a href={uploadResult?.url} target="_blank" rel="noopener noreferrer" className="text-blue-400 underline">{uploadResult?.repoId}</a> | |
| </p> | |
| <p className="text-sm"> | |
| <strong>Hardware:</strong> {hardware} | |
| </p> | |
| </div> | |
| </div> | |
| <div className="text-sm text-gray-400 space-y-2"> | |
| <p><strong>Next steps:</strong></p> | |
| <ul className="list-disc list-inside space-y-1 ml-4"> | |
| <li>Monitor your job progress using: <code className="bg-gray-800 px-2 py-1 rounded text-xs">hf jobs logs {jobResult?.jobId}</code></li> | |
| <li>The trained model will be uploaded to: <code className="bg-gray-800 px-2 py-1 rounded text-xs">{namespace}/{jobConfig.config.name}-lora</code></li> | |
| <li>You'll receive notifications when training completes</li> | |
| </ul> | |
| </div> | |
| </div> | |
| </Card> | |
| ); | |
| default: | |
| return null; | |
| } | |
| }; | |
| return ( | |
| <div className="space-y-6"> | |
| {/* Progress indicator */} | |
| <div className="flex items-center justify-between mb-6"> | |
| {(['validate', 'upload', 'submit', 'complete'] as Step[]).map((step, index) => ( | |
| <div key={step} className="flex items-center"> | |
| <div className={`w-8 h-8 rounded-full flex items-center justify-center text-sm font-semibold ${ | |
| currentStep === step | |
| ? 'bg-blue-600 text-white' | |
| : index < (['validate', 'upload', 'submit', 'complete'] as Step[]).indexOf(currentStep) | |
| ? 'bg-green-600 text-white' | |
| : 'bg-gray-600 text-gray-300' | |
| }`}> | |
| {index + 1} | |
| </div> | |
| {index < 3 && ( | |
| <div className={`w-16 h-1 mx-2 ${ | |
| index < (['validate', 'upload', 'submit', 'complete'] as Step[]).indexOf(currentStep) | |
| ? 'bg-green-600' | |
| : 'bg-gray-600' | |
| }`} /> | |
| )} | |
| </div> | |
| ))} | |
| </div> | |
| {/* Error display */} | |
| {error && ( | |
| <div className="p-4 bg-red-900/20 border border-red-700 rounded"> | |
| <p className="text-red-400">{error}</p> | |
| </div> | |
| )} | |
| {/* Current step content */} | |
| {renderStepContent()} | |
| </div> | |
| ); | |
| } | |