import React, { useCallback, useEffect, useState } from "react"; import { ChevronDown, Database, FolderPlus, X } from "lucide-react"; import { useApp } from "../App"; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; import { Input } from "@/components/ui/input"; import { Progress } from "@/components/ui/progress"; import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow, } from "@/components/ui/table"; interface Dataset { id: string; name: string; description: string | null; image_count: number | null; created_at: string; } interface Sample { name: string; step: number | null; } interface Checkpoint { name: string; step: number | null; path: string; size_bytes: number; modified_at: string; } function statusVariant(status: string): "default" | "secondary" | "destructive" | "outline" { if (status === "training" || status === "running") return "default"; if (status === "completed") return "secondary"; if (status === "failed") return "destructive"; return "outline"; } function formatDate(iso: string | undefined | null): string { if (!iso) return "--"; try { return new Date(iso).toLocaleString(); } catch { return iso; } } export function LoraTrainingPage() { const ctx = useApp(); const jobs = ctx.trainingJobs ?? []; const live = ctx.training; // Datasets const [datasets, setDatasets] = useState([]); const [datasetsLoading, setDatasetsLoading] = useState(true); const [showAddDataset, setShowAddDataset] = useState(false); const [addDatasetId, setAddDatasetId] = useState(""); const [addDatasetName, setAddDatasetName] = useState(""); const [addDatasetDesc, setAddDatasetDesc] = useState(""); const [addDatasetCount, setAddDatasetCount] = useState(""); const [addDatasetSubmitting, setAddDatasetSubmitting] = useState(false); const [addDatasetError, setAddDatasetError] = useState(null); const loadDatasets = useCallback(async () => { try { const res = await fetch("/api/lora-training/datasets"); const data = await res.json(); setDatasets(data.datasets || []); } catch { // ignore — non-critical } finally { setDatasetsLoading(false); } }, []); useEffect(() => { loadDatasets(); }, [loadDatasets]); const submitAddDataset = async () => { setAddDatasetError(null); setAddDatasetSubmitting(true); try { const res = await fetch("/api/lora-training/datasets", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ id: addDatasetId, name: addDatasetName || addDatasetId, description: addDatasetDesc || undefined, image_count: addDatasetCount ? parseInt(addDatasetCount, 10) : undefined, }), }); if (!res.ok) { const d = await res.json(); throw new Error(d.detail || "Failed to register dataset"); } setAddDatasetId(""); setAddDatasetName(""); setAddDatasetDesc(""); setAddDatasetCount(""); setShowAddDataset(false); loadDatasets(); } catch (e: any) { setAddDatasetError(e.message); } finally { setAddDatasetSubmitting(false); } }; const mergedJobs = jobs.map((job: any) => { // Merge live ai-toolkit data into the matching job row so we get real // current_step / total_steps / loss / info. ai-toolkit uses "running" // while the DB-backed job list may say "training". if (live && live.job_name === job.job_name && (live.status === "running" || live.status === "training")) { return { ...job, ...live, _live: true }; } // Job says running/training in the DB but ai-toolkit has no matching // live process. The job died or was abandoned — treat as failed. if (!live || live.job_name !== job.job_name) { if (job.status === "running" || job.status === "training") { return { ...job, status: "failed", _dead: true }; } } return job; }); const [showForm] = useState(true); const [formJobName, setFormJobName] = useState(""); const [formTrigger, setFormTrigger] = useState(""); const [formDataset, setFormDataset] = useState("rigo_flux2_lora_v1_dop"); const [submitting, setSubmitting] = useState(false); const [submitError, setSubmitError] = useState(null); const submitTraining = async () => { setSubmitError(null); setSubmitting(true); try { const res = await fetch("/api/lora-training/start", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ job_name: formJobName, trigger_word: formTrigger, dataset: formDataset }), }); const data = await res.json(); if (!res.ok) throw new Error(data.detail || "Failed to start training"); setFormJobName(""); setFormTrigger(""); ctx.load(); } catch (e: any) { setSubmitError(e.message); } finally { setSubmitting(false); } }; const [expandedJob, setExpandedJob] = useState(null); // Cache samples + checkpoints per job so expanding one doesn't overwrite another. const [expandedData, setExpandedData] = useState>(new Map()); const [loadingExpanded, setLoadingExpanded] = useState(false); const loadExpanded = useCallback(async (jobName: string) => { if (expandedData.has(jobName)) return; // already cached setLoadingExpanded(true); try { const [sRes, cRes] = await Promise.all([ fetch(`/api/lora-training/samples?job_name=${jobName}`), fetch(`/api/lora-training/checkpoints?job_name=${jobName}`), ]); const sData = await sRes.json(); const cData = await cRes.json(); // ai-toolkit returns samples as flat file paths. Parse step number // from the filename pattern: ...__000000250_0.jpg const rawSamples: string[] = sData.samples || []; const parsedSamples: Sample[] = rawSamples.map((path) => { const basename = path.split("/").pop() ?? path; const match = basename.match(/__([0-9]+)_/); return { name: path, step: match ? Number(match[1]) : null }; }); setExpandedData(prev => { const next = new Map(prev); next.set(jobName, { samples: parsedSamples, checkpoints: cData.checkpoints || [] }); return next; }); } catch { setExpandedData(prev => { const next = new Map(prev); next.set(jobName, { samples: [], checkpoints: [] }); return next; }); } finally { setLoadingExpanded(false); } }, [expandedData]); const toggleJob = (jobName: string) => { if (expandedJob === jobName) { setExpandedJob(null); } else { setExpandedJob(jobName); loadExpanded(jobName); } }; const completed = jobs.filter((j: any) => j.status === "completed").length; const running = jobs.filter((j: any) => j.status === "training" || j.status === "running").length; const failed = jobs.filter((j: any) => j.status === "failed").length; return (
{/* Header */}

Characters & LoRA Training

Train fine-tuned character LoRAs on AMD MI300X. Track all jobs, checkpoints, and stats.

{/* New training form */} {showForm && (

Start Training Job

setFormJobName(e.target.value)} className="bg-black/40 border-gray-700 text-white placeholder:text-gray-600" />
setFormTrigger(e.target.value)} className="bg-black/40 border-gray-700 text-white placeholder:text-gray-600" />
{datasets.length > 0 ? ( ) : ( setFormDataset(e.target.value)} className="bg-black/40 border-gray-700 text-white" /> )}
{submitError &&

{submitError}

}
)} {/* Datasets */}

Training Datasets {datasets.length}

{showAddDataset && (

Register a dataset folder. Place your images at{" "} /root/nemoflix-training/datasets/<id>/{" "} on the AMD node before starting a training job.

setAddDatasetId(e.target.value)} className="bg-black/40 border-gray-700 text-white text-sm placeholder:text-gray-600" />
setAddDatasetName(e.target.value)} className="bg-black/40 border-gray-700 text-white text-sm placeholder:text-gray-600" />
setAddDatasetDesc(e.target.value)} className="bg-black/40 border-gray-700 text-white text-sm placeholder:text-gray-600" />
setAddDatasetCount(e.target.value)} className="bg-black/40 border-gray-700 text-white text-sm placeholder:text-gray-600" />
{addDatasetError &&

{addDatasetError}

}
)} {datasetsLoading ? (

Loading…

) : datasets.length === 0 ? (

No datasets registered. Add one above, then reference it when starting a training job.

) : (
{datasets.map(ds => (
setFormDataset(ds.id)} title="Use in training form" >

{ds.name}

{ds.id}{ds.description ? ` · ${ds.description}` : ""}

{ds.image_count != null && ( {ds.image_count} images )} {new Date(ds.created_at).toLocaleDateString()}
))}
)}
{/* Stats */}
{/* Jobs table */}

Training Jobs {jobs.length}

{jobs.length === 0 ? (

No training jobs yet.

) : ( Job Status Progress Loss Model Created {mergedJobs.map((job: any) => { // ai-toolkit returns status="running" when actually training. The info // field is "Training" when steps are executing and "Initializing" when // models/latents are loading. Trust the live data, not hardcoded logic. const hasLiveProgress = job.current_step > 0 && job.total_steps > 0; const isTraining = (job.status === "training" || job.status === "running") && hasLiveProgress; const isInitializing = (job.status === "running" || job.status === "training") && !hasLiveProgress && job._live; const progress = hasLiveProgress ? Math.round((job.current_step / job.total_steps) * 100) : 0; const isExpanded = expandedJob === job.job_name; const isLoading = isExpanded && loadingExpanded && !expandedData.has(job.job_name); const jobData = expandedData.get(job.job_name); const samples = jobData?.samples ?? []; const checkpoints = jobData?.checkpoints ?? []; return ( {/* Main row — clickable */} toggleJob(job.job_name)} >

{job.job_name}

{[job.trigger_word && `trigger: ${job.trigger_word}`, job.dataset].filter(Boolean).join(" · ")}

{job.status} {isTraining ? (
{progress}%
Step {job.current_step}/{job.total_steps} {job.seconds_per_step ? {job.seconds_per_step.toFixed(1)}s/step : null} {job.lr != null ? lr {Number(job.lr).toExponential(1)} : null} {job.eta ? {job.eta} left : null}
{job.info &&
ai-toolkit: {job.info}
}
) : isInitializing ? (
Initializing — loading models, caching latents…
) : job.status === "completed" ? (

{job.total_steps || 0} steps{job.elapsed ? ` · ${job.elapsed}` : ""}

{job.loss != null ?

final loss {job.loss.toFixed(4)}

: null}
) : job.status === "failed" ? ( {job._dead ? "Process died or was abandoned" : job.error || "Job failed"} ) : ( )}
{job.loss != null ? job.loss.toFixed(4) : "—"} {job.model || "—"} {formatDate(job.created_at)}
{/* Expanded sub-row — inline, directly under the clicked row */} {isExpanded && ( {isLoading ? (
Loading samples and checkpoints…
) : (
{/* Samples */} {samples.length > 0 && (

Training Samples ({samples.length})

{samples.map((s) => (
{`Sample
Step {s.step ?? "?"}
))}
)} {/* Checkpoints */} {checkpoints.length > 0 && (

Checkpoints ({checkpoints.length})

Step File Size Date {checkpoints.map((ck) => ( {ck.step ?? "final"} {ck.name} {(ck.size_bytes / 1024 / 1024).toFixed(0)} MB {new Date(ck.modified_at).toLocaleDateString()} ))}
)} {samples.length === 0 && checkpoints.length === 0 && (

No samples or checkpoints for this job.

)} )} )} ); })} )} ); } function StatBox({ label, value, color }: { label: string; value: number; color?: string }) { return (

{value}

{label}

); }