| import React, { useEffect, useRef, useState } from "react"; |
| import { Button } from "@/components/ui/button"; |
| import { Input } from "@/components/ui/input"; |
| import { Label } from "@/components/ui/label"; |
| import { |
| Dialog, |
| DialogContent, |
| DialogHeader, |
| DialogTitle, |
| DialogDescription, |
| } from "@/components/ui/dialog"; |
| import { Alert, AlertDescription } from "@/components/ui/alert"; |
| import { |
| Select, |
| SelectContent, |
| SelectItem, |
| SelectTrigger, |
| SelectValue, |
| } from "@/components/ui/select"; |
| import { AlertTriangle, CheckCircle, Loader2, Play, VideoOff } from "lucide-react"; |
| import { RobotRecord } from "@/hooks/useRobots"; |
| import { useApi } from "@/contexts/ApiContext"; |
| import { useToast } from "@/hooks/use-toast"; |
| import { useNavigate } from "react-router-dom"; |
| import { |
| JobCheckpoint, |
| PolicyConfigSummary, |
| getCheckpointPolicyConfig, |
| listJobCheckpoints, |
| } from "@/lib/checkpointsApi"; |
| import { startInference } from "@/lib/inferenceApi"; |
| import CheckpointDropdown from "@/components/jobs/CheckpointDropdown"; |
|
|
| interface AvailableCamera { |
| index: number; |
| name: string; |
| deviceId: string; |
| available: boolean; |
| } |
|
|
| const CameraPreview: React.FC<{ deviceId: string; paused: boolean }> = ({ |
| deviceId, |
| paused, |
| }) => { |
| const videoRef = useRef<HTMLVideoElement>(null); |
| const [error, setError] = useState(false); |
|
|
| useEffect(() => { |
| if (paused || !deviceId) { |
| if (!deviceId) setError(true); |
| return; |
| } |
| let cancelled = false; |
| let stream: MediaStream | null = null; |
| setError(false); |
| (async () => { |
| try { |
| stream = await navigator.mediaDevices.getUserMedia({ |
| video: { deviceId: { exact: deviceId } }, |
| }); |
| if (cancelled) { |
| stream.getTracks().forEach((t) => t.stop()); |
| return; |
| } |
| if (videoRef.current) { |
| videoRef.current.srcObject = stream; |
| await videoRef.current.play().catch(() => {}); |
| } |
| } catch { |
| setError(true); |
| } |
| })(); |
| return () => { |
| cancelled = true; |
| if (stream) stream.getTracks().forEach((t) => t.stop()); |
| }; |
| }, [deviceId, paused]); |
|
|
| if (paused || error || !deviceId) { |
| return ( |
| <div className="w-32 h-24 bg-gray-800 rounded border border-gray-700 flex flex-col items-center justify-center"> |
| <VideoOff className="w-5 h-5 text-gray-500 mb-1" /> |
| <span className="text-[10px] text-gray-500"> |
| {paused ? "Released" : "No preview"} |
| </span> |
| </div> |
| ); |
| } |
| return ( |
| <video |
| ref={videoRef} |
| autoPlay |
| muted |
| playsInline |
| className="w-32 h-24 object-cover rounded border border-gray-700 bg-black" |
| /> |
| ); |
| }; |
|
|
| interface Props { |
| open: boolean; |
| onOpenChange: (open: boolean) => void; |
| robot: RobotRecord | null; |
| jobId: string; |
| initialStep: number | null; |
| } |
|
|
| const DEFAULT_FPS = 30; |
|
|
| const InferenceModal: React.FC<Props> = ({ |
| open, |
| onOpenChange, |
| robot, |
| jobId, |
| initialStep, |
| }) => { |
| const { baseUrl, fetchWithHeaders } = useApi(); |
| const { toast } = useToast(); |
| const navigate = useNavigate(); |
|
|
| const [checkpoints, setCheckpoints] = useState<JobCheckpoint[]>([]); |
| const [selectedStep, setSelectedStep] = useState<number | null>(initialStep); |
| const [task, setTask] = useState(""); |
| const [durationS, setDurationS] = useState(60); |
| const [submitting, setSubmitting] = useState(false); |
|
|
| const [policyConfig, setPolicyConfig] = useState<PolicyConfigSummary | null>(null); |
| const [policyConfigLoading, setPolicyConfigLoading] = useState(false); |
| const [policyConfigError, setPolicyConfigError] = useState<string | null>(null); |
|
|
| |
| const [cameraBindings, setCameraBindings] = useState<Record<string, number | null>>({}); |
| const [availableCameras, setAvailableCameras] = useState<AvailableCamera[]>([]); |
|
|
| |
| useEffect(() => { |
| if (!open) return; |
| let cancelled = false; |
| listJobCheckpoints(baseUrl, fetchWithHeaders, jobId) |
| .then((cks) => { |
| if (cancelled) return; |
| setCheckpoints(cks); |
| if (cks.length > 0) { |
| const latest = cks[cks.length - 1].step; |
| setSelectedStep((prev) => (prev != null ? prev : latest)); |
| } |
| }) |
| .catch(() => { |
| if (cancelled) return; |
| setCheckpoints([]); |
| }); |
| return () => { |
| cancelled = true; |
| }; |
| }, [open, baseUrl, fetchWithHeaders, jobId]); |
|
|
| |
| |
| |
| useEffect(() => { |
| if (!open) return; |
| let cancelled = false; |
| (async () => { |
| try { |
| |
| try { |
| const probe = await navigator.mediaDevices.getUserMedia({ video: true }); |
| probe.getTracks().forEach((t) => t.stop()); |
| } catch { |
| |
| } |
| const browserDevices = (await navigator.mediaDevices.enumerateDevices()) |
| .filter((d) => d.kind === "videoinput") |
| .map((d) => ({ deviceId: d.deviceId, label: d.label })); |
| const r = await fetchWithHeaders(`${baseUrl}/available-cameras`); |
| if (!r.ok) { |
| if (!cancelled) setAvailableCameras([]); |
| return; |
| } |
| const body = await r.json(); |
| const backend: { index: number; name?: string; available: boolean }[] = |
| body.cameras ?? []; |
| const norm = (s: string) => s.toLowerCase().replace(/\s+/g, " ").trim(); |
| const used = new Set<string>(); |
| const merged: AvailableCamera[] = backend.map((cam) => { |
| const label = cam.name || `Camera ${cam.index}`; |
| const target = norm(label); |
| const candidate = |
| browserDevices.find( |
| (d) => !used.has(d.deviceId) && d.label && norm(d.label) === target, |
| ) || |
| browserDevices.find( |
| (d) => |
| !used.has(d.deviceId) && |
| d.label && |
| norm(d.label).startsWith(target), |
| ) || |
| browserDevices.find( |
| (d) => |
| !used.has(d.deviceId) && |
| d.label && |
| (norm(d.label).includes(target) || target.includes(norm(d.label))), |
| ); |
| if (candidate) used.add(candidate.deviceId); |
| return { |
| index: cam.index, |
| name: label, |
| deviceId: candidate?.deviceId ?? "", |
| available: cam.available, |
| }; |
| }); |
| if (!cancelled) setAvailableCameras(merged); |
| } catch { |
| if (!cancelled) setAvailableCameras([]); |
| } |
| })(); |
| return () => { |
| cancelled = true; |
| }; |
| }, [open, baseUrl, fetchWithHeaders]); |
|
|
| |
| useEffect(() => { |
| if (!open || selectedStep == null) { |
| setPolicyConfig(null); |
| setPolicyConfigError(null); |
| return; |
| } |
| let cancelled = false; |
| setPolicyConfigLoading(true); |
| setPolicyConfigError(null); |
| getCheckpointPolicyConfig(baseUrl, fetchWithHeaders, jobId, selectedStep) |
| .then((cfg) => { |
| if (cancelled) return; |
| setPolicyConfig(cfg); |
| |
| |
| setCameraBindings((prev) => { |
| const next: Record<string, number | null> = {}; |
| for (const name of Object.keys(cfg.image_features)) { |
| next[name] = prev[name] ?? null; |
| } |
| return next; |
| }); |
| }) |
| .catch((e) => { |
| if (cancelled) return; |
| setPolicyConfig(null); |
| setPolicyConfigError(e instanceof Error ? e.message : String(e)); |
| }) |
| .finally(() => { |
| if (!cancelled) setPolicyConfigLoading(false); |
| }); |
| return () => { |
| cancelled = true; |
| }; |
| }, [open, baseUrl, fetchWithHeaders, jobId, selectedStep]); |
|
|
| |
| |
| |
| useEffect(() => { |
| if (!policyConfig) return; |
| const robotCams = robot?.cameras ?? []; |
| if (robotCams.length === 0 || availableCameras.length === 0) return; |
| setCameraBindings((prev) => { |
| let changed = false; |
| const next = { ...prev }; |
| for (const policyName of Object.keys(policyConfig.image_features)) { |
| if (next[policyName] != null) continue; |
| const robotCam = robotCams.find( |
| (c) => c.name.toLowerCase() === policyName.toLowerCase(), |
| ); |
| if (!robotCam) continue; |
| const live = |
| (robotCam.device_id && |
| availableCameras.find((c) => c.deviceId === robotCam.device_id)) || |
| availableCameras.find((c) => c.index === robotCam.camera_index); |
| if (live) { |
| next[policyName] = live.index; |
| changed = true; |
| } |
| } |
| return changed ? next : prev; |
| }); |
| }, [policyConfig, robot, availableCameras]); |
|
|
| const selectedRef = |
| selectedStep != null |
| ? checkpoints.find((c) => c.step === selectedStep)?.ref ?? null |
| : null; |
|
|
| const expectedCameraNames = policyConfig |
| ? Object.keys(policyConfig.image_features) |
| : []; |
| const allCamerasBound = expectedCameraNames.every( |
| (name) => cameraBindings[name] != null, |
| ); |
|
|
| const canStart = |
| !!robot && |
| robot.is_clean && |
| selectedRef != null && |
| !!policyConfig && |
| allCamerasBound && |
| !submitting; |
|
|
| const handleStart = async () => { |
| if (!robot || selectedRef == null || !policyConfig) return; |
| |
| |
| |
| setSubmitting(true); |
| await new Promise((r) => setTimeout(r, 300)); |
| const cameraDict: Record<string, { |
| type: string; camera_index?: number; width: number; height: number; fps?: number; |
| }> = {}; |
| for (const [name, dims] of Object.entries(policyConfig.image_features)) { |
| const idx = cameraBindings[name]; |
| if (idx == null) continue; |
| cameraDict[name] = { |
| type: "opencv", |
| camera_index: idx, |
| width: dims.width, |
| height: dims.height, |
| fps: DEFAULT_FPS, |
| }; |
| } |
| try { |
| await startInference(baseUrl, fetchWithHeaders, { |
| follower_port: robot.follower_port, |
| follower_config: robot.follower_config, |
| policy_ref: selectedRef, |
| task, |
| cameras: cameraDict, |
| duration_s: durationS, |
| }); |
| onOpenChange(false); |
| navigate("/inference"); |
| } catch (e) { |
| toast({ |
| title: "Couldn't start inference", |
| description: e instanceof Error ? e.message : String(e), |
| variant: "destructive", |
| }); |
| |
| setSubmitting(false); |
| } |
| }; |
|
|
| const onCameraBindingChange = (name: string, value: string) => { |
| const idx = Number(value); |
| setCameraBindings((prev) => ({ ...prev, [name]: idx })); |
| }; |
|
|
| return ( |
| <Dialog open={open} onOpenChange={onOpenChange}> |
| <DialogContent className="bg-gray-900 border-gray-800 text-white sm:max-w-[600px] p-8 max-h-[90vh] overflow-y-auto"> |
| <DialogHeader> |
| <div className="flex justify-center items-center mb-4"> |
| <div className="w-8 h-8 bg-green-500 rounded-full flex items-center justify-center"> |
| <Play className="w-4 h-4 text-white" /> |
| </div> |
| </div> |
| <DialogTitle className="text-white text-center text-2xl font-bold"> |
| Configure Inference |
| </DialogTitle> |
| </DialogHeader> |
| |
| <div className="space-y-6 py-4"> |
| <DialogDescription className="text-gray-400 text-base leading-relaxed text-center"> |
| Pick a checkpoint and confirm hardware. The selected policy will |
| drive the follower autonomously for the configured duration. |
| </DialogDescription> |
| |
| <div className="space-y-4"> |
| <h3 className="text-lg font-semibold text-white border-b border-gray-700 pb-2"> |
| Robot Configuration |
| </h3> |
| {!robot ? ( |
| <Alert className="bg-amber-900/40 border-amber-700 text-amber-100"> |
| <AlertTriangle className="h-4 w-4" /> |
| <AlertDescription> |
| Select and configure a robot on the Landing page first. |
| </AlertDescription> |
| </Alert> |
| ) : !robot.is_clean ? ( |
| <Alert className="bg-amber-900/40 border-amber-700 text-amber-100"> |
| <AlertTriangle className="h-4 w-4" /> |
| <AlertDescription> |
| <strong>{robot.name}</strong> is missing a calibration. |
| Configure it before running inference. |
| </AlertDescription> |
| </Alert> |
| ) : ( |
| <div className="flex items-center gap-2 text-sm"> |
| <CheckCircle className="w-4 h-4 text-green-400" /> |
| <span className="text-slate-200"> |
| Running on <strong>{robot.name}</strong> |
| </span> |
| </div> |
| )} |
| </div> |
| |
| <div className="space-y-4"> |
| <h3 className="text-lg font-semibold text-white border-b border-gray-700 pb-2"> |
| Checkpoint |
| </h3> |
| {checkpoints.length === 0 ? ( |
| <Alert className="bg-amber-900/40 border-amber-700 text-amber-100"> |
| <AlertTriangle className="h-4 w-4" /> |
| <AlertDescription> |
| No checkpoints available for this job yet. |
| </AlertDescription> |
| </Alert> |
| ) : ( |
| <CheckpointDropdown |
| checkpoints={checkpoints} |
| selectedStep={selectedStep} |
| onChange={setSelectedStep} |
| /> |
| )} |
| </div> |
| |
| <div className="space-y-4"> |
| <h3 className="text-lg font-semibold text-white border-b border-gray-700 pb-2"> |
| Run parameters |
| </h3> |
| {policyConfig?.requires_task ? ( |
| <div className="space-y-2"> |
| <Label htmlFor="task" className="text-sm font-medium text-gray-300"> |
| Task description |
| </Label> |
| <Input |
| id="task" |
| value={task} |
| onChange={(e) => setTask(e.target.value)} |
| placeholder="e.g., pick up the red block" |
| className="bg-gray-800 border-gray-700 text-white" |
| /> |
| <p className="text-xs text-gray-500"> |
| This policy is language-conditioned ({policyConfig.policy_type}). |
| </p> |
| </div> |
| ) : null} |
| <div className="space-y-2"> |
| <Label htmlFor="durationS" className="text-sm font-medium text-gray-300"> |
| Max duration (seconds) |
| </Label> |
| <Input |
| id="durationS" |
| type="number" |
| min={1} |
| value={durationS} |
| onChange={(e) => setDurationS(parseInt(e.target.value || "0"))} |
| className="bg-gray-800 border-gray-700 text-white" |
| /> |
| </div> |
| </div> |
| |
| <div className="space-y-4"> |
| <h3 className="text-lg font-semibold text-white border-b border-gray-700 pb-2"> |
| Cameras |
| </h3> |
| {policyConfigLoading ? ( |
| <div className="flex items-center gap-2 text-sm text-slate-400"> |
| <Loader2 className="w-4 h-4 animate-spin" /> |
| Reading policy config… |
| </div> |
| ) : policyConfigError ? ( |
| <Alert className="bg-red-900/40 border-red-700 text-red-100"> |
| <AlertTriangle className="h-4 w-4" /> |
| <AlertDescription> |
| Couldn't load policy config: {policyConfigError} |
| </AlertDescription> |
| </Alert> |
| ) : !policyConfig ? null : expectedCameraNames.length === 0 ? ( |
| <p className="text-xs text-gray-500"> |
| This policy doesn't use cameras. |
| </p> |
| ) : ( |
| <div className="space-y-3"> |
| <p className="text-xs text-gray-500"> |
| Bind a physical camera to each name the policy was trained |
| with. Resolution comes from the checkpoint. |
| </p> |
| {expectedCameraNames.map((name) => { |
| const dims = policyConfig.image_features[name]; |
| const value = cameraBindings[name]; |
| const bound = |
| value != null |
| ? availableCameras.find((c) => c.index === value) |
| : undefined; |
| return ( |
| <div key={name} className="flex items-center gap-3"> |
| <div className="flex-1"> |
| <Label className="text-sm font-medium text-gray-200"> |
| {name} |
| </Label> |
| <p className="text-xs text-gray-500"> |
| {dims.width}×{dims.height} |
| </p> |
| </div> |
| <Select |
| value={value != null ? String(value) : undefined} |
| onValueChange={(v) => onCameraBindingChange(name, v)} |
| > |
| <SelectTrigger className="bg-gray-800 border-gray-700 text-white w-56"> |
| <SelectValue placeholder="Select a camera" /> |
| </SelectTrigger> |
| <SelectContent className="bg-gray-900 border-gray-700 text-white"> |
| {availableCameras.length === 0 ? ( |
| <div className="px-2 py-1.5 text-xs text-gray-500"> |
| No cameras detected |
| </div> |
| ) : ( |
| availableCameras.map((cam) => ( |
| <SelectItem |
| key={cam.index} |
| value={String(cam.index)} |
| > |
| #{cam.index} — {cam.name} |
| </SelectItem> |
| )) |
| )} |
| </SelectContent> |
| </Select> |
| <CameraPreview deviceId={bound?.deviceId ?? ""} paused={submitting} /> |
| </div> |
| ); |
| })} |
| </div> |
| )} |
| </div> |
| |
| <div className="flex flex-col sm:flex-row gap-4 justify-center pt-4"> |
| <Button |
| onClick={handleStart} |
| disabled={!canStart} |
| className="w-full sm:w-auto bg-green-500 hover:bg-green-600 text-white px-10 py-6 text-lg disabled:opacity-40 disabled:cursor-not-allowed" |
| > |
| <Play className="w-5 h-5 mr-2" /> |
| {submitting ? "Starting…" : "Start Inference"} |
| </Button> |
| <Button |
| onClick={() => onOpenChange(false)} |
| variant="outline" |
| className="w-full sm:w-auto border-gray-500 hover:border-gray-200 px-10 py-6 text-lg text-zinc-500 bg-zinc-900 hover:bg-zinc-800" |
| > |
| Cancel |
| </Button> |
| </div> |
| </div> |
| </DialogContent> |
| </Dialog> |
| ); |
| }; |
|
|
| export default InferenceModal; |
|
|