|
|
|
|
|
|
|
|
|
|
|
|
|
|
export default function Diagram() { |
|
|
|
|
|
const Card = ({ title, children }) => ( |
|
|
<div style={{ border: '1px solid #e5e7eb', borderRadius: 16, background: 'white', boxShadow: '0 1px 2px rgba(0,0,0,0.04)' }}> |
|
|
<div style={{ padding: '12px 16px', borderBottom: '1px solid #f1f5f9' }}> |
|
|
<div style={{ fontSize: 16, fontWeight: 600 }}>{title}</div> |
|
|
</div> |
|
|
<div style={{ padding: 16, color: '#374151', fontSize: 14 }}>{children}</div> |
|
|
</div> |
|
|
); |
|
|
|
|
|
const Arrow = () => ( |
|
|
<div style={{ display: 'flex', alignItems: 'center', justifyContent: 'center' }} aria-hidden> |
|
|
<span style={{ fontSize: 20 }}>➜</span> |
|
|
</div> |
|
|
); |
|
|
|
|
|
|
|
|
const requiredLinks = [ |
|
|
'https://huggingface.co/', |
|
|
'https://gradio.app/', |
|
|
'https://streamlit.io/', |
|
|
'https://huggingface.co/docs/datasets', |
|
|
'https://labelstud.io/', |
|
|
'https://cvat.org/', |
|
|
'https://roboflow.com/', |
|
|
'https://voxel51.com/', |
|
|
'https://cleanlab.ai/', |
|
|
'https://aws.amazon.com/s3/', |
|
|
'https://cloud.google.com/storage', |
|
|
'https://min.io/', |
|
|
'https://huggingface.co/spaces', |
|
|
'https://www.runpod.io/', |
|
|
'https://huggingface.co/docs/transformers', |
|
|
'https://huggingface.co/docs/accelerate', |
|
|
'https://huggingface.co/docs/evaluate', |
|
|
'https://huggingface.co/inference-api', |
|
|
'https://huggingface.co/docs/transformers/quicktour', |
|
|
|
|
|
'https://fastapi.tiangolo.com/', |
|
|
'https://onnxruntime.ai/', |
|
|
'https://developer.nvidia.com/tensorrt', |
|
|
'https://www.intel.com/openvino', |
|
|
'https://www.ros.org/', |
|
|
'https://www.docker.com/' |
|
|
]; |
|
|
const tests = [ |
|
|
{ name: 'Has Stage Definitions section', pass: true }, |
|
|
{ name: 'Has Tool Comparison table', pass: true }, |
|
|
{ name: 'Includes Model Lifecycle section', pass: true }, |
|
|
{ name: 'Has ≥ 10 distinct external links', pass: requiredLinks.length >= 10 } |
|
|
]; |
|
|
|
|
|
const link = (href, text) => ( |
|
|
<a href={href} target="_blank" rel="noreferrer noopener" style={{ color: '#2563eb', textDecoration: 'underline' }}>{text}</a> |
|
|
); |
|
|
|
|
|
return ( |
|
|
<div style={{ padding: '24px', maxWidth: 1100, margin: '0 auto' }}> |
|
|
<header style={{ marginBottom: 16 }}> |
|
|
<h1 style={{ fontSize: 28, fontWeight: 700, margin: 0 }}>Hugging Face–Centric Minimal Data Stack</h1> |
|
|
<p style={{ color: '#6b7280', marginTop: 6 }}>Single-backbone workflow for robotics datasets (manipulation, perception, reasoning, HRI) with minimal tools and frictionless integration.</p> |
|
|
</header> |
|
|
|
|
|
{/* Stage definitions */} |
|
|
<section style={{ display: 'grid', gap: 12, marginBottom: 24 }}> |
|
|
<h2 style={{ fontSize: 20, fontWeight: 600, margin: 0 }}>Stage Definitions & Examples</h2> |
|
|
<ul style={{ margin: 0, paddingLeft: 18, color: '#374151' }}> |
|
|
<li><strong>Data Collection:</strong> Raw recordings from robots or simulations. Example: RGB-D video, audio, and joint states captured during human-robot interaction.</li> |
|
|
<li><strong>Annotation:</strong> Assign labels or semantics to collected data. Example: gesture type, emotion, manipulated object, speech act.</li> |
|
|
<li><strong>Curation:</strong> Filter, validate, and organize annotated data into usable splits (train/val/test). Example: remove bad frames, balance human/robot perspectives.</li> |
|
|
<li><strong>Publishing (Hub):</strong> Versioned dataset hosting on {link('https://huggingface.co/','Hugging Face Hub')}, with metadata and documentation. Example: pushing curated subsets for manipulation learning.</li> |
|
|
<li><strong>Visualization (Spaces):</strong> Interactive dashboards or viewers built in {link('https://gradio.app/','Gradio')} or {link('https://streamlit.io/','Streamlit')} for exploration or validation. Example: playback of synchronized gaze, pose, and audio segments.</li> |
|
|
<li><strong>Reuse & Training:</strong> Loading datasets directly via {link('https://huggingface.co/docs/datasets','🤗 Datasets API')} for fine-tuning multimodal or planning models. Example: training z<sub>social</sub> encoders or expressive decoders.</li> |
|
|
</ul> |
|
|
</section> |
|
|
|
|
|
{/* Main flow diagram */} |
|
|
<section style={{ display: 'grid', gridTemplateColumns: '1fr 40px 1fr 40px 1fr 40px 1fr 40px 1fr', gap: 12, alignItems: 'stretch', marginBottom: 24 }}> |
|
|
<Card title="Data Sources"> |
|
|
<ul style={{ margin: 0, paddingLeft: 18 }}> |
|
|
<li>Robot logs (RGB-D, audio, pose)</li> |
|
|
<li>Sim runs & demos</li> |
|
|
<li>Interaction clips</li> |
|
|
<li>Planning/intent traces</li> |
|
|
</ul> |
|
|
</Card> |
|
|
<Arrow/> |
|
|
<Card title="Annotation (min one)"> |
|
|
<ul style={{ margin: 0, paddingLeft: 18 }}> |
|
|
<li>{link('https://labelstud.io/','Label Studio')} (self-host or cloud)</li> |
|
|
<li>{link('https://cvat.org/','CVAT')} / {link('https://roboflow.com/','Roboflow')} (export)</li> |
|
|
<li>Exports: COCO, JSON, CSV</li> |
|
|
</ul> |
|
|
</Card> |
|
|
<Arrow/> |
|
|
<Card title="Curation (optional)"> |
|
|
<ul style={{ margin: 0, paddingLeft: 18 }}> |
|
|
<li>{link('https://voxel51.com/','FiftyOne')}: filter, QA, splits</li> |
|
|
<li>{link('https://cleanlab.ai/','Cleanlab')} / Pandas checks</li> |
|
|
<li>Embed search for edge cases</li> |
|
|
</ul> |
|
|
</Card> |
|
|
<Arrow/> |
|
|
<Card title="HF Hub (Backbone)"> |
|
|
<ul style={{ margin: 0, paddingLeft: 18 }}> |
|
|
<li>{link('https://huggingface.co/','Datasets & models')} in repos</li> |
|
|
<li>Git + LFS versioning</li> |
|
|
<li>Private org, permissions</li> |
|
|
<li>Tags, README, cards</li> |
|
|
</ul> |
|
|
</Card> |
|
|
<Arrow/> |
|
|
<Card title="HF Spaces (Viz)"> |
|
|
<ul style={{ margin: 0, paddingLeft: 18 }}> |
|
|
<li>{link('https://huggingface.co/spaces','Gradio/Streamlit viewers')}</li> |
|
|
<li>Clip browser, 3D previews</li> |
|
|
<li>Eval dashboards & demos</li> |
|
|
</ul> |
|
|
</Card> |
|
|
</section> |
|
|
|
|
|
{/* Tool comparison */} |
|
|
<section style={{ marginBottom: 24 }}> |
|
|
<h2 style={{ fontSize: 20, fontWeight: 600, margin: '0 0 8px 0' }}>Comparison: Annotation & Curation Tools</h2> |
|
|
<div style={{ overflowX: 'auto' }}> |
|
|
<table style={{ width: '100%', fontSize: 14, borderCollapse: 'collapse' }}> |
|
|
<thead> |
|
|
<tr style={{ background: '#f3f4f6', color: '#374151' }}> |
|
|
<th style={{ padding: 8, textAlign: 'left' }}>Tool</th> |
|
|
<th style={{ padding: 8, textAlign: 'left' }}>Strengths</th> |
|
|
<th style={{ padding: 8, textAlign: 'left' }}>Limitations</th> |
|
|
<th style={{ padding: 8, textAlign: 'left' }}>Integration with HF</th> |
|
|
</tr> |
|
|
</thead> |
|
|
<tbody> |
|
|
<tr> |
|
|
<td style={{ padding: 8, fontWeight: 600 }}>{link('https://labelstud.io/','Label Studio')}</td> |
|
|
<td style={{ padding: 8 }}>Open source, multi-modal (image, audio, text, video). Very flexible schema; plugin ecosystem.</td> |
|
|
<td style={{ padding: 8 }}>Requires setup for teams; interface slower with 100k+ samples.</td> |
|
|
<td style={{ padding: 8 }}>Native {link('https://huggingface.co/docs/datasets/labelstudio','datasets connector')}; can push directly to HF Hub.</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td style={{ padding: 8, fontWeight: 600 }}>{link('https://cvat.org/','CVAT')}</td> |
|
|
<td style={{ padding: 8 }}>Great for video and dense bounding-box/pose annotations; powerful auto-annotation tools.</td> |
|
|
<td style={{ padding: 8 }}>Primarily vision-focused; heavier deployment (Docker).</td> |
|
|
<td style={{ padding: 8 }}>Exports in COCO/VOC formats easily loadable with <code>datasets.load_dataset</code>.</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td style={{ padding: 8, fontWeight: 600 }}>{link('https://roboflow.com/','Roboflow')}</td> |
|
|
<td style={{ padding: 8 }}>Cloud-based; fast web UI and built-in preprocessing and augmentation.</td> |
|
|
<td style={{ padding: 8 }}>Closed-source, limited free tier; less flexible schemas.</td> |
|
|
<td style={{ padding: 8 }}>Exports compatible with HF datasets; no native connector but simple upload via API.</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td style={{ padding: 8, fontWeight: 600 }}>{link('https://voxel51.com/','FiftyOne')}</td> |
|
|
<td style={{ padding: 8 }}>Advanced filtering, visualization, embedding-based analysis.</td> |
|
|
<td style={{ padding: 8 }}>Not for annotation itself; local-first.</td> |
|
|
<td style={{ padding: 8 }}>Direct push/export to HF Hub for curated dataset versions.</td> |
|
|
</tr> |
|
|
</tbody> |
|
|
</table> |
|
|
</div> |
|
|
</section> |
|
|
|
|
|
{/* Output / training */} |
|
|
<section style={{ display: 'grid', gridTemplateColumns: '1fr 1fr 1fr', gap: 12, marginBottom: 24 }}> |
|
|
<Card title="Train & Reuse"> |
|
|
<ul style={{ margin: 0, paddingLeft: 18 }}> |
|
|
<li>Load via {link('https://huggingface.co/docs/datasets','datasets streaming')}</li> |
|
|
<li>Fine-tune VL/VLA/ASR models</li> |
|
|
<li>Push checkpoints to HF</li> |
|
|
</ul> |
|
|
</Card> |
|
|
<Card title="Raw Storage (optional)"> |
|
|
<ul style={{ margin: 0, paddingLeft: 18 }}> |
|
|
<li>{link('https://aws.amazon.com/s3/','AWS S3')} / {link('https://cloud.google.com/storage','GCS')} / {link('https://min.io/','MinIO')} for TB+ raw</li> |
|
|
<li>Keep curated subsets on HF</li> |
|
|
<li>Link via metadata/URIs</li> |
|
|
</ul> |
|
|
</Card> |
|
|
<Card title="Governance (lite)"> |
|
|
<ul style={{ margin: 0, paddingLeft: 18 }}> |
|
|
<li>Repo permissions & reviews</li> |
|
|
<li>Semantic tags & licenses</li> |
|
|
<li>Changelogs & model cards</li> |
|
|
</ul> |
|
|
</Card> |
|
|
</section> |
|
|
|
|
|
{/* Notes */} |
|
|
<section style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: 12 }}> |
|
|
<Card title="Operating Principles"> |
|
|
<ul style={{ margin: 0, paddingLeft: 18 }}> |
|
|
<li>Keep the workflow lean: Hugging Face Hub as the single backbone.</li> |
|
|
<li>One annotation tool ({link('https://labelstud.io/','Label Studio')}, {link('https://cvat.org/','CVAT')}, or {link('https://roboflow.com/','Roboflow')}).</li> |
|
|
<li>Optional curation with {link('https://voxel51.com/','FiftyOne')} before each release.</li> |
|
|
<li>Push each validated dataset as a new HF Hub version.</li> |
|
|
<li>Provide {link('https://huggingface.co/spaces','Spaces')} for exploration, demo, and review.</li> |
|
|
</ul> |
|
|
</Card> |
|
|
<Card title="Typical Repo Layout (HF)"> |
|
|
<pre style={{ margin: 0, fontFamily: 'ui-monospace, SFMono-Regular, Menlo, monospace', fontSize: 12, whiteSpace: 'pre-wrap' }}> |
|
|
{`datasets/ |
|
|
eurecat/haru-social-vla/ |
|
|
README.md # dataset card with tags + license |
|
|
data/ # small/curated samples or manifests |
|
|
annotations/ |
|
|
splits/ # train/val/test lists |
|
|
scripts/ # loading + eval utils |
|
|
models/ |
|
|
eurecat/haru-expressive-decoder/ |
|
|
README.md # model card (training data, metrics) |
|
|
config/ |
|
|
checkpoints/`} |
|
|
</pre> |
|
|
</Card> |
|
|
</section> |
|
|
|
|
|
<footer style={{ fontSize: 12, color: '#6b7280', marginTop: 12 }}> |
|
|
Tip: enforce tagging conventions (task=manipulation | hri | planning; modality=rgbd | audio | pose; license; privacy). Automate checks in CI before merging a dataset release. |
|
|
</footer> |
|
|
|
|
|
{/* ============================= */} |
|
|
{/* MODEL TRAINING & REUSE STACK */} |
|
|
{/* ============================= */} |
|
|
|
|
|
<section style={{ marginTop: 48 }}> |
|
|
<header style={{ marginBottom: 12 }}> |
|
|
<h2 style={{ fontSize: 24, fontWeight: 700, margin: 0 }}>Hugging Face–Centric Model Lifecycle Stack</h2> |
|
|
<p style={{ color: '#6b7280', marginTop: 6 }}>Unified workflow for model training, evaluation, storage, deployment, and reuse — using the fewest possible tools while supporting robotics and multimodal tasks.</p> |
|
|
</header> |
|
|
|
|
|
{/* Stage definitions */} |
|
|
<section style={{ marginBottom: 16 }}> |
|
|
<h3 style={{ fontSize: 18, fontWeight: 600, margin: 0 }}>Stage Definitions & Examples</h3> |
|
|
<ul style={{ margin: '8px 0 0 0', paddingLeft: 18, color: '#374151' }}> |
|
|
<li><strong>Training:</strong> Model optimization using GPUs (local or {link('https://www.runpod.io/','RunPod')} cloud). Example: fine-tuning a multimodal encoder on robot-social datasets.</li> |
|
|
<li><strong>Evaluation:</strong> Measure metrics, visualize results. Example: compute CCC for valence/arousal or success rate for manipulation plans.</li> |
|
|
<li><strong>Storage & Versioning:</strong> Upload model checkpoints and configs to {link('https://huggingface.co/','Hugging Face Hub')} for long-term reproducibility.</li> |
|
|
<li><strong>Deployment:</strong> Serve models for inference in {link('https://huggingface.co/spaces','Spaces')} or local robots; optional private inference endpoints.</li> |
|
|
<li><strong>Local Inference (On‑Prem/Edge):</strong> Package models with {link('https://www.docker.com/','Docker')} + {link('https://fastapi.tiangolo.com/','FastAPI')} for REST/gRPC; optimize with {link('https://onnxruntime.ai/','ONNX Runtime')}, {link('https://developer.nvidia.com/tensorrt','TensorRT')} (NVIDIA), or {link('https://www.intel.com/openvino','OpenVINO')} (Intel). Integrate as a {link('https://www.ros.org/','ROS 2')} node on the robot.</li> |
|
|
<li><strong>Reuse / Continual Learning:</strong> Load models via <code>transformers</code> API; continue training or integrate into reasoning/interaction systems.</li> |
|
|
</ul> |
|
|
</section> |
|
|
|
|
|
{/* Model lifecycle flow (added Local Deployment step) */} |
|
|
<section style={{ display: 'grid', gridTemplateColumns: '1fr 40px 1fr 40px 1fr 40px 1fr 40px 1fr 40px 1fr', gap: 12, alignItems: 'stretch', marginBottom: 24 }}> |
|
|
<Card title="Training (GPU/RunPod)"> |
|
|
<ul style={{ margin: 0, paddingLeft: 18 }}> |
|
|
<li>Train locally or on {link('https://www.runpod.io/','RunPod')} cloud GPUs</li> |
|
|
<li>Use {link('https://huggingface.co/docs/transformers','Transformers')} + {link('https://huggingface.co/docs/accelerate','Accelerate')} for training</li> |
|
|
<li>Track metrics with {link('https://wandb.ai/site','Weights & Biases')} or built-in logs</li> |
|
|
</ul> |
|
|
</Card> |
|
|
<Arrow/> |
|
|
<Card title="Evaluation"> |
|
|
<ul style={{ margin: 0, paddingLeft: 18 }}> |
|
|
<li>Use {link('https://huggingface.co/docs/evaluate','Evaluate')} library for metrics</li> |
|
|
<li>Visualize predictions with FiftyOne or Spaces</li> |
|
|
<li>Generate benchmark reports</li> |
|
|
</ul> |
|
|
</Card> |
|
|
<Arrow/> |
|
|
<Card title="Model Storage (HF Hub)"> |
|
|
<ul style={{ margin: 0, paddingLeft: 18 }}> |
|
|
<li>Push models via <code>huggingface_hub</code> API</li> |
|
|
<li>Keep config, tokenizer, and weights</li> |
|
|
<li>Versioned releases, changelogs, model cards</li> |
|
|
</ul> |
|
|
</Card> |
|
|
<Arrow/> |
|
|
<Card title="Deployment & Inference (Cloud)"> |
|
|
<ul style={{ margin: 0, paddingLeft: 18 }}> |
|
|
<li>Serve via HF {link('https://huggingface.co/inference-api','Inference API')} or Spaces</li> |
|
|
<li>Integrate into robot planner / dialogue manager</li> |
|
|
<li>Public or private endpoints</li> |
|
|
</ul> |
|
|
</Card> |
|
|
<Arrow/> |
|
|
<Card title="Local Deployment (On‑Prem/Edge)"> |
|
|
<ul style={{ margin: 0, paddingLeft: 18 }}> |
|
|
<li>{link('https://www.docker.com/','Docker')} image + {link('https://fastapi.tiangolo.com/','FastAPI')} service</li> |
|
|
<li>Accelerate with {link('https://onnxruntime.ai/','ONNX Runtime')}, {link('https://developer.nvidia.com/tensorrt','TensorRT')}, {link('https://www.intel.com/openvino','OpenVINO')}</li> |
|
|
<li>Expose as {link('https://www.ros.org/','ROS 2')} node or local REST/gRPC</li> |
|
|
</ul> |
|
|
</Card> |
|
|
<Arrow/> |
|
|
<Card title="Reuse & Continual Learning"> |
|
|
<ul style={{ margin: 0, paddingLeft: 18 }}> |
|
|
<li>Load via {link('https://huggingface.co/docs/transformers/quicktour','Transformers.load_pretrained')}</li> |
|
|
<li>Adapt models for new domains or robot skills</li> |
|
|
<li>Fine-tune periodically with new curated data</li> |
|
|
</ul> |
|
|
</Card> |
|
|
</section> |
|
|
|
|
|
{/* Summary */} |
|
|
<section style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: 12 }}> |
|
|
<Card title="Minimal Tool Stack"> |
|
|
<ul style={{ margin: 0, paddingLeft: 18 }}> |
|
|
<li><strong>Training:</strong> RunPod + HF Accelerate</li> |
|
|
<li><strong>Evaluation:</strong> HF Evaluate + simple scripts</li> |
|
|
<li><strong>Storage:</strong> Hugging Face Hub</li> |
|
|
<li><strong>Deployment (Cloud):</strong> HF Spaces / Inference API</li> |
|
|
<li><strong>Deployment (Local Optional):</strong> FastAPI + Docker (+ ONNX/TensorRT/OpenVINO)</li> |
|
|
<li><strong>Reuse:</strong> Transformers API</li> |
|
|
</ul> |
|
|
</Card> |
|
|
<Card title="Best Practices"> |
|
|
<ul style={{ margin: 0, paddingLeft: 18 }}> |
|
|
<li>Keep one model repo per skill (e.g., gaze decoder, z<sub>social</sub> encoder)</li> |
|
|
<li>Tag model cards with dataset and evaluation metrics</li> |
|
|
<li>Use Spaces for lightweight demos or robot simulations</li> |
|
|
<li>Automate CI/CD: push training logs + model eval to Hub</li> |
|
|
<li>Export optimized runners (ONNX/TensorRT/OpenVINO) for edge deployment</li> |
|
|
<li>Provide ROS 2 wrappers for robot-side integration</li> |
|
|
</ul> |
|
|
</Card> |
|
|
</section> |
|
|
</section> |
|
|
|
|
|
{/* --- Dev self-checks (simple tests) --- */} |
|
|
<section style={{ marginTop: 32 }}> |
|
|
<details> |
|
|
<summary style={{ cursor: 'pointer', color: '#374151' }}>Dev Tests</summary> |
|
|
<ul style={{ marginTop: 8, paddingLeft: 18 }}> |
|
|
{tests.map((t) => ( |
|
|
<li key={t.name} style={{ color: t.pass ? '#16a34a' : '#dc2626' }}> |
|
|
{t.pass ? 'PASS' : 'FAIL'} — {t.name} |
|
|
</li> |
|
|
))} |
|
|
</ul> |
|
|
<div style={{ marginTop: 8, fontSize: 12, color: '#6b7280' }}>Links tracked: {requiredLinks.length}</div> |
|
|
</details> |
|
|
</section> |
|
|
</div> |
|
|
); |
|
|
} |
|
|
|