magi-dalmau's picture
Update src/App.js
abf2c87 verified
// NOTE: Standalone React component with ZERO external UI/icon/motion deps.
// Removed: shadcn/ui, lucide-react, framer-motion.
// Safe for generic CRA/Spaces builds.
export default function Diagram() {
// Minimal, dependency-free "Card"
const Card = ({ title, children }) => (
<div style={{ border: '1px solid #e5e7eb', borderRadius: 16, background: 'white', boxShadow: '0 1px 2px rgba(0,0,0,0.04)' }}>
<div style={{ padding: '12px 16px', borderBottom: '1px solid #f1f5f9' }}>
<div style={{ fontSize: 16, fontWeight: 600 }}>{title}</div>
</div>
<div style={{ padding: 16, color: '#374151', fontSize: 14 }}>{children}</div>
</div>
);
const Arrow = () => (
<div style={{ display: 'flex', alignItems: 'center', justifyContent: 'center' }} aria-hidden>
<span style={{ fontSize: 20 }}></span>
</div>
);
// --- Simple test helpers (rendered at bottom) ---
const requiredLinks = [
'https://huggingface.co/',
'https://gradio.app/',
'https://streamlit.io/',
'https://huggingface.co/docs/datasets',
'https://labelstud.io/',
'https://cvat.org/',
'https://roboflow.com/',
'https://voxel51.com/',
'https://cleanlab.ai/',
'https://aws.amazon.com/s3/',
'https://cloud.google.com/storage',
'https://min.io/',
'https://huggingface.co/spaces',
'https://www.runpod.io/',
'https://huggingface.co/docs/transformers',
'https://huggingface.co/docs/accelerate',
'https://huggingface.co/docs/evaluate',
'https://huggingface.co/inference-api',
'https://huggingface.co/docs/transformers/quicktour',
// Added for local deployment
'https://fastapi.tiangolo.com/',
'https://onnxruntime.ai/',
'https://developer.nvidia.com/tensorrt',
'https://www.intel.com/openvino',
'https://www.ros.org/',
'https://www.docker.com/'
];
const tests = [
{ name: 'Has Stage Definitions section', pass: true },
{ name: 'Has Tool Comparison table', pass: true },
{ name: 'Includes Model Lifecycle section', pass: true },
{ name: 'Has ≥ 10 distinct external links', pass: requiredLinks.length >= 10 }
];
const link = (href, text) => (
<a href={href} target="_blank" rel="noreferrer noopener" style={{ color: '#2563eb', textDecoration: 'underline' }}>{text}</a>
);
return (
<div style={{ padding: '24px', maxWidth: 1100, margin: '0 auto' }}>
<header style={{ marginBottom: 16 }}>
<h1 style={{ fontSize: 28, fontWeight: 700, margin: 0 }}>Hugging Face–Centric Minimal Data Stack</h1>
<p style={{ color: '#6b7280', marginTop: 6 }}>Single-backbone workflow for robotics datasets (manipulation, perception, reasoning, HRI) with minimal tools and frictionless integration.</p>
</header>
{/* Stage definitions */}
<section style={{ display: 'grid', gap: 12, marginBottom: 24 }}>
<h2 style={{ fontSize: 20, fontWeight: 600, margin: 0 }}>Stage Definitions & Examples</h2>
<ul style={{ margin: 0, paddingLeft: 18, color: '#374151' }}>
<li><strong>Data Collection:</strong> Raw recordings from robots or simulations. Example: RGB-D video, audio, and joint states captured during human-robot interaction.</li>
<li><strong>Annotation:</strong> Assign labels or semantics to collected data. Example: gesture type, emotion, manipulated object, speech act.</li>
<li><strong>Curation:</strong> Filter, validate, and organize annotated data into usable splits (train/val/test). Example: remove bad frames, balance human/robot perspectives.</li>
<li><strong>Publishing (Hub):</strong> Versioned dataset hosting on {link('https://huggingface.co/','Hugging Face Hub')}, with metadata and documentation. Example: pushing curated subsets for manipulation learning.</li>
<li><strong>Visualization (Spaces):</strong> Interactive dashboards or viewers built in {link('https://gradio.app/','Gradio')} or {link('https://streamlit.io/','Streamlit')} for exploration or validation. Example: playback of synchronized gaze, pose, and audio segments.</li>
<li><strong>Reuse & Training:</strong> Loading datasets directly via {link('https://huggingface.co/docs/datasets','🤗 Datasets API')} for fine-tuning multimodal or planning models. Example: training z<sub>social</sub> encoders or expressive decoders.</li>
</ul>
</section>
{/* Main flow diagram */}
<section style={{ display: 'grid', gridTemplateColumns: '1fr 40px 1fr 40px 1fr 40px 1fr 40px 1fr', gap: 12, alignItems: 'stretch', marginBottom: 24 }}>
<Card title="Data Sources">
<ul style={{ margin: 0, paddingLeft: 18 }}>
<li>Robot logs (RGB-D, audio, pose)</li>
<li>Sim runs & demos</li>
<li>Interaction clips</li>
<li>Planning/intent traces</li>
</ul>
</Card>
<Arrow/>
<Card title="Annotation (min one)">
<ul style={{ margin: 0, paddingLeft: 18 }}>
<li>{link('https://labelstud.io/','Label Studio')} (self-host or cloud)</li>
<li>{link('https://cvat.org/','CVAT')} / {link('https://roboflow.com/','Roboflow')} (export)</li>
<li>Exports: COCO, JSON, CSV</li>
</ul>
</Card>
<Arrow/>
<Card title="Curation (optional)">
<ul style={{ margin: 0, paddingLeft: 18 }}>
<li>{link('https://voxel51.com/','FiftyOne')}: filter, QA, splits</li>
<li>{link('https://cleanlab.ai/','Cleanlab')} / Pandas checks</li>
<li>Embed search for edge cases</li>
</ul>
</Card>
<Arrow/>
<Card title="HF Hub (Backbone)">
<ul style={{ margin: 0, paddingLeft: 18 }}>
<li>{link('https://huggingface.co/','Datasets & models')} in repos</li>
<li>Git + LFS versioning</li>
<li>Private org, permissions</li>
<li>Tags, README, cards</li>
</ul>
</Card>
<Arrow/>
<Card title="HF Spaces (Viz)">
<ul style={{ margin: 0, paddingLeft: 18 }}>
<li>{link('https://huggingface.co/spaces','Gradio/Streamlit viewers')}</li>
<li>Clip browser, 3D previews</li>
<li>Eval dashboards & demos</li>
</ul>
</Card>
</section>
{/* Tool comparison */}
<section style={{ marginBottom: 24 }}>
<h2 style={{ fontSize: 20, fontWeight: 600, margin: '0 0 8px 0' }}>Comparison: Annotation & Curation Tools</h2>
<div style={{ overflowX: 'auto' }}>
<table style={{ width: '100%', fontSize: 14, borderCollapse: 'collapse' }}>
<thead>
<tr style={{ background: '#f3f4f6', color: '#374151' }}>
<th style={{ padding: 8, textAlign: 'left' }}>Tool</th>
<th style={{ padding: 8, textAlign: 'left' }}>Strengths</th>
<th style={{ padding: 8, textAlign: 'left' }}>Limitations</th>
<th style={{ padding: 8, textAlign: 'left' }}>Integration with HF</th>
</tr>
</thead>
<tbody>
<tr>
<td style={{ padding: 8, fontWeight: 600 }}>{link('https://labelstud.io/','Label Studio')}</td>
<td style={{ padding: 8 }}>Open source, multi-modal (image, audio, text, video). Very flexible schema; plugin ecosystem.</td>
<td style={{ padding: 8 }}>Requires setup for teams; interface slower with 100k+ samples.</td>
<td style={{ padding: 8 }}>Native {link('https://huggingface.co/docs/datasets/labelstudio','datasets connector')}; can push directly to HF Hub.</td>
</tr>
<tr>
<td style={{ padding: 8, fontWeight: 600 }}>{link('https://cvat.org/','CVAT')}</td>
<td style={{ padding: 8 }}>Great for video and dense bounding-box/pose annotations; powerful auto-annotation tools.</td>
<td style={{ padding: 8 }}>Primarily vision-focused; heavier deployment (Docker).</td>
<td style={{ padding: 8 }}>Exports in COCO/VOC formats easily loadable with <code>datasets.load_dataset</code>.</td>
</tr>
<tr>
<td style={{ padding: 8, fontWeight: 600 }}>{link('https://roboflow.com/','Roboflow')}</td>
<td style={{ padding: 8 }}>Cloud-based; fast web UI and built-in preprocessing and augmentation.</td>
<td style={{ padding: 8 }}>Closed-source, limited free tier; less flexible schemas.</td>
<td style={{ padding: 8 }}>Exports compatible with HF datasets; no native connector but simple upload via API.</td>
</tr>
<tr>
<td style={{ padding: 8, fontWeight: 600 }}>{link('https://voxel51.com/','FiftyOne')}</td>
<td style={{ padding: 8 }}>Advanced filtering, visualization, embedding-based analysis.</td>
<td style={{ padding: 8 }}>Not for annotation itself; local-first.</td>
<td style={{ padding: 8 }}>Direct push/export to HF Hub for curated dataset versions.</td>
</tr>
</tbody>
</table>
</div>
</section>
{/* Output / training */}
<section style={{ display: 'grid', gridTemplateColumns: '1fr 1fr 1fr', gap: 12, marginBottom: 24 }}>
<Card title="Train & Reuse">
<ul style={{ margin: 0, paddingLeft: 18 }}>
<li>Load via {link('https://huggingface.co/docs/datasets','datasets streaming')}</li>
<li>Fine-tune VL/VLA/ASR models</li>
<li>Push checkpoints to HF</li>
</ul>
</Card>
<Card title="Raw Storage (optional)">
<ul style={{ margin: 0, paddingLeft: 18 }}>
<li>{link('https://aws.amazon.com/s3/','AWS S3')} / {link('https://cloud.google.com/storage','GCS')} / {link('https://min.io/','MinIO')} for TB+ raw</li>
<li>Keep curated subsets on HF</li>
<li>Link via metadata/URIs</li>
</ul>
</Card>
<Card title="Governance (lite)">
<ul style={{ margin: 0, paddingLeft: 18 }}>
<li>Repo permissions & reviews</li>
<li>Semantic tags & licenses</li>
<li>Changelogs & model cards</li>
</ul>
</Card>
</section>
{/* Notes */}
<section style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: 12 }}>
<Card title="Operating Principles">
<ul style={{ margin: 0, paddingLeft: 18 }}>
<li>Keep the workflow lean: Hugging Face Hub as the single backbone.</li>
<li>One annotation tool ({link('https://labelstud.io/','Label Studio')}, {link('https://cvat.org/','CVAT')}, or {link('https://roboflow.com/','Roboflow')}).</li>
<li>Optional curation with {link('https://voxel51.com/','FiftyOne')} before each release.</li>
<li>Push each validated dataset as a new HF Hub version.</li>
<li>Provide {link('https://huggingface.co/spaces','Spaces')} for exploration, demo, and review.</li>
</ul>
</Card>
<Card title="Typical Repo Layout (HF)">
<pre style={{ margin: 0, fontFamily: 'ui-monospace, SFMono-Regular, Menlo, monospace', fontSize: 12, whiteSpace: 'pre-wrap' }}>
{`datasets/
eurecat/haru-social-vla/
README.md # dataset card with tags + license
data/ # small/curated samples or manifests
annotations/
splits/ # train/val/test lists
scripts/ # loading + eval utils
models/
eurecat/haru-expressive-decoder/
README.md # model card (training data, metrics)
config/
checkpoints/`}
</pre>
</Card>
</section>
<footer style={{ fontSize: 12, color: '#6b7280', marginTop: 12 }}>
Tip: enforce tagging conventions (task=manipulation | hri | planning; modality=rgbd | audio | pose; license; privacy). Automate checks in CI before merging a dataset release.
</footer>
{/* ============================= */}
{/* MODEL TRAINING & REUSE STACK */}
{/* ============================= */}
<section style={{ marginTop: 48 }}>
<header style={{ marginBottom: 12 }}>
<h2 style={{ fontSize: 24, fontWeight: 700, margin: 0 }}>Hugging Face–Centric Model Lifecycle Stack</h2>
<p style={{ color: '#6b7280', marginTop: 6 }}>Unified workflow for model training, evaluation, storage, deployment, and reuse — using the fewest possible tools while supporting robotics and multimodal tasks.</p>
</header>
{/* Stage definitions */}
<section style={{ marginBottom: 16 }}>
<h3 style={{ fontSize: 18, fontWeight: 600, margin: 0 }}>Stage Definitions & Examples</h3>
<ul style={{ margin: '8px 0 0 0', paddingLeft: 18, color: '#374151' }}>
<li><strong>Training:</strong> Model optimization using GPUs (local or {link('https://www.runpod.io/','RunPod')} cloud). Example: fine-tuning a multimodal encoder on robot-social datasets.</li>
<li><strong>Evaluation:</strong> Measure metrics, visualize results. Example: compute CCC for valence/arousal or success rate for manipulation plans.</li>
<li><strong>Storage & Versioning:</strong> Upload model checkpoints and configs to {link('https://huggingface.co/','Hugging Face Hub')} for long-term reproducibility.</li>
<li><strong>Deployment:</strong> Serve models for inference in {link('https://huggingface.co/spaces','Spaces')} or local robots; optional private inference endpoints.</li>
<li><strong>Local Inference (On‑Prem/Edge):</strong> Package models with {link('https://www.docker.com/','Docker')} + {link('https://fastapi.tiangolo.com/','FastAPI')} for REST/gRPC; optimize with {link('https://onnxruntime.ai/','ONNX Runtime')}, {link('https://developer.nvidia.com/tensorrt','TensorRT')} (NVIDIA), or {link('https://www.intel.com/openvino','OpenVINO')} (Intel). Integrate as a {link('https://www.ros.org/','ROS 2')} node on the robot.</li>
<li><strong>Reuse / Continual Learning:</strong> Load models via <code>transformers</code> API; continue training or integrate into reasoning/interaction systems.</li>
</ul>
</section>
{/* Model lifecycle flow (added Local Deployment step) */}
<section style={{ display: 'grid', gridTemplateColumns: '1fr 40px 1fr 40px 1fr 40px 1fr 40px 1fr 40px 1fr', gap: 12, alignItems: 'stretch', marginBottom: 24 }}>
<Card title="Training (GPU/RunPod)">
<ul style={{ margin: 0, paddingLeft: 18 }}>
<li>Train locally or on {link('https://www.runpod.io/','RunPod')} cloud GPUs</li>
<li>Use {link('https://huggingface.co/docs/transformers','Transformers')} + {link('https://huggingface.co/docs/accelerate','Accelerate')} for training</li>
<li>Track metrics with {link('https://wandb.ai/site','Weights & Biases')} or built-in logs</li>
</ul>
</Card>
<Arrow/>
<Card title="Evaluation">
<ul style={{ margin: 0, paddingLeft: 18 }}>
<li>Use {link('https://huggingface.co/docs/evaluate','Evaluate')} library for metrics</li>
<li>Visualize predictions with FiftyOne or Spaces</li>
<li>Generate benchmark reports</li>
</ul>
</Card>
<Arrow/>
<Card title="Model Storage (HF Hub)">
<ul style={{ margin: 0, paddingLeft: 18 }}>
<li>Push models via <code>huggingface_hub</code> API</li>
<li>Keep config, tokenizer, and weights</li>
<li>Versioned releases, changelogs, model cards</li>
</ul>
</Card>
<Arrow/>
<Card title="Deployment & Inference (Cloud)">
<ul style={{ margin: 0, paddingLeft: 18 }}>
<li>Serve via HF {link('https://huggingface.co/inference-api','Inference API')} or Spaces</li>
<li>Integrate into robot planner / dialogue manager</li>
<li>Public or private endpoints</li>
</ul>
</Card>
<Arrow/>
<Card title="Local Deployment (On‑Prem/Edge)">
<ul style={{ margin: 0, paddingLeft: 18 }}>
<li>{link('https://www.docker.com/','Docker')} image + {link('https://fastapi.tiangolo.com/','FastAPI')} service</li>
<li>Accelerate with {link('https://onnxruntime.ai/','ONNX Runtime')}, {link('https://developer.nvidia.com/tensorrt','TensorRT')}, {link('https://www.intel.com/openvino','OpenVINO')}</li>
<li>Expose as {link('https://www.ros.org/','ROS 2')} node or local REST/gRPC</li>
</ul>
</Card>
<Arrow/>
<Card title="Reuse & Continual Learning">
<ul style={{ margin: 0, paddingLeft: 18 }}>
<li>Load via {link('https://huggingface.co/docs/transformers/quicktour','Transformers.load_pretrained')}</li>
<li>Adapt models for new domains or robot skills</li>
<li>Fine-tune periodically with new curated data</li>
</ul>
</Card>
</section>
{/* Summary */}
<section style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: 12 }}>
<Card title="Minimal Tool Stack">
<ul style={{ margin: 0, paddingLeft: 18 }}>
<li><strong>Training:</strong> RunPod + HF Accelerate</li>
<li><strong>Evaluation:</strong> HF Evaluate + simple scripts</li>
<li><strong>Storage:</strong> Hugging Face Hub</li>
<li><strong>Deployment (Cloud):</strong> HF Spaces / Inference API</li>
<li><strong>Deployment (Local Optional):</strong> FastAPI + Docker (+ ONNX/TensorRT/OpenVINO)</li>
<li><strong>Reuse:</strong> Transformers API</li>
</ul>
</Card>
<Card title="Best Practices">
<ul style={{ margin: 0, paddingLeft: 18 }}>
<li>Keep one model repo per skill (e.g., gaze decoder, z<sub>social</sub> encoder)</li>
<li>Tag model cards with dataset and evaluation metrics</li>
<li>Use Spaces for lightweight demos or robot simulations</li>
<li>Automate CI/CD: push training logs + model eval to Hub</li>
<li>Export optimized runners (ONNX/TensorRT/OpenVINO) for edge deployment</li>
<li>Provide ROS 2 wrappers for robot-side integration</li>
</ul>
</Card>
</section>
</section>
{/* --- Dev self-checks (simple tests) --- */}
<section style={{ marginTop: 32 }}>
<details>
<summary style={{ cursor: 'pointer', color: '#374151' }}>Dev Tests</summary>
<ul style={{ marginTop: 8, paddingLeft: 18 }}>
{tests.map((t) => (
<li key={t.name} style={{ color: t.pass ? '#16a34a' : '#dc2626' }}>
{t.pass ? 'PASS' : 'FAIL'} — {t.name}
</li>
))}
</ul>
<div style={{ marginTop: 8, fontSize: 12, color: '#6b7280' }}>Links tracked: {requiredLinks.length}</div>
</details>
</section>
</div>
);
}