Spaces:

eurecat-robotics
/

data-training-pipelines

Running

App Files Files Community

data-training-pipelines / src /App.js

magi-dalmau's picture

Update src/App.js

abf2c87 verified 2 months ago

history blame contribute delete

19.2 kB

	// NOTE: Standalone React component with ZERO external UI/icon/motion deps.
	// Removed: shadcn/ui, lucide-react, framer-motion.
	// Safe for generic CRA/Spaces builds.

	export default function Diagram() {
	// Minimal, dependency-free "Card"
	const Card = ({ title, children }) => (
	<div style={{ border: '1px solid #e5e7eb', borderRadius: 16, background: 'white', boxShadow: '0 1px 2px rgba(0,0,0,0.04)' }}>
	<div style={{ padding: '12px 16px', borderBottom: '1px solid #f1f5f9' }}>
	<div style={{ fontSize: 16, fontWeight: 600 }}>{title}</div>
	</div>
	<div style={{ padding: 16, color: '#374151', fontSize: 14 }}>{children}</div>
	</div>
	);

	const Arrow = () => (
	<div style={{ display: 'flex', alignItems: 'center', justifyContent: 'center' }} aria-hidden>
	<span style={{ fontSize: 20 }}>➜</span>
	</div>
	);

	// --- Simple test helpers (rendered at bottom) ---
	const requiredLinks = [
	'https://huggingface.co/',
	'https://gradio.app/',
	'https://streamlit.io/',
	'https://huggingface.co/docs/datasets',
	'https://labelstud.io/',
	'https://cvat.org/',
	'https://roboflow.com/',
	'https://voxel51.com/',
	'https://cleanlab.ai/',
	'https://aws.amazon.com/s3/',
	'https://cloud.google.com/storage',
	'https://min.io/',
	'https://huggingface.co/spaces',
	'https://www.runpod.io/',
	'https://huggingface.co/docs/transformers',
	'https://huggingface.co/docs/accelerate',
	'https://huggingface.co/docs/evaluate',
	'https://huggingface.co/inference-api',
	'https://huggingface.co/docs/transformers/quicktour',
	// Added for local deployment
	'https://fastapi.tiangolo.com/',
	'https://onnxruntime.ai/',
	'https://developer.nvidia.com/tensorrt',
	'https://www.intel.com/openvino',
	'https://www.ros.org/',
	'https://www.docker.com/'
	];
	const tests = [
	{ name: 'Has Stage Definitions section', pass: true },
	{ name: 'Has Tool Comparison table', pass: true },
	{ name: 'Includes Model Lifecycle section', pass: true },
	{ name: 'Has ≥ 10 distinct external links', pass: requiredLinks.length >= 10 }
	];

	const link = (href, text) => (
	<a href={href} target="_blank" rel="noreferrer noopener" style={{ color: '#2563eb', textDecoration: 'underline' }}>{text}</a>
	);

	return (
	<div style={{ padding: '24px', maxWidth: 1100, margin: '0 auto' }}>
	<header style={{ marginBottom: 16 }}>
	<h1 style={{ fontSize: 28, fontWeight: 700, margin: 0 }}>Hugging Face–Centric Minimal Data Stack</h1>
	<p style={{ color: '#6b7280', marginTop: 6 }}>Single-backbone workflow for robotics datasets (manipulation, perception, reasoning, HRI) with minimal tools and frictionless integration.</p>
	</header>

	{/* Stage definitions */}
	<section style={{ display: 'grid', gap: 12, marginBottom: 24 }}>
	<h2 style={{ fontSize: 20, fontWeight: 600, margin: 0 }}>Stage Definitions & Examples</h2>
	<ul style={{ margin: 0, paddingLeft: 18, color: '#374151' }}>
	<li><strong>Data Collection:</strong> Raw recordings from robots or simulations. Example: RGB-D video, audio, and joint states captured during human-robot interaction.</li>
	<li><strong>Annotation:</strong> Assign labels or semantics to collected data. Example: gesture type, emotion, manipulated object, speech act.</li>
	<li><strong>Curation:</strong> Filter, validate, and organize annotated data into usable splits (train/val/test). Example: remove bad frames, balance human/robot perspectives.</li>
	<li><strong>Publishing (Hub):</strong> Versioned dataset hosting on {link('https://huggingface.co/','Hugging Face Hub')}, with metadata and documentation. Example: pushing curated subsets for manipulation learning.</li>
	<li><strong>Visualization (Spaces):</strong> Interactive dashboards or viewers built in {link('https://gradio.app/','Gradio')} or {link('https://streamlit.io/','Streamlit')} for exploration or validation. Example: playback of synchronized gaze, pose, and audio segments.</li>
	<li><strong>Reuse & Training:</strong> Loading datasets directly via {link('https://huggingface.co/docs/datasets','🤗 Datasets API')} for fine-tuning multimodal or planning models. Example: training z<sub>social</sub> encoders or expressive decoders.</li>
	</ul>
	</section>

	{/* Main flow diagram */}
	<section style={{ display: 'grid', gridTemplateColumns: '1fr 40px 1fr 40px 1fr 40px 1fr 40px 1fr', gap: 12, alignItems: 'stretch', marginBottom: 24 }}>
	<Card title="Data Sources">
	<ul style={{ margin: 0, paddingLeft: 18 }}>
	<li>Robot logs (RGB-D, audio, pose)</li>
	<li>Sim runs & demos</li>
	<li>Interaction clips</li>
	<li>Planning/intent traces</li>
	</ul>
	</Card>
	<Arrow/>
	<Card title="Annotation (min one)">
	<ul style={{ margin: 0, paddingLeft: 18 }}>
	<li>{link('https://labelstud.io/','Label Studio')} (self-host or cloud)</li>
	<li>{link('https://cvat.org/','CVAT')} / {link('https://roboflow.com/','Roboflow')} (export)</li>
	<li>Exports: COCO, JSON, CSV</li>
	</ul>
	</Card>
	<Arrow/>
	<Card title="Curation (optional)">
	<ul style={{ margin: 0, paddingLeft: 18 }}>
	<li>{link('https://voxel51.com/','FiftyOne')}: filter, QA, splits</li>
	<li>{link('https://cleanlab.ai/','Cleanlab')} / Pandas checks</li>
	<li>Embed search for edge cases</li>
	</ul>
	</Card>
	<Arrow/>
	<Card title="HF Hub (Backbone)">
	<ul style={{ margin: 0, paddingLeft: 18 }}>
	<li>{link('https://huggingface.co/','Datasets & models')} in repos</li>
	<li>Git + LFS versioning</li>
	<li>Private org, permissions</li>
	<li>Tags, README, cards</li>
	</ul>
	</Card>
	<Arrow/>
	<Card title="HF Spaces (Viz)">
	<ul style={{ margin: 0, paddingLeft: 18 }}>
	<li>{link('https://huggingface.co/spaces','Gradio/Streamlit viewers')}</li>
	<li>Clip browser, 3D previews</li>
	<li>Eval dashboards & demos</li>
	</ul>
	</Card>
	</section>

	{/* Tool comparison */}
	<section style={{ marginBottom: 24 }}>
	<h2 style={{ fontSize: 20, fontWeight: 600, margin: '0 0 8px 0' }}>Comparison: Annotation & Curation Tools</h2>
	<div style={{ overflowX: 'auto' }}>
	<table style={{ width: '100%', fontSize: 14, borderCollapse: 'collapse' }}>
	<thead>
	<tr style={{ background: '#f3f4f6', color: '#374151' }}>
	<th style={{ padding: 8, textAlign: 'left' }}>Tool</th>
	<th style={{ padding: 8, textAlign: 'left' }}>Strengths</th>
	<th style={{ padding: 8, textAlign: 'left' }}>Limitations</th>
	<th style={{ padding: 8, textAlign: 'left' }}>Integration with HF</th>
	</tr>
	</thead>
	<tbody>
	<tr>
	<td style={{ padding: 8, fontWeight: 600 }}>{link('https://labelstud.io/','Label Studio')}</td>
	<td style={{ padding: 8 }}>Open source, multi-modal (image, audio, text, video). Very flexible schema; plugin ecosystem.</td>
	<td style={{ padding: 8 }}>Requires setup for teams; interface slower with 100k+ samples.</td>
	<td style={{ padding: 8 }}>Native {link('https://huggingface.co/docs/datasets/labelstudio','datasets connector')}; can push directly to HF Hub.</td>
	</tr>
	<tr>
	<td style={{ padding: 8, fontWeight: 600 }}>{link('https://cvat.org/','CVAT')}</td>
	<td style={{ padding: 8 }}>Great for video and dense bounding-box/pose annotations; powerful auto-annotation tools.</td>
	<td style={{ padding: 8 }}>Primarily vision-focused; heavier deployment (Docker).</td>
	<td style={{ padding: 8 }}>Exports in COCO/VOC formats easily loadable with <code>datasets.load_dataset</code>.</td>
	</tr>
	<tr>
	<td style={{ padding: 8, fontWeight: 600 }}>{link('https://roboflow.com/','Roboflow')}</td>
	<td style={{ padding: 8 }}>Cloud-based; fast web UI and built-in preprocessing and augmentation.</td>
	<td style={{ padding: 8 }}>Closed-source, limited free tier; less flexible schemas.</td>
	<td style={{ padding: 8 }}>Exports compatible with HF datasets; no native connector but simple upload via API.</td>
	</tr>
	<tr>
	<td style={{ padding: 8, fontWeight: 600 }}>{link('https://voxel51.com/','FiftyOne')}</td>
	<td style={{ padding: 8 }}>Advanced filtering, visualization, embedding-based analysis.</td>
	<td style={{ padding: 8 }}>Not for annotation itself; local-first.</td>
	<td style={{ padding: 8 }}>Direct push/export to HF Hub for curated dataset versions.</td>
	</tr>
	</tbody>
	</table>
	</div>
	</section>

	{/* Output / training */}
	<section style={{ display: 'grid', gridTemplateColumns: '1fr 1fr 1fr', gap: 12, marginBottom: 24 }}>
	<Card title="Train & Reuse">
	<ul style={{ margin: 0, paddingLeft: 18 }}>
	<li>Load via {link('https://huggingface.co/docs/datasets','datasets streaming')}</li>
	<li>Fine-tune VL/VLA/ASR models</li>
	<li>Push checkpoints to HF</li>
	</ul>
	</Card>
	<Card title="Raw Storage (optional)">
	<ul style={{ margin: 0, paddingLeft: 18 }}>
	<li>{link('https://aws.amazon.com/s3/','AWS S3')} / {link('https://cloud.google.com/storage','GCS')} / {link('https://min.io/','MinIO')} for TB+ raw</li>
	<li>Keep curated subsets on HF</li>
	<li>Link via metadata/URIs</li>
	</ul>
	</Card>
	<Card title="Governance (lite)">
	<ul style={{ margin: 0, paddingLeft: 18 }}>
	<li>Repo permissions & reviews</li>
	<li>Semantic tags & licenses</li>
	<li>Changelogs & model cards</li>
	</ul>
	</Card>
	</section>

	{/* Notes */}
	<section style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: 12 }}>
	<Card title="Operating Principles">
	<ul style={{ margin: 0, paddingLeft: 18 }}>
	<li>Keep the workflow lean: Hugging Face Hub as the single backbone.</li>
	<li>One annotation tool ({link('https://labelstud.io/','Label Studio')}, {link('https://cvat.org/','CVAT')}, or {link('https://roboflow.com/','Roboflow')}).</li>
	<li>Optional curation with {link('https://voxel51.com/','FiftyOne')} before each release.</li>
	<li>Push each validated dataset as a new HF Hub version.</li>
	<li>Provide {link('https://huggingface.co/spaces','Spaces')} for exploration, demo, and review.</li>
	</ul>
	</Card>
	<Card title="Typical Repo Layout (HF)">
	<pre style={{ margin: 0, fontFamily: 'ui-monospace, SFMono-Regular, Menlo, monospace', fontSize: 12, whiteSpace: 'pre-wrap' }}>
	{`datasets/
	eurecat/haru-social-vla/
	README.md # dataset card with tags + license
	data/ # small/curated samples or manifests
	annotations/
	splits/ # train/val/test lists
	scripts/ # loading + eval utils
	models/
	eurecat/haru-expressive-decoder/
	README.md # model card (training data, metrics)
	config/
	checkpoints/`}
	</pre>
	</Card>
	</section>

	<footer style={{ fontSize: 12, color: '#6b7280', marginTop: 12 }}>
	Tip: enforce tagging conventions (task=manipulation \| hri \| planning; modality=rgbd \| audio \| pose; license; privacy). Automate checks in CI before merging a dataset release.
	</footer>

	{/* ============================= */}
	{/* MODEL TRAINING & REUSE STACK */}
	{/* ============================= */}

	<section style={{ marginTop: 48 }}>
	<header style={{ marginBottom: 12 }}>
	<h2 style={{ fontSize: 24, fontWeight: 700, margin: 0 }}>Hugging Face–Centric Model Lifecycle Stack</h2>
	<p style={{ color: '#6b7280', marginTop: 6 }}>Unified workflow for model training, evaluation, storage, deployment, and reuse — using the fewest possible tools while supporting robotics and multimodal tasks.</p>
	</header>

	{/* Stage definitions */}
	<section style={{ marginBottom: 16 }}>
	<h3 style={{ fontSize: 18, fontWeight: 600, margin: 0 }}>Stage Definitions & Examples</h3>
	<ul style={{ margin: '8px 0 0 0', paddingLeft: 18, color: '#374151' }}>
	<li><strong>Training:</strong> Model optimization using GPUs (local or {link('https://www.runpod.io/','RunPod')} cloud). Example: fine-tuning a multimodal encoder on robot-social datasets.</li>
	<li><strong>Evaluation:</strong> Measure metrics, visualize results. Example: compute CCC for valence/arousal or success rate for manipulation plans.</li>
	<li><strong>Storage & Versioning:</strong> Upload model checkpoints and configs to {link('https://huggingface.co/','Hugging Face Hub')} for long-term reproducibility.</li>
	<li><strong>Deployment:</strong> Serve models for inference in {link('https://huggingface.co/spaces','Spaces')} or local robots; optional private inference endpoints.</li>
	<li><strong>Local Inference (On‑Prem/Edge):</strong> Package models with {link('https://www.docker.com/','Docker')} + {link('https://fastapi.tiangolo.com/','FastAPI')} for REST/gRPC; optimize with {link('https://onnxruntime.ai/','ONNX Runtime')}, {link('https://developer.nvidia.com/tensorrt','TensorRT')} (NVIDIA), or {link('https://www.intel.com/openvino','OpenVINO')} (Intel). Integrate as a {link('https://www.ros.org/','ROS 2')} node on the robot.</li>
	<li><strong>Reuse / Continual Learning:</strong> Load models via <code>transformers</code> API; continue training or integrate into reasoning/interaction systems.</li>
	</ul>
	</section>

	{/* Model lifecycle flow (added Local Deployment step) */}
	<section style={{ display: 'grid', gridTemplateColumns: '1fr 40px 1fr 40px 1fr 40px 1fr 40px 1fr 40px 1fr', gap: 12, alignItems: 'stretch', marginBottom: 24 }}>
	<Card title="Training (GPU/RunPod)">
	<ul style={{ margin: 0, paddingLeft: 18 }}>
	<li>Train locally or on {link('https://www.runpod.io/','RunPod')} cloud GPUs</li>
	<li>Use {link('https://huggingface.co/docs/transformers','Transformers')} + {link('https://huggingface.co/docs/accelerate','Accelerate')} for training</li>
	<li>Track metrics with {link('https://wandb.ai/site','Weights & Biases')} or built-in logs</li>
	</ul>
	</Card>
	<Arrow/>
	<Card title="Evaluation">
	<ul style={{ margin: 0, paddingLeft: 18 }}>
	<li>Use {link('https://huggingface.co/docs/evaluate','Evaluate')} library for metrics</li>
	<li>Visualize predictions with FiftyOne or Spaces</li>
	<li>Generate benchmark reports</li>
	</ul>
	</Card>
	<Arrow/>
	<Card title="Model Storage (HF Hub)">
	<ul style={{ margin: 0, paddingLeft: 18 }}>
	<li>Push models via <code>huggingface_hub</code> API</li>
	<li>Keep config, tokenizer, and weights</li>
	<li>Versioned releases, changelogs, model cards</li>
	</ul>
	</Card>
	<Arrow/>
	<Card title="Deployment & Inference (Cloud)">
	<ul style={{ margin: 0, paddingLeft: 18 }}>
	<li>Serve via HF {link('https://huggingface.co/inference-api','Inference API')} or Spaces</li>
	<li>Integrate into robot planner / dialogue manager</li>
	<li>Public or private endpoints</li>
	</ul>
	</Card>
	<Arrow/>
	<Card title="Local Deployment (On‑Prem/Edge)">
	<ul style={{ margin: 0, paddingLeft: 18 }}>
	<li>{link('https://www.docker.com/','Docker')} image + {link('https://fastapi.tiangolo.com/','FastAPI')} service</li>
	<li>Accelerate with {link('https://onnxruntime.ai/','ONNX Runtime')}, {link('https://developer.nvidia.com/tensorrt','TensorRT')}, {link('https://www.intel.com/openvino','OpenVINO')}</li>
	<li>Expose as {link('https://www.ros.org/','ROS 2')} node or local REST/gRPC</li>
	</ul>
	</Card>
	<Arrow/>
	<Card title="Reuse & Continual Learning">
	<ul style={{ margin: 0, paddingLeft: 18 }}>
	<li>Load via {link('https://huggingface.co/docs/transformers/quicktour','Transformers.load_pretrained')}</li>
	<li>Adapt models for new domains or robot skills</li>
	<li>Fine-tune periodically with new curated data</li>
	</ul>
	</Card>
	</section>

	{/* Summary */}
	<section style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: 12 }}>
	<Card title="Minimal Tool Stack">
	<ul style={{ margin: 0, paddingLeft: 18 }}>
	<li><strong>Training:</strong> RunPod + HF Accelerate</li>
	<li><strong>Evaluation:</strong> HF Evaluate + simple scripts</li>
	<li><strong>Storage:</strong> Hugging Face Hub</li>
	<li><strong>Deployment (Cloud):</strong> HF Spaces / Inference API</li>
	<li><strong>Deployment (Local Optional):</strong> FastAPI + Docker (+ ONNX/TensorRT/OpenVINO)</li>
	<li><strong>Reuse:</strong> Transformers API</li>
	</ul>
	</Card>
	<Card title="Best Practices">
	<ul style={{ margin: 0, paddingLeft: 18 }}>
	<li>Keep one model repo per skill (e.g., gaze decoder, z<sub>social</sub> encoder)</li>
	<li>Tag model cards with dataset and evaluation metrics</li>
	<li>Use Spaces for lightweight demos or robot simulations</li>
	<li>Automate CI/CD: push training logs + model eval to Hub</li>
	<li>Export optimized runners (ONNX/TensorRT/OpenVINO) for edge deployment</li>
	<li>Provide ROS 2 wrappers for robot-side integration</li>
	</ul>
	</Card>
	</section>
	</section>

	{/* --- Dev self-checks (simple tests) --- */}
	<section style={{ marginTop: 32 }}>
	<details>
	<summary style={{ cursor: 'pointer', color: '#374151' }}>Dev Tests</summary>
	<ul style={{ marginTop: 8, paddingLeft: 18 }}>
	{tests.map((t) => (
	<li key={t.name} style={{ color: t.pass ? '#16a34a' : '#dc2626' }}>
	{t.pass ? 'PASS' : 'FAIL'} — {t.name}
	</li>
	))}
	</ul>
	<div style={{ marginTop: 8, fontSize: 12, color: '#6b7280' }}>Links tracked: {requiredLinks.length}</div>
	</details>
	</section>
	</div>
	);
	}