Spaces:
Running
Running
| import { LayerDefinition, LayerType, GraphTemplate } from './types'; | |
| export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = { | |
| // --- CORE --- | |
| [LayerType.INPUT]: { | |
| type: LayerType.INPUT, | |
| label: 'Input Data', | |
| description: 'Entry point for data tensors', | |
| category: 'Core', | |
| parameters: [ | |
| { name: 'name', type: 'string', label: 'Name', default: 'input_1', description: 'Variable name in forward()' }, | |
| { name: 'modality', type: 'select', label: 'Modality', default: 'Tensor', options: ['Tensor', 'Image', 'Text', 'Audio', 'Video', 'Latent', 'State', '3D Volume', 'Point Cloud', 'Radar', 'Lidar', 'Graph', 'Molecule'] }, | |
| { name: 'shape', type: 'string', label: 'Shape (e.g. 3,224,224)', default: '3, 224, 224' }, | |
| { name: 'batch_size', type: 'number', label: 'Batch Size', default: 32 }, | |
| { name: 'dtype', type: 'select', label: 'Data Type', default: 'float32', options: ['float32', 'int64', 'bool'] } | |
| ] | |
| }, | |
| [LayerType.LINEAR]: { | |
| type: LayerType.LINEAR, | |
| label: 'Linear (Dense)', | |
| description: 'Fully connected layer', | |
| category: 'Core', | |
| parameters: [ | |
| { name: 'in_features', type: 'number', label: 'In Features (Opt)', default: 0, description: "0 = Auto-infer" }, | |
| { name: 'out_features', type: 'number', label: 'Output Features', default: 128 }, | |
| { name: 'bias', type: 'boolean', label: 'Use Bias', default: true }, | |
| { name: 'activation', type: 'select', label: 'Fused Activation', default: 'None', options: ['None', 'ReLU', 'GELU', 'Sigmoid', 'Softplus'] } | |
| ] | |
| }, | |
| [LayerType.OUTPUT]: { | |
| type: LayerType.OUTPUT, | |
| label: 'Output Head', | |
| description: 'Final model output', | |
| category: 'Core', | |
| parameters: [ | |
| { name: 'num_classes', type: 'number', label: 'Classes', default: 10 }, | |
| { name: 'activation', type: 'select', label: 'Activation', default: 'Softmax', options: ['None', 'Softmax', 'Sigmoid', 'LogSoftmax'] } | |
| ] | |
| }, | |
| [LayerType.EMBEDDING]: { | |
| type: LayerType.EMBEDDING, | |
| label: 'Embedding', | |
| description: 'Lookup table for embeddings', | |
| category: 'Core', | |
| parameters: [ | |
| { name: 'num_embeddings', type: 'number', label: 'Vocab Size', default: 10000 }, | |
| { name: 'embedding_dim', type: 'number', label: 'Embed Dim', default: 256 }, | |
| { name: 'padding_idx', type: 'number', label: 'Padding Index', default: 0 }, | |
| { name: 'max_norm', type: 'number', label: 'Max Norm', default: 0, description: "0 = None" } | |
| ] | |
| }, | |
| [LayerType.POS_EMBED]: { | |
| type: LayerType.POS_EMBED, | |
| label: 'Positional Embed', | |
| description: 'Learnable positional embeddings', | |
| category: 'Core', | |
| parameters: [ | |
| { name: 'num_embeddings', type: 'number', label: 'Max Positions', default: 1024 }, | |
| { name: 'embedding_dim', type: 'number', label: 'Embed Dim', default: 256 } | |
| ] | |
| }, | |
| // --- PREPROCESSING (TF/KERAS STYLE) --- | |
| [LayerType.RESCALING]: { | |
| type: LayerType.RESCALING, | |
| label: 'Rescaling', | |
| description: 'Scale inputs (x * scale + offset)', | |
| category: 'Preprocessing', | |
| parameters: [ | |
| { name: 'scale', type: 'number', label: 'Scale', default: 0.00392, description: 'e.g. 1/255' }, | |
| { name: 'offset', type: 'number', label: 'Offset', default: 0.0 } | |
| ] | |
| }, | |
| [LayerType.RESIZING]: { | |
| type: LayerType.RESIZING, | |
| label: 'Resizing', | |
| description: 'Resize image batch', | |
| category: 'Preprocessing', | |
| parameters: [ | |
| { name: 'height', type: 'number', label: 'Height', default: 224 }, | |
| { name: 'width', type: 'number', label: 'Width', default: 224 }, | |
| { name: 'interpolation', type: 'select', label: 'Interpolation', default: 'bilinear', options: ['bilinear', 'nearest', 'bicubic'] } | |
| ] | |
| }, | |
| [LayerType.CENTER_CROP]: { | |
| type: LayerType.CENTER_CROP, | |
| label: 'Center Crop', | |
| description: 'Crops the central portion', | |
| category: 'Preprocessing', | |
| parameters: [ | |
| { name: 'height', type: 'number', label: 'Height', default: 224 }, | |
| { name: 'width', type: 'number', label: 'Width', default: 224 } | |
| ] | |
| }, | |
| [LayerType.NORMALIZATION_LAYER]: { | |
| type: LayerType.NORMALIZATION_LAYER, | |
| label: 'Normalization (Stat)', | |
| description: 'Normalize with mean/variance (Keras style)', | |
| category: 'Preprocessing', | |
| parameters: [ | |
| { name: 'mean', type: 'string', label: 'Mean', default: '0.0' }, | |
| { name: 'variance', type: 'string', label: 'Variance', default: '1.0' }, | |
| { name: 'axis', type: 'number', label: 'Axis', default: -1 } | |
| ] | |
| }, | |
| [LayerType.RANDOM_FLIP]: { | |
| type: LayerType.RANDOM_FLIP, | |
| label: 'Random Flip', | |
| description: 'Augmentation: Flips image', | |
| category: 'Preprocessing', | |
| parameters: [ | |
| { name: 'mode', type: 'select', label: 'Mode', default: 'horizontal', options: ['horizontal', 'vertical', 'horizontal_and_vertical'] }, | |
| { name: 'seed', type: 'number', label: 'Seed', default: 42 } | |
| ] | |
| }, | |
| [LayerType.RANDOM_ROTATION]: { | |
| type: LayerType.RANDOM_ROTATION, | |
| label: 'Random Rotation', | |
| description: 'Augmentation: Rotates image', | |
| category: 'Preprocessing', | |
| parameters: [ | |
| { name: 'factor', type: 'number', label: 'Factor (0-1)', default: 0.2 }, | |
| { name: 'fill_mode', type: 'select', label: 'Fill Mode', default: 'reflect', options: ['reflect', 'wrap', 'constant', 'nearest'] } | |
| ] | |
| }, | |
| [LayerType.RANDOM_ZOOM]: { | |
| type: LayerType.RANDOM_ZOOM, | |
| label: 'Random Zoom', | |
| description: 'Augmentation: Zooms image', | |
| category: 'Preprocessing', | |
| parameters: [ | |
| { name: 'height_factor', type: 'number', label: 'Height Factor', default: 0.2 }, | |
| { name: 'width_factor', type: 'number', label: 'Width Factor', default: 0.2 } | |
| ] | |
| }, | |
| [LayerType.RANDOM_CONTRAST]: { | |
| type: LayerType.RANDOM_CONTRAST, | |
| label: 'Random Contrast', | |
| description: 'Augmentation: Adjusts contrast', | |
| category: 'Preprocessing', | |
| parameters: [ | |
| { name: 'factor', type: 'number', label: 'Factor', default: 0.1 } | |
| ] | |
| }, | |
| [LayerType.TEXT_VECTORIZATION]: { | |
| type: LayerType.TEXT_VECTORIZATION, | |
| label: 'Text Vectorization', | |
| description: 'Map text to integer sequences', | |
| category: 'Preprocessing', | |
| parameters: [ | |
| { name: 'max_tokens', type: 'number', label: 'Max Tokens', default: 10000 }, | |
| { name: 'output_sequence_length', type: 'number', label: 'Seq Length', default: 256 }, | |
| { name: 'output_mode', type: 'select', label: 'Mode', default: 'int', options: ['int', 'binary', 'count', 'tf_idf'] } | |
| ] | |
| }, | |
| [LayerType.DISCRETIZATION]: { | |
| type: LayerType.DISCRETIZATION, | |
| label: 'Discretization', | |
| description: 'Bucketizes continuous features', | |
| category: 'Preprocessing', | |
| parameters: [ | |
| { name: 'num_bins', type: 'number', label: 'Num Bins', default: 10 } | |
| ] | |
| }, | |
| [LayerType.CATEGORY_ENCODING]: { | |
| type: LayerType.CATEGORY_ENCODING, | |
| label: 'Category Encoding', | |
| description: 'One-hot or Multi-hot encoding', | |
| category: 'Preprocessing', | |
| parameters: [ | |
| { name: 'num_tokens', type: 'number', label: 'Num Tokens', default: 10 }, | |
| { name: 'output_mode', type: 'select', label: 'Mode', default: 'one_hot', options: ['one_hot', 'multi_hot', 'count'] } | |
| ] | |
| }, | |
| // --- GRAPH NEURAL NETWORKS --- | |
| [LayerType.GCN_CONV]: { | |
| type: LayerType.GCN_CONV, | |
| label: 'GCN Conv', | |
| description: 'Graph Convolutional Network', | |
| category: 'Graph', | |
| parameters: [ | |
| { name: 'in_channels', type: 'number', label: 'In Channels', default: 16 }, | |
| { name: 'out_channels', type: 'number', label: 'Out Channels', default: 32 }, | |
| { name: 'improved', type: 'boolean', label: 'Improved GCN', default: false } | |
| ] | |
| }, | |
| [LayerType.GRAPH_SAGE]: { | |
| type: LayerType.GRAPH_SAGE, | |
| label: 'GraphSAGE', | |
| description: 'Inductive Graph Learning', | |
| category: 'Graph', | |
| parameters: [ | |
| { name: 'in_channels', type: 'number', label: 'In Channels', default: 16 }, | |
| { name: 'out_channels', type: 'number', label: 'Out Channels', default: 32 }, | |
| { name: 'aggr', type: 'select', label: 'Aggregator', default: 'mean', options: ['mean', 'max', 'lstm'] } | |
| ] | |
| }, | |
| [LayerType.GAT_CONV]: { | |
| type: LayerType.GAT_CONV, | |
| label: 'GAT Conv', | |
| description: 'Graph Attention Network', | |
| category: 'Graph', | |
| parameters: [ | |
| { name: 'in_channels', type: 'number', label: 'In Channels', default: 16 }, | |
| { name: 'out_channels', type: 'number', label: 'Out Channels', default: 8 }, | |
| { name: 'heads', type: 'number', label: 'Attention Heads', default: 4 }, | |
| { name: 'concat', type: 'boolean', label: 'Concat Heads', default: true } | |
| ] | |
| }, | |
| [LayerType.GIN_CONV]: { | |
| type: LayerType.GIN_CONV, | |
| label: 'GIN Conv', | |
| description: 'Graph Isomorphism Network', | |
| category: 'Graph', | |
| parameters: [ | |
| { name: 'eps', type: 'number', label: 'Epsilon', default: 0 }, | |
| { name: 'train_eps', type: 'boolean', label: 'Train Epsilon', default: true } | |
| ] | |
| }, | |
| // --- PHYSICS & SCIML --- | |
| [LayerType.NEURAL_ODE]: { | |
| type: LayerType.NEURAL_ODE, | |
| label: 'Neural ODE', | |
| description: 'Continuous Depth Model', | |
| category: 'Physics', | |
| parameters: [ | |
| { name: 'hidden_dim', type: 'number', label: 'Hidden Dim', default: 64 }, | |
| { name: 'solver', type: 'select', label: 'Solver', default: 'dopri5', options: ['dopri5', 'rk4', 'euler'] } | |
| ] | |
| }, | |
| [LayerType.PINN_LINEAR]: { | |
| type: LayerType.PINN_LINEAR, | |
| label: 'PINN Linear', | |
| description: 'Physics-Informed Linear Layer', | |
| category: 'Physics', | |
| parameters: [ | |
| { name: 'in_features', type: 'number', label: 'In Features', default: 32 }, | |
| { name: 'out_features', type: 'number', label: 'Out Features', default: 32 }, | |
| { name: 'constraint', type: 'text', label: 'Constraint Formula', default: 'grad(u, x) - f(x) = 0' } | |
| ] | |
| }, | |
| [LayerType.HAMILTONIAN_NN]: { | |
| type: LayerType.HAMILTONIAN_NN, | |
| label: 'Hamiltonian NN', | |
| description: 'Conserves Energy/Momentum', | |
| category: 'Physics', | |
| parameters: [ | |
| { name: 'dim', type: 'number', label: 'Dimension', default: 2 }, | |
| { name: 'hidden_dim', type: 'number', label: 'Hidden Dim', default: 64 } | |
| ] | |
| }, | |
| [LayerType.PROTEIN_FOLDING]: { | |
| type: LayerType.PROTEIN_FOLDING, | |
| label: 'Protein Folding', | |
| description: 'AlphaFold EvoFormer Block', | |
| category: 'Physics', | |
| parameters: [ | |
| { name: 'msa_dim', type: 'number', label: 'MSA Dim', default: 256 }, | |
| { name: 'pair_dim', type: 'number', label: 'Pair Dim', default: 128 }, | |
| { name: 'num_heads', type: 'number', label: 'Heads', default: 8 } | |
| ] | |
| }, | |
| // --- SPIKING & NEUROMORPHIC --- | |
| [LayerType.LIF_NEURON]: { | |
| type: LayerType.LIF_NEURON, | |
| label: 'LIF Neuron', | |
| description: 'Leaky Integrate-and-Fire', | |
| category: 'Spiking', | |
| parameters: [ | |
| { name: 'tau', type: 'number', label: 'Time Constant', default: 2.0 }, | |
| { name: 'v_threshold', type: 'number', label: 'Threshold', default: 1.0 }, | |
| { name: 'surrogate_grad', type: 'select', label: 'Surrogate Grad', default: 'fast_sigmoid', options: ['fast_sigmoid', 'arctan'] } | |
| ] | |
| }, | |
| [LayerType.SPIKING_LAYER]: { | |
| type: LayerType.SPIKING_LAYER, | |
| label: 'Spiking Dense', | |
| description: 'Fully Connected SNN Layer', | |
| category: 'Spiking', | |
| parameters: [ | |
| { name: 'in_features', type: 'number', label: 'In Features', default: 128 }, | |
| { name: 'out_features', type: 'number', label: 'Out Features', default: 128 }, | |
| { name: 'spike_mode', type: 'boolean', label: 'Output Spikes', default: true } | |
| ] | |
| }, | |
| // --- REINFORCEMENT LEARNING --- | |
| [LayerType.DUELING_HEAD]: { | |
| type: LayerType.DUELING_HEAD, | |
| label: 'Dueling Head', | |
| description: 'Separates Value and Advantage', | |
| category: 'RL', | |
| parameters: [ | |
| { name: 'input_dim', type: 'number', label: 'Input Dim', default: 512 }, | |
| { name: 'action_dim', type: 'number', label: 'Action Dim', default: 4 } | |
| ] | |
| }, | |
| [LayerType.PPO_HEAD]: { | |
| type: LayerType.PPO_HEAD, | |
| label: 'PPO Actor-Critic', | |
| description: 'Policy and Value Heads', | |
| category: 'RL', | |
| parameters: [ | |
| { name: 'hidden_dim', type: 'number', label: 'Hidden Dim', default: 256 }, | |
| { name: 'action_space', type: 'select', label: 'Action Space', default: 'Discrete', options: ['Discrete', 'Continuous'] } | |
| ] | |
| }, | |
| [LayerType.SAC_HEAD]: { | |
| type: LayerType.SAC_HEAD, | |
| label: 'SAC Head', | |
| description: 'Soft Actor-Critic Output', | |
| category: 'RL', | |
| parameters: [ | |
| { name: 'hidden_dim', type: 'number', label: 'Hidden Dim', default: 256 }, | |
| { name: 'log_std_min', type: 'number', label: 'Min Log Std', default: -20 }, | |
| { name: 'log_std_max', type: 'number', label: 'Max Log Std', default: 2 } | |
| ] | |
| }, | |
| // --- ADVANCED / NICHE --- | |
| [LayerType.CAPSULE]: { | |
| type: LayerType.CAPSULE, | |
| label: 'Capsule Layer', | |
| description: 'Preserves spatial hierarchy', | |
| category: 'Advanced', | |
| parameters: [ | |
| { name: 'num_capsules', type: 'number', label: 'Num Capsules', default: 10 }, | |
| { name: 'capsule_dim', type: 'number', label: 'Capsule Dim', default: 16 }, | |
| { name: 'routings', type: 'number', label: 'Routing Iters', default: 3 } | |
| ] | |
| }, | |
| [LayerType.HYPER_NET]: { | |
| type: LayerType.HYPER_NET, | |
| label: 'HyperNetwork', | |
| description: 'Generates weights for another NN', | |
| category: 'Advanced', | |
| parameters: [ | |
| { name: 'target_shape', type: 'string', label: 'Target Shape', default: '64,64' }, | |
| { name: 'embedding_dim', type: 'number', label: 'Z Dim', default: 32 } | |
| ] | |
| }, | |
| [LayerType.MAMBA_BLOCK]: { | |
| type: LayerType.MAMBA_BLOCK, | |
| label: 'Mamba (SSM)', | |
| description: 'Selective State Space Model', | |
| category: 'Advanced', | |
| parameters: [ | |
| { name: 'd_model', type: 'number', label: 'D Model', default: 512 }, | |
| { name: 'd_state', type: 'number', label: 'State Dim', default: 16 }, | |
| { name: 'expand', type: 'number', label: 'Expansion', default: 2 } | |
| ] | |
| }, | |
| [LayerType.RWKV_BLOCK]: { | |
| type: LayerType.RWKV_BLOCK, | |
| label: 'RWKV Block', | |
| description: 'RNN-Transformer Hybrid', | |
| category: 'Advanced', | |
| parameters: [ | |
| { name: 'n_embd', type: 'number', label: 'Embed Dim', default: 768 }, | |
| { name: 'n_layer', type: 'number', label: 'Layer Idx', default: 0 } | |
| ] | |
| }, | |
| [LayerType.HOPFIELD]: { | |
| type: LayerType.HOPFIELD, | |
| label: 'Hopfield Layer', | |
| description: 'Dense Associative Memory', | |
| category: 'Advanced', | |
| parameters: [ | |
| { name: 'in_features', type: 'number', label: 'Features', default: 64 }, | |
| { name: 'stored_patterns', type: 'number', label: 'Pattern Capacity', default: 10 } | |
| ] | |
| }, | |
| [LayerType.NORMALIZING_FLOW]: { | |
| type: LayerType.NORMALIZING_FLOW, | |
| label: 'Normalizing Flow', | |
| description: 'Invertible Generative Model', | |
| category: 'Advanced', | |
| parameters: [ | |
| { name: 'flow_type', type: 'select', label: 'Type', default: 'RealNVP', options: ['RealNVP', 'Glow', 'MAF'] }, | |
| { name: 'num_flows', type: 'number', label: 'Num Flows', default: 4 } | |
| ] | |
| }, | |
| [LayerType.DNC_MEMORY]: { | |
| type: LayerType.DNC_MEMORY, | |
| label: 'DNC Memory', | |
| description: 'Differentiable Neural Computer', | |
| category: 'Advanced', | |
| parameters: [ | |
| { name: 'memory_size', type: 'number', label: 'Mem Slots', default: 128 }, | |
| { name: 'word_size', type: 'number', label: 'Word Size', default: 20 }, | |
| { name: 'num_read_heads', type: 'number', label: 'Read Heads', default: 4 } | |
| ] | |
| }, | |
| [LayerType.ARCFACE]: { | |
| type: LayerType.ARCFACE, | |
| label: 'ArcFace', | |
| description: 'Metric Learning Loss', | |
| category: 'Advanced', | |
| parameters: [ | |
| { name: 's', type: 'number', label: 'Scale (s)', default: 64.0 }, | |
| { name: 'm', type: 'number', label: 'Margin (m)', default: 0.5 }, | |
| { name: 'num_classes', type: 'number', label: 'Classes', default: 1000 } | |
| ] | |
| }, | |
| [LayerType.ECHO_STATE]: { | |
| type: LayerType.ECHO_STATE, | |
| label: 'Reservoir (ESN)', | |
| description: 'Echo State Network', | |
| category: 'Advanced', | |
| parameters: [ | |
| { name: 'reservoir_size', type: 'number', label: 'Reservoir Size', default: 1000 }, | |
| { name: 'spectral_radius', type: 'number', label: 'Spectral Radius', default: 0.9 } | |
| ] | |
| }, | |
| // --- VIDEO / GENERATION --- | |
| [LayerType.VIDEO_DIFFUSION_BLOCK]: { | |
| type: LayerType.VIDEO_DIFFUSION_BLOCK, | |
| label: 'Video Diffusion', | |
| description: '3D UNet Block for Video Gen', | |
| category: 'Video', | |
| parameters: [ | |
| { name: 'in_channels', type: 'number', label: 'Channels', default: 128 }, | |
| { name: 'time_dim', type: 'number', label: 'Time Emb Dim', default: 512 }, | |
| { name: 'use_temporal_attn', type: 'boolean', label: 'Temp Attn', default: true } | |
| ] | |
| }, | |
| [LayerType.SPATIO_TEMPORAL_ATTN]: { | |
| type: LayerType.SPATIO_TEMPORAL_ATTN, | |
| label: 'Spatio-Temporal Attn', | |
| description: 'Attention over Space & Time', | |
| category: 'Video', | |
| parameters: [ | |
| { name: 'dim', type: 'number', label: 'Dimension', default: 512 }, | |
| { name: 'num_heads', type: 'number', label: 'Heads', default: 8 }, | |
| { name: 'frames', type: 'number', label: 'Max Frames', default: 16 } | |
| ] | |
| }, | |
| [LayerType.VIDEO_TOKENIZER]: { | |
| type: LayerType.VIDEO_TOKENIZER, | |
| label: 'Video Tokenizer', | |
| description: '3D VQ-VAE / Magvit style', | |
| category: 'Video', | |
| parameters: [ | |
| { name: 'patch_size_t', type: 'number', label: 'Time Patch', default: 2 }, | |
| { name: 'patch_size_hw', type: 'number', label: 'Spatial Patch', default: 16 }, | |
| { name: 'vocab_size', type: 'number', label: 'Codebook Size', default: 8192 } | |
| ] | |
| }, | |
| [LayerType.FRAME_INTERPOLATOR]: { | |
| type: LayerType.FRAME_INTERPOLATOR, | |
| label: 'Frame Interpolator', | |
| description: 'Upsamples video frame rate', | |
| category: 'Video', | |
| parameters: [ | |
| { name: 'scale_factor', type: 'number', label: 'Scale Factor', default: 2 }, | |
| { name: 'mode', type: 'select', label: 'Mode', default: 'bilinear', options: ['bilinear', 'optical_flow', 'pixel_shuffle'] } | |
| ] | |
| }, | |
| [LayerType.TEMPORAL_SHIFT]: { | |
| type: LayerType.TEMPORAL_SHIFT, | |
| label: 'Temporal Shift', | |
| description: 'TSM Module (Zero params)', | |
| category: 'Video', | |
| parameters: [ | |
| { name: 'n_segment', type: 'number', label: 'Segments', default: 8 }, | |
| { name: 'fold_div', type: 'number', label: 'Fold Divisor', default: 8 } | |
| ] | |
| }, | |
| [LayerType.NON_LOCAL_BLOCK]: { | |
| type: LayerType.NON_LOCAL_BLOCK, | |
| label: 'Non-Local Block', | |
| description: 'Global context block', | |
| category: 'Video', | |
| parameters: [ | |
| { name: 'in_channels', type: 'number', label: 'Channels', default: 64 }, | |
| { name: 'mode', type: 'select', label: 'Mode', default: 'embedded_gaussian', options: ['embedded_gaussian', 'gaussian', 'dot', 'concat'] } | |
| ] | |
| }, | |
| [LayerType.MULTIMODAL_FUSION]: { | |
| type: LayerType.MULTIMODAL_FUSION, | |
| label: 'Multimodal Fusion', | |
| description: 'Merge Video, Audio, Text', | |
| category: 'Video', | |
| parameters: [ | |
| { name: 'video_dim', type: 'number', label: 'Video Dim', default: 512 }, | |
| { name: 'audio_dim', type: 'number', label: 'Audio Dim', default: 256 }, | |
| { name: 'text_dim', type: 'number', label: 'Text Dim', default: 768 }, | |
| { name: 'out_dim', type: 'number', label: 'Fused Dim', default: 512 } | |
| ] | |
| }, | |
| // --- OCR (Text Recognition) --- | |
| [LayerType.TPS_TRANSFORM]: { | |
| type: LayerType.TPS_TRANSFORM, | |
| label: 'TPS Transform', | |
| description: 'Rectifies curved text (Thin Plate Spline)', | |
| category: 'OCR', | |
| parameters: [ | |
| { name: 'fiducial_points', type: 'number', label: 'Control Points', default: 20 }, | |
| { name: 'output_size', type: 'string', label: 'Out Size (HxW)', default: '32,100' } | |
| ] | |
| }, | |
| [LayerType.CRNN_BLOCK]: { | |
| type: LayerType.CRNN_BLOCK, | |
| label: 'CRNN Block', | |
| description: 'Conv + BiLSTM for Text Sequence', | |
| category: 'OCR', | |
| parameters: [ | |
| { name: 'img_h', type: 'number', label: 'Image Height', default: 32 }, | |
| { name: 'hidden_size', type: 'number', label: 'LSTM Hidden', default: 256 }, | |
| { name: 'num_classes', type: 'number', label: 'Num Chars', default: 37 } | |
| ] | |
| }, | |
| [LayerType.CTC_DECODER]: { | |
| type: LayerType.CTC_DECODER, | |
| label: 'CTC Decoder', | |
| description: 'Connectionist Temporal Classification', | |
| category: 'OCR', | |
| parameters: [ | |
| { name: 'blank_index', type: 'number', label: 'Blank Index', default: 0 }, | |
| { name: 'reduction', type: 'select', label: 'Reduction', default: 'mean', options: ['mean', 'sum', 'none'] } | |
| ] | |
| }, | |
| // --- ROBOTICS / MOTION / DEPTH --- | |
| [LayerType.DEPTH_DECODER]: { | |
| type: LayerType.DEPTH_DECODER, | |
| label: 'Depth Decoder', | |
| description: 'Estimates Monocular Depth Map', | |
| category: 'Robotics', | |
| parameters: [ | |
| { name: 'min_depth', type: 'number', label: 'Min Depth (m)', default: 0.1 }, | |
| { name: 'max_depth', type: 'number', label: 'Max Depth (m)', default: 100.0 }, | |
| { name: 'backbone_scale', type: 'number', label: 'Scale Factor', default: 1 } | |
| ] | |
| }, | |
| [LayerType.DISPARITY_HEAD]: { | |
| type: LayerType.DISPARITY_HEAD, | |
| label: 'Disparity Head', | |
| description: 'Stereo Vision Disparity Estimation', | |
| category: 'Robotics', | |
| parameters: [ | |
| { name: 'max_disp', type: 'number', label: 'Max Disparity', default: 192 }, | |
| { name: 'refine_iter', type: 'number', label: 'Refine Iters', default: 3 } | |
| ] | |
| }, | |
| [LayerType.OPTICAL_FLOW]: { | |
| type: LayerType.OPTICAL_FLOW, | |
| label: 'Optical Flow', | |
| description: 'Estimates pixel motion between frames', | |
| category: 'Robotics', | |
| parameters: [ | |
| { name: 'input_channels', type: 'number', label: 'In Channels', default: 6 }, | |
| { name: 'flow_dim', type: 'number', label: 'Flow Dim', default: 2 }, | |
| { name: 'corr_levels', type: 'number', label: 'Correlation Lvl', default: 4 } | |
| ] | |
| }, | |
| [LayerType.VELOCITY_HEAD]: { | |
| type: LayerType.VELOCITY_HEAD, | |
| label: 'Velocity Head', | |
| description: 'Predicts object speed/trajectory', | |
| category: 'Robotics', | |
| parameters: [ | |
| { name: 'input_dim', type: 'number', label: 'Input Dim', default: 512 }, | |
| { name: 'time_horizon', type: 'number', label: 'Horizon (sec)', default: 3 }, | |
| { name: 'mode', type: 'select', label: 'Mode', default: 'Linear', options: ['Linear', 'Angular', 'Full State'] } | |
| ] | |
| }, | |
| [LayerType.KALMAN_FILTER]: { | |
| type: LayerType.KALMAN_FILTER, | |
| label: 'Kalman Filter', | |
| description: 'Differentiable State Estimation', | |
| category: 'Robotics', | |
| parameters: [ | |
| { name: 'state_dim', type: 'number', label: 'State Dim', default: 4 }, | |
| { name: 'measure_dim', type: 'number', label: 'Measure Dim', default: 2 }, | |
| { name: 'learnable_process_noise', type: 'boolean', label: 'Learn Noise', default: true } | |
| ] | |
| }, | |
| [LayerType.BEV_TRANSFORM]: { | |
| type: LayerType.BEV_TRANSFORM, | |
| label: 'BEV Transformer', | |
| description: 'Perspective to Bird\'s Eye View', | |
| category: 'Robotics', | |
| parameters: [ | |
| { name: 'bev_h', type: 'number', label: 'BEV Height', default: 200 }, | |
| { name: 'bev_w', type: 'number', label: 'BEV Width', default: 200 }, | |
| { name: 'num_queries', type: 'number', label: 'Num Queries', default: 900 } | |
| ] | |
| }, | |
| [LayerType.RADAR_ENCODER]: { | |
| type: LayerType.RADAR_ENCODER, | |
| label: 'Radar Encoder', | |
| description: 'PointPillars style radar encoding', | |
| category: 'Robotics', | |
| parameters: [ | |
| { name: 'max_points', type: 'number', label: 'Max Points', default: 100 }, | |
| { name: 'num_features', type: 'number', label: 'Num Feats', default: 5 }, | |
| { name: 'voxel_size', type: 'string', label: 'Voxel Size', default: '0.2,0.2,4' } | |
| ] | |
| }, | |
| // --- DETECTION (YOLO) --- | |
| [LayerType.C2F_BLOCK]: { | |
| type: LayerType.C2F_BLOCK, | |
| label: 'C2f Block (YOLO)', | |
| description: 'CSP Bottleneck with 2 convolutions', | |
| category: 'Detection', | |
| parameters: [ | |
| { name: 'c1', type: 'number', label: 'In Channels', default: 64 }, | |
| { name: 'c2', type: 'number', label: 'Out Channels', default: 64 }, | |
| { name: 'n', type: 'number', label: 'Num Bottlenecks', default: 1 }, | |
| { name: 'shortcut', type: 'boolean', label: 'Shortcut', default: true } | |
| ] | |
| }, | |
| [LayerType.SPPF_BLOCK]: { | |
| type: LayerType.SPPF_BLOCK, | |
| label: 'SPPF Block', | |
| description: 'Spatial Pyramid Pooling - Fast', | |
| category: 'Detection', | |
| parameters: [ | |
| { name: 'c1', type: 'number', label: 'In Channels', default: 512 }, | |
| { name: 'c2', type: 'number', label: 'Out Channels', default: 512 }, | |
| { name: 'k', type: 'number', label: 'Kernel Size', default: 5 } | |
| ] | |
| }, | |
| [LayerType.DARKNET_BLOCK]: { | |
| type: LayerType.DARKNET_BLOCK, | |
| label: 'Darknet Block', | |
| description: 'Residual block used in Darknet', | |
| category: 'Detection', | |
| parameters: [ | |
| { name: 'channels', type: 'number', label: 'Channels', default: 64 }, | |
| { name: 'num_repeats', type: 'number', label: 'Repeats', default: 1 } | |
| ] | |
| }, | |
| [LayerType.DETECT_HEAD]: { | |
| type: LayerType.DETECT_HEAD, | |
| label: 'Detection Head', | |
| description: 'Predicts BBoxes and Classes', | |
| category: 'Detection', | |
| parameters: [ | |
| { name: 'nc', type: 'number', label: 'Num Classes', default: 80 }, | |
| { name: 'ch', type: 'text', label: 'Input Channels', default: '()', description: "List of channels from backbone levels" } | |
| ] | |
| }, | |
| [LayerType.ANCHOR_BOX]: { | |
| type: LayerType.ANCHOR_BOX, | |
| label: 'Anchor Boxes', | |
| description: 'Reference boxes for detection', | |
| category: 'Detection', | |
| parameters: [ | |
| { name: 'aspect_ratios', type: 'text', label: 'Ratios', default: '[0.5, 1.0, 2.0]' }, | |
| { name: 'scales', type: 'text', label: 'Scales', default: '[32, 64, 128]' } | |
| ] | |
| }, | |
| [LayerType.NMS]: { | |
| type: LayerType.NMS, | |
| label: 'NMS', | |
| description: 'Non-Maximum Suppression', | |
| category: 'Detection', | |
| parameters: [ | |
| { name: 'iou_threshold', type: 'number', label: 'IoU Thresh', default: 0.5 }, | |
| { name: 'score_threshold', type: 'number', label: 'Score Thresh', default: 0.25 } | |
| ] | |
| }, | |
| // --- AUDIO / SPEECH --- | |
| [LayerType.STFT]: { | |
| type: LayerType.STFT, | |
| label: 'STFT', | |
| description: 'Short-Time Fourier Transform', | |
| category: 'Audio', | |
| parameters: [ | |
| { name: 'n_fft', type: 'number', label: 'N_FFT', default: 1024 }, | |
| { name: 'hop_length', type: 'number', label: 'Hop Length', default: 256 }, | |
| { name: 'win_length', type: 'number', label: 'Window Length', default: 1024 } | |
| ] | |
| }, | |
| [LayerType.MEL_SPECTROGRAM]: { | |
| type: LayerType.MEL_SPECTROGRAM, | |
| label: 'MelSpectrogram', | |
| description: 'Raw Audio to Mel Spectrogram', | |
| category: 'Audio', | |
| parameters: [ | |
| { name: 'sample_rate', type: 'number', label: 'Sample Rate', default: 22050 }, | |
| { name: 'n_fft', type: 'number', label: 'N_FFT', default: 1024 }, | |
| { name: 'n_mels', type: 'number', label: 'Num Mels', default: 80 } | |
| ] | |
| }, | |
| [LayerType.SPEC_AUGMENT]: { | |
| type: LayerType.SPEC_AUGMENT, | |
| label: 'SpecAugment', | |
| description: 'Time/Freq masking for Audio', | |
| category: 'Audio', | |
| parameters: [ | |
| { name: 'freq_mask_param', type: 'number', label: 'Freq Mask', default: 27 }, | |
| { name: 'time_mask_param', type: 'number', label: 'Time Mask', default: 100 } | |
| ] | |
| }, | |
| [LayerType.CONFORMER_BLOCK]: { | |
| type: LayerType.CONFORMER_BLOCK, | |
| label: 'Conformer Block', | |
| description: 'Convolution + Transformer (ASR)', | |
| category: 'Audio', | |
| parameters: [ | |
| { name: 'd_model', type: 'number', label: 'D Model', default: 256 }, | |
| { name: 'nhead', type: 'number', label: 'Heads', default: 4 }, | |
| { name: 'kernel_size', type: 'number', label: 'Conv Kernel', default: 31 } | |
| ] | |
| }, | |
| [LayerType.WAVENET_BLOCK]: { | |
| type: LayerType.WAVENET_BLOCK, | |
| label: 'WaveNet Block', | |
| description: 'Dilated Causal Convolution', | |
| category: 'Audio', | |
| parameters: [ | |
| { name: 'channels', type: 'number', label: 'Channels', default: 64 }, | |
| { name: 'dilation', type: 'number', label: 'Dilation', default: 1 }, | |
| { name: 'kernel_size', type: 'number', label: 'Kernel', default: 3 } | |
| ] | |
| }, | |
| [LayerType.WAV2VEC2_ENC]: { | |
| type: LayerType.WAV2VEC2_ENC, | |
| label: 'Wav2Vec2 Encoder', | |
| description: 'Self-supervised Speech Encoder', | |
| category: 'Audio', | |
| parameters: [ | |
| { name: 'output_dim', type: 'number', label: 'Output Dim', default: 768 }, | |
| { name: 'extractor_mode', type: 'select', label: 'Mode', default: 'default', options: ['default', 'layer_norm'] } | |
| ] | |
| }, | |
| [LayerType.RVC_ENCODER]: { | |
| type: LayerType.RVC_ENCODER, | |
| label: 'RVC Hubert', | |
| description: 'Content Encoder for Voice Cloning', | |
| category: 'Audio', | |
| parameters: [ | |
| { name: 'model_type', type: 'select', label: 'Model', default: 'hubert-soft', options: ['hubert-soft', 'vec256', 'vec768'] }, | |
| { name: 'freeze', type: 'boolean', label: 'Freeze', default: true } | |
| ] | |
| }, | |
| [LayerType.VOCODER]: { | |
| type: LayerType.VOCODER, | |
| label: 'Vocoder', | |
| description: 'Mel Spectrogram to Waveform', | |
| category: 'Audio', | |
| parameters: [ | |
| { name: 'type', type: 'select', label: 'Type', default: 'HiFiGAN', options: ['HiFiGAN', 'WaveGlow', 'MelGAN'] }, | |
| { name: 'upsample_rates', type: 'text', label: 'Upsample Rates', default: '[8,8,2,2]' } | |
| ] | |
| }, | |
| [LayerType.AUDIO_EMBEDDING]: { | |
| type: LayerType.AUDIO_EMBEDDING, | |
| label: 'Audio Embedding', | |
| description: 'Embeddings for discrete audio tokens', | |
| category: 'Audio', | |
| parameters: [ | |
| { name: 'num_embeddings', type: 'number', label: 'Num Embeddings', default: 1024 }, | |
| { name: 'embedding_dim', type: 'number', label: 'Embedding Dim', default: 512 } | |
| ] | |
| }, | |
| [LayerType.SINC_CONV]: { | |
| type: LayerType.SINC_CONV, | |
| label: 'SincConv', | |
| description: 'Parametric Sinc Filters (Speech)', | |
| category: 'Audio', | |
| parameters: [ | |
| { name: 'out_channels', type: 'number', label: 'Filters', default: 80 }, | |
| { name: 'kernel_size', type: 'number', label: 'Kernel Size', default: 251 }, | |
| { name: 'min_low_hz', type: 'number', label: 'Min Hz', default: 50 } | |
| ] | |
| }, | |
| // --- 3D / VISION --- | |
| [LayerType.NERF_BLOCK]: { | |
| type: LayerType.NERF_BLOCK, | |
| label: 'NeRF Block', | |
| description: 'MLP for Radiance Fields', | |
| category: '3D', | |
| parameters: [ | |
| { name: 'hidden_dim', type: 'number', label: 'Hidden Dim', default: 256 }, | |
| { name: 'num_layers', type: 'number', label: 'Num Layers', default: 8 }, | |
| { name: 'skips', type: 'text', label: 'Skip Layers', default: '[4]' } | |
| ] | |
| }, | |
| [LayerType.POINTNET_BLOCK]: { | |
| type: LayerType.POINTNET_BLOCK, | |
| label: 'PointNet Layer', | |
| description: 'Point cloud feature extraction', | |
| category: '3D', | |
| parameters: [ | |
| { name: 'in_channels', type: 'number', label: 'In Channels', default: 3 }, | |
| { name: 'out_channels', type: 'number', label: 'Out Channels', default: 64 } | |
| ] | |
| }, | |
| [LayerType.POINT_TRANSFORMER]: { | |
| type: LayerType.POINT_TRANSFORMER, | |
| label: 'PointTransformer', | |
| description: 'Self-Attention for Point Clouds', | |
| category: '3D', | |
| parameters: [ | |
| { name: 'dim', type: 'number', label: 'Dim', default: 32 }, | |
| { name: 'num_neighbors', type: 'number', label: 'Neighbors (k)', default: 16 } | |
| ] | |
| }, | |
| [LayerType.TRIPLANE_ENC]: { | |
| type: LayerType.TRIPLANE_ENC, | |
| label: 'Triplane Enc', | |
| description: 'Project 3D to 3x2D Planes', | |
| category: '3D', | |
| parameters: [ | |
| { name: 'plane_res', type: 'number', label: 'Resolution', default: 256 }, | |
| { name: 'channels', type: 'number', label: 'Channels', default: 32 } | |
| ] | |
| }, | |
| [LayerType.GAUSSIAN_SPLAT]: { | |
| type: LayerType.GAUSSIAN_SPLAT, | |
| label: 'Gaussian Splat', | |
| description: '3D Gaussian Splatting Decoder', | |
| category: '3D', | |
| parameters: [ | |
| { name: 'num_gaussians', type: 'number', label: 'Num Gaussians', default: 10000 }, | |
| { name: 'sh_degree', type: 'number', label: 'SH Degree', default: 3 } | |
| ] | |
| }, | |
| [LayerType.MESH_CONV]: { | |
| type: LayerType.MESH_CONV, | |
| label: 'Mesh Conv', | |
| description: 'Convolution on 3D Meshes', | |
| category: '3D', | |
| parameters: [ | |
| { name: 'in_channels', type: 'number', label: 'In Channels', default: 3 }, | |
| { name: 'out_channels', type: 'number', label: 'Out Channels', default: 64 } | |
| ] | |
| }, | |
| // --- MERGE --- | |
| [LayerType.CONCAT]: { | |
| type: LayerType.CONCAT, | |
| label: 'Concatenate', | |
| description: 'Merge inputs along a dim', | |
| category: 'Merge', | |
| parameters: [ | |
| { name: 'dim', type: 'number', label: 'Dimension', default: 1 } | |
| ] | |
| }, | |
| [LayerType.ADD]: { | |
| type: LayerType.ADD, | |
| label: 'Add (Sum)', | |
| description: 'Element-wise addition (Residual)', | |
| category: 'Merge', | |
| parameters: [] | |
| }, | |
| [LayerType.SUBTRACT]: { | |
| type: LayerType.SUBTRACT, | |
| label: 'Subtract', | |
| description: 'Element-wise subtraction', | |
| category: 'Merge', | |
| parameters: [] | |
| }, | |
| [LayerType.MULTIPLY]: { | |
| type: LayerType.MULTIPLY, | |
| label: 'Multiply', | |
| description: 'Element-wise multiplication', | |
| category: 'Merge', | |
| parameters: [] | |
| }, | |
| [LayerType.AVERAGE]: { | |
| type: LayerType.AVERAGE, | |
| label: 'Average', | |
| description: 'Average of inputs', | |
| category: 'Merge', | |
| parameters: [] | |
| }, | |
| [LayerType.MAXIMUM]: { | |
| type: LayerType.MAXIMUM, | |
| label: 'Maximum', | |
| description: 'Element-wise maximum', | |
| category: 'Merge', | |
| parameters: [] | |
| }, | |
| [LayerType.MINIMUM]: { | |
| type: LayerType.MINIMUM, | |
| label: 'Minimum', | |
| description: 'Element-wise minimum', | |
| category: 'Merge', | |
| parameters: [] | |
| }, | |
| [LayerType.DOT]: { | |
| type: LayerType.DOT, | |
| label: 'Dot Product', | |
| description: 'Dot product of two tensors', | |
| category: 'Merge', | |
| parameters: [ | |
| { name: 'axes', type: 'number', label: 'Axis', default: 1 } | |
| ] | |
| }, | |
| // --- GENAI / ADVANCED --- | |
| [LayerType.RMSNORM]: { | |
| type: LayerType.RMSNORM, | |
| label: 'RMSNorm', | |
| description: 'Root Mean Square Norm (LLMs)', | |
| category: 'GenAI', | |
| parameters: [ | |
| { name: 'dim', type: 'number', label: 'Dimension', default: 512 }, | |
| { name: 'eps', type: 'number', label: 'Epsilon', default: 1e-6 } | |
| ] | |
| }, | |
| [LayerType.ROPE]: { | |
| type: LayerType.ROPE, | |
| label: 'RoPE', | |
| description: 'Rotary Positional Embedding', | |
| category: 'GenAI', | |
| parameters: [ | |
| { name: 'dim', type: 'number', label: 'Head Dim', default: 64 }, | |
| { name: 'max_position', type: 'number', label: 'Max Pos', default: 2048 } | |
| ] | |
| }, | |
| [LayerType.PATCH_EMBED]: { | |
| type: LayerType.PATCH_EMBED, | |
| label: 'Patch Embed (ViT)', | |
| description: 'Image to Sequence Patches', | |
| category: 'GenAI', | |
| parameters: [ | |
| { name: 'img_size', type: 'number', label: 'Image Size', default: 224 }, | |
| { name: 'patch_size', type: 'number', label: 'Patch Size', default: 16 }, | |
| { name: 'in_chans', type: 'number', label: 'In Channels', default: 3 }, | |
| { name: 'embed_dim', type: 'number', label: 'Embed Dim', default: 768 } | |
| ] | |
| }, | |
| [LayerType.MOE_BLOCK]: { | |
| type: LayerType.MOE_BLOCK, | |
| label: 'Sparse MoE Block', | |
| description: 'Mixture of Experts Layer', | |
| category: 'GenAI', | |
| parameters: [ | |
| { name: 'num_experts', type: 'number', label: 'Num Experts', default: 8 }, | |
| { name: 'top_k', type: 'number', label: 'Top K (Active)', default: 2 }, | |
| { name: 'hidden_dim', type: 'number', label: 'Hidden Dim', default: 512 }, | |
| { name: 'expert_dim', type: 'number', label: 'Expert Dim', default: 2048 } | |
| ] | |
| }, | |
| [LayerType.ACTION_HEAD]: { | |
| type: LayerType.ACTION_HEAD, | |
| label: 'Action Head', | |
| description: 'Decision output for Agents/LAMs', | |
| category: 'GenAI', | |
| parameters: [ | |
| { name: 'input_dim', type: 'number', label: 'Input Dim', default: 512 }, | |
| { name: 'num_actions', type: 'number', label: 'Num Actions', default: 50 }, | |
| { name: 'action_type', type: 'select', label: 'Type', default: 'Discrete', options: ['Discrete', 'Continuous'] } | |
| ] | |
| }, | |
| [LayerType.SE_BLOCK]: { | |
| type: LayerType.SE_BLOCK, | |
| label: 'SE Block', | |
| description: 'Squeeze & Excitation Attention', | |
| category: 'GenAI', | |
| parameters: [ | |
| { name: 'channels', type: 'number', label: 'Channels', default: 64 }, | |
| { name: 'reduction', type: 'number', label: 'Reduction Ratio', default: 16 } | |
| ] | |
| }, | |
| [LayerType.TIME_EMBEDDING]: { | |
| type: LayerType.TIME_EMBEDDING, | |
| label: 'Time Embedding', | |
| description: 'Sinusoidal Time Embed (Diffusion)', | |
| category: 'GenAI', | |
| parameters: [ | |
| { name: 'dim', type: 'number', label: 'Dimension', default: 256 } | |
| ] | |
| }, | |
| [LayerType.SAM_PROMPT_ENCODER]: { | |
| type: LayerType.SAM_PROMPT_ENCODER, | |
| label: 'SAM Prompt Enc', | |
| description: 'Encodes points/boxes (SAM)', | |
| category: 'GenAI', | |
| parameters: [ | |
| { name: 'embed_dim', type: 'number', label: 'Embed Dim', default: 256 } | |
| ] | |
| }, | |
| [LayerType.SAM_MASK_DECODER]: { | |
| type: LayerType.SAM_MASK_DECODER, | |
| label: 'SAM Mask Dec', | |
| description: 'Decodes segmentation masks', | |
| category: 'GenAI', | |
| parameters: [ | |
| { name: 'transformer_dim', type: 'number', label: 'Model Dim', default: 256 }, | |
| { name: 'num_multimask_outputs', type: 'number', label: 'Num Masks', default: 3 } | |
| ] | |
| }, | |
| // --- CONVOLUTION --- | |
| [LayerType.CONV1D]: { | |
| type: LayerType.CONV1D, | |
| label: 'Conv1D', | |
| description: '1D Convolution (Audio/Text)', | |
| category: 'Convolution', | |
| parameters: [ | |
| { name: 'in_channels', type: 'number', label: 'In Channels (Opt)', default: 0 }, | |
| { name: 'out_channels', type: 'number', label: 'Filters', default: 32 }, | |
| { name: 'kernel_size', type: 'number', label: 'Kernel Size', default: 3 }, | |
| { name: 'stride', type: 'number', label: 'Stride', default: 1 }, | |
| { name: 'padding', type: 'number', label: 'Padding', default: 1 } | |
| ] | |
| }, | |
| [LayerType.CONV2D]: { | |
| type: LayerType.CONV2D, | |
| label: 'Conv2D', | |
| description: '2D Convolutional Layer', | |
| category: 'Convolution', | |
| parameters: [ | |
| { name: 'in_channels', type: 'number', label: 'In Channels (Opt)', default: 0 }, | |
| { name: 'out_channels', type: 'number', label: 'Filters', default: 64 }, | |
| { name: 'kernel_size', type: 'number', label: 'Kernel Size', default: 3 }, | |
| { name: 'stride', type: 'number', label: 'Stride', default: 1 }, | |
| { name: 'padding', type: 'number', label: 'Padding', default: 1 }, | |
| { name: 'padding_mode', type: 'select', label: 'Pad Mode', default: 'zeros', options: ['zeros', 'reflect', 'replicate', 'circular'] }, | |
| { name: 'dilation', type: 'number', label: 'Dilation', default: 1 }, | |
| { name: 'groups', type: 'number', label: 'Groups', default: 1, description: "For depthwise separable" }, | |
| { name: 'bias', type: 'boolean', label: 'Bias', default: true }, | |
| { name: 'activation', type: 'select', label: 'Fused Activation', default: 'None', options: ['None', 'ReLU', 'LeakyReLU', 'SiLU'] } | |
| ] | |
| }, | |
| [LayerType.CONV3D]: { | |
| type: LayerType.CONV3D, | |
| label: 'Conv3D', | |
| description: '3D Convolution (Video/Volumetric)', | |
| category: 'Convolution', | |
| parameters: [ | |
| { name: 'out_channels', type: 'number', label: 'Filters', default: 32 }, | |
| { name: 'kernel_size', type: 'number', label: 'Kernel Size', default: 3 }, | |
| { name: 'stride', type: 'number', label: 'Stride', default: 1 }, | |
| { name: 'padding', type: 'number', label: 'Padding', default: 1 } | |
| ] | |
| }, | |
| [LayerType.SEPARABLE_CONV2D]: { | |
| type: LayerType.SEPARABLE_CONV2D, | |
| label: 'Separable Conv2D', | |
| description: 'Depthwise Separable Conv (TF/Keras style)', | |
| category: 'Convolution', | |
| parameters: [ | |
| { name: 'out_channels', type: 'number', label: 'Filters', default: 64 }, | |
| { name: 'kernel_size', type: 'number', label: 'Kernel Size', default: 3 }, | |
| { name: 'depth_multiplier', type: 'number', label: 'Depth Mult', default: 1 }, | |
| { name: 'stride', type: 'number', label: 'Stride', default: 1 } | |
| ] | |
| }, | |
| [LayerType.DEPTHWISE_CONV2D]: { | |
| type: LayerType.DEPTHWISE_CONV2D, | |
| label: 'Depthwise Conv2D', | |
| description: 'Convolution per channel independently', | |
| category: 'Convolution', | |
| parameters: [ | |
| { name: 'kernel_size', type: 'number', label: 'Kernel Size', default: 3 }, | |
| { name: 'depth_multiplier', type: 'number', label: 'Depth Mult', default: 1 }, | |
| { name: 'stride', type: 'number', label: 'Stride', default: 1 } | |
| ] | |
| }, | |
| [LayerType.CONV_TRANSPOSE2D]: { | |
| type: LayerType.CONV_TRANSPOSE2D, | |
| label: 'ConvTranspose2D', | |
| description: 'Deconvolution (Upsampling)', | |
| category: 'Convolution', | |
| parameters: [ | |
| { name: 'out_channels', type: 'number', label: 'Filters', default: 64 }, | |
| { name: 'kernel_size', type: 'number', label: 'Kernel Size', default: 2 }, | |
| { name: 'stride', type: 'number', label: 'Stride', default: 2 }, | |
| { name: 'padding', type: 'number', label: 'Padding', default: 0 } | |
| ] | |
| }, | |
| [LayerType.DEFORMABLE_CONV]: { | |
| type: LayerType.DEFORMABLE_CONV, | |
| label: 'Deformable Conv', | |
| description: 'Deformable Convolution v2', | |
| category: 'Convolution', | |
| parameters: [ | |
| { name: 'out_channels', type: 'number', label: 'Filters', default: 64 }, | |
| { name: 'kernel_size', type: 'number', label: 'Kernel Size', default: 3 }, | |
| { name: 'offset_groups', type: 'number', label: 'Offset Groups', default: 1 } | |
| ] | |
| }, | |
| [LayerType.MAXPOOL]: { | |
| type: LayerType.MAXPOOL, | |
| label: 'MaxPool2D', | |
| description: 'Max pooling operation', | |
| category: 'Convolution', | |
| parameters: [ | |
| { name: 'kernel_size', type: 'number', label: 'Kernel Size', default: 2 }, | |
| { name: 'stride', type: 'number', label: 'Stride', default: 2 } | |
| ] | |
| }, | |
| [LayerType.MAXPOOL3D]: { | |
| type: LayerType.MAXPOOL3D, | |
| label: 'MaxPool3D', | |
| description: '3D Max pooling', | |
| category: 'Convolution', | |
| parameters: [ | |
| { name: 'kernel_size', type: 'number', label: 'Kernel Size', default: 2 }, | |
| { name: 'stride', type: 'number', label: 'Stride', default: 2 } | |
| ] | |
| }, | |
| [LayerType.AVGPOOL]: { | |
| type: LayerType.AVGPOOL, | |
| label: 'AvgPool2D', | |
| description: 'Average pooling operation', | |
| category: 'Convolution', | |
| parameters: [ | |
| { name: 'kernel_size', type: 'number', label: 'Kernel Size', default: 2 }, | |
| { name: 'stride', type: 'number', label: 'Stride', default: 2 } | |
| ] | |
| }, | |
| [LayerType.ADAPTIVEAVGPOOL]: { | |
| type: LayerType.ADAPTIVEAVGPOOL, | |
| label: 'AdaptAvgPool2D', | |
| description: 'Pools to specific output size', | |
| category: 'Convolution', | |
| parameters: [ | |
| { name: 'output_size', type: 'number', label: 'Output Size (NxN)', default: 1 } | |
| ] | |
| }, | |
| [LayerType.GLOBAL_AVG_POOL]: { | |
| type: LayerType.GLOBAL_AVG_POOL, | |
| label: 'Global Avg Pool', | |
| description: 'Reduces spatial dims to 1x1', | |
| category: 'Convolution', | |
| parameters: [] | |
| }, | |
| [LayerType.UPSAMPLE]: { | |
| type: LayerType.UPSAMPLE, | |
| label: 'Upsample', | |
| description: 'Increases spatial size', | |
| category: 'Convolution', | |
| parameters: [ | |
| { name: 'scale_factor', type: 'number', label: 'Scale Factor', default: 2 }, | |
| { name: 'mode', type: 'select', label: 'Mode', default: 'nearest', options: ['nearest', 'bilinear', 'bicubic', 'trilinear'] } | |
| ] | |
| }, | |
| [LayerType.PIXEL_SHUFFLE]: { | |
| type: LayerType.PIXEL_SHUFFLE, | |
| label: 'Pixel Shuffle', | |
| description: 'Efficient Upscaling (Sub-pixel)', | |
| category: 'Convolution', | |
| parameters: [ | |
| { name: 'upscale_factor', type: 'number', label: 'Upscale Factor', default: 2 } | |
| ] | |
| }, | |
| // --- RECURRENT --- | |
| [LayerType.LSTM]: { | |
| type: LayerType.LSTM, | |
| label: 'LSTM', | |
| description: 'Long Short-Term Memory', | |
| category: 'Recurrent', | |
| parameters: [ | |
| { name: 'input_size', type: 'number', label: 'Input Size (Opt)', default: 0 }, | |
| { name: 'hidden_size', type: 'number', label: 'Hidden Size', default: 128 }, | |
| { name: 'num_layers', type: 'number', label: 'Num Layers', default: 1 }, | |
| { name: 'bidirectional', type: 'boolean', label: 'Bidirectional', default: false }, | |
| { name: 'dropout', type: 'number', label: 'Dropout', default: 0.0 }, | |
| { name: 'batch_first', type: 'boolean', label: 'Batch First', default: true } | |
| ] | |
| }, | |
| [LayerType.GRU]: { | |
| type: LayerType.GRU, | |
| label: 'GRU', | |
| description: 'Gated Recurrent Unit', | |
| category: 'Recurrent', | |
| parameters: [ | |
| { name: 'input_size', type: 'number', label: 'Input Size (Opt)', default: 0 }, | |
| { name: 'hidden_size', type: 'number', label: 'Hidden Size', default: 128 }, | |
| { name: 'num_layers', type: 'number', label: 'Num Layers', default: 1 }, | |
| { name: 'bidirectional', type: 'boolean', label: 'Bidirectional', default: false }, | |
| { name: 'dropout', type: 'number', label: 'Dropout', default: 0.0 }, | |
| { name: 'batch_first', type: 'boolean', label: 'Batch First', default: true } | |
| ] | |
| }, | |
| // --- UTILITY / ACTIVATION --- | |
| [LayerType.RELU]: { | |
| type: LayerType.RELU, | |
| label: 'ReLU', | |
| description: 'Rectified Linear Unit', | |
| category: 'Utility', | |
| parameters: [] | |
| }, | |
| [LayerType.PRELU]: { | |
| type: LayerType.PRELU, | |
| label: 'PReLU', | |
| description: 'Parametric ReLU', | |
| category: 'Utility', | |
| parameters: [ | |
| { name: 'num_parameters', type: 'number', label: 'Num Params', default: 1 } | |
| ] | |
| }, | |
| [LayerType.SWIGLU]: { | |
| type: LayerType.SWIGLU, | |
| label: 'SwiGLU', | |
| description: 'Swish-Gated Linear Unit', | |
| category: 'Utility', | |
| parameters: [ | |
| { name: 'dim', type: 'number', label: 'Dim (Opt)', default: 0 } | |
| ] | |
| }, | |
| [LayerType.LEAKYRELU]: { | |
| type: LayerType.LEAKYRELU, | |
| label: 'LeakyReLU', | |
| description: 'Leaky ReLU Activation', | |
| category: 'Utility', | |
| parameters: [ | |
| { name: 'negative_slope', type: 'number', label: 'Negative Slope', default: 0.01 } | |
| ] | |
| }, | |
| [LayerType.GELU]: { | |
| type: LayerType.GELU, | |
| label: 'GELU', | |
| description: 'Gaussian Error Linear Unit', | |
| category: 'Utility', | |
| parameters: [] | |
| }, | |
| [LayerType.SILU]: { | |
| type: LayerType.SILU, | |
| label: 'SiLU (Swish)', | |
| description: 'Sigmoid Linear Unit', | |
| category: 'Utility', | |
| parameters: [] | |
| }, | |
| [LayerType.SIGMOID]: { | |
| type: LayerType.SIGMOID, | |
| label: 'Sigmoid', | |
| description: 'Sigmoid Activation', | |
| category: 'Utility', | |
| parameters: [] | |
| }, | |
| [LayerType.TANH]: { | |
| type: LayerType.TANH, | |
| label: 'Tanh', | |
| description: 'Hyperbolic Tangent', | |
| category: 'Utility', | |
| parameters: [] | |
| }, | |
| [LayerType.SOFTPLUS]: { | |
| type: LayerType.SOFTPLUS, | |
| label: 'Softplus', | |
| description: 'Smooth approximation to ReLU', | |
| category: 'Utility', | |
| parameters: [] | |
| }, | |
| [LayerType.SOFTSIGN]: { | |
| type: LayerType.SOFTSIGN, | |
| label: 'Softsign', | |
| description: 'Softsign Activation', | |
| category: 'Utility', | |
| parameters: [] | |
| }, | |
| [LayerType.DROPOUT]: { | |
| type: LayerType.DROPOUT, | |
| label: 'Dropout', | |
| description: 'Random zeroing of elements', | |
| category: 'Utility', | |
| parameters: [ | |
| { name: 'p', type: 'number', label: 'Probability', default: 0.5 } | |
| ] | |
| }, | |
| [LayerType.SPATIAL_DROPOUT]: { | |
| type: LayerType.SPATIAL_DROPOUT, | |
| label: 'Spatial Dropout', | |
| description: 'Drops entire 2D feature maps', | |
| category: 'Utility', | |
| parameters: [ | |
| { name: 'p', type: 'number', label: 'Probability', default: 0.2 } | |
| ] | |
| }, | |
| [LayerType.DROPPATH]: { | |
| type: LayerType.DROPPATH, | |
| label: 'DropPath', | |
| description: 'Stochastic Depth (ResNets/ViTs)', | |
| category: 'Utility', | |
| parameters: [ | |
| { name: 'drop_prob', type: 'number', label: 'Drop Probability', default: 0.1 } | |
| ] | |
| }, | |
| [LayerType.FLATTEN]: { | |
| type: LayerType.FLATTEN, | |
| label: 'Flatten', | |
| description: 'Flattens input to 1D', | |
| category: 'Utility', | |
| parameters: [] | |
| }, | |
| [LayerType.RESHAPE]: { | |
| type: LayerType.RESHAPE, | |
| label: 'Reshape', | |
| description: 'Changes tensor dimensions', | |
| category: 'Utility', | |
| parameters: [ | |
| { name: 'shape', type: 'string', label: 'Target Shape', default: '-1, 256', description: 'Use -1 for inference' } | |
| ] | |
| }, | |
| [LayerType.PERMUTE]: { | |
| type: LayerType.PERMUTE, | |
| label: 'Permute', | |
| description: 'Permutes tensor dimensions', | |
| category: 'Utility', | |
| parameters: [ | |
| { name: 'dims', type: 'string', label: 'Dimensions', default: '0, 2, 1' } | |
| ] | |
| }, | |
| [LayerType.UNFLATTEN]: { | |
| type: LayerType.UNFLATTEN, | |
| label: 'Unflatten', | |
| description: 'Restores tensor dimensions', | |
| category: 'Utility', | |
| parameters: [ | |
| { name: 'dim', type: 'number', label: 'Dimension', default: 1 }, | |
| { name: 'unflattened_size', type: 'string', label: 'Target Sizes', default: '16, 16' } | |
| ] | |
| }, | |
| [LayerType.CUSTOM]: { | |
| type: LayerType.CUSTOM, | |
| label: 'Custom / Code', | |
| description: 'Define or instantiate any PyTorch Module', | |
| category: 'Utility', | |
| parameters: [ | |
| { name: 'class_name', type: 'string', label: 'Class Name', default: 'MyCustomLayer', description: "Name of the class to instantiate" }, | |
| { name: 'args', type: 'string', label: 'Arguments', default: '', description: "Constructor args (e.g. dim=128)" }, | |
| { name: 'imports', type: 'string', label: 'Imports', default: '', description: "Required imports (e.g. import torch.nn.functional as F)" }, | |
| { name: 'definition_code', type: 'text', label: 'Python Code (Def)', default: '', description: "Full class definition if strictly custom." } | |
| ] | |
| }, | |
| [LayerType.LAMBDA]: { | |
| type: LayerType.LAMBDA, | |
| label: 'Lambda (Expr)', | |
| description: 'Evaluate custom expression (e.g. x * 2)', | |
| category: 'Utility', | |
| parameters: [ | |
| { name: 'expression', type: 'string', label: 'Expression', default: 'x', description: "Python expression using 'x'" } | |
| ] | |
| }, | |
| [LayerType.IDENTITY]: { | |
| type: LayerType.IDENTITY, | |
| label: 'Identity', | |
| description: 'Passthrough layer', | |
| category: 'Utility', | |
| parameters: [] | |
| }, | |
| // --- NORMALIZATION --- | |
| [LayerType.BATCHNORM]: { | |
| type: LayerType.BATCHNORM, | |
| label: 'BatchNorm2D', | |
| description: 'Batch Normalization', | |
| category: 'Normalization', | |
| parameters: [ | |
| { name: 'num_features', type: 'number', label: 'Num Features', default: 64 } | |
| ] | |
| }, | |
| [LayerType.GROUPNORM]: { | |
| type: LayerType.GROUPNORM, | |
| label: 'GroupNorm', | |
| description: 'Group Normalization', | |
| category: 'Normalization', | |
| parameters: [ | |
| { name: 'num_groups', type: 'number', label: 'Num Groups', default: 32 }, | |
| { name: 'num_channels', type: 'number', label: 'Num Channels', default: 64 } | |
| ] | |
| }, | |
| [LayerType.LAYERNORM]: { | |
| type: LayerType.LAYERNORM, | |
| label: 'LayerNorm', | |
| description: 'Layer Normalization', | |
| category: 'Normalization', | |
| parameters: [ | |
| { name: 'normalized_shape', type: 'string', label: 'Norm Shape', default: '128' } | |
| ] | |
| }, | |
| [LayerType.INSTANCENORM]: { | |
| type: LayerType.INSTANCENORM, | |
| label: 'InstanceNorm2d', | |
| description: 'Instance Normalization', | |
| category: 'Normalization', | |
| parameters: [ | |
| { name: 'num_features', type: 'number', label: 'Num Features', default: 64 }, | |
| { name: 'affine', type: 'boolean', label: 'Learnable', default: false } | |
| ] | |
| }, | |
| // --- TRANSFORMER --- | |
| [LayerType.ATTENTION]: { | |
| type: LayerType.ATTENTION, | |
| label: 'Self Attention', | |
| description: 'Multi-Head Self Attention', | |
| category: 'Transformer', | |
| parameters: [ | |
| { name: 'embed_dim', type: 'number', label: 'Embed Dim', default: 512 }, | |
| { name: 'num_heads', type: 'number', label: 'Num Heads', default: 8 }, | |
| { name: 'dropout', type: 'number', label: 'Dropout', default: 0.1 }, | |
| { name: 'batch_first', type: 'boolean', label: 'Batch First', default: true }, | |
| { name: 'causal', type: 'boolean', label: 'Causal Mask', default: false } | |
| ] | |
| }, | |
| [LayerType.CROSS_ATTENTION]: { | |
| type: LayerType.CROSS_ATTENTION, | |
| label: 'Cross Attention', | |
| description: 'Attention between two sequences', | |
| category: 'Transformer', | |
| parameters: [ | |
| { name: 'embed_dim', type: 'number', label: 'Embed Dim', default: 512 }, | |
| { name: 'num_heads', type: 'number', label: 'Num Heads', default: 8 }, | |
| { name: 'dropout', type: 'number', label: 'Dropout', default: 0.1 } | |
| ] | |
| }, | |
| [LayerType.WINDOW_ATTENTION]: { | |
| type: LayerType.WINDOW_ATTENTION, | |
| label: 'Window Attention', | |
| description: 'Sliding/Windowed Attention', | |
| category: 'Transformer', | |
| parameters: [ | |
| { name: 'window_size', type: 'number', label: 'Window Size', default: 7 }, | |
| { name: 'embed_dim', type: 'number', label: 'Embed Dim', default: 96 }, | |
| { name: 'num_heads', type: 'number', label: 'Num Heads', default: 4 } | |
| ] | |
| }, | |
| [LayerType.TRANSFORMER_ENCODER]: { | |
| type: LayerType.TRANSFORMER_ENCODER, | |
| label: 'Encoder Layer', | |
| description: 'Standard Transformer Encoder', | |
| category: 'Transformer', | |
| parameters: [ | |
| { name: 'd_model', type: 'number', label: 'D Model', default: 512 }, | |
| { name: 'nhead', type: 'number', label: 'Heads', default: 8 }, | |
| { name: 'dim_feedforward', type: 'number', label: 'FF Dim', default: 2048 }, | |
| { name: 'dropout', type: 'number', label: 'Dropout', default: 0.1 }, | |
| { name: 'activation', type: 'select', label: 'Activation', default: 'relu', options: ['relu', 'gelu'] } | |
| ] | |
| }, | |
| [LayerType.TRANSFORMER_DECODER]: { | |
| type: LayerType.TRANSFORMER_DECODER, | |
| label: 'Decoder Layer', | |
| description: 'Standard Transformer Decoder', | |
| category: 'Transformer', | |
| parameters: [ | |
| { name: 'd_model', type: 'number', label: 'D Model', default: 512 }, | |
| { name: 'nhead', type: 'number', label: 'Heads', default: 8 }, | |
| { name: 'dim_feedforward', type: 'number', label: 'FF Dim', default: 2048 }, | |
| { name: 'dropout', type: 'number', label: 'Dropout', default: 0.1 } | |
| ] | |
| }, | |
| [LayerType.TRANSFORMER_BLOCK]: { | |
| type: LayerType.TRANSFORMER_BLOCK, | |
| label: 'Transformer Block', | |
| description: 'Generic Block', | |
| category: 'Transformer', | |
| parameters: [ | |
| { name: 'd_model', type: 'number', label: 'D Model', default: 512 }, | |
| { name: 'nhead', type: 'number', label: 'Heads', default: 8 }, | |
| { name: 'dim_feedforward', type: 'number', label: 'FF Dim', default: 2048 } | |
| ] | |
| }, | |
| }; | |
| export const INITIAL_NODES = [ | |
| { | |
| id: '1', | |
| type: 'custom', | |
| position: { x: 250, y: 50 }, | |
| data: { label: 'Input Data', type: LayerType.INPUT, params: LAYER_DEFINITIONS[LayerType.INPUT].parameters.reduce((acc, p) => ({...acc, [p.name]: p.default}), {}) } | |
| }, | |
| { | |
| id: '2', | |
| type: 'custom', | |
| position: { x: 250, y: 200 }, | |
| data: { label: 'Conv2D', type: LayerType.CONV2D, params: LAYER_DEFINITIONS[LayerType.CONV2D].parameters.reduce((acc, p) => ({...acc, [p.name]: p.default}), {}) } | |
| }, | |
| { | |
| id: '3', | |
| type: 'custom', | |
| position: { x: 250, y: 350 }, | |
| data: { label: 'ReLU', type: LayerType.RELU, params: {} } | |
| } | |
| ]; | |
| export const INITIAL_EDGES = [ | |
| { id: 'e1-2', source: '1', target: '2', animated: true, style: { stroke: '#94a3b8' } }, | |
| { id: 'e2-3', source: '2', target: '3', animated: true, style: { stroke: '#94a3b8' } } | |
| ]; | |
| export const TEMPLATES: Record<string, GraphTemplate> = { | |
| 'mobilenet_block': { | |
| id: 'mobilenet_block', | |
| name: 'MobileNet Block', | |
| description: 'Efficient Separable Conv (TF/Keras style).', | |
| nodes: [ | |
| { id: 'in', type: 'custom', position: {x: 250, y: 0}, data: {label: 'Input', type: LayerType.INPUT, params: {modality: 'Image'}} }, | |
| // Expansion | |
| { id: 'pw1', type: 'custom', position: {x: 250, y: 100}, data: {label: '1x1 Conv (Expand)', type: LayerType.CONV2D, params: {out_channels: 144, kernel_size: 1}} }, | |
| { id: 'bn1', type: 'custom', position: {x: 250, y: 200}, data: {label: 'BatchNorm', type: LayerType.BATCHNORM, params: {}} }, | |
| { id: 'rel1', type: 'custom', position: {x: 250, y: 300}, data: {label: 'ReLU6', type: LayerType.RELU, params: {}} }, // Typically ReLU6 | |
| // Depthwise | |
| { id: 'dw', type: 'custom', position: {x: 250, y: 400}, data: {label: 'Depthwise Conv', type: LayerType.DEPTHWISE_CONV2D, params: {kernel_size: 3, stride: 1}} }, | |
| { id: 'bn2', type: 'custom', position: {x: 250, y: 500}, data: {label: 'BatchNorm', type: LayerType.BATCHNORM, params: {}} }, | |
| { id: 'rel2', type: 'custom', position: {x: 250, y: 600}, data: {label: 'ReLU6', type: LayerType.RELU, params: {}} }, | |
| // Pointwise | |
| { id: 'pw2', type: 'custom', position: {x: 250, y: 700}, data: {label: '1x1 Conv (Project)', type: LayerType.CONV2D, params: {out_channels: 24, kernel_size: 1}} }, | |
| { id: 'bn3', type: 'custom', position: {x: 250, y: 800}, data: {label: 'BatchNorm', type: LayerType.BATCHNORM, params: {}} }, | |
| // Residual | |
| { id: 'add', type: 'custom', position: {x: 250, y: 900}, data: {label: 'Add Residual', type: LayerType.ADD, params: {}} }, | |
| ], | |
| edges: [ | |
| { id: '1', source: 'in', target: 'pw1' }, { id: '2', source: 'pw1', target: 'bn1' }, { id: '3', source: 'bn1', target: 'rel1' }, | |
| { id: '4', source: 'rel1', target: 'dw' }, { id: '5', source: 'dw', target: 'bn2' }, { id: '6', source: 'bn2', target: 'rel2' }, | |
| { id: '7', source: 'rel2', target: 'pw2' }, { id: '8', source: 'pw2', target: 'bn3' }, | |
| { id: '9', source: 'bn3', target: 'add' }, { id: '10', source: 'in', target: 'add' } | |
| ] | |
| }, | |
| 'rvc_voice': { | |
| id: 'rvc_voice', | |
| name: 'Voice Cloning (RVC)', | |
| description: 'Retrieval-based Voice Conversion backbone.', | |
| nodes: [ | |
| { id: 'audio', type: 'custom', position: {x: 200, y: 0}, data: {label: 'Source Audio', type: LayerType.INPUT, params: {modality: 'Audio'}} }, | |
| { id: 'hubert', type: 'custom', position: {x: 200, y: 100}, data: {label: 'HuBERT Soft', type: LayerType.RVC_ENCODER, params: {}} }, | |
| { id: 'f0', type: 'custom', position: {x: 450, y: 0}, data: {label: 'Pitch (F0)', type: LayerType.INPUT, params: {modality: 'Tensor'}} }, | |
| { id: 'emb', type: 'custom', position: {x: 450, y: 100}, data: {label: 'F0 Embed', type: LayerType.EMBEDDING, params: {num_embeddings: 256}} }, | |
| { id: 'cat', type: 'custom', position: {x: 325, y: 200}, data: {label: 'Merge Features', type: LayerType.CONCAT, params: {}} }, | |
| { id: 'wn', type: 'custom', position: {x: 325, y: 300}, data: {label: 'WaveNet Stack', type: LayerType.WAVENET_BLOCK, params: {channels: 256, dilation: 2}} }, | |
| { id: 'voc', type: 'custom', position: {x: 325, y: 400}, data: {label: 'HiFiGAN', type: LayerType.VOCODER, params: {}} }, | |
| { id: 'out', type: 'custom', position: {x: 325, y: 500}, data: {label: 'Cloned Audio', type: LayerType.OUTPUT, params: {}} }, | |
| ], | |
| edges: [ | |
| { id: '1', source: 'audio', target: 'hubert' }, { id: '2', source: 'f0', target: 'emb' }, | |
| { id: '3', source: 'hubert', target: 'cat' }, { id: '4', source: 'emb', target: 'cat' }, | |
| { id: '5', source: 'cat', target: 'wn' }, { id: '6', source: 'wn', target: 'voc' }, | |
| { id: '7', source: 'voc', target: 'out' } | |
| ] | |
| }, | |
| 'yolo_v8': { | |
| id: 'yolo_v8', | |
| name: 'YOLO (Detection)', | |
| description: 'C2f Backbone with Detection Head.', | |
| nodes: [ | |
| { id: 'in', type: 'custom', position: {x: 300, y: 0}, data: {label: 'Image', type: LayerType.INPUT, params: {modality: 'Image'}} }, | |
| { id: 'c1', type: 'custom', position: {x: 300, y: 100}, data: {label: 'Conv (Stem)', type: LayerType.CONV2D, params: {out_channels: 64, kernel_size: 3, stride: 2}} }, | |
| { id: 'c2f1', type: 'custom', position: {x: 300, y: 200}, data: {label: 'C2f Block 1', type: LayerType.C2F_BLOCK, params: {c1: 64, c2: 128, n: 3}} }, | |
| { id: 'c2f2', type: 'custom', position: {x: 300, y: 300}, data: {label: 'C2f Block 2', type: LayerType.C2F_BLOCK, params: {c1: 128, c2: 256, n: 6}} }, | |
| { id: 'sppf', type: 'custom', position: {x: 300, y: 400}, data: {label: 'SPPF', type: LayerType.SPPF_BLOCK, params: {c1: 256, c2: 256}} }, | |
| { id: 'head', type: 'custom', position: {x: 300, y: 500}, data: {label: 'YOLO Head', type: LayerType.DETECT_HEAD, params: {nc: 80}} }, | |
| ], | |
| edges: [ | |
| { id: '1', source: 'in', target: 'c1' }, { id: '2', source: 'c1', target: 'c2f1' }, | |
| { id: '3', source: 'c2f1', target: 'c2f2' }, { id: '4', source: 'c2f2', target: 'sppf' }, | |
| { id: '5', source: 'sppf', target: 'head' } | |
| ] | |
| }, | |
| 'whisper_stt': { | |
| id: 'whisper_stt', | |
| name: 'Whisper (STT)', | |
| description: 'Speech-to-Text Transformer.', | |
| nodes: [ | |
| { id: 'aud', type: 'custom', position: {x: 100, y: 0}, data: {label: 'Audio', type: LayerType.INPUT, params: {modality: 'Audio'}} }, | |
| { id: 'mel', type: 'custom', position: {x: 100, y: 100}, data: {label: 'MelSpectrogram', type: LayerType.MEL_SPECTROGRAM, params: {n_mels: 80}} }, | |
| { id: 'conv1', type: 'custom', position: {x: 100, y: 200}, data: {label: 'Conv1D', type: LayerType.CONV1D, params: {out_channels: 512, kernel_size: 3}} }, | |
| { id: 'enc', type: 'custom', position: {x: 100, y: 300}, data: {label: 'Transformer Enc', type: LayerType.TRANSFORMER_ENCODER, params: {d_model: 512}} }, | |
| { id: 'tok', type: 'custom', position: {x: 500, y: 0}, data: {label: 'Text Tokens', type: LayerType.INPUT, params: {modality: 'Text'}} }, | |
| { id: 'emb', type: 'custom', position: {x: 500, y: 100}, data: {label: 'Embedding', type: LayerType.EMBEDDING, params: {}} }, | |
| { id: 'dec', type: 'custom', position: {x: 300, y: 400}, data: {label: 'Transformer Dec', type: LayerType.TRANSFORMER_DECODER, params: {d_model: 512}} }, | |
| { id: 'out', type: 'custom', position: {x: 300, y: 500}, data: {label: 'Next Token', type: LayerType.LINEAR, params: {}} } | |
| ], | |
| edges: [ | |
| { id: '1', source: 'aud', target: 'mel' }, { id: '2', source: 'mel', target: 'conv1' }, | |
| { id: '3', source: 'conv1', target: 'enc' }, { id: '4', source: 'enc', target: 'dec' }, | |
| { id: '5', source: 'tok', target: 'emb' }, { id: '6', source: 'emb', target: 'dec' }, | |
| { id: '7', source: 'dec', target: 'out' } | |
| ] | |
| }, | |
| 'tacotron_tts': { | |
| id: 'tacotron_tts', | |
| name: 'Voice Cloning (TTS)', | |
| description: 'Text to Mel Spectrogram with Vocoder.', | |
| nodes: [ | |
| { id: 'txt', type: 'custom', position: {x: 300, y: 0}, data: {label: 'Text', type: LayerType.INPUT, params: {modality: 'Text'}} }, | |
| { id: 'emb', type: 'custom', position: {x: 300, y: 100}, data: {label: 'Char Embed', type: LayerType.EMBEDDING, params: {embedding_dim: 512}} }, | |
| { id: 'pre', type: 'custom', position: {x: 300, y: 200}, data: {label: 'PreNet (Linear)', type: LayerType.LINEAR, params: {out_features: 256}} }, | |
| { id: 'lstm', type: 'custom', position: {x: 300, y: 300}, data: {label: 'Decoder LSTM', type: LayerType.LSTM, params: {hidden_size: 1024}} }, | |
| { id: 'mel', type: 'custom', position: {x: 300, y: 400}, data: {label: 'Mel Projection', type: LayerType.LINEAR, params: {out_features: 80}} }, | |
| { id: 'voc', type: 'custom', position: {x: 300, y: 500}, data: {label: 'HiFiGAN Vocoder', type: LayerType.VOCODER, params: {type: 'HiFiGAN'}} }, | |
| { id: 'out', type: 'custom', position: {x: 300, y: 600}, data: {label: 'Audio Waveform', type: LayerType.OUTPUT, params: {}} } | |
| ], | |
| edges: [ | |
| { id: '1', source: 'txt', target: 'emb' }, { id: '2', source: 'emb', target: 'pre' }, | |
| { id: '3', source: 'pre', target: 'lstm' }, { id: '4', source: 'lstm', target: 'mel' }, | |
| { id: '5', source: 'mel', target: 'voc' }, { id: '6', source: 'voc', target: 'out' } | |
| ] | |
| }, | |
| 'nerf_3d': { | |
| id: 'nerf_3d', | |
| name: 'NeRF (3D Gen)', | |
| description: 'Neural Radiance Field MLP.', | |
| nodes: [ | |
| { id: 'pos', type: 'custom', position: {x: 200, y: 0}, data: {label: 'XYZ Coords', type: LayerType.INPUT, params: {shape: '3'}} }, | |
| { id: 'dir', type: 'custom', position: {x: 400, y: 0}, data: {label: 'View Dir', type: LayerType.INPUT, params: {shape: '3'}} }, | |
| { id: 'pe1', type: 'custom', position: {x: 200, y: 100}, data: {label: 'Pos Enc', type: LayerType.POS_EMBED, params: {}} }, | |
| { id: 'pe2', type: 'custom', position: {x: 400, y: 100}, data: {label: 'Dir Enc', type: LayerType.POS_EMBED, params: {}} }, | |
| { id: 'mlp1', type: 'custom', position: {x: 300, y: 200}, data: {label: 'Density MLP', type: LayerType.NERF_BLOCK, params: {hidden_dim: 256, num_layers: 8}} }, | |
| { id: 'cat', type: 'custom', position: {x: 300, y: 300}, data: {label: 'Concat View', type: LayerType.CONCAT, params: {}} }, | |
| { id: 'mlp2', type: 'custom', position: {x: 300, y: 400}, data: {label: 'Color MLP', type: LayerType.LINEAR, params: {out_features: 128}} }, | |
| { id: 'rgb', type: 'custom', position: {x: 300, y: 500}, data: {label: 'RGB + Sigma', type: LayerType.OUTPUT, params: {}} } | |
| ], | |
| edges: [ | |
| { id: '1', source: 'pos', target: 'pe1' }, { id: '2', source: 'dir', target: 'pe2' }, | |
| { id: '3', source: 'pe1', target: 'mlp1' }, { id: '4', source: 'mlp1', target: 'cat' }, | |
| { id: '5', source: 'pe2', target: 'cat' }, { id: '6', source: 'cat', target: 'mlp2' }, | |
| { id: '7', source: 'mlp2', target: 'rgb' } | |
| ] | |
| }, | |
| 'gpt_style': { | |
| id: 'gpt_style', | |
| name: 'LLM (GPT Style)', | |
| description: 'Decoder-only Transformer with RoPE & RMSNorm.', | |
| nodes: [ | |
| { id: 'in', type: 'custom', position: {x: 300, y: 0}, data: {label: 'Token Input', type: LayerType.INPUT, params: {modality: 'Text', shape: '128', dtype: 'int64'}} }, | |
| { id: 'emb', type: 'custom', position: {x: 300, y: 100}, data: {label: 'Token Embed', type: LayerType.EMBEDDING, params: {num_embeddings: 50257, embedding_dim: 768}} }, | |
| { id: 'rope', type: 'custom', position: {x: 300, y: 200}, data: {label: 'RoPE', type: LayerType.ROPE, params: {dim: 64}} }, | |
| { id: 'blk1', type: 'custom', position: {x: 300, y: 300}, data: {label: 'Decoder Block 1', type: LayerType.TRANSFORMER_DECODER, params: {d_model: 768, nhead: 12}} }, | |
| { id: 'blk2', type: 'custom', position: {x: 300, y: 400}, data: {label: 'Decoder Block 2', type: LayerType.TRANSFORMER_DECODER, params: {d_model: 768, nhead: 12}} }, | |
| { id: 'ln', type: 'custom', position: {x: 300, y: 500}, data: {label: 'RMSNorm', type: LayerType.RMSNORM, params: {dim: 768}} }, | |
| { id: 'head', type: 'custom', position: {x: 300, y: 600}, data: {label: 'LM Head', type: LayerType.LINEAR, params: {out_features: 50257, bias: false}} } | |
| ], | |
| edges: [ | |
| { id: '1', source: 'in', target: 'emb' }, { id: '2', source: 'emb', target: 'rope' }, | |
| { id: '3', source: 'rope', target: 'blk1' }, { id: '4', source: 'blk1', target: 'blk2' }, | |
| { id: '5', source: 'blk2', target: 'ln' }, { id: '6', source: 'ln', target: 'head' } | |
| ] | |
| }, | |
| 'bert_encoder': { | |
| id: 'bert_encoder', | |
| name: 'BERT (Encoder)', | |
| description: 'Bidirectional Transformer Encoder (NLP)', | |
| nodes: [ | |
| { id: 'in', type: 'custom', position: {x: 300, y: 0}, data: {label: 'Token Input', type: LayerType.INPUT, params: {modality: 'Text'}} }, | |
| { id: 'emb', type: 'custom', position: {x: 300, y: 100}, data: {label: 'Embedding', type: LayerType.EMBEDDING, params: {embedding_dim: 768}} }, | |
| { id: 'pos', type: 'custom', position: {x: 300, y: 200}, data: {label: 'Pos Embed', type: LayerType.POS_EMBED, params: {embedding_dim: 768}} }, | |
| { id: 'add', type: 'custom', position: {x: 300, y: 300}, data: {label: 'Combine', type: LayerType.ADD, params: {}} }, | |
| { id: 'enc1', type: 'custom', position: {x: 300, y: 400}, data: {label: 'Encoder Layer 1', type: LayerType.TRANSFORMER_ENCODER, params: {d_model: 768, nhead: 12}} }, | |
| { id: 'enc2', type: 'custom', position: {x: 300, y: 500}, data: {label: 'Encoder Layer 2', type: LayerType.TRANSFORMER_ENCODER, params: {d_model: 768, nhead: 12}} }, | |
| { id: 'pool', type: 'custom', position: {x: 300, y: 600}, data: {label: 'Pooler', type: LayerType.LINEAR, params: {out_features: 768}} }, | |
| ], | |
| edges: [ | |
| { id: '1', source: 'in', target: 'emb' }, { id: '2', source: 'emb', target: 'add' }, | |
| { id: '3', source: 'pos', target: 'add' }, { id: '4', source: 'add', target: 'enc1' }, | |
| { id: '5', source: 'enc1', target: 'enc2' }, { id: '6', source: 'enc2', target: 'pool' } | |
| ] | |
| }, | |
| 'autoencoder_conv': { | |
| id: 'autoencoder_conv', | |
| name: 'Autoencoder (Conv)', | |
| description: 'Image compression and reconstruction.', | |
| nodes: [ | |
| { id: 'in', type: 'custom', position: {x: 300, y: 0}, data: {label: 'Image', type: LayerType.INPUT, params: {modality: 'Image', shape: '1,28,28'}} }, | |
| // Encoder | |
| { id: 'enc1', type: 'custom', position: {x: 300, y: 100}, data: {label: 'Conv 1', type: LayerType.CONV2D, params: {out_channels: 16, kernel_size: 3, stride: 2, padding: 1}} }, | |
| { id: 'act1', type: 'custom', position: {x: 300, y: 180}, data: {label: 'ReLU', type: LayerType.RELU, params: {}} }, | |
| { id: 'enc2', type: 'custom', position: {x: 300, y: 260}, data: {label: 'Conv 2', type: LayerType.CONV2D, params: {out_channels: 32, kernel_size: 3, stride: 2, padding: 1}} }, | |
| { id: 'act2', type: 'custom', position: {x: 300, y: 340}, data: {label: 'ReLU', type: LayerType.RELU, params: {}} }, | |
| // Decoder | |
| { id: 'dec1', type: 'custom', position: {x: 300, y: 420}, data: {label: 'Deconv 1', type: LayerType.CONV_TRANSPOSE2D, params: {out_channels: 16, kernel_size: 3, stride: 2, padding: 1}} }, // Needs output_padding often in code but usually inferred | |
| { id: 'act3', type: 'custom', position: {x: 300, y: 500}, data: {label: 'ReLU', type: LayerType.RELU, params: {}} }, | |
| { id: 'dec2', type: 'custom', position: {x: 300, y: 580}, data: {label: 'Deconv 2', type: LayerType.CONV_TRANSPOSE2D, params: {out_channels: 1, kernel_size: 3, stride: 2, padding: 1}} }, | |
| { id: 'sig', type: 'custom', position: {x: 300, y: 660}, data: {label: 'Sigmoid', type: LayerType.SIGMOID, params: {}} } | |
| ], | |
| edges: [ | |
| { id: '1', source: 'in', target: 'enc1' }, { id: '2', source: 'enc1', target: 'act1' }, { id: '3', source: 'act1', target: 'enc2' }, | |
| { id: '4', source: 'enc2', target: 'act2' }, { id: '5', source: 'act2', target: 'dec1' }, { id: '6', source: 'dec1', target: 'act3' }, | |
| { id: '7', source: 'act3', target: 'dec2' }, { id: '8', source: 'dec2', target: 'sig' } | |
| ] | |
| }, | |
| 'video_3d_cnn': { | |
| id: 'video_3d_cnn', | |
| name: '3D CNN (Video)', | |
| description: 'Video Classification using 3D Convolutions', | |
| nodes: [ | |
| { id: 'in', type: 'custom', position: {x: 300, y: 0}, data: {label: 'Video Input', type: LayerType.INPUT, params: {modality: '3D Volume', shape: '3,16,112,112'}} }, | |
| { id: 'c3d1', type: 'custom', position: {x: 300, y: 100}, data: {label: 'Conv3D 1', type: LayerType.CONV3D, params: {out_channels: 64, kernel_size: 3}} }, | |
| { id: 'act1', type: 'custom', position: {x: 300, y: 200}, data: {label: 'ReLU', type: LayerType.RELU, params: {}} }, | |
| { id: 'pool1', type: 'custom', position: {x: 300, y: 300}, data: {label: 'MaxPool3D', type: LayerType.MAXPOOL3D, params: {kernel_size: 2, stride: 2}} }, | |
| { id: 'c3d2', type: 'custom', position: {x: 300, y: 400}, data: {label: 'Conv3D 2', type: LayerType.CONV3D, params: {out_channels: 128, kernel_size: 3}} }, | |
| { id: 'gap', type: 'custom', position: {x: 300, y: 500}, data: {label: 'GlobalAvgPool', type: LayerType.GLOBAL_AVG_POOL, params: {}} }, | |
| { id: 'fc', type: 'custom', position: {x: 300, y: 600}, data: {label: 'Classifier', type: LayerType.LINEAR, params: {out_features: 400}} } | |
| ], | |
| edges: [ | |
| { id: '1', source: 'in', target: 'c3d1' }, { id: '2', source: 'c3d1', target: 'act1' }, | |
| { id: '3', source: 'act1', target: 'pool1' }, { id: '4', source: 'pool1', target: 'c3d2' }, | |
| { id: '5', source: 'c3d2', target: 'gap' }, { id: '6', source: 'gap', target: 'fc' } | |
| ] | |
| }, | |
| 'super_res_gan': { | |
| id: 'super_res_gan', | |
| name: 'Super Res (ESPCN)', | |
| description: 'Efficient Sub-Pixel Convolutional Neural Network', | |
| nodes: [ | |
| { id: 'in', type: 'custom', position: {x: 300, y: 0}, data: {label: 'Low Res Image', type: LayerType.INPUT, params: {modality: 'Image'}} }, | |
| { id: 'conv1', type: 'custom', position: {x: 300, y: 100}, data: {label: 'Conv1 (Feature)', type: LayerType.CONV2D, params: {out_channels: 64, kernel_size: 5}} }, | |
| { id: 'tanh1', type: 'custom', position: {x: 300, y: 200}, data: {label: 'Tanh', type: LayerType.TANH, params: {}} }, | |
| { id: 'conv2', type: 'custom', position: {x: 300, y: 300}, data: {label: 'Conv2 (Map)', type: LayerType.CONV2D, params: {out_channels: 32, kernel_size: 3}} }, | |
| { id: 'tanh2', type: 'custom', position: {x: 300, y: 400}, data: {label: 'Tanh', type: LayerType.TANH, params: {}} }, | |
| { id: 'conv3', type: 'custom', position: {x: 300, y: 500}, data: {label: 'Conv3 (Shuffle)', type: LayerType.CONV2D, params: {out_channels: 12, kernel_size: 3}} }, // 3*r^2 (r=2) = 12 | |
| { id: 'ps', type: 'custom', position: {x: 300, y: 600}, data: {label: 'PixelShuffle', type: LayerType.PIXEL_SHUFFLE, params: {upscale_factor: 2}} }, | |
| { id: 'out', type: 'custom', position: {x: 300, y: 700}, data: {label: 'High Res', type: LayerType.OUTPUT, params: {}} } | |
| ], | |
| edges: [ | |
| { id: '1', source: 'in', target: 'conv1' }, { id: '2', source: 'conv1', target: 'tanh1' }, | |
| { id: '3', source: 'tanh1', target: 'conv2' }, { id: '4', source: 'conv2', target: 'tanh2' }, | |
| { id: '5', source: 'tanh2', target: 'conv3' }, { id: '6', source: 'conv3', target: 'ps' }, | |
| { id: '7', source: 'ps', target: 'out' } | |
| ] | |
| }, | |
| 'resnet_mini': { | |
| id: 'resnet_mini', | |
| name: 'ResNet Block (Mini)', | |
| description: 'CNN with Skip Connection.', | |
| nodes: [ | |
| { id: 'in', type: 'custom', position: {x: 200, y: 0}, data: {label: 'Input', type: LayerType.INPUT, params: {modality: 'Image'}} }, | |
| { id: 'conv1', type: 'custom', position: {x: 200, y: 100}, data: {label: 'Conv1', type: LayerType.CONV2D, params: {out_channels: 64}} }, | |
| { id: 'relu1', type: 'custom', position: {x: 200, y: 200}, data: {label: 'ReLU', type: LayerType.RELU, params: {}} }, | |
| { id: 'conv2', type: 'custom', position: {x: 200, y: 300}, data: {label: 'Conv2', type: LayerType.CONV2D, params: {out_channels: 64}} }, | |
| // Skip connection path handled by edges, but node layout implies it | |
| { id: 'add', type: 'custom', position: {x: 200, y: 450}, data: {label: 'Residual Add', type: LayerType.ADD, params: {}} }, | |
| { id: 'relu2', type: 'custom', position: {x: 200, y: 550}, data: {label: 'Final ReLU', type: LayerType.RELU, params: {}} } | |
| ], | |
| edges: [ | |
| { id: '1', source: 'in', target: 'conv1' }, { id: '2', source: 'conv1', target: 'relu1' }, | |
| { id: '3', source: 'relu1', target: 'conv2' }, { id: '4', source: 'conv2', target: 'add' }, | |
| { id: '5', source: 'in', target: 'add' }, // Skip connection | |
| { id: '6', source: 'add', target: 'relu2' } | |
| ] | |
| }, | |
| 'moe_transformer': { | |
| id: 'moe_transformer', | |
| name: 'Mixture of Experts', | |
| description: 'Sparse MoE model with routing.', | |
| nodes: [ | |
| { id: 'in', type: 'custom', position: {x: 300, y: 0}, data: {label: 'Input', type: LayerType.INPUT, params: {modality: 'Text'}} }, | |
| { id: 'emb', type: 'custom', position: {x: 300, y: 100}, data: {label: 'Embedding', type: LayerType.EMBEDDING, params: {}} }, | |
| { id: 'att', type: 'custom', position: {x: 300, y: 200}, data: {label: 'Self Attention', type: LayerType.ATTENTION, params: {}} }, | |
| { id: 'moe', type: 'custom', position: {x: 300, y: 300}, data: {label: 'Sparse MoE Block', type: LayerType.MOE_BLOCK, params: {num_experts: 8, top_k: 2}} }, | |
| { id: 'norm', type: 'custom', position: {x: 300, y: 400}, data: {label: 'RMSNorm', type: LayerType.RMSNORM, params: {}} }, | |
| { id: 'out', type: 'custom', position: {x: 300, y: 500}, data: {label: 'Output', type: LayerType.LINEAR, params: {}} } | |
| ], | |
| edges: [ | |
| { id: '1', source: 'in', target: 'emb' }, { id: '2', source: 'emb', target: 'att' }, | |
| { id: '3', source: 'att', target: 'moe' }, { id: '4', source: 'moe', target: 'norm' }, | |
| { id: '5', source: 'norm', target: 'out' } | |
| ] | |
| }, | |
| 'vlm_llava': { | |
| id: 'vlm_llava', | |
| name: 'VLM (LlaVA Style)', | |
| description: 'Visual Language Model connecting Vision Encoder to LLM.', | |
| nodes: [ | |
| // Vision Branch | |
| { id: 'img', type: 'custom', position: {x: 100, y: 0}, data: {label: 'Image Input', type: LayerType.INPUT, params: {modality: 'Image'}} }, | |
| { id: 'patch', type: 'custom', position: {x: 100, y: 100}, data: {label: 'Patch Embed', type: LayerType.PATCH_EMBED, params: {patch_size: 14}} }, | |
| { id: 'vit', type: 'custom', position: {x: 100, y: 200}, data: {label: 'ViT Encoder', type: LayerType.TRANSFORMER_ENCODER, params: {}} }, | |
| { id: 'proj', type: 'custom', position: {x: 100, y: 300}, data: {label: 'Projection', type: LayerType.LINEAR, params: {out_features: 4096}} }, | |
| // Text Branch | |
| { id: 'txt', type: 'custom', position: {x: 500, y: 0}, data: {label: 'Text Prompts', type: LayerType.INPUT, params: {modality: 'Text'}} }, | |
| { id: 'temb', type: 'custom', position: {x: 500, y: 200}, data: {label: 'Text Embed', type: LayerType.EMBEDDING, params: {embedding_dim: 4096}} }, | |
| // Merge | |
| { id: 'cat', type: 'custom', position: {x: 300, y: 400}, data: {label: 'Concat Tokens', type: LayerType.CONCAT, params: {dim: 1}} }, | |
| { id: 'llm', type: 'custom', position: {x: 300, y: 500}, data: {label: 'LLM Decoder', type: LayerType.TRANSFORMER_DECODER, params: {d_model: 4096}} }, | |
| { id: 'out', type: 'custom', position: {x: 300, y: 600}, data: {label: 'Response', type: LayerType.OUTPUT, params: {}} } | |
| ], | |
| edges: [ | |
| { id: '1', source: 'img', target: 'patch' }, { id: '2', source: 'patch', target: 'vit' }, { id: '3', source: 'vit', target: 'proj' }, | |
| { id: '4', source: 'txt', target: 'temb' }, | |
| { id: '5', source: 'proj', target: 'cat' }, { id: '6', source: 'temb', target: 'cat' }, | |
| { id: '7', source: 'cat', target: 'llm' }, { id: '8', source: 'llm', target: 'out' } | |
| ] | |
| }, | |
| 'sam_model': { | |
| id: 'sam_model', | |
| name: 'Segment Anything (SAM)', | |
| description: 'Image Encoder + Prompt Encoder + Mask Decoder.', | |
| nodes: [ | |
| { id: 'img', type: 'custom', position: {x: 100, y: 0}, data: {label: 'Image', type: LayerType.INPUT, params: {modality: 'Image'}} }, | |
| { id: 'enc', type: 'custom', position: {x: 100, y: 150}, data: {label: 'Image Encoder (ViT)', type: LayerType.TRANSFORMER_ENCODER, params: {}} }, | |
| { id: 'prm', type: 'custom', position: {x: 500, y: 0}, data: {label: 'Points/Boxes', type: LayerType.INPUT, params: {modality: 'Tensor'}} }, | |
| { id: 'penc', type: 'custom', position: {x: 500, y: 150}, data: {label: 'Prompt Enc', type: LayerType.SAM_PROMPT_ENCODER, params: {}} }, | |
| { id: 'dec', type: 'custom', position: {x: 300, y: 300}, data: {label: 'Mask Decoder', type: LayerType.SAM_MASK_DECODER, params: {}} }, | |
| { id: 'out', type: 'custom', position: {x: 300, y: 400}, data: {label: 'Masks', type: LayerType.OUTPUT, params: {}} } | |
| ], | |
| edges: [ | |
| { id: '1', source: 'img', target: 'enc' }, { id: '2', source: 'enc', target: 'dec' }, | |
| { id: '3', source: 'prm', target: 'penc' }, { id: '4', source: 'penc', target: 'dec' }, | |
| { id: '5', source: 'dec', target: 'out' } | |
| ] | |
| }, | |
| 'lam_agent': { | |
| id: 'lam_agent', | |
| name: 'Large Action Model (LAM)', | |
| description: 'LLM backbone with Action Head for agents.', | |
| nodes: [ | |
| { id: 'state', type: 'custom', position: {x: 300, y: 0}, data: {label: 'Env State', type: LayerType.INPUT, params: {modality: 'State'}} }, | |
| { id: 'llm', type: 'custom', position: {x: 300, y: 150}, data: {label: 'LLM Backbone', type: LayerType.TRANSFORMER_DECODER, params: {d_model: 1024}} }, | |
| { id: 'head', type: 'custom', position: {x: 300, y: 300}, data: {label: 'Action Head', type: LayerType.ACTION_HEAD, params: {num_actions: 50, action_type: 'Discrete'}} }, | |
| { id: 'out', type: 'custom', position: {x: 300, y: 400}, data: {label: 'Action Logits', type: LayerType.OUTPUT, params: {}} } | |
| ], | |
| edges: [ | |
| { id: '1', source: 'state', target: 'llm' }, { id: '2', source: 'llm', target: 'head' }, { id: '3', source: 'head', target: 'out' } | |
| ] | |
| }, | |
| 'lcm_diff': { | |
| id: 'lcm_diff', | |
| name: 'Latent Consistency (LCM)', | |
| description: 'Diffusion backbone with Time Embeddings.', | |
| nodes: [ | |
| { id: 'lat', type: 'custom', position: {x: 200, y: 0}, data: {label: 'Latent Input', type: LayerType.INPUT, params: {modality: 'Latent', shape: '4,64,64'}} }, | |
| { id: 'time', type: 'custom', position: {x: 500, y: 0}, data: {label: 'Time Step', type: LayerType.INPUT, params: {modality: 'Tensor', shape: '1'}} }, | |
| { id: 'temb', type: 'custom', position: {x: 500, y: 100}, data: {label: 'Time Embed', type: LayerType.TIME_EMBEDDING, params: {}} }, | |
| { id: 'cat', type: 'custom', position: {x: 350, y: 250}, data: {label: 'Inject Time', type: LayerType.ADD, params: {}} }, | |
| { id: 'unet', type: 'custom', position: {x: 350, y: 350}, data: {label: 'UNet Block', type: LayerType.CONV2D, params: {}} }, | |
| { id: 'out', type: 'custom', position: {x: 350, y: 450}, data: {label: 'Denoised', type: LayerType.OUTPUT, params: {}} } | |
| ], | |
| edges: [ | |
| { id: '1', source: 'lat', target: 'cat' }, | |
| { id: '2', source: 'time', target: 'temb' }, { id: '3', source: 'temb', target: 'cat' }, | |
| { id: '4', source: 'cat', target: 'unet' }, { id: '5', source: 'unet', target: 'out' } | |
| ] | |
| } | |
| }; |