import { LayerDefinition, LayerType, GraphTemplate } from './types'; export const LAYER_DEFINITIONS: Record = { // --- CORE --- [LayerType.INPUT]: { type: LayerType.INPUT, label: 'Input Data', description: 'Entry point for data tensors', category: 'Core', parameters: [ { name: 'name', type: 'string', label: 'Name', default: 'input_1', description: 'Variable name in forward()' }, { name: 'modality', type: 'select', label: 'Modality', default: 'Tensor', options: ['Tensor', 'Image', 'Text', 'Audio', 'Video', 'Latent', 'State', '3D Volume', 'Point Cloud', 'Radar', 'Lidar', 'Graph', 'Molecule'] }, { name: 'shape', type: 'string', label: 'Shape (e.g. 3,224,224)', default: '3, 224, 224' }, { name: 'batch_size', type: 'number', label: 'Batch Size', default: 32 }, { name: 'dtype', type: 'select', label: 'Data Type', default: 'float32', options: ['float32', 'int64', 'bool'] } ] }, [LayerType.LINEAR]: { type: LayerType.LINEAR, label: 'Linear (Dense)', description: 'Fully connected layer', category: 'Core', parameters: [ { name: 'in_features', type: 'number', label: 'In Features (Opt)', default: 0, description: "0 = Auto-infer" }, { name: 'out_features', type: 'number', label: 'Output Features', default: 128 }, { name: 'bias', type: 'boolean', label: 'Use Bias', default: true }, { name: 'activation', type: 'select', label: 'Fused Activation', default: 'None', options: ['None', 'ReLU', 'GELU', 'Sigmoid', 'Softplus'] } ] }, [LayerType.OUTPUT]: { type: LayerType.OUTPUT, label: 'Output Head', description: 'Final model output', category: 'Core', parameters: [ { name: 'num_classes', type: 'number', label: 'Classes', default: 10 }, { name: 'activation', type: 'select', label: 'Activation', default: 'Softmax', options: ['None', 'Softmax', 'Sigmoid', 'LogSoftmax'] } ] }, [LayerType.EMBEDDING]: { type: LayerType.EMBEDDING, label: 'Embedding', description: 'Lookup table for embeddings', category: 'Core', parameters: [ { name: 'num_embeddings', type: 'number', label: 'Vocab Size', default: 10000 }, { name: 'embedding_dim', type: 'number', label: 'Embed Dim', default: 256 }, { name: 'padding_idx', type: 'number', label: 'Padding Index', default: 0 }, { name: 'max_norm', type: 'number', label: 'Max Norm', default: 0, description: "0 = None" } ] }, [LayerType.POS_EMBED]: { type: LayerType.POS_EMBED, label: 'Positional Embed', description: 'Learnable positional embeddings', category: 'Core', parameters: [ { name: 'num_embeddings', type: 'number', label: 'Max Positions', default: 1024 }, { name: 'embedding_dim', type: 'number', label: 'Embed Dim', default: 256 } ] }, // --- PREPROCESSING (TF/KERAS STYLE) --- [LayerType.RESCALING]: { type: LayerType.RESCALING, label: 'Rescaling', description: 'Scale inputs (x * scale + offset)', category: 'Preprocessing', parameters: [ { name: 'scale', type: 'number', label: 'Scale', default: 0.00392, description: 'e.g. 1/255' }, { name: 'offset', type: 'number', label: 'Offset', default: 0.0 } ] }, [LayerType.RESIZING]: { type: LayerType.RESIZING, label: 'Resizing', description: 'Resize image batch', category: 'Preprocessing', parameters: [ { name: 'height', type: 'number', label: 'Height', default: 224 }, { name: 'width', type: 'number', label: 'Width', default: 224 }, { name: 'interpolation', type: 'select', label: 'Interpolation', default: 'bilinear', options: ['bilinear', 'nearest', 'bicubic'] } ] }, [LayerType.CENTER_CROP]: { type: LayerType.CENTER_CROP, label: 'Center Crop', description: 'Crops the central portion', category: 'Preprocessing', parameters: [ { name: 'height', type: 'number', label: 'Height', default: 224 }, { name: 'width', type: 'number', label: 'Width', default: 224 } ] }, [LayerType.NORMALIZATION_LAYER]: { type: LayerType.NORMALIZATION_LAYER, label: 'Normalization (Stat)', description: 'Normalize with mean/variance (Keras style)', category: 'Preprocessing', parameters: [ { name: 'mean', type: 'string', label: 'Mean', default: '0.0' }, { name: 'variance', type: 'string', label: 'Variance', default: '1.0' }, { name: 'axis', type: 'number', label: 'Axis', default: -1 } ] }, [LayerType.RANDOM_FLIP]: { type: LayerType.RANDOM_FLIP, label: 'Random Flip', description: 'Augmentation: Flips image', category: 'Preprocessing', parameters: [ { name: 'mode', type: 'select', label: 'Mode', default: 'horizontal', options: ['horizontal', 'vertical', 'horizontal_and_vertical'] }, { name: 'seed', type: 'number', label: 'Seed', default: 42 } ] }, [LayerType.RANDOM_ROTATION]: { type: LayerType.RANDOM_ROTATION, label: 'Random Rotation', description: 'Augmentation: Rotates image', category: 'Preprocessing', parameters: [ { name: 'factor', type: 'number', label: 'Factor (0-1)', default: 0.2 }, { name: 'fill_mode', type: 'select', label: 'Fill Mode', default: 'reflect', options: ['reflect', 'wrap', 'constant', 'nearest'] } ] }, [LayerType.RANDOM_ZOOM]: { type: LayerType.RANDOM_ZOOM, label: 'Random Zoom', description: 'Augmentation: Zooms image', category: 'Preprocessing', parameters: [ { name: 'height_factor', type: 'number', label: 'Height Factor', default: 0.2 }, { name: 'width_factor', type: 'number', label: 'Width Factor', default: 0.2 } ] }, [LayerType.RANDOM_CONTRAST]: { type: LayerType.RANDOM_CONTRAST, label: 'Random Contrast', description: 'Augmentation: Adjusts contrast', category: 'Preprocessing', parameters: [ { name: 'factor', type: 'number', label: 'Factor', default: 0.1 } ] }, [LayerType.TEXT_VECTORIZATION]: { type: LayerType.TEXT_VECTORIZATION, label: 'Text Vectorization', description: 'Map text to integer sequences', category: 'Preprocessing', parameters: [ { name: 'max_tokens', type: 'number', label: 'Max Tokens', default: 10000 }, { name: 'output_sequence_length', type: 'number', label: 'Seq Length', default: 256 }, { name: 'output_mode', type: 'select', label: 'Mode', default: 'int', options: ['int', 'binary', 'count', 'tf_idf'] } ] }, [LayerType.DISCRETIZATION]: { type: LayerType.DISCRETIZATION, label: 'Discretization', description: 'Bucketizes continuous features', category: 'Preprocessing', parameters: [ { name: 'num_bins', type: 'number', label: 'Num Bins', default: 10 } ] }, [LayerType.CATEGORY_ENCODING]: { type: LayerType.CATEGORY_ENCODING, label: 'Category Encoding', description: 'One-hot or Multi-hot encoding', category: 'Preprocessing', parameters: [ { name: 'num_tokens', type: 'number', label: 'Num Tokens', default: 10 }, { name: 'output_mode', type: 'select', label: 'Mode', default: 'one_hot', options: ['one_hot', 'multi_hot', 'count'] } ] }, // --- GRAPH NEURAL NETWORKS --- [LayerType.GCN_CONV]: { type: LayerType.GCN_CONV, label: 'GCN Conv', description: 'Graph Convolutional Network', category: 'Graph', parameters: [ { name: 'in_channels', type: 'number', label: 'In Channels', default: 16 }, { name: 'out_channels', type: 'number', label: 'Out Channels', default: 32 }, { name: 'improved', type: 'boolean', label: 'Improved GCN', default: false } ] }, [LayerType.GRAPH_SAGE]: { type: LayerType.GRAPH_SAGE, label: 'GraphSAGE', description: 'Inductive Graph Learning', category: 'Graph', parameters: [ { name: 'in_channels', type: 'number', label: 'In Channels', default: 16 }, { name: 'out_channels', type: 'number', label: 'Out Channels', default: 32 }, { name: 'aggr', type: 'select', label: 'Aggregator', default: 'mean', options: ['mean', 'max', 'lstm'] } ] }, [LayerType.GAT_CONV]: { type: LayerType.GAT_CONV, label: 'GAT Conv', description: 'Graph Attention Network', category: 'Graph', parameters: [ { name: 'in_channels', type: 'number', label: 'In Channels', default: 16 }, { name: 'out_channels', type: 'number', label: 'Out Channels', default: 8 }, { name: 'heads', type: 'number', label: 'Attention Heads', default: 4 }, { name: 'concat', type: 'boolean', label: 'Concat Heads', default: true } ] }, [LayerType.GIN_CONV]: { type: LayerType.GIN_CONV, label: 'GIN Conv', description: 'Graph Isomorphism Network', category: 'Graph', parameters: [ { name: 'eps', type: 'number', label: 'Epsilon', default: 0 }, { name: 'train_eps', type: 'boolean', label: 'Train Epsilon', default: true } ] }, // --- PHYSICS & SCIML --- [LayerType.NEURAL_ODE]: { type: LayerType.NEURAL_ODE, label: 'Neural ODE', description: 'Continuous Depth Model', category: 'Physics', parameters: [ { name: 'hidden_dim', type: 'number', label: 'Hidden Dim', default: 64 }, { name: 'solver', type: 'select', label: 'Solver', default: 'dopri5', options: ['dopri5', 'rk4', 'euler'] } ] }, [LayerType.PINN_LINEAR]: { type: LayerType.PINN_LINEAR, label: 'PINN Linear', description: 'Physics-Informed Linear Layer', category: 'Physics', parameters: [ { name: 'in_features', type: 'number', label: 'In Features', default: 32 }, { name: 'out_features', type: 'number', label: 'Out Features', default: 32 }, { name: 'constraint', type: 'text', label: 'Constraint Formula', default: 'grad(u, x) - f(x) = 0' } ] }, [LayerType.HAMILTONIAN_NN]: { type: LayerType.HAMILTONIAN_NN, label: 'Hamiltonian NN', description: 'Conserves Energy/Momentum', category: 'Physics', parameters: [ { name: 'dim', type: 'number', label: 'Dimension', default: 2 }, { name: 'hidden_dim', type: 'number', label: 'Hidden Dim', default: 64 } ] }, [LayerType.PROTEIN_FOLDING]: { type: LayerType.PROTEIN_FOLDING, label: 'Protein Folding', description: 'AlphaFold EvoFormer Block', category: 'Physics', parameters: [ { name: 'msa_dim', type: 'number', label: 'MSA Dim', default: 256 }, { name: 'pair_dim', type: 'number', label: 'Pair Dim', default: 128 }, { name: 'num_heads', type: 'number', label: 'Heads', default: 8 } ] }, // --- SPIKING & NEUROMORPHIC --- [LayerType.LIF_NEURON]: { type: LayerType.LIF_NEURON, label: 'LIF Neuron', description: 'Leaky Integrate-and-Fire', category: 'Spiking', parameters: [ { name: 'tau', type: 'number', label: 'Time Constant', default: 2.0 }, { name: 'v_threshold', type: 'number', label: 'Threshold', default: 1.0 }, { name: 'surrogate_grad', type: 'select', label: 'Surrogate Grad', default: 'fast_sigmoid', options: ['fast_sigmoid', 'arctan'] } ] }, [LayerType.SPIKING_LAYER]: { type: LayerType.SPIKING_LAYER, label: 'Spiking Dense', description: 'Fully Connected SNN Layer', category: 'Spiking', parameters: [ { name: 'in_features', type: 'number', label: 'In Features', default: 128 }, { name: 'out_features', type: 'number', label: 'Out Features', default: 128 }, { name: 'spike_mode', type: 'boolean', label: 'Output Spikes', default: true } ] }, // --- REINFORCEMENT LEARNING --- [LayerType.DUELING_HEAD]: { type: LayerType.DUELING_HEAD, label: 'Dueling Head', description: 'Separates Value and Advantage', category: 'RL', parameters: [ { name: 'input_dim', type: 'number', label: 'Input Dim', default: 512 }, { name: 'action_dim', type: 'number', label: 'Action Dim', default: 4 } ] }, [LayerType.PPO_HEAD]: { type: LayerType.PPO_HEAD, label: 'PPO Actor-Critic', description: 'Policy and Value Heads', category: 'RL', parameters: [ { name: 'hidden_dim', type: 'number', label: 'Hidden Dim', default: 256 }, { name: 'action_space', type: 'select', label: 'Action Space', default: 'Discrete', options: ['Discrete', 'Continuous'] } ] }, [LayerType.SAC_HEAD]: { type: LayerType.SAC_HEAD, label: 'SAC Head', description: 'Soft Actor-Critic Output', category: 'RL', parameters: [ { name: 'hidden_dim', type: 'number', label: 'Hidden Dim', default: 256 }, { name: 'log_std_min', type: 'number', label: 'Min Log Std', default: -20 }, { name: 'log_std_max', type: 'number', label: 'Max Log Std', default: 2 } ] }, // --- ADVANCED / NICHE --- [LayerType.CAPSULE]: { type: LayerType.CAPSULE, label: 'Capsule Layer', description: 'Preserves spatial hierarchy', category: 'Advanced', parameters: [ { name: 'num_capsules', type: 'number', label: 'Num Capsules', default: 10 }, { name: 'capsule_dim', type: 'number', label: 'Capsule Dim', default: 16 }, { name: 'routings', type: 'number', label: 'Routing Iters', default: 3 } ] }, [LayerType.HYPER_NET]: { type: LayerType.HYPER_NET, label: 'HyperNetwork', description: 'Generates weights for another NN', category: 'Advanced', parameters: [ { name: 'target_shape', type: 'string', label: 'Target Shape', default: '64,64' }, { name: 'embedding_dim', type: 'number', label: 'Z Dim', default: 32 } ] }, [LayerType.MAMBA_BLOCK]: { type: LayerType.MAMBA_BLOCK, label: 'Mamba (SSM)', description: 'Selective State Space Model', category: 'Advanced', parameters: [ { name: 'd_model', type: 'number', label: 'D Model', default: 512 }, { name: 'd_state', type: 'number', label: 'State Dim', default: 16 }, { name: 'expand', type: 'number', label: 'Expansion', default: 2 } ] }, [LayerType.RWKV_BLOCK]: { type: LayerType.RWKV_BLOCK, label: 'RWKV Block', description: 'RNN-Transformer Hybrid', category: 'Advanced', parameters: [ { name: 'n_embd', type: 'number', label: 'Embed Dim', default: 768 }, { name: 'n_layer', type: 'number', label: 'Layer Idx', default: 0 } ] }, [LayerType.HOPFIELD]: { type: LayerType.HOPFIELD, label: 'Hopfield Layer', description: 'Dense Associative Memory', category: 'Advanced', parameters: [ { name: 'in_features', type: 'number', label: 'Features', default: 64 }, { name: 'stored_patterns', type: 'number', label: 'Pattern Capacity', default: 10 } ] }, [LayerType.NORMALIZING_FLOW]: { type: LayerType.NORMALIZING_FLOW, label: 'Normalizing Flow', description: 'Invertible Generative Model', category: 'Advanced', parameters: [ { name: 'flow_type', type: 'select', label: 'Type', default: 'RealNVP', options: ['RealNVP', 'Glow', 'MAF'] }, { name: 'num_flows', type: 'number', label: 'Num Flows', default: 4 } ] }, [LayerType.DNC_MEMORY]: { type: LayerType.DNC_MEMORY, label: 'DNC Memory', description: 'Differentiable Neural Computer', category: 'Advanced', parameters: [ { name: 'memory_size', type: 'number', label: 'Mem Slots', default: 128 }, { name: 'word_size', type: 'number', label: 'Word Size', default: 20 }, { name: 'num_read_heads', type: 'number', label: 'Read Heads', default: 4 } ] }, [LayerType.ARCFACE]: { type: LayerType.ARCFACE, label: 'ArcFace', description: 'Metric Learning Loss', category: 'Advanced', parameters: [ { name: 's', type: 'number', label: 'Scale (s)', default: 64.0 }, { name: 'm', type: 'number', label: 'Margin (m)', default: 0.5 }, { name: 'num_classes', type: 'number', label: 'Classes', default: 1000 } ] }, [LayerType.ECHO_STATE]: { type: LayerType.ECHO_STATE, label: 'Reservoir (ESN)', description: 'Echo State Network', category: 'Advanced', parameters: [ { name: 'reservoir_size', type: 'number', label: 'Reservoir Size', default: 1000 }, { name: 'spectral_radius', type: 'number', label: 'Spectral Radius', default: 0.9 } ] }, // --- VIDEO / GENERATION --- [LayerType.VIDEO_DIFFUSION_BLOCK]: { type: LayerType.VIDEO_DIFFUSION_BLOCK, label: 'Video Diffusion', description: '3D UNet Block for Video Gen', category: 'Video', parameters: [ { name: 'in_channels', type: 'number', label: 'Channels', default: 128 }, { name: 'time_dim', type: 'number', label: 'Time Emb Dim', default: 512 }, { name: 'use_temporal_attn', type: 'boolean', label: 'Temp Attn', default: true } ] }, [LayerType.SPATIO_TEMPORAL_ATTN]: { type: LayerType.SPATIO_TEMPORAL_ATTN, label: 'Spatio-Temporal Attn', description: 'Attention over Space & Time', category: 'Video', parameters: [ { name: 'dim', type: 'number', label: 'Dimension', default: 512 }, { name: 'num_heads', type: 'number', label: 'Heads', default: 8 }, { name: 'frames', type: 'number', label: 'Max Frames', default: 16 } ] }, [LayerType.VIDEO_TOKENIZER]: { type: LayerType.VIDEO_TOKENIZER, label: 'Video Tokenizer', description: '3D VQ-VAE / Magvit style', category: 'Video', parameters: [ { name: 'patch_size_t', type: 'number', label: 'Time Patch', default: 2 }, { name: 'patch_size_hw', type: 'number', label: 'Spatial Patch', default: 16 }, { name: 'vocab_size', type: 'number', label: 'Codebook Size', default: 8192 } ] }, [LayerType.FRAME_INTERPOLATOR]: { type: LayerType.FRAME_INTERPOLATOR, label: 'Frame Interpolator', description: 'Upsamples video frame rate', category: 'Video', parameters: [ { name: 'scale_factor', type: 'number', label: 'Scale Factor', default: 2 }, { name: 'mode', type: 'select', label: 'Mode', default: 'bilinear', options: ['bilinear', 'optical_flow', 'pixel_shuffle'] } ] }, [LayerType.TEMPORAL_SHIFT]: { type: LayerType.TEMPORAL_SHIFT, label: 'Temporal Shift', description: 'TSM Module (Zero params)', category: 'Video', parameters: [ { name: 'n_segment', type: 'number', label: 'Segments', default: 8 }, { name: 'fold_div', type: 'number', label: 'Fold Divisor', default: 8 } ] }, [LayerType.NON_LOCAL_BLOCK]: { type: LayerType.NON_LOCAL_BLOCK, label: 'Non-Local Block', description: 'Global context block', category: 'Video', parameters: [ { name: 'in_channels', type: 'number', label: 'Channels', default: 64 }, { name: 'mode', type: 'select', label: 'Mode', default: 'embedded_gaussian', options: ['embedded_gaussian', 'gaussian', 'dot', 'concat'] } ] }, [LayerType.MULTIMODAL_FUSION]: { type: LayerType.MULTIMODAL_FUSION, label: 'Multimodal Fusion', description: 'Merge Video, Audio, Text', category: 'Video', parameters: [ { name: 'video_dim', type: 'number', label: 'Video Dim', default: 512 }, { name: 'audio_dim', type: 'number', label: 'Audio Dim', default: 256 }, { name: 'text_dim', type: 'number', label: 'Text Dim', default: 768 }, { name: 'out_dim', type: 'number', label: 'Fused Dim', default: 512 } ] }, // --- OCR (Text Recognition) --- [LayerType.TPS_TRANSFORM]: { type: LayerType.TPS_TRANSFORM, label: 'TPS Transform', description: 'Rectifies curved text (Thin Plate Spline)', category: 'OCR', parameters: [ { name: 'fiducial_points', type: 'number', label: 'Control Points', default: 20 }, { name: 'output_size', type: 'string', label: 'Out Size (HxW)', default: '32,100' } ] }, [LayerType.CRNN_BLOCK]: { type: LayerType.CRNN_BLOCK, label: 'CRNN Block', description: 'Conv + BiLSTM for Text Sequence', category: 'OCR', parameters: [ { name: 'img_h', type: 'number', label: 'Image Height', default: 32 }, { name: 'hidden_size', type: 'number', label: 'LSTM Hidden', default: 256 }, { name: 'num_classes', type: 'number', label: 'Num Chars', default: 37 } ] }, [LayerType.CTC_DECODER]: { type: LayerType.CTC_DECODER, label: 'CTC Decoder', description: 'Connectionist Temporal Classification', category: 'OCR', parameters: [ { name: 'blank_index', type: 'number', label: 'Blank Index', default: 0 }, { name: 'reduction', type: 'select', label: 'Reduction', default: 'mean', options: ['mean', 'sum', 'none'] } ] }, // --- ROBOTICS / MOTION / DEPTH --- [LayerType.DEPTH_DECODER]: { type: LayerType.DEPTH_DECODER, label: 'Depth Decoder', description: 'Estimates Monocular Depth Map', category: 'Robotics', parameters: [ { name: 'min_depth', type: 'number', label: 'Min Depth (m)', default: 0.1 }, { name: 'max_depth', type: 'number', label: 'Max Depth (m)', default: 100.0 }, { name: 'backbone_scale', type: 'number', label: 'Scale Factor', default: 1 } ] }, [LayerType.DISPARITY_HEAD]: { type: LayerType.DISPARITY_HEAD, label: 'Disparity Head', description: 'Stereo Vision Disparity Estimation', category: 'Robotics', parameters: [ { name: 'max_disp', type: 'number', label: 'Max Disparity', default: 192 }, { name: 'refine_iter', type: 'number', label: 'Refine Iters', default: 3 } ] }, [LayerType.OPTICAL_FLOW]: { type: LayerType.OPTICAL_FLOW, label: 'Optical Flow', description: 'Estimates pixel motion between frames', category: 'Robotics', parameters: [ { name: 'input_channels', type: 'number', label: 'In Channels', default: 6 }, { name: 'flow_dim', type: 'number', label: 'Flow Dim', default: 2 }, { name: 'corr_levels', type: 'number', label: 'Correlation Lvl', default: 4 } ] }, [LayerType.VELOCITY_HEAD]: { type: LayerType.VELOCITY_HEAD, label: 'Velocity Head', description: 'Predicts object speed/trajectory', category: 'Robotics', parameters: [ { name: 'input_dim', type: 'number', label: 'Input Dim', default: 512 }, { name: 'time_horizon', type: 'number', label: 'Horizon (sec)', default: 3 }, { name: 'mode', type: 'select', label: 'Mode', default: 'Linear', options: ['Linear', 'Angular', 'Full State'] } ] }, [LayerType.KALMAN_FILTER]: { type: LayerType.KALMAN_FILTER, label: 'Kalman Filter', description: 'Differentiable State Estimation', category: 'Robotics', parameters: [ { name: 'state_dim', type: 'number', label: 'State Dim', default: 4 }, { name: 'measure_dim', type: 'number', label: 'Measure Dim', default: 2 }, { name: 'learnable_process_noise', type: 'boolean', label: 'Learn Noise', default: true } ] }, [LayerType.BEV_TRANSFORM]: { type: LayerType.BEV_TRANSFORM, label: 'BEV Transformer', description: 'Perspective to Bird\'s Eye View', category: 'Robotics', parameters: [ { name: 'bev_h', type: 'number', label: 'BEV Height', default: 200 }, { name: 'bev_w', type: 'number', label: 'BEV Width', default: 200 }, { name: 'num_queries', type: 'number', label: 'Num Queries', default: 900 } ] }, [LayerType.RADAR_ENCODER]: { type: LayerType.RADAR_ENCODER, label: 'Radar Encoder', description: 'PointPillars style radar encoding', category: 'Robotics', parameters: [ { name: 'max_points', type: 'number', label: 'Max Points', default: 100 }, { name: 'num_features', type: 'number', label: 'Num Feats', default: 5 }, { name: 'voxel_size', type: 'string', label: 'Voxel Size', default: '0.2,0.2,4' } ] }, // --- DETECTION (YOLO) --- [LayerType.C2F_BLOCK]: { type: LayerType.C2F_BLOCK, label: 'C2f Block (YOLO)', description: 'CSP Bottleneck with 2 convolutions', category: 'Detection', parameters: [ { name: 'c1', type: 'number', label: 'In Channels', default: 64 }, { name: 'c2', type: 'number', label: 'Out Channels', default: 64 }, { name: 'n', type: 'number', label: 'Num Bottlenecks', default: 1 }, { name: 'shortcut', type: 'boolean', label: 'Shortcut', default: true } ] }, [LayerType.SPPF_BLOCK]: { type: LayerType.SPPF_BLOCK, label: 'SPPF Block', description: 'Spatial Pyramid Pooling - Fast', category: 'Detection', parameters: [ { name: 'c1', type: 'number', label: 'In Channels', default: 512 }, { name: 'c2', type: 'number', label: 'Out Channels', default: 512 }, { name: 'k', type: 'number', label: 'Kernel Size', default: 5 } ] }, [LayerType.DARKNET_BLOCK]: { type: LayerType.DARKNET_BLOCK, label: 'Darknet Block', description: 'Residual block used in Darknet', category: 'Detection', parameters: [ { name: 'channels', type: 'number', label: 'Channels', default: 64 }, { name: 'num_repeats', type: 'number', label: 'Repeats', default: 1 } ] }, [LayerType.DETECT_HEAD]: { type: LayerType.DETECT_HEAD, label: 'Detection Head', description: 'Predicts BBoxes and Classes', category: 'Detection', parameters: [ { name: 'nc', type: 'number', label: 'Num Classes', default: 80 }, { name: 'ch', type: 'text', label: 'Input Channels', default: '()', description: "List of channels from backbone levels" } ] }, [LayerType.ANCHOR_BOX]: { type: LayerType.ANCHOR_BOX, label: 'Anchor Boxes', description: 'Reference boxes for detection', category: 'Detection', parameters: [ { name: 'aspect_ratios', type: 'text', label: 'Ratios', default: '[0.5, 1.0, 2.0]' }, { name: 'scales', type: 'text', label: 'Scales', default: '[32, 64, 128]' } ] }, [LayerType.NMS]: { type: LayerType.NMS, label: 'NMS', description: 'Non-Maximum Suppression', category: 'Detection', parameters: [ { name: 'iou_threshold', type: 'number', label: 'IoU Thresh', default: 0.5 }, { name: 'score_threshold', type: 'number', label: 'Score Thresh', default: 0.25 } ] }, // --- AUDIO / SPEECH --- [LayerType.STFT]: { type: LayerType.STFT, label: 'STFT', description: 'Short-Time Fourier Transform', category: 'Audio', parameters: [ { name: 'n_fft', type: 'number', label: 'N_FFT', default: 1024 }, { name: 'hop_length', type: 'number', label: 'Hop Length', default: 256 }, { name: 'win_length', type: 'number', label: 'Window Length', default: 1024 } ] }, [LayerType.MEL_SPECTROGRAM]: { type: LayerType.MEL_SPECTROGRAM, label: 'MelSpectrogram', description: 'Raw Audio to Mel Spectrogram', category: 'Audio', parameters: [ { name: 'sample_rate', type: 'number', label: 'Sample Rate', default: 22050 }, { name: 'n_fft', type: 'number', label: 'N_FFT', default: 1024 }, { name: 'n_mels', type: 'number', label: 'Num Mels', default: 80 } ] }, [LayerType.SPEC_AUGMENT]: { type: LayerType.SPEC_AUGMENT, label: 'SpecAugment', description: 'Time/Freq masking for Audio', category: 'Audio', parameters: [ { name: 'freq_mask_param', type: 'number', label: 'Freq Mask', default: 27 }, { name: 'time_mask_param', type: 'number', label: 'Time Mask', default: 100 } ] }, [LayerType.CONFORMER_BLOCK]: { type: LayerType.CONFORMER_BLOCK, label: 'Conformer Block', description: 'Convolution + Transformer (ASR)', category: 'Audio', parameters: [ { name: 'd_model', type: 'number', label: 'D Model', default: 256 }, { name: 'nhead', type: 'number', label: 'Heads', default: 4 }, { name: 'kernel_size', type: 'number', label: 'Conv Kernel', default: 31 } ] }, [LayerType.WAVENET_BLOCK]: { type: LayerType.WAVENET_BLOCK, label: 'WaveNet Block', description: 'Dilated Causal Convolution', category: 'Audio', parameters: [ { name: 'channels', type: 'number', label: 'Channels', default: 64 }, { name: 'dilation', type: 'number', label: 'Dilation', default: 1 }, { name: 'kernel_size', type: 'number', label: 'Kernel', default: 3 } ] }, [LayerType.WAV2VEC2_ENC]: { type: LayerType.WAV2VEC2_ENC, label: 'Wav2Vec2 Encoder', description: 'Self-supervised Speech Encoder', category: 'Audio', parameters: [ { name: 'output_dim', type: 'number', label: 'Output Dim', default: 768 }, { name: 'extractor_mode', type: 'select', label: 'Mode', default: 'default', options: ['default', 'layer_norm'] } ] }, [LayerType.RVC_ENCODER]: { type: LayerType.RVC_ENCODER, label: 'RVC Hubert', description: 'Content Encoder for Voice Cloning', category: 'Audio', parameters: [ { name: 'model_type', type: 'select', label: 'Model', default: 'hubert-soft', options: ['hubert-soft', 'vec256', 'vec768'] }, { name: 'freeze', type: 'boolean', label: 'Freeze', default: true } ] }, [LayerType.VOCODER]: { type: LayerType.VOCODER, label: 'Vocoder', description: 'Mel Spectrogram to Waveform', category: 'Audio', parameters: [ { name: 'type', type: 'select', label: 'Type', default: 'HiFiGAN', options: ['HiFiGAN', 'WaveGlow', 'MelGAN'] }, { name: 'upsample_rates', type: 'text', label: 'Upsample Rates', default: '[8,8,2,2]' } ] }, [LayerType.AUDIO_EMBEDDING]: { type: LayerType.AUDIO_EMBEDDING, label: 'Audio Embedding', description: 'Embeddings for discrete audio tokens', category: 'Audio', parameters: [ { name: 'num_embeddings', type: 'number', label: 'Num Embeddings', default: 1024 }, { name: 'embedding_dim', type: 'number', label: 'Embedding Dim', default: 512 } ] }, [LayerType.SINC_CONV]: { type: LayerType.SINC_CONV, label: 'SincConv', description: 'Parametric Sinc Filters (Speech)', category: 'Audio', parameters: [ { name: 'out_channels', type: 'number', label: 'Filters', default: 80 }, { name: 'kernel_size', type: 'number', label: 'Kernel Size', default: 251 }, { name: 'min_low_hz', type: 'number', label: 'Min Hz', default: 50 } ] }, // --- 3D / VISION --- [LayerType.NERF_BLOCK]: { type: LayerType.NERF_BLOCK, label: 'NeRF Block', description: 'MLP for Radiance Fields', category: '3D', parameters: [ { name: 'hidden_dim', type: 'number', label: 'Hidden Dim', default: 256 }, { name: 'num_layers', type: 'number', label: 'Num Layers', default: 8 }, { name: 'skips', type: 'text', label: 'Skip Layers', default: '[4]' } ] }, [LayerType.POINTNET_BLOCK]: { type: LayerType.POINTNET_BLOCK, label: 'PointNet Layer', description: 'Point cloud feature extraction', category: '3D', parameters: [ { name: 'in_channels', type: 'number', label: 'In Channels', default: 3 }, { name: 'out_channels', type: 'number', label: 'Out Channels', default: 64 } ] }, [LayerType.POINT_TRANSFORMER]: { type: LayerType.POINT_TRANSFORMER, label: 'PointTransformer', description: 'Self-Attention for Point Clouds', category: '3D', parameters: [ { name: 'dim', type: 'number', label: 'Dim', default: 32 }, { name: 'num_neighbors', type: 'number', label: 'Neighbors (k)', default: 16 } ] }, [LayerType.TRIPLANE_ENC]: { type: LayerType.TRIPLANE_ENC, label: 'Triplane Enc', description: 'Project 3D to 3x2D Planes', category: '3D', parameters: [ { name: 'plane_res', type: 'number', label: 'Resolution', default: 256 }, { name: 'channels', type: 'number', label: 'Channels', default: 32 } ] }, [LayerType.GAUSSIAN_SPLAT]: { type: LayerType.GAUSSIAN_SPLAT, label: 'Gaussian Splat', description: '3D Gaussian Splatting Decoder', category: '3D', parameters: [ { name: 'num_gaussians', type: 'number', label: 'Num Gaussians', default: 10000 }, { name: 'sh_degree', type: 'number', label: 'SH Degree', default: 3 } ] }, [LayerType.MESH_CONV]: { type: LayerType.MESH_CONV, label: 'Mesh Conv', description: 'Convolution on 3D Meshes', category: '3D', parameters: [ { name: 'in_channels', type: 'number', label: 'In Channels', default: 3 }, { name: 'out_channels', type: 'number', label: 'Out Channels', default: 64 } ] }, // --- MERGE --- [LayerType.CONCAT]: { type: LayerType.CONCAT, label: 'Concatenate', description: 'Merge inputs along a dim', category: 'Merge', parameters: [ { name: 'dim', type: 'number', label: 'Dimension', default: 1 } ] }, [LayerType.ADD]: { type: LayerType.ADD, label: 'Add (Sum)', description: 'Element-wise addition (Residual)', category: 'Merge', parameters: [] }, [LayerType.SUBTRACT]: { type: LayerType.SUBTRACT, label: 'Subtract', description: 'Element-wise subtraction', category: 'Merge', parameters: [] }, [LayerType.MULTIPLY]: { type: LayerType.MULTIPLY, label: 'Multiply', description: 'Element-wise multiplication', category: 'Merge', parameters: [] }, [LayerType.AVERAGE]: { type: LayerType.AVERAGE, label: 'Average', description: 'Average of inputs', category: 'Merge', parameters: [] }, [LayerType.MAXIMUM]: { type: LayerType.MAXIMUM, label: 'Maximum', description: 'Element-wise maximum', category: 'Merge', parameters: [] }, [LayerType.MINIMUM]: { type: LayerType.MINIMUM, label: 'Minimum', description: 'Element-wise minimum', category: 'Merge', parameters: [] }, [LayerType.DOT]: { type: LayerType.DOT, label: 'Dot Product', description: 'Dot product of two tensors', category: 'Merge', parameters: [ { name: 'axes', type: 'number', label: 'Axis', default: 1 } ] }, // --- GENAI / ADVANCED --- [LayerType.RMSNORM]: { type: LayerType.RMSNORM, label: 'RMSNorm', description: 'Root Mean Square Norm (LLMs)', category: 'GenAI', parameters: [ { name: 'dim', type: 'number', label: 'Dimension', default: 512 }, { name: 'eps', type: 'number', label: 'Epsilon', default: 1e-6 } ] }, [LayerType.ROPE]: { type: LayerType.ROPE, label: 'RoPE', description: 'Rotary Positional Embedding', category: 'GenAI', parameters: [ { name: 'dim', type: 'number', label: 'Head Dim', default: 64 }, { name: 'max_position', type: 'number', label: 'Max Pos', default: 2048 } ] }, [LayerType.PATCH_EMBED]: { type: LayerType.PATCH_EMBED, label: 'Patch Embed (ViT)', description: 'Image to Sequence Patches', category: 'GenAI', parameters: [ { name: 'img_size', type: 'number', label: 'Image Size', default: 224 }, { name: 'patch_size', type: 'number', label: 'Patch Size', default: 16 }, { name: 'in_chans', type: 'number', label: 'In Channels', default: 3 }, { name: 'embed_dim', type: 'number', label: 'Embed Dim', default: 768 } ] }, [LayerType.MOE_BLOCK]: { type: LayerType.MOE_BLOCK, label: 'Sparse MoE Block', description: 'Mixture of Experts Layer', category: 'GenAI', parameters: [ { name: 'num_experts', type: 'number', label: 'Num Experts', default: 8 }, { name: 'top_k', type: 'number', label: 'Top K (Active)', default: 2 }, { name: 'hidden_dim', type: 'number', label: 'Hidden Dim', default: 512 }, { name: 'expert_dim', type: 'number', label: 'Expert Dim', default: 2048 } ] }, [LayerType.ACTION_HEAD]: { type: LayerType.ACTION_HEAD, label: 'Action Head', description: 'Decision output for Agents/LAMs', category: 'GenAI', parameters: [ { name: 'input_dim', type: 'number', label: 'Input Dim', default: 512 }, { name: 'num_actions', type: 'number', label: 'Num Actions', default: 50 }, { name: 'action_type', type: 'select', label: 'Type', default: 'Discrete', options: ['Discrete', 'Continuous'] } ] }, [LayerType.SE_BLOCK]: { type: LayerType.SE_BLOCK, label: 'SE Block', description: 'Squeeze & Excitation Attention', category: 'GenAI', parameters: [ { name: 'channels', type: 'number', label: 'Channels', default: 64 }, { name: 'reduction', type: 'number', label: 'Reduction Ratio', default: 16 } ] }, [LayerType.TIME_EMBEDDING]: { type: LayerType.TIME_EMBEDDING, label: 'Time Embedding', description: 'Sinusoidal Time Embed (Diffusion)', category: 'GenAI', parameters: [ { name: 'dim', type: 'number', label: 'Dimension', default: 256 } ] }, [LayerType.SAM_PROMPT_ENCODER]: { type: LayerType.SAM_PROMPT_ENCODER, label: 'SAM Prompt Enc', description: 'Encodes points/boxes (SAM)', category: 'GenAI', parameters: [ { name: 'embed_dim', type: 'number', label: 'Embed Dim', default: 256 } ] }, [LayerType.SAM_MASK_DECODER]: { type: LayerType.SAM_MASK_DECODER, label: 'SAM Mask Dec', description: 'Decodes segmentation masks', category: 'GenAI', parameters: [ { name: 'transformer_dim', type: 'number', label: 'Model Dim', default: 256 }, { name: 'num_multimask_outputs', type: 'number', label: 'Num Masks', default: 3 } ] }, // --- CONVOLUTION --- [LayerType.CONV1D]: { type: LayerType.CONV1D, label: 'Conv1D', description: '1D Convolution (Audio/Text)', category: 'Convolution', parameters: [ { name: 'in_channels', type: 'number', label: 'In Channels (Opt)', default: 0 }, { name: 'out_channels', type: 'number', label: 'Filters', default: 32 }, { name: 'kernel_size', type: 'number', label: 'Kernel Size', default: 3 }, { name: 'stride', type: 'number', label: 'Stride', default: 1 }, { name: 'padding', type: 'number', label: 'Padding', default: 1 } ] }, [LayerType.CONV2D]: { type: LayerType.CONV2D, label: 'Conv2D', description: '2D Convolutional Layer', category: 'Convolution', parameters: [ { name: 'in_channels', type: 'number', label: 'In Channels (Opt)', default: 0 }, { name: 'out_channels', type: 'number', label: 'Filters', default: 64 }, { name: 'kernel_size', type: 'number', label: 'Kernel Size', default: 3 }, { name: 'stride', type: 'number', label: 'Stride', default: 1 }, { name: 'padding', type: 'number', label: 'Padding', default: 1 }, { name: 'padding_mode', type: 'select', label: 'Pad Mode', default: 'zeros', options: ['zeros', 'reflect', 'replicate', 'circular'] }, { name: 'dilation', type: 'number', label: 'Dilation', default: 1 }, { name: 'groups', type: 'number', label: 'Groups', default: 1, description: "For depthwise separable" }, { name: 'bias', type: 'boolean', label: 'Bias', default: true }, { name: 'activation', type: 'select', label: 'Fused Activation', default: 'None', options: ['None', 'ReLU', 'LeakyReLU', 'SiLU'] } ] }, [LayerType.CONV3D]: { type: LayerType.CONV3D, label: 'Conv3D', description: '3D Convolution (Video/Volumetric)', category: 'Convolution', parameters: [ { name: 'out_channels', type: 'number', label: 'Filters', default: 32 }, { name: 'kernel_size', type: 'number', label: 'Kernel Size', default: 3 }, { name: 'stride', type: 'number', label: 'Stride', default: 1 }, { name: 'padding', type: 'number', label: 'Padding', default: 1 } ] }, [LayerType.SEPARABLE_CONV2D]: { type: LayerType.SEPARABLE_CONV2D, label: 'Separable Conv2D', description: 'Depthwise Separable Conv (TF/Keras style)', category: 'Convolution', parameters: [ { name: 'out_channels', type: 'number', label: 'Filters', default: 64 }, { name: 'kernel_size', type: 'number', label: 'Kernel Size', default: 3 }, { name: 'depth_multiplier', type: 'number', label: 'Depth Mult', default: 1 }, { name: 'stride', type: 'number', label: 'Stride', default: 1 } ] }, [LayerType.DEPTHWISE_CONV2D]: { type: LayerType.DEPTHWISE_CONV2D, label: 'Depthwise Conv2D', description: 'Convolution per channel independently', category: 'Convolution', parameters: [ { name: 'kernel_size', type: 'number', label: 'Kernel Size', default: 3 }, { name: 'depth_multiplier', type: 'number', label: 'Depth Mult', default: 1 }, { name: 'stride', type: 'number', label: 'Stride', default: 1 } ] }, [LayerType.CONV_TRANSPOSE2D]: { type: LayerType.CONV_TRANSPOSE2D, label: 'ConvTranspose2D', description: 'Deconvolution (Upsampling)', category: 'Convolution', parameters: [ { name: 'out_channels', type: 'number', label: 'Filters', default: 64 }, { name: 'kernel_size', type: 'number', label: 'Kernel Size', default: 2 }, { name: 'stride', type: 'number', label: 'Stride', default: 2 }, { name: 'padding', type: 'number', label: 'Padding', default: 0 } ] }, [LayerType.DEFORMABLE_CONV]: { type: LayerType.DEFORMABLE_CONV, label: 'Deformable Conv', description: 'Deformable Convolution v2', category: 'Convolution', parameters: [ { name: 'out_channels', type: 'number', label: 'Filters', default: 64 }, { name: 'kernel_size', type: 'number', label: 'Kernel Size', default: 3 }, { name: 'offset_groups', type: 'number', label: 'Offset Groups', default: 1 } ] }, [LayerType.MAXPOOL]: { type: LayerType.MAXPOOL, label: 'MaxPool2D', description: 'Max pooling operation', category: 'Convolution', parameters: [ { name: 'kernel_size', type: 'number', label: 'Kernel Size', default: 2 }, { name: 'stride', type: 'number', label: 'Stride', default: 2 } ] }, [LayerType.MAXPOOL3D]: { type: LayerType.MAXPOOL3D, label: 'MaxPool3D', description: '3D Max pooling', category: 'Convolution', parameters: [ { name: 'kernel_size', type: 'number', label: 'Kernel Size', default: 2 }, { name: 'stride', type: 'number', label: 'Stride', default: 2 } ] }, [LayerType.AVGPOOL]: { type: LayerType.AVGPOOL, label: 'AvgPool2D', description: 'Average pooling operation', category: 'Convolution', parameters: [ { name: 'kernel_size', type: 'number', label: 'Kernel Size', default: 2 }, { name: 'stride', type: 'number', label: 'Stride', default: 2 } ] }, [LayerType.ADAPTIVEAVGPOOL]: { type: LayerType.ADAPTIVEAVGPOOL, label: 'AdaptAvgPool2D', description: 'Pools to specific output size', category: 'Convolution', parameters: [ { name: 'output_size', type: 'number', label: 'Output Size (NxN)', default: 1 } ] }, [LayerType.GLOBAL_AVG_POOL]: { type: LayerType.GLOBAL_AVG_POOL, label: 'Global Avg Pool', description: 'Reduces spatial dims to 1x1', category: 'Convolution', parameters: [] }, [LayerType.UPSAMPLE]: { type: LayerType.UPSAMPLE, label: 'Upsample', description: 'Increases spatial size', category: 'Convolution', parameters: [ { name: 'scale_factor', type: 'number', label: 'Scale Factor', default: 2 }, { name: 'mode', type: 'select', label: 'Mode', default: 'nearest', options: ['nearest', 'bilinear', 'bicubic', 'trilinear'] } ] }, [LayerType.PIXEL_SHUFFLE]: { type: LayerType.PIXEL_SHUFFLE, label: 'Pixel Shuffle', description: 'Efficient Upscaling (Sub-pixel)', category: 'Convolution', parameters: [ { name: 'upscale_factor', type: 'number', label: 'Upscale Factor', default: 2 } ] }, // --- RECURRENT --- [LayerType.LSTM]: { type: LayerType.LSTM, label: 'LSTM', description: 'Long Short-Term Memory', category: 'Recurrent', parameters: [ { name: 'input_size', type: 'number', label: 'Input Size (Opt)', default: 0 }, { name: 'hidden_size', type: 'number', label: 'Hidden Size', default: 128 }, { name: 'num_layers', type: 'number', label: 'Num Layers', default: 1 }, { name: 'bidirectional', type: 'boolean', label: 'Bidirectional', default: false }, { name: 'dropout', type: 'number', label: 'Dropout', default: 0.0 }, { name: 'batch_first', type: 'boolean', label: 'Batch First', default: true } ] }, [LayerType.GRU]: { type: LayerType.GRU, label: 'GRU', description: 'Gated Recurrent Unit', category: 'Recurrent', parameters: [ { name: 'input_size', type: 'number', label: 'Input Size (Opt)', default: 0 }, { name: 'hidden_size', type: 'number', label: 'Hidden Size', default: 128 }, { name: 'num_layers', type: 'number', label: 'Num Layers', default: 1 }, { name: 'bidirectional', type: 'boolean', label: 'Bidirectional', default: false }, { name: 'dropout', type: 'number', label: 'Dropout', default: 0.0 }, { name: 'batch_first', type: 'boolean', label: 'Batch First', default: true } ] }, // --- UTILITY / ACTIVATION --- [LayerType.RELU]: { type: LayerType.RELU, label: 'ReLU', description: 'Rectified Linear Unit', category: 'Utility', parameters: [] }, [LayerType.PRELU]: { type: LayerType.PRELU, label: 'PReLU', description: 'Parametric ReLU', category: 'Utility', parameters: [ { name: 'num_parameters', type: 'number', label: 'Num Params', default: 1 } ] }, [LayerType.SWIGLU]: { type: LayerType.SWIGLU, label: 'SwiGLU', description: 'Swish-Gated Linear Unit', category: 'Utility', parameters: [ { name: 'dim', type: 'number', label: 'Dim (Opt)', default: 0 } ] }, [LayerType.LEAKYRELU]: { type: LayerType.LEAKYRELU, label: 'LeakyReLU', description: 'Leaky ReLU Activation', category: 'Utility', parameters: [ { name: 'negative_slope', type: 'number', label: 'Negative Slope', default: 0.01 } ] }, [LayerType.GELU]: { type: LayerType.GELU, label: 'GELU', description: 'Gaussian Error Linear Unit', category: 'Utility', parameters: [] }, [LayerType.SILU]: { type: LayerType.SILU, label: 'SiLU (Swish)', description: 'Sigmoid Linear Unit', category: 'Utility', parameters: [] }, [LayerType.SIGMOID]: { type: LayerType.SIGMOID, label: 'Sigmoid', description: 'Sigmoid Activation', category: 'Utility', parameters: [] }, [LayerType.TANH]: { type: LayerType.TANH, label: 'Tanh', description: 'Hyperbolic Tangent', category: 'Utility', parameters: [] }, [LayerType.SOFTPLUS]: { type: LayerType.SOFTPLUS, label: 'Softplus', description: 'Smooth approximation to ReLU', category: 'Utility', parameters: [] }, [LayerType.SOFTSIGN]: { type: LayerType.SOFTSIGN, label: 'Softsign', description: 'Softsign Activation', category: 'Utility', parameters: [] }, [LayerType.DROPOUT]: { type: LayerType.DROPOUT, label: 'Dropout', description: 'Random zeroing of elements', category: 'Utility', parameters: [ { name: 'p', type: 'number', label: 'Probability', default: 0.5 } ] }, [LayerType.SPATIAL_DROPOUT]: { type: LayerType.SPATIAL_DROPOUT, label: 'Spatial Dropout', description: 'Drops entire 2D feature maps', category: 'Utility', parameters: [ { name: 'p', type: 'number', label: 'Probability', default: 0.2 } ] }, [LayerType.DROPPATH]: { type: LayerType.DROPPATH, label: 'DropPath', description: 'Stochastic Depth (ResNets/ViTs)', category: 'Utility', parameters: [ { name: 'drop_prob', type: 'number', label: 'Drop Probability', default: 0.1 } ] }, [LayerType.FLATTEN]: { type: LayerType.FLATTEN, label: 'Flatten', description: 'Flattens input to 1D', category: 'Utility', parameters: [] }, [LayerType.RESHAPE]: { type: LayerType.RESHAPE, label: 'Reshape', description: 'Changes tensor dimensions', category: 'Utility', parameters: [ { name: 'shape', type: 'string', label: 'Target Shape', default: '-1, 256', description: 'Use -1 for inference' } ] }, [LayerType.PERMUTE]: { type: LayerType.PERMUTE, label: 'Permute', description: 'Permutes tensor dimensions', category: 'Utility', parameters: [ { name: 'dims', type: 'string', label: 'Dimensions', default: '0, 2, 1' } ] }, [LayerType.UNFLATTEN]: { type: LayerType.UNFLATTEN, label: 'Unflatten', description: 'Restores tensor dimensions', category: 'Utility', parameters: [ { name: 'dim', type: 'number', label: 'Dimension', default: 1 }, { name: 'unflattened_size', type: 'string', label: 'Target Sizes', default: '16, 16' } ] }, [LayerType.CUSTOM]: { type: LayerType.CUSTOM, label: 'Custom / Code', description: 'Define or instantiate any PyTorch Module', category: 'Utility', parameters: [ { name: 'class_name', type: 'string', label: 'Class Name', default: 'MyCustomLayer', description: "Name of the class to instantiate" }, { name: 'args', type: 'string', label: 'Arguments', default: '', description: "Constructor args (e.g. dim=128)" }, { name: 'imports', type: 'string', label: 'Imports', default: '', description: "Required imports (e.g. import torch.nn.functional as F)" }, { name: 'definition_code', type: 'text', label: 'Python Code (Def)', default: '', description: "Full class definition if strictly custom." } ] }, [LayerType.LAMBDA]: { type: LayerType.LAMBDA, label: 'Lambda (Expr)', description: 'Evaluate custom expression (e.g. x * 2)', category: 'Utility', parameters: [ { name: 'expression', type: 'string', label: 'Expression', default: 'x', description: "Python expression using 'x'" } ] }, [LayerType.IDENTITY]: { type: LayerType.IDENTITY, label: 'Identity', description: 'Passthrough layer', category: 'Utility', parameters: [] }, // --- NORMALIZATION --- [LayerType.BATCHNORM]: { type: LayerType.BATCHNORM, label: 'BatchNorm2D', description: 'Batch Normalization', category: 'Normalization', parameters: [ { name: 'num_features', type: 'number', label: 'Num Features', default: 64 } ] }, [LayerType.GROUPNORM]: { type: LayerType.GROUPNORM, label: 'GroupNorm', description: 'Group Normalization', category: 'Normalization', parameters: [ { name: 'num_groups', type: 'number', label: 'Num Groups', default: 32 }, { name: 'num_channels', type: 'number', label: 'Num Channels', default: 64 } ] }, [LayerType.LAYERNORM]: { type: LayerType.LAYERNORM, label: 'LayerNorm', description: 'Layer Normalization', category: 'Normalization', parameters: [ { name: 'normalized_shape', type: 'string', label: 'Norm Shape', default: '128' } ] }, [LayerType.INSTANCENORM]: { type: LayerType.INSTANCENORM, label: 'InstanceNorm2d', description: 'Instance Normalization', category: 'Normalization', parameters: [ { name: 'num_features', type: 'number', label: 'Num Features', default: 64 }, { name: 'affine', type: 'boolean', label: 'Learnable', default: false } ] }, // --- TRANSFORMER --- [LayerType.ATTENTION]: { type: LayerType.ATTENTION, label: 'Self Attention', description: 'Multi-Head Self Attention', category: 'Transformer', parameters: [ { name: 'embed_dim', type: 'number', label: 'Embed Dim', default: 512 }, { name: 'num_heads', type: 'number', label: 'Num Heads', default: 8 }, { name: 'dropout', type: 'number', label: 'Dropout', default: 0.1 }, { name: 'batch_first', type: 'boolean', label: 'Batch First', default: true }, { name: 'causal', type: 'boolean', label: 'Causal Mask', default: false } ] }, [LayerType.CROSS_ATTENTION]: { type: LayerType.CROSS_ATTENTION, label: 'Cross Attention', description: 'Attention between two sequences', category: 'Transformer', parameters: [ { name: 'embed_dim', type: 'number', label: 'Embed Dim', default: 512 }, { name: 'num_heads', type: 'number', label: 'Num Heads', default: 8 }, { name: 'dropout', type: 'number', label: 'Dropout', default: 0.1 } ] }, [LayerType.WINDOW_ATTENTION]: { type: LayerType.WINDOW_ATTENTION, label: 'Window Attention', description: 'Sliding/Windowed Attention', category: 'Transformer', parameters: [ { name: 'window_size', type: 'number', label: 'Window Size', default: 7 }, { name: 'embed_dim', type: 'number', label: 'Embed Dim', default: 96 }, { name: 'num_heads', type: 'number', label: 'Num Heads', default: 4 } ] }, [LayerType.TRANSFORMER_ENCODER]: { type: LayerType.TRANSFORMER_ENCODER, label: 'Encoder Layer', description: 'Standard Transformer Encoder', category: 'Transformer', parameters: [ { name: 'd_model', type: 'number', label: 'D Model', default: 512 }, { name: 'nhead', type: 'number', label: 'Heads', default: 8 }, { name: 'dim_feedforward', type: 'number', label: 'FF Dim', default: 2048 }, { name: 'dropout', type: 'number', label: 'Dropout', default: 0.1 }, { name: 'activation', type: 'select', label: 'Activation', default: 'relu', options: ['relu', 'gelu'] } ] }, [LayerType.TRANSFORMER_DECODER]: { type: LayerType.TRANSFORMER_DECODER, label: 'Decoder Layer', description: 'Standard Transformer Decoder', category: 'Transformer', parameters: [ { name: 'd_model', type: 'number', label: 'D Model', default: 512 }, { name: 'nhead', type: 'number', label: 'Heads', default: 8 }, { name: 'dim_feedforward', type: 'number', label: 'FF Dim', default: 2048 }, { name: 'dropout', type: 'number', label: 'Dropout', default: 0.1 } ] }, [LayerType.TRANSFORMER_BLOCK]: { type: LayerType.TRANSFORMER_BLOCK, label: 'Transformer Block', description: 'Generic Block', category: 'Transformer', parameters: [ { name: 'd_model', type: 'number', label: 'D Model', default: 512 }, { name: 'nhead', type: 'number', label: 'Heads', default: 8 }, { name: 'dim_feedforward', type: 'number', label: 'FF Dim', default: 2048 } ] }, }; export const INITIAL_NODES = [ { id: '1', type: 'custom', position: { x: 250, y: 50 }, data: { label: 'Input Data', type: LayerType.INPUT, params: LAYER_DEFINITIONS[LayerType.INPUT].parameters.reduce((acc, p) => ({...acc, [p.name]: p.default}), {}) } }, { id: '2', type: 'custom', position: { x: 250, y: 200 }, data: { label: 'Conv2D', type: LayerType.CONV2D, params: LAYER_DEFINITIONS[LayerType.CONV2D].parameters.reduce((acc, p) => ({...acc, [p.name]: p.default}), {}) } }, { id: '3', type: 'custom', position: { x: 250, y: 350 }, data: { label: 'ReLU', type: LayerType.RELU, params: {} } } ]; export const INITIAL_EDGES = [ { id: 'e1-2', source: '1', target: '2', animated: true, style: { stroke: '#94a3b8' } }, { id: 'e2-3', source: '2', target: '3', animated: true, style: { stroke: '#94a3b8' } } ]; export const TEMPLATES: Record = { 'mobilenet_block': { id: 'mobilenet_block', name: 'MobileNet Block', description: 'Efficient Separable Conv (TF/Keras style).', nodes: [ { id: 'in', type: 'custom', position: {x: 250, y: 0}, data: {label: 'Input', type: LayerType.INPUT, params: {modality: 'Image'}} }, // Expansion { id: 'pw1', type: 'custom', position: {x: 250, y: 100}, data: {label: '1x1 Conv (Expand)', type: LayerType.CONV2D, params: {out_channels: 144, kernel_size: 1}} }, { id: 'bn1', type: 'custom', position: {x: 250, y: 200}, data: {label: 'BatchNorm', type: LayerType.BATCHNORM, params: {}} }, { id: 'rel1', type: 'custom', position: {x: 250, y: 300}, data: {label: 'ReLU6', type: LayerType.RELU, params: {}} }, // Typically ReLU6 // Depthwise { id: 'dw', type: 'custom', position: {x: 250, y: 400}, data: {label: 'Depthwise Conv', type: LayerType.DEPTHWISE_CONV2D, params: {kernel_size: 3, stride: 1}} }, { id: 'bn2', type: 'custom', position: {x: 250, y: 500}, data: {label: 'BatchNorm', type: LayerType.BATCHNORM, params: {}} }, { id: 'rel2', type: 'custom', position: {x: 250, y: 600}, data: {label: 'ReLU6', type: LayerType.RELU, params: {}} }, // Pointwise { id: 'pw2', type: 'custom', position: {x: 250, y: 700}, data: {label: '1x1 Conv (Project)', type: LayerType.CONV2D, params: {out_channels: 24, kernel_size: 1}} }, { id: 'bn3', type: 'custom', position: {x: 250, y: 800}, data: {label: 'BatchNorm', type: LayerType.BATCHNORM, params: {}} }, // Residual { id: 'add', type: 'custom', position: {x: 250, y: 900}, data: {label: 'Add Residual', type: LayerType.ADD, params: {}} }, ], edges: [ { id: '1', source: 'in', target: 'pw1' }, { id: '2', source: 'pw1', target: 'bn1' }, { id: '3', source: 'bn1', target: 'rel1' }, { id: '4', source: 'rel1', target: 'dw' }, { id: '5', source: 'dw', target: 'bn2' }, { id: '6', source: 'bn2', target: 'rel2' }, { id: '7', source: 'rel2', target: 'pw2' }, { id: '8', source: 'pw2', target: 'bn3' }, { id: '9', source: 'bn3', target: 'add' }, { id: '10', source: 'in', target: 'add' } ] }, 'rvc_voice': { id: 'rvc_voice', name: 'Voice Cloning (RVC)', description: 'Retrieval-based Voice Conversion backbone.', nodes: [ { id: 'audio', type: 'custom', position: {x: 200, y: 0}, data: {label: 'Source Audio', type: LayerType.INPUT, params: {modality: 'Audio'}} }, { id: 'hubert', type: 'custom', position: {x: 200, y: 100}, data: {label: 'HuBERT Soft', type: LayerType.RVC_ENCODER, params: {}} }, { id: 'f0', type: 'custom', position: {x: 450, y: 0}, data: {label: 'Pitch (F0)', type: LayerType.INPUT, params: {modality: 'Tensor'}} }, { id: 'emb', type: 'custom', position: {x: 450, y: 100}, data: {label: 'F0 Embed', type: LayerType.EMBEDDING, params: {num_embeddings: 256}} }, { id: 'cat', type: 'custom', position: {x: 325, y: 200}, data: {label: 'Merge Features', type: LayerType.CONCAT, params: {}} }, { id: 'wn', type: 'custom', position: {x: 325, y: 300}, data: {label: 'WaveNet Stack', type: LayerType.WAVENET_BLOCK, params: {channels: 256, dilation: 2}} }, { id: 'voc', type: 'custom', position: {x: 325, y: 400}, data: {label: 'HiFiGAN', type: LayerType.VOCODER, params: {}} }, { id: 'out', type: 'custom', position: {x: 325, y: 500}, data: {label: 'Cloned Audio', type: LayerType.OUTPUT, params: {}} }, ], edges: [ { id: '1', source: 'audio', target: 'hubert' }, { id: '2', source: 'f0', target: 'emb' }, { id: '3', source: 'hubert', target: 'cat' }, { id: '4', source: 'emb', target: 'cat' }, { id: '5', source: 'cat', target: 'wn' }, { id: '6', source: 'wn', target: 'voc' }, { id: '7', source: 'voc', target: 'out' } ] }, 'yolo_v8': { id: 'yolo_v8', name: 'YOLO (Detection)', description: 'C2f Backbone with Detection Head.', nodes: [ { id: 'in', type: 'custom', position: {x: 300, y: 0}, data: {label: 'Image', type: LayerType.INPUT, params: {modality: 'Image'}} }, { id: 'c1', type: 'custom', position: {x: 300, y: 100}, data: {label: 'Conv (Stem)', type: LayerType.CONV2D, params: {out_channels: 64, kernel_size: 3, stride: 2}} }, { id: 'c2f1', type: 'custom', position: {x: 300, y: 200}, data: {label: 'C2f Block 1', type: LayerType.C2F_BLOCK, params: {c1: 64, c2: 128, n: 3}} }, { id: 'c2f2', type: 'custom', position: {x: 300, y: 300}, data: {label: 'C2f Block 2', type: LayerType.C2F_BLOCK, params: {c1: 128, c2: 256, n: 6}} }, { id: 'sppf', type: 'custom', position: {x: 300, y: 400}, data: {label: 'SPPF', type: LayerType.SPPF_BLOCK, params: {c1: 256, c2: 256}} }, { id: 'head', type: 'custom', position: {x: 300, y: 500}, data: {label: 'YOLO Head', type: LayerType.DETECT_HEAD, params: {nc: 80}} }, ], edges: [ { id: '1', source: 'in', target: 'c1' }, { id: '2', source: 'c1', target: 'c2f1' }, { id: '3', source: 'c2f1', target: 'c2f2' }, { id: '4', source: 'c2f2', target: 'sppf' }, { id: '5', source: 'sppf', target: 'head' } ] }, 'whisper_stt': { id: 'whisper_stt', name: 'Whisper (STT)', description: 'Speech-to-Text Transformer.', nodes: [ { id: 'aud', type: 'custom', position: {x: 100, y: 0}, data: {label: 'Audio', type: LayerType.INPUT, params: {modality: 'Audio'}} }, { id: 'mel', type: 'custom', position: {x: 100, y: 100}, data: {label: 'MelSpectrogram', type: LayerType.MEL_SPECTROGRAM, params: {n_mels: 80}} }, { id: 'conv1', type: 'custom', position: {x: 100, y: 200}, data: {label: 'Conv1D', type: LayerType.CONV1D, params: {out_channels: 512, kernel_size: 3}} }, { id: 'enc', type: 'custom', position: {x: 100, y: 300}, data: {label: 'Transformer Enc', type: LayerType.TRANSFORMER_ENCODER, params: {d_model: 512}} }, { id: 'tok', type: 'custom', position: {x: 500, y: 0}, data: {label: 'Text Tokens', type: LayerType.INPUT, params: {modality: 'Text'}} }, { id: 'emb', type: 'custom', position: {x: 500, y: 100}, data: {label: 'Embedding', type: LayerType.EMBEDDING, params: {}} }, { id: 'dec', type: 'custom', position: {x: 300, y: 400}, data: {label: 'Transformer Dec', type: LayerType.TRANSFORMER_DECODER, params: {d_model: 512}} }, { id: 'out', type: 'custom', position: {x: 300, y: 500}, data: {label: 'Next Token', type: LayerType.LINEAR, params: {}} } ], edges: [ { id: '1', source: 'aud', target: 'mel' }, { id: '2', source: 'mel', target: 'conv1' }, { id: '3', source: 'conv1', target: 'enc' }, { id: '4', source: 'enc', target: 'dec' }, { id: '5', source: 'tok', target: 'emb' }, { id: '6', source: 'emb', target: 'dec' }, { id: '7', source: 'dec', target: 'out' } ] }, 'tacotron_tts': { id: 'tacotron_tts', name: 'Voice Cloning (TTS)', description: 'Text to Mel Spectrogram with Vocoder.', nodes: [ { id: 'txt', type: 'custom', position: {x: 300, y: 0}, data: {label: 'Text', type: LayerType.INPUT, params: {modality: 'Text'}} }, { id: 'emb', type: 'custom', position: {x: 300, y: 100}, data: {label: 'Char Embed', type: LayerType.EMBEDDING, params: {embedding_dim: 512}} }, { id: 'pre', type: 'custom', position: {x: 300, y: 200}, data: {label: 'PreNet (Linear)', type: LayerType.LINEAR, params: {out_features: 256}} }, { id: 'lstm', type: 'custom', position: {x: 300, y: 300}, data: {label: 'Decoder LSTM', type: LayerType.LSTM, params: {hidden_size: 1024}} }, { id: 'mel', type: 'custom', position: {x: 300, y: 400}, data: {label: 'Mel Projection', type: LayerType.LINEAR, params: {out_features: 80}} }, { id: 'voc', type: 'custom', position: {x: 300, y: 500}, data: {label: 'HiFiGAN Vocoder', type: LayerType.VOCODER, params: {type: 'HiFiGAN'}} }, { id: 'out', type: 'custom', position: {x: 300, y: 600}, data: {label: 'Audio Waveform', type: LayerType.OUTPUT, params: {}} } ], edges: [ { id: '1', source: 'txt', target: 'emb' }, { id: '2', source: 'emb', target: 'pre' }, { id: '3', source: 'pre', target: 'lstm' }, { id: '4', source: 'lstm', target: 'mel' }, { id: '5', source: 'mel', target: 'voc' }, { id: '6', source: 'voc', target: 'out' } ] }, 'nerf_3d': { id: 'nerf_3d', name: 'NeRF (3D Gen)', description: 'Neural Radiance Field MLP.', nodes: [ { id: 'pos', type: 'custom', position: {x: 200, y: 0}, data: {label: 'XYZ Coords', type: LayerType.INPUT, params: {shape: '3'}} }, { id: 'dir', type: 'custom', position: {x: 400, y: 0}, data: {label: 'View Dir', type: LayerType.INPUT, params: {shape: '3'}} }, { id: 'pe1', type: 'custom', position: {x: 200, y: 100}, data: {label: 'Pos Enc', type: LayerType.POS_EMBED, params: {}} }, { id: 'pe2', type: 'custom', position: {x: 400, y: 100}, data: {label: 'Dir Enc', type: LayerType.POS_EMBED, params: {}} }, { id: 'mlp1', type: 'custom', position: {x: 300, y: 200}, data: {label: 'Density MLP', type: LayerType.NERF_BLOCK, params: {hidden_dim: 256, num_layers: 8}} }, { id: 'cat', type: 'custom', position: {x: 300, y: 300}, data: {label: 'Concat View', type: LayerType.CONCAT, params: {}} }, { id: 'mlp2', type: 'custom', position: {x: 300, y: 400}, data: {label: 'Color MLP', type: LayerType.LINEAR, params: {out_features: 128}} }, { id: 'rgb', type: 'custom', position: {x: 300, y: 500}, data: {label: 'RGB + Sigma', type: LayerType.OUTPUT, params: {}} } ], edges: [ { id: '1', source: 'pos', target: 'pe1' }, { id: '2', source: 'dir', target: 'pe2' }, { id: '3', source: 'pe1', target: 'mlp1' }, { id: '4', source: 'mlp1', target: 'cat' }, { id: '5', source: 'pe2', target: 'cat' }, { id: '6', source: 'cat', target: 'mlp2' }, { id: '7', source: 'mlp2', target: 'rgb' } ] }, 'gpt_style': { id: 'gpt_style', name: 'LLM (GPT Style)', description: 'Decoder-only Transformer with RoPE & RMSNorm.', nodes: [ { id: 'in', type: 'custom', position: {x: 300, y: 0}, data: {label: 'Token Input', type: LayerType.INPUT, params: {modality: 'Text', shape: '128', dtype: 'int64'}} }, { id: 'emb', type: 'custom', position: {x: 300, y: 100}, data: {label: 'Token Embed', type: LayerType.EMBEDDING, params: {num_embeddings: 50257, embedding_dim: 768}} }, { id: 'rope', type: 'custom', position: {x: 300, y: 200}, data: {label: 'RoPE', type: LayerType.ROPE, params: {dim: 64}} }, { id: 'blk1', type: 'custom', position: {x: 300, y: 300}, data: {label: 'Decoder Block 1', type: LayerType.TRANSFORMER_DECODER, params: {d_model: 768, nhead: 12}} }, { id: 'blk2', type: 'custom', position: {x: 300, y: 400}, data: {label: 'Decoder Block 2', type: LayerType.TRANSFORMER_DECODER, params: {d_model: 768, nhead: 12}} }, { id: 'ln', type: 'custom', position: {x: 300, y: 500}, data: {label: 'RMSNorm', type: LayerType.RMSNORM, params: {dim: 768}} }, { id: 'head', type: 'custom', position: {x: 300, y: 600}, data: {label: 'LM Head', type: LayerType.LINEAR, params: {out_features: 50257, bias: false}} } ], edges: [ { id: '1', source: 'in', target: 'emb' }, { id: '2', source: 'emb', target: 'rope' }, { id: '3', source: 'rope', target: 'blk1' }, { id: '4', source: 'blk1', target: 'blk2' }, { id: '5', source: 'blk2', target: 'ln' }, { id: '6', source: 'ln', target: 'head' } ] }, 'bert_encoder': { id: 'bert_encoder', name: 'BERT (Encoder)', description: 'Bidirectional Transformer Encoder (NLP)', nodes: [ { id: 'in', type: 'custom', position: {x: 300, y: 0}, data: {label: 'Token Input', type: LayerType.INPUT, params: {modality: 'Text'}} }, { id: 'emb', type: 'custom', position: {x: 300, y: 100}, data: {label: 'Embedding', type: LayerType.EMBEDDING, params: {embedding_dim: 768}} }, { id: 'pos', type: 'custom', position: {x: 300, y: 200}, data: {label: 'Pos Embed', type: LayerType.POS_EMBED, params: {embedding_dim: 768}} }, { id: 'add', type: 'custom', position: {x: 300, y: 300}, data: {label: 'Combine', type: LayerType.ADD, params: {}} }, { id: 'enc1', type: 'custom', position: {x: 300, y: 400}, data: {label: 'Encoder Layer 1', type: LayerType.TRANSFORMER_ENCODER, params: {d_model: 768, nhead: 12}} }, { id: 'enc2', type: 'custom', position: {x: 300, y: 500}, data: {label: 'Encoder Layer 2', type: LayerType.TRANSFORMER_ENCODER, params: {d_model: 768, nhead: 12}} }, { id: 'pool', type: 'custom', position: {x: 300, y: 600}, data: {label: 'Pooler', type: LayerType.LINEAR, params: {out_features: 768}} }, ], edges: [ { id: '1', source: 'in', target: 'emb' }, { id: '2', source: 'emb', target: 'add' }, { id: '3', source: 'pos', target: 'add' }, { id: '4', source: 'add', target: 'enc1' }, { id: '5', source: 'enc1', target: 'enc2' }, { id: '6', source: 'enc2', target: 'pool' } ] }, 'autoencoder_conv': { id: 'autoencoder_conv', name: 'Autoencoder (Conv)', description: 'Image compression and reconstruction.', nodes: [ { id: 'in', type: 'custom', position: {x: 300, y: 0}, data: {label: 'Image', type: LayerType.INPUT, params: {modality: 'Image', shape: '1,28,28'}} }, // Encoder { id: 'enc1', type: 'custom', position: {x: 300, y: 100}, data: {label: 'Conv 1', type: LayerType.CONV2D, params: {out_channels: 16, kernel_size: 3, stride: 2, padding: 1}} }, { id: 'act1', type: 'custom', position: {x: 300, y: 180}, data: {label: 'ReLU', type: LayerType.RELU, params: {}} }, { id: 'enc2', type: 'custom', position: {x: 300, y: 260}, data: {label: 'Conv 2', type: LayerType.CONV2D, params: {out_channels: 32, kernel_size: 3, stride: 2, padding: 1}} }, { id: 'act2', type: 'custom', position: {x: 300, y: 340}, data: {label: 'ReLU', type: LayerType.RELU, params: {}} }, // Decoder { id: 'dec1', type: 'custom', position: {x: 300, y: 420}, data: {label: 'Deconv 1', type: LayerType.CONV_TRANSPOSE2D, params: {out_channels: 16, kernel_size: 3, stride: 2, padding: 1}} }, // Needs output_padding often in code but usually inferred { id: 'act3', type: 'custom', position: {x: 300, y: 500}, data: {label: 'ReLU', type: LayerType.RELU, params: {}} }, { id: 'dec2', type: 'custom', position: {x: 300, y: 580}, data: {label: 'Deconv 2', type: LayerType.CONV_TRANSPOSE2D, params: {out_channels: 1, kernel_size: 3, stride: 2, padding: 1}} }, { id: 'sig', type: 'custom', position: {x: 300, y: 660}, data: {label: 'Sigmoid', type: LayerType.SIGMOID, params: {}} } ], edges: [ { id: '1', source: 'in', target: 'enc1' }, { id: '2', source: 'enc1', target: 'act1' }, { id: '3', source: 'act1', target: 'enc2' }, { id: '4', source: 'enc2', target: 'act2' }, { id: '5', source: 'act2', target: 'dec1' }, { id: '6', source: 'dec1', target: 'act3' }, { id: '7', source: 'act3', target: 'dec2' }, { id: '8', source: 'dec2', target: 'sig' } ] }, 'video_3d_cnn': { id: 'video_3d_cnn', name: '3D CNN (Video)', description: 'Video Classification using 3D Convolutions', nodes: [ { id: 'in', type: 'custom', position: {x: 300, y: 0}, data: {label: 'Video Input', type: LayerType.INPUT, params: {modality: '3D Volume', shape: '3,16,112,112'}} }, { id: 'c3d1', type: 'custom', position: {x: 300, y: 100}, data: {label: 'Conv3D 1', type: LayerType.CONV3D, params: {out_channels: 64, kernel_size: 3}} }, { id: 'act1', type: 'custom', position: {x: 300, y: 200}, data: {label: 'ReLU', type: LayerType.RELU, params: {}} }, { id: 'pool1', type: 'custom', position: {x: 300, y: 300}, data: {label: 'MaxPool3D', type: LayerType.MAXPOOL3D, params: {kernel_size: 2, stride: 2}} }, { id: 'c3d2', type: 'custom', position: {x: 300, y: 400}, data: {label: 'Conv3D 2', type: LayerType.CONV3D, params: {out_channels: 128, kernel_size: 3}} }, { id: 'gap', type: 'custom', position: {x: 300, y: 500}, data: {label: 'GlobalAvgPool', type: LayerType.GLOBAL_AVG_POOL, params: {}} }, { id: 'fc', type: 'custom', position: {x: 300, y: 600}, data: {label: 'Classifier', type: LayerType.LINEAR, params: {out_features: 400}} } ], edges: [ { id: '1', source: 'in', target: 'c3d1' }, { id: '2', source: 'c3d1', target: 'act1' }, { id: '3', source: 'act1', target: 'pool1' }, { id: '4', source: 'pool1', target: 'c3d2' }, { id: '5', source: 'c3d2', target: 'gap' }, { id: '6', source: 'gap', target: 'fc' } ] }, 'super_res_gan': { id: 'super_res_gan', name: 'Super Res (ESPCN)', description: 'Efficient Sub-Pixel Convolutional Neural Network', nodes: [ { id: 'in', type: 'custom', position: {x: 300, y: 0}, data: {label: 'Low Res Image', type: LayerType.INPUT, params: {modality: 'Image'}} }, { id: 'conv1', type: 'custom', position: {x: 300, y: 100}, data: {label: 'Conv1 (Feature)', type: LayerType.CONV2D, params: {out_channels: 64, kernel_size: 5}} }, { id: 'tanh1', type: 'custom', position: {x: 300, y: 200}, data: {label: 'Tanh', type: LayerType.TANH, params: {}} }, { id: 'conv2', type: 'custom', position: {x: 300, y: 300}, data: {label: 'Conv2 (Map)', type: LayerType.CONV2D, params: {out_channels: 32, kernel_size: 3}} }, { id: 'tanh2', type: 'custom', position: {x: 300, y: 400}, data: {label: 'Tanh', type: LayerType.TANH, params: {}} }, { id: 'conv3', type: 'custom', position: {x: 300, y: 500}, data: {label: 'Conv3 (Shuffle)', type: LayerType.CONV2D, params: {out_channels: 12, kernel_size: 3}} }, // 3*r^2 (r=2) = 12 { id: 'ps', type: 'custom', position: {x: 300, y: 600}, data: {label: 'PixelShuffle', type: LayerType.PIXEL_SHUFFLE, params: {upscale_factor: 2}} }, { id: 'out', type: 'custom', position: {x: 300, y: 700}, data: {label: 'High Res', type: LayerType.OUTPUT, params: {}} } ], edges: [ { id: '1', source: 'in', target: 'conv1' }, { id: '2', source: 'conv1', target: 'tanh1' }, { id: '3', source: 'tanh1', target: 'conv2' }, { id: '4', source: 'conv2', target: 'tanh2' }, { id: '5', source: 'tanh2', target: 'conv3' }, { id: '6', source: 'conv3', target: 'ps' }, { id: '7', source: 'ps', target: 'out' } ] }, 'resnet_mini': { id: 'resnet_mini', name: 'ResNet Block (Mini)', description: 'CNN with Skip Connection.', nodes: [ { id: 'in', type: 'custom', position: {x: 200, y: 0}, data: {label: 'Input', type: LayerType.INPUT, params: {modality: 'Image'}} }, { id: 'conv1', type: 'custom', position: {x: 200, y: 100}, data: {label: 'Conv1', type: LayerType.CONV2D, params: {out_channels: 64}} }, { id: 'relu1', type: 'custom', position: {x: 200, y: 200}, data: {label: 'ReLU', type: LayerType.RELU, params: {}} }, { id: 'conv2', type: 'custom', position: {x: 200, y: 300}, data: {label: 'Conv2', type: LayerType.CONV2D, params: {out_channels: 64}} }, // Skip connection path handled by edges, but node layout implies it { id: 'add', type: 'custom', position: {x: 200, y: 450}, data: {label: 'Residual Add', type: LayerType.ADD, params: {}} }, { id: 'relu2', type: 'custom', position: {x: 200, y: 550}, data: {label: 'Final ReLU', type: LayerType.RELU, params: {}} } ], edges: [ { id: '1', source: 'in', target: 'conv1' }, { id: '2', source: 'conv1', target: 'relu1' }, { id: '3', source: 'relu1', target: 'conv2' }, { id: '4', source: 'conv2', target: 'add' }, { id: '5', source: 'in', target: 'add' }, // Skip connection { id: '6', source: 'add', target: 'relu2' } ] }, 'moe_transformer': { id: 'moe_transformer', name: 'Mixture of Experts', description: 'Sparse MoE model with routing.', nodes: [ { id: 'in', type: 'custom', position: {x: 300, y: 0}, data: {label: 'Input', type: LayerType.INPUT, params: {modality: 'Text'}} }, { id: 'emb', type: 'custom', position: {x: 300, y: 100}, data: {label: 'Embedding', type: LayerType.EMBEDDING, params: {}} }, { id: 'att', type: 'custom', position: {x: 300, y: 200}, data: {label: 'Self Attention', type: LayerType.ATTENTION, params: {}} }, { id: 'moe', type: 'custom', position: {x: 300, y: 300}, data: {label: 'Sparse MoE Block', type: LayerType.MOE_BLOCK, params: {num_experts: 8, top_k: 2}} }, { id: 'norm', type: 'custom', position: {x: 300, y: 400}, data: {label: 'RMSNorm', type: LayerType.RMSNORM, params: {}} }, { id: 'out', type: 'custom', position: {x: 300, y: 500}, data: {label: 'Output', type: LayerType.LINEAR, params: {}} } ], edges: [ { id: '1', source: 'in', target: 'emb' }, { id: '2', source: 'emb', target: 'att' }, { id: '3', source: 'att', target: 'moe' }, { id: '4', source: 'moe', target: 'norm' }, { id: '5', source: 'norm', target: 'out' } ] }, 'vlm_llava': { id: 'vlm_llava', name: 'VLM (LlaVA Style)', description: 'Visual Language Model connecting Vision Encoder to LLM.', nodes: [ // Vision Branch { id: 'img', type: 'custom', position: {x: 100, y: 0}, data: {label: 'Image Input', type: LayerType.INPUT, params: {modality: 'Image'}} }, { id: 'patch', type: 'custom', position: {x: 100, y: 100}, data: {label: 'Patch Embed', type: LayerType.PATCH_EMBED, params: {patch_size: 14}} }, { id: 'vit', type: 'custom', position: {x: 100, y: 200}, data: {label: 'ViT Encoder', type: LayerType.TRANSFORMER_ENCODER, params: {}} }, { id: 'proj', type: 'custom', position: {x: 100, y: 300}, data: {label: 'Projection', type: LayerType.LINEAR, params: {out_features: 4096}} }, // Text Branch { id: 'txt', type: 'custom', position: {x: 500, y: 0}, data: {label: 'Text Prompts', type: LayerType.INPUT, params: {modality: 'Text'}} }, { id: 'temb', type: 'custom', position: {x: 500, y: 200}, data: {label: 'Text Embed', type: LayerType.EMBEDDING, params: {embedding_dim: 4096}} }, // Merge { id: 'cat', type: 'custom', position: {x: 300, y: 400}, data: {label: 'Concat Tokens', type: LayerType.CONCAT, params: {dim: 1}} }, { id: 'llm', type: 'custom', position: {x: 300, y: 500}, data: {label: 'LLM Decoder', type: LayerType.TRANSFORMER_DECODER, params: {d_model: 4096}} }, { id: 'out', type: 'custom', position: {x: 300, y: 600}, data: {label: 'Response', type: LayerType.OUTPUT, params: {}} } ], edges: [ { id: '1', source: 'img', target: 'patch' }, { id: '2', source: 'patch', target: 'vit' }, { id: '3', source: 'vit', target: 'proj' }, { id: '4', source: 'txt', target: 'temb' }, { id: '5', source: 'proj', target: 'cat' }, { id: '6', source: 'temb', target: 'cat' }, { id: '7', source: 'cat', target: 'llm' }, { id: '8', source: 'llm', target: 'out' } ] }, 'sam_model': { id: 'sam_model', name: 'Segment Anything (SAM)', description: 'Image Encoder + Prompt Encoder + Mask Decoder.', nodes: [ { id: 'img', type: 'custom', position: {x: 100, y: 0}, data: {label: 'Image', type: LayerType.INPUT, params: {modality: 'Image'}} }, { id: 'enc', type: 'custom', position: {x: 100, y: 150}, data: {label: 'Image Encoder (ViT)', type: LayerType.TRANSFORMER_ENCODER, params: {}} }, { id: 'prm', type: 'custom', position: {x: 500, y: 0}, data: {label: 'Points/Boxes', type: LayerType.INPUT, params: {modality: 'Tensor'}} }, { id: 'penc', type: 'custom', position: {x: 500, y: 150}, data: {label: 'Prompt Enc', type: LayerType.SAM_PROMPT_ENCODER, params: {}} }, { id: 'dec', type: 'custom', position: {x: 300, y: 300}, data: {label: 'Mask Decoder', type: LayerType.SAM_MASK_DECODER, params: {}} }, { id: 'out', type: 'custom', position: {x: 300, y: 400}, data: {label: 'Masks', type: LayerType.OUTPUT, params: {}} } ], edges: [ { id: '1', source: 'img', target: 'enc' }, { id: '2', source: 'enc', target: 'dec' }, { id: '3', source: 'prm', target: 'penc' }, { id: '4', source: 'penc', target: 'dec' }, { id: '5', source: 'dec', target: 'out' } ] }, 'lam_agent': { id: 'lam_agent', name: 'Large Action Model (LAM)', description: 'LLM backbone with Action Head for agents.', nodes: [ { id: 'state', type: 'custom', position: {x: 300, y: 0}, data: {label: 'Env State', type: LayerType.INPUT, params: {modality: 'State'}} }, { id: 'llm', type: 'custom', position: {x: 300, y: 150}, data: {label: 'LLM Backbone', type: LayerType.TRANSFORMER_DECODER, params: {d_model: 1024}} }, { id: 'head', type: 'custom', position: {x: 300, y: 300}, data: {label: 'Action Head', type: LayerType.ACTION_HEAD, params: {num_actions: 50, action_type: 'Discrete'}} }, { id: 'out', type: 'custom', position: {x: 300, y: 400}, data: {label: 'Action Logits', type: LayerType.OUTPUT, params: {}} } ], edges: [ { id: '1', source: 'state', target: 'llm' }, { id: '2', source: 'llm', target: 'head' }, { id: '3', source: 'head', target: 'out' } ] }, 'lcm_diff': { id: 'lcm_diff', name: 'Latent Consistency (LCM)', description: 'Diffusion backbone with Time Embeddings.', nodes: [ { id: 'lat', type: 'custom', position: {x: 200, y: 0}, data: {label: 'Latent Input', type: LayerType.INPUT, params: {modality: 'Latent', shape: '4,64,64'}} }, { id: 'time', type: 'custom', position: {x: 500, y: 0}, data: {label: 'Time Step', type: LayerType.INPUT, params: {modality: 'Tensor', shape: '1'}} }, { id: 'temb', type: 'custom', position: {x: 500, y: 100}, data: {label: 'Time Embed', type: LayerType.TIME_EMBEDDING, params: {}} }, { id: 'cat', type: 'custom', position: {x: 350, y: 250}, data: {label: 'Inject Time', type: LayerType.ADD, params: {}} }, { id: 'unet', type: 'custom', position: {x: 350, y: 350}, data: {label: 'UNet Block', type: LayerType.CONV2D, params: {}} }, { id: 'out', type: 'custom', position: {x: 350, y: 450}, data: {label: 'Denoised', type: LayerType.OUTPUT, params: {}} } ], edges: [ { id: '1', source: 'lat', target: 'cat' }, { id: '2', source: 'time', target: 'temb' }, { id: '3', source: 'temb', target: 'cat' }, { id: '4', source: 'cat', target: 'unet' }, { id: '5', source: 'unet', target: 'out' } ] } };