Spaces:
Running
Running
Update constants.ts
Browse files- constants.ts +191 -2
constants.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
|
|
|
|
| 2 |
import { LayerDefinition, LayerType, GraphTemplate } from './types';
|
| 3 |
|
| 4 |
export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
|
|
@@ -10,7 +11,7 @@ export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
|
|
| 10 |
category: 'Core',
|
| 11 |
parameters: [
|
| 12 |
{ name: 'name', type: 'string', label: 'Name', default: 'input_1', description: 'Variable name in forward()' },
|
| 13 |
-
{ name: 'modality', type: 'select', label: 'Modality', default: 'Tensor', options: ['Tensor', 'Image', 'Text', 'Audio', 'Video', 'Latent', 'State', '3D Volume', 'Point Cloud'] },
|
| 14 |
{ name: 'shape', type: 'string', label: 'Shape (e.g. 3,224,224)', default: '3, 224, 224' },
|
| 15 |
{ name: 'batch_size', type: 'number', label: 'Batch Size', default: 32 },
|
| 16 |
{ name: 'dtype', type: 'select', label: 'Data Type', default: 'float32', options: ['float32', 'int64', 'bool'] }
|
|
@@ -61,6 +62,194 @@ export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
|
|
| 61 |
]
|
| 62 |
},
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
// --- DETECTION (YOLO) ---
|
| 65 |
[LayerType.C2F_BLOCK]: {
|
| 66 |
type: LayerType.C2F_BLOCK,
|
|
@@ -1122,4 +1311,4 @@ export const TEMPLATES: Record<string, GraphTemplate> = {
|
|
| 1122 |
{ id: '4', source: 'cat', target: 'unet' }, { id: '5', source: 'unet', target: 'out' }
|
| 1123 |
]
|
| 1124 |
}
|
| 1125 |
-
};
|
|
|
|
| 1 |
|
| 2 |
+
|
| 3 |
import { LayerDefinition, LayerType, GraphTemplate } from './types';
|
| 4 |
|
| 5 |
export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
|
|
|
|
| 11 |
category: 'Core',
|
| 12 |
parameters: [
|
| 13 |
{ name: 'name', type: 'string', label: 'Name', default: 'input_1', description: 'Variable name in forward()' },
|
| 14 |
+
{ name: 'modality', type: 'select', label: 'Modality', default: 'Tensor', options: ['Tensor', 'Image', 'Text', 'Audio', 'Video', 'Latent', 'State', '3D Volume', 'Point Cloud', 'Radar', 'Lidar'] },
|
| 15 |
{ name: 'shape', type: 'string', label: 'Shape (e.g. 3,224,224)', default: '3, 224, 224' },
|
| 16 |
{ name: 'batch_size', type: 'number', label: 'Batch Size', default: 32 },
|
| 17 |
{ name: 'dtype', type: 'select', label: 'Data Type', default: 'float32', options: ['float32', 'int64', 'bool'] }
|
|
|
|
| 62 |
]
|
| 63 |
},
|
| 64 |
|
| 65 |
+
// --- VIDEO / GENERATION ---
|
| 66 |
+
[LayerType.VIDEO_DIFFUSION_BLOCK]: {
|
| 67 |
+
type: LayerType.VIDEO_DIFFUSION_BLOCK,
|
| 68 |
+
label: 'Video Diffusion',
|
| 69 |
+
description: '3D UNet Block for Video Gen',
|
| 70 |
+
category: 'Video',
|
| 71 |
+
parameters: [
|
| 72 |
+
{ name: 'in_channels', type: 'number', label: 'Channels', default: 128 },
|
| 73 |
+
{ name: 'time_dim', type: 'number', label: 'Time Emb Dim', default: 512 },
|
| 74 |
+
{ name: 'use_temporal_attn', type: 'boolean', label: 'Temp Attn', default: true }
|
| 75 |
+
]
|
| 76 |
+
},
|
| 77 |
+
[LayerType.SPATIO_TEMPORAL_ATTN]: {
|
| 78 |
+
type: LayerType.SPATIO_TEMPORAL_ATTN,
|
| 79 |
+
label: 'Spatio-Temporal Attn',
|
| 80 |
+
description: 'Attention over Space & Time',
|
| 81 |
+
category: 'Video',
|
| 82 |
+
parameters: [
|
| 83 |
+
{ name: 'dim', type: 'number', label: 'Dimension', default: 512 },
|
| 84 |
+
{ name: 'num_heads', type: 'number', label: 'Heads', default: 8 },
|
| 85 |
+
{ name: 'frames', type: 'number', label: 'Max Frames', default: 16 }
|
| 86 |
+
]
|
| 87 |
+
},
|
| 88 |
+
[LayerType.VIDEO_TOKENIZER]: {
|
| 89 |
+
type: LayerType.VIDEO_TOKENIZER,
|
| 90 |
+
label: 'Video Tokenizer',
|
| 91 |
+
description: '3D VQ-VAE / Magvit style',
|
| 92 |
+
category: 'Video',
|
| 93 |
+
parameters: [
|
| 94 |
+
{ name: 'patch_size_t', type: 'number', label: 'Time Patch', default: 2 },
|
| 95 |
+
{ name: 'patch_size_hw', type: 'number', label: 'Spatial Patch', default: 16 },
|
| 96 |
+
{ name: 'vocab_size', type: 'number', label: 'Codebook Size', default: 8192 }
|
| 97 |
+
]
|
| 98 |
+
},
|
| 99 |
+
[LayerType.FRAME_INTERPOLATOR]: {
|
| 100 |
+
type: LayerType.FRAME_INTERPOLATOR,
|
| 101 |
+
label: 'Frame Interpolator',
|
| 102 |
+
description: 'Upsamples video frame rate',
|
| 103 |
+
category: 'Video',
|
| 104 |
+
parameters: [
|
| 105 |
+
{ name: 'scale_factor', type: 'number', label: 'Scale Factor', default: 2 },
|
| 106 |
+
{ name: 'mode', type: 'select', label: 'Mode', default: 'bilinear', options: ['bilinear', 'optical_flow', 'pixel_shuffle'] }
|
| 107 |
+
]
|
| 108 |
+
},
|
| 109 |
+
[LayerType.TEMPORAL_SHIFT]: {
|
| 110 |
+
type: LayerType.TEMPORAL_SHIFT,
|
| 111 |
+
label: 'Temporal Shift',
|
| 112 |
+
description: 'TSM Module (Zero params)',
|
| 113 |
+
category: 'Video',
|
| 114 |
+
parameters: [
|
| 115 |
+
{ name: 'n_segment', type: 'number', label: 'Segments', default: 8 },
|
| 116 |
+
{ name: 'fold_div', type: 'number', label: 'Fold Divisor', default: 8 }
|
| 117 |
+
]
|
| 118 |
+
},
|
| 119 |
+
[LayerType.NON_LOCAL_BLOCK]: {
|
| 120 |
+
type: LayerType.NON_LOCAL_BLOCK,
|
| 121 |
+
label: 'Non-Local Block',
|
| 122 |
+
description: 'Global context block',
|
| 123 |
+
category: 'Video',
|
| 124 |
+
parameters: [
|
| 125 |
+
{ name: 'in_channels', type: 'number', label: 'Channels', default: 64 },
|
| 126 |
+
{ name: 'mode', type: 'select', label: 'Mode', default: 'embedded_gaussian', options: ['embedded_gaussian', 'gaussian', 'dot', 'concat'] }
|
| 127 |
+
]
|
| 128 |
+
},
|
| 129 |
+
[LayerType.MULTIMODAL_FUSION]: {
|
| 130 |
+
type: LayerType.MULTIMODAL_FUSION,
|
| 131 |
+
label: 'Multimodal Fusion',
|
| 132 |
+
description: 'Merge Video, Audio, Text',
|
| 133 |
+
category: 'Video',
|
| 134 |
+
parameters: [
|
| 135 |
+
{ name: 'video_dim', type: 'number', label: 'Video Dim', default: 512 },
|
| 136 |
+
{ name: 'audio_dim', type: 'number', label: 'Audio Dim', default: 256 },
|
| 137 |
+
{ name: 'text_dim', type: 'number', label: 'Text Dim', default: 768 },
|
| 138 |
+
{ name: 'out_dim', type: 'number', label: 'Fused Dim', default: 512 }
|
| 139 |
+
]
|
| 140 |
+
},
|
| 141 |
+
|
| 142 |
+
// --- OCR (Text Recognition) ---
|
| 143 |
+
[LayerType.TPS_TRANSFORM]: {
|
| 144 |
+
type: LayerType.TPS_TRANSFORM,
|
| 145 |
+
label: 'TPS Transform',
|
| 146 |
+
description: 'Rectifies curved text (Thin Plate Spline)',
|
| 147 |
+
category: 'OCR',
|
| 148 |
+
parameters: [
|
| 149 |
+
{ name: 'fiducial_points', type: 'number', label: 'Control Points', default: 20 },
|
| 150 |
+
{ name: 'output_size', type: 'string', label: 'Out Size (HxW)', default: '32,100' }
|
| 151 |
+
]
|
| 152 |
+
},
|
| 153 |
+
[LayerType.CRNN_BLOCK]: {
|
| 154 |
+
type: LayerType.CRNN_BLOCK,
|
| 155 |
+
label: 'CRNN Block',
|
| 156 |
+
description: 'Conv + BiLSTM for Text Sequence',
|
| 157 |
+
category: 'OCR',
|
| 158 |
+
parameters: [
|
| 159 |
+
{ name: 'img_h', type: 'number', label: 'Image Height', default: 32 },
|
| 160 |
+
{ name: 'hidden_size', type: 'number', label: 'LSTM Hidden', default: 256 },
|
| 161 |
+
{ name: 'num_classes', type: 'number', label: 'Num Chars', default: 37 }
|
| 162 |
+
]
|
| 163 |
+
},
|
| 164 |
+
[LayerType.CTC_DECODER]: {
|
| 165 |
+
type: LayerType.CTC_DECODER,
|
| 166 |
+
label: 'CTC Decoder',
|
| 167 |
+
description: 'Connectionist Temporal Classification',
|
| 168 |
+
category: 'OCR',
|
| 169 |
+
parameters: [
|
| 170 |
+
{ name: 'blank_index', type: 'number', label: 'Blank Index', default: 0 },
|
| 171 |
+
{ name: 'reduction', type: 'select', label: 'Reduction', default: 'mean', options: ['mean', 'sum', 'none'] }
|
| 172 |
+
]
|
| 173 |
+
},
|
| 174 |
+
|
| 175 |
+
// --- ROBOTICS / MOTION / DEPTH ---
|
| 176 |
+
[LayerType.DEPTH_DECODER]: {
|
| 177 |
+
type: LayerType.DEPTH_DECODER,
|
| 178 |
+
label: 'Depth Decoder',
|
| 179 |
+
description: 'Estimates Monocular Depth Map',
|
| 180 |
+
category: 'Robotics',
|
| 181 |
+
parameters: [
|
| 182 |
+
{ name: 'min_depth', type: 'number', label: 'Min Depth (m)', default: 0.1 },
|
| 183 |
+
{ name: 'max_depth', type: 'number', label: 'Max Depth (m)', default: 100.0 },
|
| 184 |
+
{ name: 'backbone_scale', type: 'number', label: 'Scale Factor', default: 1 }
|
| 185 |
+
]
|
| 186 |
+
},
|
| 187 |
+
[LayerType.DISPARITY_HEAD]: {
|
| 188 |
+
type: LayerType.DISPARITY_HEAD,
|
| 189 |
+
label: 'Disparity Head',
|
| 190 |
+
description: 'Stereo Vision Disparity Estimation',
|
| 191 |
+
category: 'Robotics',
|
| 192 |
+
parameters: [
|
| 193 |
+
{ name: 'max_disp', type: 'number', label: 'Max Disparity', default: 192 },
|
| 194 |
+
{ name: 'refine_iter', type: 'number', label: 'Refine Iters', default: 3 }
|
| 195 |
+
]
|
| 196 |
+
},
|
| 197 |
+
[LayerType.OPTICAL_FLOW]: {
|
| 198 |
+
type: LayerType.OPTICAL_FLOW,
|
| 199 |
+
label: 'Optical Flow',
|
| 200 |
+
description: 'Estimates pixel motion between frames',
|
| 201 |
+
category: 'Robotics',
|
| 202 |
+
parameters: [
|
| 203 |
+
{ name: 'input_channels', type: 'number', label: 'In Channels', default: 6 },
|
| 204 |
+
{ name: 'flow_dim', type: 'number', label: 'Flow Dim', default: 2 },
|
| 205 |
+
{ name: 'corr_levels', type: 'number', label: 'Correlation Lvl', default: 4 }
|
| 206 |
+
]
|
| 207 |
+
},
|
| 208 |
+
[LayerType.VELOCITY_HEAD]: {
|
| 209 |
+
type: LayerType.VELOCITY_HEAD,
|
| 210 |
+
label: 'Velocity Head',
|
| 211 |
+
description: 'Predicts object speed/trajectory',
|
| 212 |
+
category: 'Robotics',
|
| 213 |
+
parameters: [
|
| 214 |
+
{ name: 'input_dim', type: 'number', label: 'Input Dim', default: 512 },
|
| 215 |
+
{ name: 'time_horizon', type: 'number', label: 'Horizon (sec)', default: 3 },
|
| 216 |
+
{ name: 'mode', type: 'select', label: 'Mode', default: 'Linear', options: ['Linear', 'Angular', 'Full State'] }
|
| 217 |
+
]
|
| 218 |
+
},
|
| 219 |
+
[LayerType.KALMAN_FILTER]: {
|
| 220 |
+
type: LayerType.KALMAN_FILTER,
|
| 221 |
+
label: 'Kalman Filter',
|
| 222 |
+
description: 'Differentiable State Estimation',
|
| 223 |
+
category: 'Robotics',
|
| 224 |
+
parameters: [
|
| 225 |
+
{ name: 'state_dim', type: 'number', label: 'State Dim', default: 4 },
|
| 226 |
+
{ name: 'measure_dim', type: 'number', label: 'Measure Dim', default: 2 },
|
| 227 |
+
{ name: 'learnable_process_noise', type: 'boolean', label: 'Learn Noise', default: true }
|
| 228 |
+
]
|
| 229 |
+
},
|
| 230 |
+
[LayerType.BEV_TRANSFORM]: {
|
| 231 |
+
type: LayerType.BEV_TRANSFORM,
|
| 232 |
+
label: 'BEV Transformer',
|
| 233 |
+
description: 'Perspective to Bird\'s Eye View',
|
| 234 |
+
category: 'Robotics',
|
| 235 |
+
parameters: [
|
| 236 |
+
{ name: 'bev_h', type: 'number', label: 'BEV Height', default: 200 },
|
| 237 |
+
{ name: 'bev_w', type: 'number', label: 'BEV Width', default: 200 },
|
| 238 |
+
{ name: 'num_queries', type: 'number', label: 'Num Queries', default: 900 }
|
| 239 |
+
]
|
| 240 |
+
},
|
| 241 |
+
[LayerType.RADAR_ENCODER]: {
|
| 242 |
+
type: LayerType.RADAR_ENCODER,
|
| 243 |
+
label: 'Radar Encoder',
|
| 244 |
+
description: 'PointPillars style radar encoding',
|
| 245 |
+
category: 'Robotics',
|
| 246 |
+
parameters: [
|
| 247 |
+
{ name: 'max_points', type: 'number', label: 'Max Points', default: 100 },
|
| 248 |
+
{ name: 'num_features', type: 'number', label: 'Num Feats', default: 5 },
|
| 249 |
+
{ name: 'voxel_size', type: 'string', label: 'Voxel Size', default: '0.2,0.2,4' }
|
| 250 |
+
]
|
| 251 |
+
},
|
| 252 |
+
|
| 253 |
// --- DETECTION (YOLO) ---
|
| 254 |
[LayerType.C2F_BLOCK]: {
|
| 255 |
type: LayerType.C2F_BLOCK,
|
|
|
|
| 1311 |
{ id: '4', source: 'cat', target: 'unet' }, { id: '5', source: 'unet', target: 'out' }
|
| 1312 |
]
|
| 1313 |
}
|
| 1314 |
+
};
|