wuhp commited on
Commit
d422478
·
verified ·
1 Parent(s): 96385b4

Update constants.ts

Browse files
Files changed (1) hide show
  1. constants.ts +191 -2
constants.ts CHANGED
@@ -1,4 +1,5 @@
1
 
 
2
  import { LayerDefinition, LayerType, GraphTemplate } from './types';
3
 
4
  export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
@@ -10,7 +11,7 @@ export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
10
  category: 'Core',
11
  parameters: [
12
  { name: 'name', type: 'string', label: 'Name', default: 'input_1', description: 'Variable name in forward()' },
13
- { name: 'modality', type: 'select', label: 'Modality', default: 'Tensor', options: ['Tensor', 'Image', 'Text', 'Audio', 'Video', 'Latent', 'State', '3D Volume', 'Point Cloud'] },
14
  { name: 'shape', type: 'string', label: 'Shape (e.g. 3,224,224)', default: '3, 224, 224' },
15
  { name: 'batch_size', type: 'number', label: 'Batch Size', default: 32 },
16
  { name: 'dtype', type: 'select', label: 'Data Type', default: 'float32', options: ['float32', 'int64', 'bool'] }
@@ -61,6 +62,194 @@ export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
61
  ]
62
  },
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  // --- DETECTION (YOLO) ---
65
  [LayerType.C2F_BLOCK]: {
66
  type: LayerType.C2F_BLOCK,
@@ -1122,4 +1311,4 @@ export const TEMPLATES: Record<string, GraphTemplate> = {
1122
  { id: '4', source: 'cat', target: 'unet' }, { id: '5', source: 'unet', target: 'out' }
1123
  ]
1124
  }
1125
- };
 
1
 
2
+
3
  import { LayerDefinition, LayerType, GraphTemplate } from './types';
4
 
5
  export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
 
11
  category: 'Core',
12
  parameters: [
13
  { name: 'name', type: 'string', label: 'Name', default: 'input_1', description: 'Variable name in forward()' },
14
+ { name: 'modality', type: 'select', label: 'Modality', default: 'Tensor', options: ['Tensor', 'Image', 'Text', 'Audio', 'Video', 'Latent', 'State', '3D Volume', 'Point Cloud', 'Radar', 'Lidar'] },
15
  { name: 'shape', type: 'string', label: 'Shape (e.g. 3,224,224)', default: '3, 224, 224' },
16
  { name: 'batch_size', type: 'number', label: 'Batch Size', default: 32 },
17
  { name: 'dtype', type: 'select', label: 'Data Type', default: 'float32', options: ['float32', 'int64', 'bool'] }
 
62
  ]
63
  },
64
 
65
+ // --- VIDEO / GENERATION ---
66
+ [LayerType.VIDEO_DIFFUSION_BLOCK]: {
67
+ type: LayerType.VIDEO_DIFFUSION_BLOCK,
68
+ label: 'Video Diffusion',
69
+ description: '3D UNet Block for Video Gen',
70
+ category: 'Video',
71
+ parameters: [
72
+ { name: 'in_channels', type: 'number', label: 'Channels', default: 128 },
73
+ { name: 'time_dim', type: 'number', label: 'Time Emb Dim', default: 512 },
74
+ { name: 'use_temporal_attn', type: 'boolean', label: 'Temp Attn', default: true }
75
+ ]
76
+ },
77
+ [LayerType.SPATIO_TEMPORAL_ATTN]: {
78
+ type: LayerType.SPATIO_TEMPORAL_ATTN,
79
+ label: 'Spatio-Temporal Attn',
80
+ description: 'Attention over Space & Time',
81
+ category: 'Video',
82
+ parameters: [
83
+ { name: 'dim', type: 'number', label: 'Dimension', default: 512 },
84
+ { name: 'num_heads', type: 'number', label: 'Heads', default: 8 },
85
+ { name: 'frames', type: 'number', label: 'Max Frames', default: 16 }
86
+ ]
87
+ },
88
+ [LayerType.VIDEO_TOKENIZER]: {
89
+ type: LayerType.VIDEO_TOKENIZER,
90
+ label: 'Video Tokenizer',
91
+ description: '3D VQ-VAE / Magvit style',
92
+ category: 'Video',
93
+ parameters: [
94
+ { name: 'patch_size_t', type: 'number', label: 'Time Patch', default: 2 },
95
+ { name: 'patch_size_hw', type: 'number', label: 'Spatial Patch', default: 16 },
96
+ { name: 'vocab_size', type: 'number', label: 'Codebook Size', default: 8192 }
97
+ ]
98
+ },
99
+ [LayerType.FRAME_INTERPOLATOR]: {
100
+ type: LayerType.FRAME_INTERPOLATOR,
101
+ label: 'Frame Interpolator',
102
+ description: 'Upsamples video frame rate',
103
+ category: 'Video',
104
+ parameters: [
105
+ { name: 'scale_factor', type: 'number', label: 'Scale Factor', default: 2 },
106
+ { name: 'mode', type: 'select', label: 'Mode', default: 'bilinear', options: ['bilinear', 'optical_flow', 'pixel_shuffle'] }
107
+ ]
108
+ },
109
+ [LayerType.TEMPORAL_SHIFT]: {
110
+ type: LayerType.TEMPORAL_SHIFT,
111
+ label: 'Temporal Shift',
112
+ description: 'TSM Module (Zero params)',
113
+ category: 'Video',
114
+ parameters: [
115
+ { name: 'n_segment', type: 'number', label: 'Segments', default: 8 },
116
+ { name: 'fold_div', type: 'number', label: 'Fold Divisor', default: 8 }
117
+ ]
118
+ },
119
+ [LayerType.NON_LOCAL_BLOCK]: {
120
+ type: LayerType.NON_LOCAL_BLOCK,
121
+ label: 'Non-Local Block',
122
+ description: 'Global context block',
123
+ category: 'Video',
124
+ parameters: [
125
+ { name: 'in_channels', type: 'number', label: 'Channels', default: 64 },
126
+ { name: 'mode', type: 'select', label: 'Mode', default: 'embedded_gaussian', options: ['embedded_gaussian', 'gaussian', 'dot', 'concat'] }
127
+ ]
128
+ },
129
+ [LayerType.MULTIMODAL_FUSION]: {
130
+ type: LayerType.MULTIMODAL_FUSION,
131
+ label: 'Multimodal Fusion',
132
+ description: 'Merge Video, Audio, Text',
133
+ category: 'Video',
134
+ parameters: [
135
+ { name: 'video_dim', type: 'number', label: 'Video Dim', default: 512 },
136
+ { name: 'audio_dim', type: 'number', label: 'Audio Dim', default: 256 },
137
+ { name: 'text_dim', type: 'number', label: 'Text Dim', default: 768 },
138
+ { name: 'out_dim', type: 'number', label: 'Fused Dim', default: 512 }
139
+ ]
140
+ },
141
+
142
+ // --- OCR (Text Recognition) ---
143
+ [LayerType.TPS_TRANSFORM]: {
144
+ type: LayerType.TPS_TRANSFORM,
145
+ label: 'TPS Transform',
146
+ description: 'Rectifies curved text (Thin Plate Spline)',
147
+ category: 'OCR',
148
+ parameters: [
149
+ { name: 'fiducial_points', type: 'number', label: 'Control Points', default: 20 },
150
+ { name: 'output_size', type: 'string', label: 'Out Size (HxW)', default: '32,100' }
151
+ ]
152
+ },
153
+ [LayerType.CRNN_BLOCK]: {
154
+ type: LayerType.CRNN_BLOCK,
155
+ label: 'CRNN Block',
156
+ description: 'Conv + BiLSTM for Text Sequence',
157
+ category: 'OCR',
158
+ parameters: [
159
+ { name: 'img_h', type: 'number', label: 'Image Height', default: 32 },
160
+ { name: 'hidden_size', type: 'number', label: 'LSTM Hidden', default: 256 },
161
+ { name: 'num_classes', type: 'number', label: 'Num Chars', default: 37 }
162
+ ]
163
+ },
164
+ [LayerType.CTC_DECODER]: {
165
+ type: LayerType.CTC_DECODER,
166
+ label: 'CTC Decoder',
167
+ description: 'Connectionist Temporal Classification',
168
+ category: 'OCR',
169
+ parameters: [
170
+ { name: 'blank_index', type: 'number', label: 'Blank Index', default: 0 },
171
+ { name: 'reduction', type: 'select', label: 'Reduction', default: 'mean', options: ['mean', 'sum', 'none'] }
172
+ ]
173
+ },
174
+
175
+ // --- ROBOTICS / MOTION / DEPTH ---
176
+ [LayerType.DEPTH_DECODER]: {
177
+ type: LayerType.DEPTH_DECODER,
178
+ label: 'Depth Decoder',
179
+ description: 'Estimates Monocular Depth Map',
180
+ category: 'Robotics',
181
+ parameters: [
182
+ { name: 'min_depth', type: 'number', label: 'Min Depth (m)', default: 0.1 },
183
+ { name: 'max_depth', type: 'number', label: 'Max Depth (m)', default: 100.0 },
184
+ { name: 'backbone_scale', type: 'number', label: 'Scale Factor', default: 1 }
185
+ ]
186
+ },
187
+ [LayerType.DISPARITY_HEAD]: {
188
+ type: LayerType.DISPARITY_HEAD,
189
+ label: 'Disparity Head',
190
+ description: 'Stereo Vision Disparity Estimation',
191
+ category: 'Robotics',
192
+ parameters: [
193
+ { name: 'max_disp', type: 'number', label: 'Max Disparity', default: 192 },
194
+ { name: 'refine_iter', type: 'number', label: 'Refine Iters', default: 3 }
195
+ ]
196
+ },
197
+ [LayerType.OPTICAL_FLOW]: {
198
+ type: LayerType.OPTICAL_FLOW,
199
+ label: 'Optical Flow',
200
+ description: 'Estimates pixel motion between frames',
201
+ category: 'Robotics',
202
+ parameters: [
203
+ { name: 'input_channels', type: 'number', label: 'In Channels', default: 6 },
204
+ { name: 'flow_dim', type: 'number', label: 'Flow Dim', default: 2 },
205
+ { name: 'corr_levels', type: 'number', label: 'Correlation Lvl', default: 4 }
206
+ ]
207
+ },
208
+ [LayerType.VELOCITY_HEAD]: {
209
+ type: LayerType.VELOCITY_HEAD,
210
+ label: 'Velocity Head',
211
+ description: 'Predicts object speed/trajectory',
212
+ category: 'Robotics',
213
+ parameters: [
214
+ { name: 'input_dim', type: 'number', label: 'Input Dim', default: 512 },
215
+ { name: 'time_horizon', type: 'number', label: 'Horizon (sec)', default: 3 },
216
+ { name: 'mode', type: 'select', label: 'Mode', default: 'Linear', options: ['Linear', 'Angular', 'Full State'] }
217
+ ]
218
+ },
219
+ [LayerType.KALMAN_FILTER]: {
220
+ type: LayerType.KALMAN_FILTER,
221
+ label: 'Kalman Filter',
222
+ description: 'Differentiable State Estimation',
223
+ category: 'Robotics',
224
+ parameters: [
225
+ { name: 'state_dim', type: 'number', label: 'State Dim', default: 4 },
226
+ { name: 'measure_dim', type: 'number', label: 'Measure Dim', default: 2 },
227
+ { name: 'learnable_process_noise', type: 'boolean', label: 'Learn Noise', default: true }
228
+ ]
229
+ },
230
+ [LayerType.BEV_TRANSFORM]: {
231
+ type: LayerType.BEV_TRANSFORM,
232
+ label: 'BEV Transformer',
233
+ description: 'Perspective to Bird\'s Eye View',
234
+ category: 'Robotics',
235
+ parameters: [
236
+ { name: 'bev_h', type: 'number', label: 'BEV Height', default: 200 },
237
+ { name: 'bev_w', type: 'number', label: 'BEV Width', default: 200 },
238
+ { name: 'num_queries', type: 'number', label: 'Num Queries', default: 900 }
239
+ ]
240
+ },
241
+ [LayerType.RADAR_ENCODER]: {
242
+ type: LayerType.RADAR_ENCODER,
243
+ label: 'Radar Encoder',
244
+ description: 'PointPillars style radar encoding',
245
+ category: 'Robotics',
246
+ parameters: [
247
+ { name: 'max_points', type: 'number', label: 'Max Points', default: 100 },
248
+ { name: 'num_features', type: 'number', label: 'Num Feats', default: 5 },
249
+ { name: 'voxel_size', type: 'string', label: 'Voxel Size', default: '0.2,0.2,4' }
250
+ ]
251
+ },
252
+
253
  // --- DETECTION (YOLO) ---
254
  [LayerType.C2F_BLOCK]: {
255
  type: LayerType.C2F_BLOCK,
 
1311
  { id: '4', source: 'cat', target: 'unet' }, { id: '5', source: 'unet', target: 'out' }
1312
  ]
1313
  }
1314
+ };