wuhp commited on
Commit
7e69a53
·
verified ·
1 Parent(s): 0393444

Update constants.ts

Browse files
Files changed (1) hide show
  1. constants.ts +121 -3
constants.ts CHANGED
@@ -9,6 +9,7 @@ export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
9
  description: 'Entry point for data tensors',
10
  category: 'Core',
11
  parameters: [
 
12
  { name: 'modality', type: 'select', label: 'Modality', default: 'Tensor', options: ['Tensor', 'Image', 'Text', 'Audio', 'Video', 'Latent', 'State', '3D Volume', 'Point Cloud'] },
13
  { name: 'shape', type: 'string', label: 'Shape (e.g. 3,224,224)', default: '3, 224, 224' },
14
  { name: 'batch_size', type: 'number', label: 'Batch Size', default: 32 },
@@ -23,7 +24,8 @@ export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
23
  parameters: [
24
  { name: 'in_features', type: 'number', label: 'In Features (Opt)', default: 0, description: "0 = Auto-infer" },
25
  { name: 'out_features', type: 'number', label: 'Output Features', default: 128 },
26
- { name: 'bias', type: 'boolean', label: 'Use Bias', default: true }
 
27
  ]
28
  },
29
  [LayerType.OUTPUT]: {
@@ -83,6 +85,16 @@ export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
83
  { name: 'k', type: 'number', label: 'Kernel Size', default: 5 }
84
  ]
85
  },
 
 
 
 
 
 
 
 
 
 
86
  [LayerType.DETECT_HEAD]: {
87
  type: LayerType.DETECT_HEAD,
88
  label: 'Detection Head',
@@ -103,8 +115,29 @@ export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
103
  { name: 'scales', type: 'text', label: 'Scales', default: '[32, 64, 128]' }
104
  ]
105
  },
 
 
 
 
 
 
 
 
 
 
106
 
107
  // --- AUDIO / SPEECH ---
 
 
 
 
 
 
 
 
 
 
 
108
  [LayerType.MEL_SPECTROGRAM]: {
109
  type: LayerType.MEL_SPECTROGRAM,
110
  label: 'MelSpectrogram',
@@ -116,6 +149,16 @@ export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
116
  { name: 'n_mels', type: 'number', label: 'Num Mels', default: 80 }
117
  ]
118
  },
 
 
 
 
 
 
 
 
 
 
119
  [LayerType.CONFORMER_BLOCK]: {
120
  type: LayerType.CONFORMER_BLOCK,
121
  label: 'Conformer Block',
@@ -138,6 +181,26 @@ export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
138
  { name: 'kernel_size', type: 'number', label: 'Kernel', default: 3 }
139
  ]
140
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  [LayerType.VOCODER]: {
142
  type: LayerType.VOCODER,
143
  label: 'Vocoder',
@@ -181,6 +244,26 @@ export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
181
  { name: 'out_channels', type: 'number', label: 'Out Channels', default: 64 }
182
  ]
183
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  [LayerType.GAUSSIAN_SPLAT]: {
185
  type: LayerType.GAUSSIAN_SPLAT,
186
  label: 'Gaussian Splat',
@@ -330,9 +413,11 @@ export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
330
  { name: 'kernel_size', type: 'number', label: 'Kernel Size', default: 3 },
331
  { name: 'stride', type: 'number', label: 'Stride', default: 1 },
332
  { name: 'padding', type: 'number', label: 'Padding', default: 1 },
 
333
  { name: 'dilation', type: 'number', label: 'Dilation', default: 1 },
334
  { name: 'groups', type: 'number', label: 'Groups', default: 1, description: "For depthwise separable" },
335
- { name: 'bias', type: 'boolean', label: 'Bias', default: true }
 
336
  ]
337
  },
338
  [LayerType.CONV3D]: {
@@ -359,6 +444,17 @@ export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
359
  { name: 'padding', type: 'number', label: 'Padding', default: 0 }
360
  ]
361
  },
 
 
 
 
 
 
 
 
 
 
 
362
  [LayerType.MAXPOOL]: {
363
  type: LayerType.MAXPOOL,
364
  label: 'MaxPool2D',
@@ -613,7 +709,8 @@ export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
613
  { name: 'embed_dim', type: 'number', label: 'Embed Dim', default: 512 },
614
  { name: 'num_heads', type: 'number', label: 'Num Heads', default: 8 },
615
  { name: 'dropout', type: 'number', label: 'Dropout', default: 0.1 },
616
- { name: 'batch_first', type: 'boolean', label: 'Batch First', default: true }
 
617
  ]
618
  },
619
  [LayerType.CROSS_ATTENTION]: {
@@ -703,6 +800,27 @@ export const INITIAL_EDGES = [
703
  ];
704
 
705
  export const TEMPLATES: Record<string, GraphTemplate> = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
706
  'yolo_v8': {
707
  id: 'yolo_v8',
708
  name: 'YOLO (Detection)',
 
9
  description: 'Entry point for data tensors',
10
  category: 'Core',
11
  parameters: [
12
+ { name: 'name', type: 'string', label: 'Name', default: 'input_1', description: 'Variable name in forward()' },
13
  { name: 'modality', type: 'select', label: 'Modality', default: 'Tensor', options: ['Tensor', 'Image', 'Text', 'Audio', 'Video', 'Latent', 'State', '3D Volume', 'Point Cloud'] },
14
  { name: 'shape', type: 'string', label: 'Shape (e.g. 3,224,224)', default: '3, 224, 224' },
15
  { name: 'batch_size', type: 'number', label: 'Batch Size', default: 32 },
 
24
  parameters: [
25
  { name: 'in_features', type: 'number', label: 'In Features (Opt)', default: 0, description: "0 = Auto-infer" },
26
  { name: 'out_features', type: 'number', label: 'Output Features', default: 128 },
27
+ { name: 'bias', type: 'boolean', label: 'Use Bias', default: true },
28
+ { name: 'activation', type: 'select', label: 'Fused Activation', default: 'None', options: ['None', 'ReLU', 'GELU', 'Sigmoid'] }
29
  ]
30
  },
31
  [LayerType.OUTPUT]: {
 
85
  { name: 'k', type: 'number', label: 'Kernel Size', default: 5 }
86
  ]
87
  },
88
+ [LayerType.DARKNET_BLOCK]: {
89
+ type: LayerType.DARKNET_BLOCK,
90
+ label: 'Darknet Block',
91
+ description: 'Residual block used in Darknet',
92
+ category: 'Detection',
93
+ parameters: [
94
+ { name: 'channels', type: 'number', label: 'Channels', default: 64 },
95
+ { name: 'num_repeats', type: 'number', label: 'Repeats', default: 1 }
96
+ ]
97
+ },
98
  [LayerType.DETECT_HEAD]: {
99
  type: LayerType.DETECT_HEAD,
100
  label: 'Detection Head',
 
115
  { name: 'scales', type: 'text', label: 'Scales', default: '[32, 64, 128]' }
116
  ]
117
  },
118
+ [LayerType.NMS]: {
119
+ type: LayerType.NMS,
120
+ label: 'NMS',
121
+ description: 'Non-Maximum Suppression',
122
+ category: 'Detection',
123
+ parameters: [
124
+ { name: 'iou_threshold', type: 'number', label: 'IoU Thresh', default: 0.5 },
125
+ { name: 'score_threshold', type: 'number', label: 'Score Thresh', default: 0.25 }
126
+ ]
127
+ },
128
 
129
  // --- AUDIO / SPEECH ---
130
+ [LayerType.STFT]: {
131
+ type: LayerType.STFT,
132
+ label: 'STFT',
133
+ description: 'Short-Time Fourier Transform',
134
+ category: 'Audio',
135
+ parameters: [
136
+ { name: 'n_fft', type: 'number', label: 'N_FFT', default: 1024 },
137
+ { name: 'hop_length', type: 'number', label: 'Hop Length', default: 256 },
138
+ { name: 'win_length', type: 'number', label: 'Window Length', default: 1024 }
139
+ ]
140
+ },
141
  [LayerType.MEL_SPECTROGRAM]: {
142
  type: LayerType.MEL_SPECTROGRAM,
143
  label: 'MelSpectrogram',
 
149
  { name: 'n_mels', type: 'number', label: 'Num Mels', default: 80 }
150
  ]
151
  },
152
+ [LayerType.SPEC_AUGMENT]: {
153
+ type: LayerType.SPEC_AUGMENT,
154
+ label: 'SpecAugment',
155
+ description: 'Time/Freq masking for Audio',
156
+ category: 'Audio',
157
+ parameters: [
158
+ { name: 'freq_mask_param', type: 'number', label: 'Freq Mask', default: 27 },
159
+ { name: 'time_mask_param', type: 'number', label: 'Time Mask', default: 100 }
160
+ ]
161
+ },
162
  [LayerType.CONFORMER_BLOCK]: {
163
  type: LayerType.CONFORMER_BLOCK,
164
  label: 'Conformer Block',
 
181
  { name: 'kernel_size', type: 'number', label: 'Kernel', default: 3 }
182
  ]
183
  },
184
+ [LayerType.WAV2VEC2_ENC]: {
185
+ type: LayerType.WAV2VEC2_ENC,
186
+ label: 'Wav2Vec2 Encoder',
187
+ description: 'Self-supervised Speech Encoder',
188
+ category: 'Audio',
189
+ parameters: [
190
+ { name: 'output_dim', type: 'number', label: 'Output Dim', default: 768 },
191
+ { name: 'extractor_mode', type: 'select', label: 'Mode', default: 'default', options: ['default', 'layer_norm'] }
192
+ ]
193
+ },
194
+ [LayerType.RVC_ENCODER]: {
195
+ type: LayerType.RVC_ENCODER,
196
+ label: 'RVC Hubert',
197
+ description: 'Content Encoder for Voice Cloning',
198
+ category: 'Audio',
199
+ parameters: [
200
+ { name: 'model_type', type: 'select', label: 'Model', default: 'hubert-soft', options: ['hubert-soft', 'vec256', 'vec768'] },
201
+ { name: 'freeze', type: 'boolean', label: 'Freeze', default: true }
202
+ ]
203
+ },
204
  [LayerType.VOCODER]: {
205
  type: LayerType.VOCODER,
206
  label: 'Vocoder',
 
244
  { name: 'out_channels', type: 'number', label: 'Out Channels', default: 64 }
245
  ]
246
  },
247
+ [LayerType.POINT_TRANSFORMER]: {
248
+ type: LayerType.POINT_TRANSFORMER,
249
+ label: 'PointTransformer',
250
+ description: 'Self-Attention for Point Clouds',
251
+ category: '3D',
252
+ parameters: [
253
+ { name: 'dim', type: 'number', label: 'Dim', default: 32 },
254
+ { name: 'num_neighbors', type: 'number', label: 'Neighbors (k)', default: 16 }
255
+ ]
256
+ },
257
+ [LayerType.TRIPLANE_ENC]: {
258
+ type: LayerType.TRIPLANE_ENC,
259
+ label: 'Triplane Enc',
260
+ description: 'Project 3D to 3x2D Planes',
261
+ category: '3D',
262
+ parameters: [
263
+ { name: 'plane_res', type: 'number', label: 'Resolution', default: 256 },
264
+ { name: 'channels', type: 'number', label: 'Channels', default: 32 }
265
+ ]
266
+ },
267
  [LayerType.GAUSSIAN_SPLAT]: {
268
  type: LayerType.GAUSSIAN_SPLAT,
269
  label: 'Gaussian Splat',
 
413
  { name: 'kernel_size', type: 'number', label: 'Kernel Size', default: 3 },
414
  { name: 'stride', type: 'number', label: 'Stride', default: 1 },
415
  { name: 'padding', type: 'number', label: 'Padding', default: 1 },
416
+ { name: 'padding_mode', type: 'select', label: 'Pad Mode', default: 'zeros', options: ['zeros', 'reflect', 'replicate', 'circular'] },
417
  { name: 'dilation', type: 'number', label: 'Dilation', default: 1 },
418
  { name: 'groups', type: 'number', label: 'Groups', default: 1, description: "For depthwise separable" },
419
+ { name: 'bias', type: 'boolean', label: 'Bias', default: true },
420
+ { name: 'activation', type: 'select', label: 'Fused Activation', default: 'None', options: ['None', 'ReLU', 'LeakyReLU', 'SiLU'] }
421
  ]
422
  },
423
  [LayerType.CONV3D]: {
 
444
  { name: 'padding', type: 'number', label: 'Padding', default: 0 }
445
  ]
446
  },
447
+ [LayerType.DEFORMABLE_CONV]: {
448
+ type: LayerType.DEFORMABLE_CONV,
449
+ label: 'Deformable Conv',
450
+ description: 'Deformable Convolution v2',
451
+ category: 'Convolution',
452
+ parameters: [
453
+ { name: 'out_channels', type: 'number', label: 'Filters', default: 64 },
454
+ { name: 'kernel_size', type: 'number', label: 'Kernel Size', default: 3 },
455
+ { name: 'offset_groups', type: 'number', label: 'Offset Groups', default: 1 }
456
+ ]
457
+ },
458
  [LayerType.MAXPOOL]: {
459
  type: LayerType.MAXPOOL,
460
  label: 'MaxPool2D',
 
709
  { name: 'embed_dim', type: 'number', label: 'Embed Dim', default: 512 },
710
  { name: 'num_heads', type: 'number', label: 'Num Heads', default: 8 },
711
  { name: 'dropout', type: 'number', label: 'Dropout', default: 0.1 },
712
+ { name: 'batch_first', type: 'boolean', label: 'Batch First', default: true },
713
+ { name: 'causal', type: 'boolean', label: 'Causal Mask', default: false }
714
  ]
715
  },
716
  [LayerType.CROSS_ATTENTION]: {
 
800
  ];
801
 
802
  export const TEMPLATES: Record<string, GraphTemplate> = {
803
+ 'rvc_voice': {
804
+ id: 'rvc_voice',
805
+ name: 'Voice Cloning (RVC)',
806
+ description: 'Retrieval-based Voice Conversion backbone.',
807
+ nodes: [
808
+ { id: 'audio', type: 'custom', position: {x: 200, y: 0}, data: {label: 'Source Audio', type: LayerType.INPUT, params: {modality: 'Audio'}} },
809
+ { id: 'hubert', type: 'custom', position: {x: 200, y: 100}, data: {label: 'HuBERT Soft', type: LayerType.RVC_ENCODER, params: {}} },
810
+ { id: 'f0', type: 'custom', position: {x: 450, y: 0}, data: {label: 'Pitch (F0)', type: LayerType.INPUT, params: {modality: 'Tensor'}} },
811
+ { id: 'emb', type: 'custom', position: {x: 450, y: 100}, data: {label: 'F0 Embed', type: LayerType.EMBEDDING, params: {num_embeddings: 256}} },
812
+ { id: 'cat', type: 'custom', position: {x: 325, y: 200}, data: {label: 'Merge Features', type: LayerType.CONCAT, params: {}} },
813
+ { id: 'wn', type: 'custom', position: {x: 325, y: 300}, data: {label: 'WaveNet Stack', type: LayerType.WAVENET_BLOCK, params: {channels: 256, dilation: 2}} },
814
+ { id: 'voc', type: 'custom', position: {x: 325, y: 400}, data: {label: 'HiFiGAN', type: LayerType.VOCODER, params: {}} },
815
+ { id: 'out', type: 'custom', position: {x: 325, y: 500}, data: {label: 'Cloned Audio', type: LayerType.OUTPUT, params: {}} },
816
+ ],
817
+ edges: [
818
+ { id: '1', source: 'audio', target: 'hubert' }, { id: '2', source: 'f0', target: 'emb' },
819
+ { id: '3', source: 'hubert', target: 'cat' }, { id: '4', source: 'emb', target: 'cat' },
820
+ { id: '5', source: 'cat', target: 'wn' }, { id: '6', source: 'wn', target: 'voc' },
821
+ { id: '7', source: 'voc', target: 'out' }
822
+ ]
823
+ },
824
  'yolo_v8': {
825
  id: 'yolo_v8',
826
  name: 'YOLO (Detection)',