Spaces:
Running
Running
Update constants.ts
Browse files- constants.ts +121 -3
constants.ts
CHANGED
|
@@ -9,6 +9,7 @@ export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
|
|
| 9 |
description: 'Entry point for data tensors',
|
| 10 |
category: 'Core',
|
| 11 |
parameters: [
|
|
|
|
| 12 |
{ name: 'modality', type: 'select', label: 'Modality', default: 'Tensor', options: ['Tensor', 'Image', 'Text', 'Audio', 'Video', 'Latent', 'State', '3D Volume', 'Point Cloud'] },
|
| 13 |
{ name: 'shape', type: 'string', label: 'Shape (e.g. 3,224,224)', default: '3, 224, 224' },
|
| 14 |
{ name: 'batch_size', type: 'number', label: 'Batch Size', default: 32 },
|
|
@@ -23,7 +24,8 @@ export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
|
|
| 23 |
parameters: [
|
| 24 |
{ name: 'in_features', type: 'number', label: 'In Features (Opt)', default: 0, description: "0 = Auto-infer" },
|
| 25 |
{ name: 'out_features', type: 'number', label: 'Output Features', default: 128 },
|
| 26 |
-
{ name: 'bias', type: 'boolean', label: 'Use Bias', default: true }
|
|
|
|
| 27 |
]
|
| 28 |
},
|
| 29 |
[LayerType.OUTPUT]: {
|
|
@@ -83,6 +85,16 @@ export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
|
|
| 83 |
{ name: 'k', type: 'number', label: 'Kernel Size', default: 5 }
|
| 84 |
]
|
| 85 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
[LayerType.DETECT_HEAD]: {
|
| 87 |
type: LayerType.DETECT_HEAD,
|
| 88 |
label: 'Detection Head',
|
|
@@ -103,8 +115,29 @@ export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
|
|
| 103 |
{ name: 'scales', type: 'text', label: 'Scales', default: '[32, 64, 128]' }
|
| 104 |
]
|
| 105 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
// --- AUDIO / SPEECH ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
[LayerType.MEL_SPECTROGRAM]: {
|
| 109 |
type: LayerType.MEL_SPECTROGRAM,
|
| 110 |
label: 'MelSpectrogram',
|
|
@@ -116,6 +149,16 @@ export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
|
|
| 116 |
{ name: 'n_mels', type: 'number', label: 'Num Mels', default: 80 }
|
| 117 |
]
|
| 118 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
[LayerType.CONFORMER_BLOCK]: {
|
| 120 |
type: LayerType.CONFORMER_BLOCK,
|
| 121 |
label: 'Conformer Block',
|
|
@@ -138,6 +181,26 @@ export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
|
|
| 138 |
{ name: 'kernel_size', type: 'number', label: 'Kernel', default: 3 }
|
| 139 |
]
|
| 140 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
[LayerType.VOCODER]: {
|
| 142 |
type: LayerType.VOCODER,
|
| 143 |
label: 'Vocoder',
|
|
@@ -181,6 +244,26 @@ export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
|
|
| 181 |
{ name: 'out_channels', type: 'number', label: 'Out Channels', default: 64 }
|
| 182 |
]
|
| 183 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
[LayerType.GAUSSIAN_SPLAT]: {
|
| 185 |
type: LayerType.GAUSSIAN_SPLAT,
|
| 186 |
label: 'Gaussian Splat',
|
|
@@ -330,9 +413,11 @@ export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
|
|
| 330 |
{ name: 'kernel_size', type: 'number', label: 'Kernel Size', default: 3 },
|
| 331 |
{ name: 'stride', type: 'number', label: 'Stride', default: 1 },
|
| 332 |
{ name: 'padding', type: 'number', label: 'Padding', default: 1 },
|
|
|
|
| 333 |
{ name: 'dilation', type: 'number', label: 'Dilation', default: 1 },
|
| 334 |
{ name: 'groups', type: 'number', label: 'Groups', default: 1, description: "For depthwise separable" },
|
| 335 |
-
{ name: 'bias', type: 'boolean', label: 'Bias', default: true }
|
|
|
|
| 336 |
]
|
| 337 |
},
|
| 338 |
[LayerType.CONV3D]: {
|
|
@@ -359,6 +444,17 @@ export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
|
|
| 359 |
{ name: 'padding', type: 'number', label: 'Padding', default: 0 }
|
| 360 |
]
|
| 361 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
[LayerType.MAXPOOL]: {
|
| 363 |
type: LayerType.MAXPOOL,
|
| 364 |
label: 'MaxPool2D',
|
|
@@ -613,7 +709,8 @@ export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
|
|
| 613 |
{ name: 'embed_dim', type: 'number', label: 'Embed Dim', default: 512 },
|
| 614 |
{ name: 'num_heads', type: 'number', label: 'Num Heads', default: 8 },
|
| 615 |
{ name: 'dropout', type: 'number', label: 'Dropout', default: 0.1 },
|
| 616 |
-
{ name: 'batch_first', type: 'boolean', label: 'Batch First', default: true }
|
|
|
|
| 617 |
]
|
| 618 |
},
|
| 619 |
[LayerType.CROSS_ATTENTION]: {
|
|
@@ -703,6 +800,27 @@ export const INITIAL_EDGES = [
|
|
| 703 |
];
|
| 704 |
|
| 705 |
export const TEMPLATES: Record<string, GraphTemplate> = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 706 |
'yolo_v8': {
|
| 707 |
id: 'yolo_v8',
|
| 708 |
name: 'YOLO (Detection)',
|
|
|
|
| 9 |
description: 'Entry point for data tensors',
|
| 10 |
category: 'Core',
|
| 11 |
parameters: [
|
| 12 |
+
{ name: 'name', type: 'string', label: 'Name', default: 'input_1', description: 'Variable name in forward()' },
|
| 13 |
{ name: 'modality', type: 'select', label: 'Modality', default: 'Tensor', options: ['Tensor', 'Image', 'Text', 'Audio', 'Video', 'Latent', 'State', '3D Volume', 'Point Cloud'] },
|
| 14 |
{ name: 'shape', type: 'string', label: 'Shape (e.g. 3,224,224)', default: '3, 224, 224' },
|
| 15 |
{ name: 'batch_size', type: 'number', label: 'Batch Size', default: 32 },
|
|
|
|
| 24 |
parameters: [
|
| 25 |
{ name: 'in_features', type: 'number', label: 'In Features (Opt)', default: 0, description: "0 = Auto-infer" },
|
| 26 |
{ name: 'out_features', type: 'number', label: 'Output Features', default: 128 },
|
| 27 |
+
{ name: 'bias', type: 'boolean', label: 'Use Bias', default: true },
|
| 28 |
+
{ name: 'activation', type: 'select', label: 'Fused Activation', default: 'None', options: ['None', 'ReLU', 'GELU', 'Sigmoid'] }
|
| 29 |
]
|
| 30 |
},
|
| 31 |
[LayerType.OUTPUT]: {
|
|
|
|
| 85 |
{ name: 'k', type: 'number', label: 'Kernel Size', default: 5 }
|
| 86 |
]
|
| 87 |
},
|
| 88 |
+
[LayerType.DARKNET_BLOCK]: {
|
| 89 |
+
type: LayerType.DARKNET_BLOCK,
|
| 90 |
+
label: 'Darknet Block',
|
| 91 |
+
description: 'Residual block used in Darknet',
|
| 92 |
+
category: 'Detection',
|
| 93 |
+
parameters: [
|
| 94 |
+
{ name: 'channels', type: 'number', label: 'Channels', default: 64 },
|
| 95 |
+
{ name: 'num_repeats', type: 'number', label: 'Repeats', default: 1 }
|
| 96 |
+
]
|
| 97 |
+
},
|
| 98 |
[LayerType.DETECT_HEAD]: {
|
| 99 |
type: LayerType.DETECT_HEAD,
|
| 100 |
label: 'Detection Head',
|
|
|
|
| 115 |
{ name: 'scales', type: 'text', label: 'Scales', default: '[32, 64, 128]' }
|
| 116 |
]
|
| 117 |
},
|
| 118 |
+
[LayerType.NMS]: {
|
| 119 |
+
type: LayerType.NMS,
|
| 120 |
+
label: 'NMS',
|
| 121 |
+
description: 'Non-Maximum Suppression',
|
| 122 |
+
category: 'Detection',
|
| 123 |
+
parameters: [
|
| 124 |
+
{ name: 'iou_threshold', type: 'number', label: 'IoU Thresh', default: 0.5 },
|
| 125 |
+
{ name: 'score_threshold', type: 'number', label: 'Score Thresh', default: 0.25 }
|
| 126 |
+
]
|
| 127 |
+
},
|
| 128 |
|
| 129 |
// --- AUDIO / SPEECH ---
|
| 130 |
+
[LayerType.STFT]: {
|
| 131 |
+
type: LayerType.STFT,
|
| 132 |
+
label: 'STFT',
|
| 133 |
+
description: 'Short-Time Fourier Transform',
|
| 134 |
+
category: 'Audio',
|
| 135 |
+
parameters: [
|
| 136 |
+
{ name: 'n_fft', type: 'number', label: 'N_FFT', default: 1024 },
|
| 137 |
+
{ name: 'hop_length', type: 'number', label: 'Hop Length', default: 256 },
|
| 138 |
+
{ name: 'win_length', type: 'number', label: 'Window Length', default: 1024 }
|
| 139 |
+
]
|
| 140 |
+
},
|
| 141 |
[LayerType.MEL_SPECTROGRAM]: {
|
| 142 |
type: LayerType.MEL_SPECTROGRAM,
|
| 143 |
label: 'MelSpectrogram',
|
|
|
|
| 149 |
{ name: 'n_mels', type: 'number', label: 'Num Mels', default: 80 }
|
| 150 |
]
|
| 151 |
},
|
| 152 |
+
[LayerType.SPEC_AUGMENT]: {
|
| 153 |
+
type: LayerType.SPEC_AUGMENT,
|
| 154 |
+
label: 'SpecAugment',
|
| 155 |
+
description: 'Time/Freq masking for Audio',
|
| 156 |
+
category: 'Audio',
|
| 157 |
+
parameters: [
|
| 158 |
+
{ name: 'freq_mask_param', type: 'number', label: 'Freq Mask', default: 27 },
|
| 159 |
+
{ name: 'time_mask_param', type: 'number', label: 'Time Mask', default: 100 }
|
| 160 |
+
]
|
| 161 |
+
},
|
| 162 |
[LayerType.CONFORMER_BLOCK]: {
|
| 163 |
type: LayerType.CONFORMER_BLOCK,
|
| 164 |
label: 'Conformer Block',
|
|
|
|
| 181 |
{ name: 'kernel_size', type: 'number', label: 'Kernel', default: 3 }
|
| 182 |
]
|
| 183 |
},
|
| 184 |
+
[LayerType.WAV2VEC2_ENC]: {
|
| 185 |
+
type: LayerType.WAV2VEC2_ENC,
|
| 186 |
+
label: 'Wav2Vec2 Encoder',
|
| 187 |
+
description: 'Self-supervised Speech Encoder',
|
| 188 |
+
category: 'Audio',
|
| 189 |
+
parameters: [
|
| 190 |
+
{ name: 'output_dim', type: 'number', label: 'Output Dim', default: 768 },
|
| 191 |
+
{ name: 'extractor_mode', type: 'select', label: 'Mode', default: 'default', options: ['default', 'layer_norm'] }
|
| 192 |
+
]
|
| 193 |
+
},
|
| 194 |
+
[LayerType.RVC_ENCODER]: {
|
| 195 |
+
type: LayerType.RVC_ENCODER,
|
| 196 |
+
label: 'RVC Hubert',
|
| 197 |
+
description: 'Content Encoder for Voice Cloning',
|
| 198 |
+
category: 'Audio',
|
| 199 |
+
parameters: [
|
| 200 |
+
{ name: 'model_type', type: 'select', label: 'Model', default: 'hubert-soft', options: ['hubert-soft', 'vec256', 'vec768'] },
|
| 201 |
+
{ name: 'freeze', type: 'boolean', label: 'Freeze', default: true }
|
| 202 |
+
]
|
| 203 |
+
},
|
| 204 |
[LayerType.VOCODER]: {
|
| 205 |
type: LayerType.VOCODER,
|
| 206 |
label: 'Vocoder',
|
|
|
|
| 244 |
{ name: 'out_channels', type: 'number', label: 'Out Channels', default: 64 }
|
| 245 |
]
|
| 246 |
},
|
| 247 |
+
[LayerType.POINT_TRANSFORMER]: {
|
| 248 |
+
type: LayerType.POINT_TRANSFORMER,
|
| 249 |
+
label: 'PointTransformer',
|
| 250 |
+
description: 'Self-Attention for Point Clouds',
|
| 251 |
+
category: '3D',
|
| 252 |
+
parameters: [
|
| 253 |
+
{ name: 'dim', type: 'number', label: 'Dim', default: 32 },
|
| 254 |
+
{ name: 'num_neighbors', type: 'number', label: 'Neighbors (k)', default: 16 }
|
| 255 |
+
]
|
| 256 |
+
},
|
| 257 |
+
[LayerType.TRIPLANE_ENC]: {
|
| 258 |
+
type: LayerType.TRIPLANE_ENC,
|
| 259 |
+
label: 'Triplane Enc',
|
| 260 |
+
description: 'Project 3D to 3x2D Planes',
|
| 261 |
+
category: '3D',
|
| 262 |
+
parameters: [
|
| 263 |
+
{ name: 'plane_res', type: 'number', label: 'Resolution', default: 256 },
|
| 264 |
+
{ name: 'channels', type: 'number', label: 'Channels', default: 32 }
|
| 265 |
+
]
|
| 266 |
+
},
|
| 267 |
[LayerType.GAUSSIAN_SPLAT]: {
|
| 268 |
type: LayerType.GAUSSIAN_SPLAT,
|
| 269 |
label: 'Gaussian Splat',
|
|
|
|
| 413 |
{ name: 'kernel_size', type: 'number', label: 'Kernel Size', default: 3 },
|
| 414 |
{ name: 'stride', type: 'number', label: 'Stride', default: 1 },
|
| 415 |
{ name: 'padding', type: 'number', label: 'Padding', default: 1 },
|
| 416 |
+
{ name: 'padding_mode', type: 'select', label: 'Pad Mode', default: 'zeros', options: ['zeros', 'reflect', 'replicate', 'circular'] },
|
| 417 |
{ name: 'dilation', type: 'number', label: 'Dilation', default: 1 },
|
| 418 |
{ name: 'groups', type: 'number', label: 'Groups', default: 1, description: "For depthwise separable" },
|
| 419 |
+
{ name: 'bias', type: 'boolean', label: 'Bias', default: true },
|
| 420 |
+
{ name: 'activation', type: 'select', label: 'Fused Activation', default: 'None', options: ['None', 'ReLU', 'LeakyReLU', 'SiLU'] }
|
| 421 |
]
|
| 422 |
},
|
| 423 |
[LayerType.CONV3D]: {
|
|
|
|
| 444 |
{ name: 'padding', type: 'number', label: 'Padding', default: 0 }
|
| 445 |
]
|
| 446 |
},
|
| 447 |
+
[LayerType.DEFORMABLE_CONV]: {
|
| 448 |
+
type: LayerType.DEFORMABLE_CONV,
|
| 449 |
+
label: 'Deformable Conv',
|
| 450 |
+
description: 'Deformable Convolution v2',
|
| 451 |
+
category: 'Convolution',
|
| 452 |
+
parameters: [
|
| 453 |
+
{ name: 'out_channels', type: 'number', label: 'Filters', default: 64 },
|
| 454 |
+
{ name: 'kernel_size', type: 'number', label: 'Kernel Size', default: 3 },
|
| 455 |
+
{ name: 'offset_groups', type: 'number', label: 'Offset Groups', default: 1 }
|
| 456 |
+
]
|
| 457 |
+
},
|
| 458 |
[LayerType.MAXPOOL]: {
|
| 459 |
type: LayerType.MAXPOOL,
|
| 460 |
label: 'MaxPool2D',
|
|
|
|
| 709 |
{ name: 'embed_dim', type: 'number', label: 'Embed Dim', default: 512 },
|
| 710 |
{ name: 'num_heads', type: 'number', label: 'Num Heads', default: 8 },
|
| 711 |
{ name: 'dropout', type: 'number', label: 'Dropout', default: 0.1 },
|
| 712 |
+
{ name: 'batch_first', type: 'boolean', label: 'Batch First', default: true },
|
| 713 |
+
{ name: 'causal', type: 'boolean', label: 'Causal Mask', default: false }
|
| 714 |
]
|
| 715 |
},
|
| 716 |
[LayerType.CROSS_ATTENTION]: {
|
|
|
|
| 800 |
];
|
| 801 |
|
| 802 |
export const TEMPLATES: Record<string, GraphTemplate> = {
|
| 803 |
+
'rvc_voice': {
|
| 804 |
+
id: 'rvc_voice',
|
| 805 |
+
name: 'Voice Cloning (RVC)',
|
| 806 |
+
description: 'Retrieval-based Voice Conversion backbone.',
|
| 807 |
+
nodes: [
|
| 808 |
+
{ id: 'audio', type: 'custom', position: {x: 200, y: 0}, data: {label: 'Source Audio', type: LayerType.INPUT, params: {modality: 'Audio'}} },
|
| 809 |
+
{ id: 'hubert', type: 'custom', position: {x: 200, y: 100}, data: {label: 'HuBERT Soft', type: LayerType.RVC_ENCODER, params: {}} },
|
| 810 |
+
{ id: 'f0', type: 'custom', position: {x: 450, y: 0}, data: {label: 'Pitch (F0)', type: LayerType.INPUT, params: {modality: 'Tensor'}} },
|
| 811 |
+
{ id: 'emb', type: 'custom', position: {x: 450, y: 100}, data: {label: 'F0 Embed', type: LayerType.EMBEDDING, params: {num_embeddings: 256}} },
|
| 812 |
+
{ id: 'cat', type: 'custom', position: {x: 325, y: 200}, data: {label: 'Merge Features', type: LayerType.CONCAT, params: {}} },
|
| 813 |
+
{ id: 'wn', type: 'custom', position: {x: 325, y: 300}, data: {label: 'WaveNet Stack', type: LayerType.WAVENET_BLOCK, params: {channels: 256, dilation: 2}} },
|
| 814 |
+
{ id: 'voc', type: 'custom', position: {x: 325, y: 400}, data: {label: 'HiFiGAN', type: LayerType.VOCODER, params: {}} },
|
| 815 |
+
{ id: 'out', type: 'custom', position: {x: 325, y: 500}, data: {label: 'Cloned Audio', type: LayerType.OUTPUT, params: {}} },
|
| 816 |
+
],
|
| 817 |
+
edges: [
|
| 818 |
+
{ id: '1', source: 'audio', target: 'hubert' }, { id: '2', source: 'f0', target: 'emb' },
|
| 819 |
+
{ id: '3', source: 'hubert', target: 'cat' }, { id: '4', source: 'emb', target: 'cat' },
|
| 820 |
+
{ id: '5', source: 'cat', target: 'wn' }, { id: '6', source: 'wn', target: 'voc' },
|
| 821 |
+
{ id: '7', source: 'voc', target: 'out' }
|
| 822 |
+
]
|
| 823 |
+
},
|
| 824 |
'yolo_v8': {
|
| 825 |
id: 'yolo_v8',
|
| 826 |
name: 'YOLO (Detection)',
|