Spaces:

wuhp
/

testarcbuilder

Running

App Files Files Community

wuhp commited on Dec 6, 2025

Commit

d422478

verified ·

1 Parent(s): 96385b4

Update constants.ts

Browse files

Files changed (1) hide show

constants.ts +191 -2

constants.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import { LayerDefinition, LayerType, GraphTemplate } from './types';
 export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
@@ -10,7 +11,7 @@ export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
     category: 'Core',
     parameters: [
       { name: 'name', type: 'string', label: 'Name', default: 'input_1', description: 'Variable name in forward()' },
-      { name: 'modality', type: 'select', label: 'Modality', default: 'Tensor', options: ['Tensor', 'Image', 'Text', 'Audio', 'Video', 'Latent', 'State', '3D Volume', 'Point Cloud'] },
       { name: 'shape', type: 'string', label: 'Shape (e.g. 3,224,224)', default: '3, 224, 224' },
       { name: 'batch_size', type: 'number', label: 'Batch Size', default: 32 },
       { name: 'dtype', type: 'select', label: 'Data Type', default: 'float32', options: ['float32', 'int64', 'bool'] }
@@ -61,6 +62,194 @@ export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
     ]
   },
   // --- DETECTION (YOLO) ---
   [LayerType.C2F_BLOCK]: {
     type: LayerType.C2F_BLOCK,
@@ -1122,4 +1311,4 @@ export const TEMPLATES: Record<string, GraphTemplate> = {
       { id: '4', source: 'cat', target: 'unet' }, { id: '5', source: 'unet', target: 'out' }
     ]
   }
-};

 import { LayerDefinition, LayerType, GraphTemplate } from './types';
 export const LAYER_DEFINITIONS: Record<LayerType, LayerDefinition> = {
     category: 'Core',
     parameters: [
       { name: 'name', type: 'string', label: 'Name', default: 'input_1', description: 'Variable name in forward()' },
+      { name: 'modality', type: 'select', label: 'Modality', default: 'Tensor', options: ['Tensor', 'Image', 'Text', 'Audio', 'Video', 'Latent', 'State', '3D Volume', 'Point Cloud', 'Radar', 'Lidar'] },
       { name: 'shape', type: 'string', label: 'Shape (e.g. 3,224,224)', default: '3, 224, 224' },
       { name: 'batch_size', type: 'number', label: 'Batch Size', default: 32 },
       { name: 'dtype', type: 'select', label: 'Data Type', default: 'float32', options: ['float32', 'int64', 'bool'] }
     ]
   },
+  // --- VIDEO / GENERATION ---
+  [LayerType.VIDEO_DIFFUSION_BLOCK]: {
+    type: LayerType.VIDEO_DIFFUSION_BLOCK,
+    label: 'Video Diffusion',
+    description: '3D UNet Block for Video Gen',
+    category: 'Video',
+    parameters: [
+      { name: 'in_channels', type: 'number', label: 'Channels', default: 128 },
+      { name: 'time_dim', type: 'number', label: 'Time Emb Dim', default: 512 },
+      { name: 'use_temporal_attn', type: 'boolean', label: 'Temp Attn', default: true }
+    ]
+  },
+  [LayerType.SPATIO_TEMPORAL_ATTN]: {
+    type: LayerType.SPATIO_TEMPORAL_ATTN,
+    label: 'Spatio-Temporal Attn',
+    description: 'Attention over Space & Time',
+    category: 'Video',
+    parameters: [
+      { name: 'dim', type: 'number', label: 'Dimension', default: 512 },
+      { name: 'num_heads', type: 'number', label: 'Heads', default: 8 },
+      { name: 'frames', type: 'number', label: 'Max Frames', default: 16 }
+    ]
+  },
+  [LayerType.VIDEO_TOKENIZER]: {
+    type: LayerType.VIDEO_TOKENIZER,
+    label: 'Video Tokenizer',
+    description: '3D VQ-VAE / Magvit style',
+    category: 'Video',
+    parameters: [
+      { name: 'patch_size_t', type: 'number', label: 'Time Patch', default: 2 },
+      { name: 'patch_size_hw', type: 'number', label: 'Spatial Patch', default: 16 },
+      { name: 'vocab_size', type: 'number', label: 'Codebook Size', default: 8192 }
+    ]
+  },
+  [LayerType.FRAME_INTERPOLATOR]: {
+    type: LayerType.FRAME_INTERPOLATOR,
+    label: 'Frame Interpolator',
+    description: 'Upsamples video frame rate',
+    category: 'Video',
+    parameters: [
+      { name: 'scale_factor', type: 'number', label: 'Scale Factor', default: 2 },
+      { name: 'mode', type: 'select', label: 'Mode', default: 'bilinear', options: ['bilinear', 'optical_flow', 'pixel_shuffle'] }
+    ]
+  },
+  [LayerType.TEMPORAL_SHIFT]: {
+    type: LayerType.TEMPORAL_SHIFT,
+    label: 'Temporal Shift',
+    description: 'TSM Module (Zero params)',
+    category: 'Video',
+    parameters: [
+      { name: 'n_segment', type: 'number', label: 'Segments', default: 8 },
+      { name: 'fold_div', type: 'number', label: 'Fold Divisor', default: 8 }
+    ]
+  },
+  [LayerType.NON_LOCAL_BLOCK]: {
+    type: LayerType.NON_LOCAL_BLOCK,
+    label: 'Non-Local Block',
+    description: 'Global context block',
+    category: 'Video',
+    parameters: [
+      { name: 'in_channels', type: 'number', label: 'Channels', default: 64 },
+      { name: 'mode', type: 'select', label: 'Mode', default: 'embedded_gaussian', options: ['embedded_gaussian', 'gaussian', 'dot', 'concat'] }
+    ]
+  },
+  [LayerType.MULTIMODAL_FUSION]: {
+    type: LayerType.MULTIMODAL_FUSION,
+    label: 'Multimodal Fusion',
+    description: 'Merge Video, Audio, Text',
+    category: 'Video',
+    parameters: [
+      { name: 'video_dim', type: 'number', label: 'Video Dim', default: 512 },
+      { name: 'audio_dim', type: 'number', label: 'Audio Dim', default: 256 },
+      { name: 'text_dim', type: 'number', label: 'Text Dim', default: 768 },
+      { name: 'out_dim', type: 'number', label: 'Fused Dim', default: 512 }
+    ]
+  },
+  // --- OCR (Text Recognition) ---
+  [LayerType.TPS_TRANSFORM]: {
+    type: LayerType.TPS_TRANSFORM,
+    label: 'TPS Transform',
+    description: 'Rectifies curved text (Thin Plate Spline)',
+    category: 'OCR',
+    parameters: [
+      { name: 'fiducial_points', type: 'number', label: 'Control Points', default: 20 },
+      { name: 'output_size', type: 'string', label: 'Out Size (HxW)', default: '32,100' }
+    ]
+  },
+  [LayerType.CRNN_BLOCK]: {
+    type: LayerType.CRNN_BLOCK,
+    label: 'CRNN Block',
+    description: 'Conv + BiLSTM for Text Sequence',
+    category: 'OCR',
+    parameters: [
+      { name: 'img_h', type: 'number', label: 'Image Height', default: 32 },
+      { name: 'hidden_size', type: 'number', label: 'LSTM Hidden', default: 256 },
+      { name: 'num_classes', type: 'number', label: 'Num Chars', default: 37 }
+    ]
+  },
+  [LayerType.CTC_DECODER]: {
+    type: LayerType.CTC_DECODER,
+    label: 'CTC Decoder',
+    description: 'Connectionist Temporal Classification',
+    category: 'OCR',
+    parameters: [
+      { name: 'blank_index', type: 'number', label: 'Blank Index', default: 0 },
+      { name: 'reduction', type: 'select', label: 'Reduction', default: 'mean', options: ['mean', 'sum', 'none'] }
+    ]
+  },
+  // --- ROBOTICS / MOTION / DEPTH ---
+  [LayerType.DEPTH_DECODER]: {
+    type: LayerType.DEPTH_DECODER,
+    label: 'Depth Decoder',
+    description: 'Estimates Monocular Depth Map',
+    category: 'Robotics',
+    parameters: [
+      { name: 'min_depth', type: 'number', label: 'Min Depth (m)', default: 0.1 },
+      { name: 'max_depth', type: 'number', label: 'Max Depth (m)', default: 100.0 },
+      { name: 'backbone_scale', type: 'number', label: 'Scale Factor', default: 1 }
+    ]
+  },
+  [LayerType.DISPARITY_HEAD]: {
+    type: LayerType.DISPARITY_HEAD,
+    label: 'Disparity Head',
+    description: 'Stereo Vision Disparity Estimation',
+    category: 'Robotics',
+    parameters: [
+      { name: 'max_disp', type: 'number', label: 'Max Disparity', default: 192 },
+      { name: 'refine_iter', type: 'number', label: 'Refine Iters', default: 3 }
+    ]
+  },
+  [LayerType.OPTICAL_FLOW]: {
+    type: LayerType.OPTICAL_FLOW,
+    label: 'Optical Flow',
+    description: 'Estimates pixel motion between frames',
+    category: 'Robotics',
+    parameters: [
+      { name: 'input_channels', type: 'number', label: 'In Channels', default: 6 },
+      { name: 'flow_dim', type: 'number', label: 'Flow Dim', default: 2 },
+      { name: 'corr_levels', type: 'number', label: 'Correlation Lvl', default: 4 }
+    ]
+  },
+  [LayerType.VELOCITY_HEAD]: {
+    type: LayerType.VELOCITY_HEAD,
+    label: 'Velocity Head',
+    description: 'Predicts object speed/trajectory',
+    category: 'Robotics',
+    parameters: [
+      { name: 'input_dim', type: 'number', label: 'Input Dim', default: 512 },
+      { name: 'time_horizon', type: 'number', label: 'Horizon (sec)', default: 3 },
+      { name: 'mode', type: 'select', label: 'Mode', default: 'Linear', options: ['Linear', 'Angular', 'Full State'] }
+    ]
+  },
+  [LayerType.KALMAN_FILTER]: {
+    type: LayerType.KALMAN_FILTER,
+    label: 'Kalman Filter',
+    description: 'Differentiable State Estimation',
+    category: 'Robotics',
+    parameters: [
+      { name: 'state_dim', type: 'number', label: 'State Dim', default: 4 },
+      { name: 'measure_dim', type: 'number', label: 'Measure Dim', default: 2 },
+      { name: 'learnable_process_noise', type: 'boolean', label: 'Learn Noise', default: true }
+    ]
+  },
+  [LayerType.BEV_TRANSFORM]: {
+    type: LayerType.BEV_TRANSFORM,
+    label: 'BEV Transformer',
+    description: 'Perspective to Bird\'s Eye View',
+    category: 'Robotics',
+    parameters: [
+      { name: 'bev_h', type: 'number', label: 'BEV Height', default: 200 },
+      { name: 'bev_w', type: 'number', label: 'BEV Width', default: 200 },
+      { name: 'num_queries', type: 'number', label: 'Num Queries', default: 900 }
+    ]
+  },
+  [LayerType.RADAR_ENCODER]: {
+    type: LayerType.RADAR_ENCODER,
+    label: 'Radar Encoder',
+    description: 'PointPillars style radar encoding',
+    category: 'Robotics',
+    parameters: [
+      { name: 'max_points', type: 'number', label: 'Max Points', default: 100 },
+      { name: 'num_features', type: 'number', label: 'Num Feats', default: 5 },
+      { name: 'voxel_size', type: 'string', label: 'Voxel Size', default: '0.2,0.2,4' }
+    ]
+  },
   // --- DETECTION (YOLO) ---
   [LayerType.C2F_BLOCK]: {
     type: LayerType.C2F_BLOCK,
       { id: '4', source: 'cat', target: 'unet' }, { id: '5', source: 'unet', target: 'out' }
     ]
   }
+};