chaitjo
/

gRNAde

+# Training configurations for gRNAde_drop3d@0.75_maxlen@500.h5
+# Misc configurations
+device:
+  value: 'gpu'
+  desc: Device to run on (cpu/cuda/xpu)
+gpu:
+  value: 0
+  desc: GPU ID
+seed:
+  value: 0
+  desc: Random seed for reproducibility
+save:
+  value: True
+  desc: Whether to save current and best model checkpoint
+# Data configurations
+data_path:
+  value: "./data/"
+  desc: Data directory (preprocessed and raw)
+radius:
+  value: 4.5
+  desc: Radius for determining local neighborhoods in Angstrom (currently not used)
+top_k:
+  value: 32
+  desc: Number of k-nearest neighbors in 3D and sequence space
+num_rbf:
+  value: 32
+  desc: Number of radial basis functions to featurise distances
+num_posenc:
+  value: 32
+  desc: Number of positional encodings to featurise edges
+max_num_conformers:
+  value: 1
+  desc: Maximum number of conformations sampled per sequence
+noise_scale:
+  value: 0.1
+  desc: Std of gaussian noise added to node coordinates during training
+drop_prob_3d:
+  value: 0.75
+  desc: Dropout probability of 3D coordinates during training
+random_order:
+  value: True
+  desc: Whether to train with random permutation or sequential order
+max_nodes_batch:
+  value: 3000
+  desc: Maximum number of nodes in batch
+max_nodes_sample:
+  value: 500
+  desc: Maximum number of nodes in batches with single samples (ie. maximum RNA length)
+# Splitting configurations
+split:
+  value: 'das'
+  desc: Type of data split (das/structsim_v2)
+# Model configurations
+model:
+  value: 'ARv1'
+  desc: Model architecture (AR/NAR)
+node_in_dim:
+  value: [15, 4]  # (num_bb_atoms x 5, 2 + (num_bb_atoms - 1))
+  desc: Input dimensions for node features (scalar channels, vector channels)
+node_h_dim:
+  value: [128, 16]
+  desc: Hidden dimensions for node features (scalar channels, vector channels)
+edge_in_dim:
+  value: [132, 3]  # (num_bb_atoms x num_edge_type + num_rbf + num_posenc, num_bb_atoms)
+  desc: Input dimensions for edge features (scalar channels, vector channels)
+edge_h_dim:
+  value: [64, 4]
+  desc: Hidden dimensions for edge features (scalar channels, vector channels)
+num_layers:
+  value: 4
+  desc: Number of layers for encoder/decoder
+drop_rate:
+  value: 0.5
+  desc: Dropout rate
+out_dim:
+  value: 4
+  desc: Output dimension (4 bases for RNA)
+# Training configurations
+epochs:
+  value: 100
+  desc: Number of training epochs
+lr:
+  value: 0.0001
+  desc: Learning rate
+label_smoothing:
+  value: 0.05
+  desc: Label smoothing for cross entropy loss
+batch_size:
+  value: 8
+  desc: Batch size for dataloaders (currently not used)
+num_workers:
+  value: 16
+  desc: Number of workers for dataloaders
+val_every:
+  value: 10
+  desc: Interval of training epochs after which validation is performed
+# Evaluation configurations
+model_path:
+  value: ''
+  desc: Path to model checkpoint for evaluation or reloading
+evaluate:
+  value: False
+  desc: Whether to run evaluation (or training)
+n_samples:
+  value: 16
+  desc: Number of samples for evaluating recovery
+temperature:
+  value: 0.1
+  desc: Sampling temperature for evaluating recovery