chaitjo commited on
Commit
f3c09c7
·
verified ·
1 Parent(s): 3f08f3e

Create config.yaml

Browse files
Files changed (1) hide show
  1. config.yaml +115 -0
config.yaml ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Training configurations for gRNAde_drop3d@0.75_maxlen@500.h5
2
+
3
+ # Misc configurations
4
+ device:
5
+ value: 'gpu'
6
+ desc: Device to run on (cpu/cuda/xpu)
7
+ gpu:
8
+ value: 0
9
+ desc: GPU ID
10
+ seed:
11
+ value: 0
12
+ desc: Random seed for reproducibility
13
+ save:
14
+ value: True
15
+ desc: Whether to save current and best model checkpoint
16
+
17
+ # Data configurations
18
+ data_path:
19
+ value: "./data/"
20
+ desc: Data directory (preprocessed and raw)
21
+ radius:
22
+ value: 4.5
23
+ desc: Radius for determining local neighborhoods in Angstrom (currently not used)
24
+ top_k:
25
+ value: 32
26
+ desc: Number of k-nearest neighbors in 3D and sequence space
27
+ num_rbf:
28
+ value: 32
29
+ desc: Number of radial basis functions to featurise distances
30
+ num_posenc:
31
+ value: 32
32
+ desc: Number of positional encodings to featurise edges
33
+ max_num_conformers:
34
+ value: 1
35
+ desc: Maximum number of conformations sampled per sequence
36
+ noise_scale:
37
+ value: 0.1
38
+ desc: Std of gaussian noise added to node coordinates during training
39
+ drop_prob_3d:
40
+ value: 0.75
41
+ desc: Dropout probability of 3D coordinates during training
42
+ random_order:
43
+ value: True
44
+ desc: Whether to train with random permutation or sequential order
45
+ max_nodes_batch:
46
+ value: 3000
47
+ desc: Maximum number of nodes in batch
48
+ max_nodes_sample:
49
+ value: 500
50
+ desc: Maximum number of nodes in batches with single samples (ie. maximum RNA length)
51
+
52
+ # Splitting configurations
53
+ split:
54
+ value: 'das'
55
+ desc: Type of data split (das/structsim_v2)
56
+
57
+ # Model configurations
58
+ model:
59
+ value: 'ARv1'
60
+ desc: Model architecture (AR/NAR)
61
+ node_in_dim:
62
+ value: [15, 4] # (num_bb_atoms x 5, 2 + (num_bb_atoms - 1))
63
+ desc: Input dimensions for node features (scalar channels, vector channels)
64
+ node_h_dim:
65
+ value: [128, 16]
66
+ desc: Hidden dimensions for node features (scalar channels, vector channels)
67
+ edge_in_dim:
68
+ value: [132, 3] # (num_bb_atoms x num_edge_type + num_rbf + num_posenc, num_bb_atoms)
69
+ desc: Input dimensions for edge features (scalar channels, vector channels)
70
+ edge_h_dim:
71
+ value: [64, 4]
72
+ desc: Hidden dimensions for edge features (scalar channels, vector channels)
73
+ num_layers:
74
+ value: 4
75
+ desc: Number of layers for encoder/decoder
76
+ drop_rate:
77
+ value: 0.5
78
+ desc: Dropout rate
79
+ out_dim:
80
+ value: 4
81
+ desc: Output dimension (4 bases for RNA)
82
+
83
+ # Training configurations
84
+ epochs:
85
+ value: 100
86
+ desc: Number of training epochs
87
+ lr:
88
+ value: 0.0001
89
+ desc: Learning rate
90
+ label_smoothing:
91
+ value: 0.05
92
+ desc: Label smoothing for cross entropy loss
93
+ batch_size:
94
+ value: 8
95
+ desc: Batch size for dataloaders (currently not used)
96
+ num_workers:
97
+ value: 16
98
+ desc: Number of workers for dataloaders
99
+ val_every:
100
+ value: 10
101
+ desc: Interval of training epochs after which validation is performed
102
+
103
+ # Evaluation configurations
104
+ model_path:
105
+ value: ''
106
+ desc: Path to model checkpoint for evaluation or reloading
107
+ evaluate:
108
+ value: False
109
+ desc: Whether to run evaluation (or training)
110
+ n_samples:
111
+ value: 16
112
+ desc: Number of samples for evaluating recovery
113
+ temperature:
114
+ value: 0.1
115
+ desc: Sampling temperature for evaluating recovery