chq1155 Claude Opus 4.8 (1M context) commited on
Commit
8d43f08
·
1 Parent(s): 1b56a0a

Reorganize root modules into models/ training/ mcts/ packages

Browse files

Move library modules out of the cluttered root into role-based folders:
- models/: diffusion.py, roformer.py, noise_schedule.py
- training/: finetune_utils.py, distributed_utils.py
- mcts/: peptide_mcts.py

Entry points (inference.py, finetune_multi_target.py) stay at root.
Update all imports across td3b/, baselines/, and entry scripts.
Fix pre-existing broken import (finetune_distributed_utils -> training.distributed_utils).
Update README Code Structure to match. Add .gitignore for __pycache__.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ __pycache__/
2
+ *.pyc
README.md CHANGED
@@ -46,12 +46,16 @@ TD3B/
46
  TD3B/
47
  ├── inference.py # Generate binders (main inference entry point)
48
  ├── finetune_multi_target.py # Multi-target TD3B training
49
- ├── finetune_utils.py # Training utilities
50
  ├── launch_multi_target.sh # Training launcher script
51
- ├── diffusion.py # MDLM backbone (TR2-D2)
52
- ├── roformer.py # RoFormer wrapper
53
- ├── noise_schedule.py # Noise schedules
54
- ── peptide_mcts.py # MCTS tree search
 
 
 
 
 
55
  ├── td3b/
56
  │ ├── direction_oracle.py # Direction Oracle (f_φ)
57
  │ ├── td3b_scoring.py # Gated reward R = g_ψ · σ(d*·(f_φ−0.5)/τ)
 
46
  TD3B/
47
  ├── inference.py # Generate binders (main inference entry point)
48
  ├── finetune_multi_target.py # Multi-target TD3B training
 
49
  ├── launch_multi_target.sh # Training launcher script
50
+ ├── models/
51
+ ├── diffusion.py # MDLM backbone (TR2-D2)
52
+ ├── roformer.py # RoFormer wrapper
53
+ │ └── noise_schedule.py # Noise schedules
54
+ ├── training/
55
+ │ ├── finetune_utils.py # Training utilities
56
+ │ └── distributed_utils.py # Distributed training helpers
57
+ ├── mcts/
58
+ │ └── peptide_mcts.py # MCTS tree search
59
  ├── td3b/
60
  │ ├── direction_oracle.py # Direction Oracle (f_φ)
61
  │ ├── td3b_scoring.py # Gated reward R = g_ψ · σ(d*·(f_φ−0.5)/τ)
baselines/baselines.py CHANGED
@@ -316,7 +316,7 @@ class PepTuneSampler:
316
  pareto_max_size: Optional[int],
317
  eps: float,
318
  ):
319
- from peptide_mcts import Node, updateParetoFront
320
  from utils.app import PeptideAnalyzer
321
 
322
  self.base_model = base_model
 
316
  pareto_max_size: Optional[int],
317
  eps: float,
318
  ):
319
+ from mcts.peptide_mcts import Node, updateParetoFront
320
  from utils.app import PeptideAnalyzer
321
 
322
  self.base_model = base_model
baselines/run_mcts_tr2d2.py CHANGED
@@ -14,7 +14,7 @@ ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
14
  if ROOT_DIR not in sys.path:
15
  sys.path.insert(0, ROOT_DIR)
16
 
17
- from diffusion import Diffusion
18
  from configs.finetune_config import (
19
  DiffusionConfig,
20
  RoFormerConfig,
@@ -25,8 +25,8 @@ from configs.finetune_config import (
25
  OptimConfig,
26
  MCTSConfig,
27
  )
28
- from finetune_utils import load_tokenizer
29
- from finetune_distributed_utils import setup_distributed, cleanup_distributed, is_main_process
30
  from scoring.functions.binding import MultiTargetBindingAffinity, TargetSpecificBindingAffinity
31
  from td3b.direction_oracle import DirectionalOracle
32
  from finetune_multi_target_tr2d2_ddp import TR2D2GatedReward, TargetDataset, create_tr2d2_mcts
 
14
  if ROOT_DIR not in sys.path:
15
  sys.path.insert(0, ROOT_DIR)
16
 
17
+ from models.diffusion import Diffusion
18
  from configs.finetune_config import (
19
  DiffusionConfig,
20
  RoFormerConfig,
 
25
  OptimConfig,
26
  MCTSConfig,
27
  )
28
+ from training.finetune_utils import load_tokenizer
29
+ from training.distributed_utils import setup_distributed, cleanup_distributed, is_main_process
30
  from scoring.functions.binding import MultiTargetBindingAffinity, TargetSpecificBindingAffinity
31
  from td3b.direction_oracle import DirectionalOracle
32
  from finetune_multi_target_tr2d2_ddp import TR2D2GatedReward, TargetDataset, create_tr2d2_mcts
baselines/run_validation_td3b.py CHANGED
@@ -14,7 +14,7 @@ ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
14
  if ROOT_DIR not in sys.path:
15
  sys.path.insert(0, ROOT_DIR)
16
 
17
- from diffusion import Diffusion
18
  from configs.finetune_config import (
19
  DiffusionConfig,
20
  RoFormerConfig,
@@ -25,9 +25,9 @@ from configs.finetune_config import (
25
  OptimConfig,
26
  MCTSConfig,
27
  )
28
- from finetune_utils import load_tokenizer, create_reward_function
29
  from finetune_multi_target import TargetDataset
30
- from distributed_utils import setup_distributed, cleanup_distributed, is_main_process
31
  from scoring.functions.binding import MultiTargetBindingAffinity, TargetSpecificBindingAffinity
32
  from td3b.direction_oracle import DirectionalOracle
33
  from utils.app import PeptideAnalyzer
 
14
  if ROOT_DIR not in sys.path:
15
  sys.path.insert(0, ROOT_DIR)
16
 
17
+ from models.diffusion import Diffusion
18
  from configs.finetune_config import (
19
  DiffusionConfig,
20
  RoFormerConfig,
 
25
  OptimConfig,
26
  MCTSConfig,
27
  )
28
+ from training.finetune_utils import load_tokenizer, create_reward_function
29
  from finetune_multi_target import TargetDataset
30
+ from training.distributed_utils import setup_distributed, cleanup_distributed, is_main_process
31
  from scoring.functions.binding import MultiTargetBindingAffinity, TargetSpecificBindingAffinity
32
  from td3b.direction_oracle import DirectionalOracle
33
  from utils.app import PeptideAnalyzer
baselines/sampling_setup.py CHANGED
@@ -14,7 +14,7 @@ import torch
14
  from hydra import compose, initialize_config_dir
15
  from hydra.core.global_hydra import GlobalHydra
16
 
17
- from diffusion import Diffusion
18
  from scoring.scoring_functions import ScoringFunctions
19
  from scoring.functions.binding import MultiTargetBindingAffinity
20
  from td3b.direction_oracle import DirectionalOracle, resolve_device
 
14
  from hydra import compose, initialize_config_dir
15
  from hydra.core.global_hydra import GlobalHydra
16
 
17
+ from models.diffusion import Diffusion
18
  from scoring.scoring_functions import ScoringFunctions
19
  from scoring.functions.binding import MultiTargetBindingAffinity
20
  from td3b.direction_oracle import DirectionalOracle, resolve_device
finetune_multi_target.py CHANGED
@@ -33,7 +33,7 @@ from tqdm import tqdm
33
  # Add project root to path
34
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
35
 
36
- from diffusion import Diffusion
37
  from tokenizer.my_tokenizers import SMILES_SPE_Tokenizer
38
  from utils.app import PeptideAnalyzer
39
  from scoring.functions.binding import MultiTargetBindingAffinity, TargetSpecificBindingAffinity
@@ -60,7 +60,7 @@ from configs.finetune_config import (
60
  )
61
 
62
  # Import shared utilities
63
- from finetune_utils import (
64
  load_tokenizer,
65
  initialize_device,
66
  create_output_directory,
@@ -728,7 +728,7 @@ def main():
728
  )
729
 
730
  # WDCE loss
731
- from finetune_utils import loss_wdce
732
 
733
  logger.info("\n[4/6] Setting up training...")
734
  policy_model.train()
 
33
  # Add project root to path
34
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
35
 
36
+ from models.diffusion import Diffusion
37
  from tokenizer.my_tokenizers import SMILES_SPE_Tokenizer
38
  from utils.app import PeptideAnalyzer
39
  from scoring.functions.binding import MultiTargetBindingAffinity, TargetSpecificBindingAffinity
 
60
  )
61
 
62
  # Import shared utilities
63
+ from training.finetune_utils import (
64
  load_tokenizer,
65
  initialize_device,
66
  create_output_directory,
 
728
  )
729
 
730
  # WDCE loss
731
+ from training.finetune_utils import loss_wdce
732
 
733
  logger.info("\n[4/6] Setting up training...")
734
  policy_model.train()
inference.py CHANGED
@@ -24,12 +24,12 @@ ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
24
  if ROOT_DIR not in sys.path:
25
  sys.path.insert(0, ROOT_DIR)
26
 
27
- from diffusion import Diffusion
28
  from configs.finetune_config import (
29
  DiffusionConfig, RoFormerConfig, NoiseConfig,
30
  TrainingConfig, SamplingConfig, EvalConfig, OptimConfig, MCTSConfig,
31
  )
32
- from finetune_utils import load_tokenizer, create_reward_function
33
  from td3b.direction_oracle import DirectionalOracle
34
  from td3b.td3b_scoring import create_td3b_reward_function
35
  from utils.app import PeptideAnalyzer
 
24
  if ROOT_DIR not in sys.path:
25
  sys.path.insert(0, ROOT_DIR)
26
 
27
+ from models.diffusion import Diffusion
28
  from configs.finetune_config import (
29
  DiffusionConfig, RoFormerConfig, NoiseConfig,
30
  TrainingConfig, SamplingConfig, EvalConfig, OptimConfig, MCTSConfig,
31
  )
32
+ from training.finetune_utils import load_tokenizer, create_reward_function
33
  from td3b.direction_oracle import DirectionalOracle
34
  from td3b.td3b_scoring import create_td3b_reward_function
35
  from utils.app import PeptideAnalyzer
peptide_mcts.py → mcts/peptide_mcts.py RENAMED
@@ -7,7 +7,7 @@ from utils.app import PeptideAnalyzer
7
  from utils.timer import StepTimer
8
  from scoring.scoring_functions import ScoringFunctions
9
 
10
- import noise_schedule
11
 
12
  ### for peptide multi-objective ###
13
  def dominates(a, b):
 
7
  from utils.timer import StepTimer
8
  from scoring.scoring_functions import ScoringFunctions
9
 
10
+ from models import noise_schedule
11
 
12
  ### for peptide multi-objective ###
13
  def dominates(a, b):
diffusion.py → models/diffusion.py RENAMED
@@ -15,9 +15,9 @@ import gc
15
  import utils.utils as utils
16
 
17
  from tokenizer.my_tokenizers import SMILES_SPE_Tokenizer
18
- import noise_schedule
19
  from torch.optim.lr_scheduler import _LRScheduler
20
- import roformer as roformer
21
  from utils.app import PeptideAnalyzer
22
  import pandas as pd
23
 
 
15
  import utils.utils as utils
16
 
17
  from tokenizer.my_tokenizers import SMILES_SPE_Tokenizer
18
+ from models import noise_schedule
19
  from torch.optim.lr_scheduler import _LRScheduler
20
+ from models import roformer
21
  from utils.app import PeptideAnalyzer
22
  import pandas as pd
23
 
noise_schedule.py → models/noise_schedule.py RENAMED
File without changes
roformer.py → models/roformer.py RENAMED
File without changes
td3b/direction_oracle.py CHANGED
@@ -20,7 +20,7 @@ if PROJECT_ROOT not in sys.path:
20
  sys.path.insert(0, PROJECT_ROOT)
21
 
22
  from tokenizer.my_tokenizers import SMILES_SPE_Tokenizer
23
- from roformer import Roformer
24
 
25
  logger = logging.getLogger(__name__)
26
 
 
20
  sys.path.insert(0, PROJECT_ROOT)
21
 
22
  from tokenizer.my_tokenizers import SMILES_SPE_Tokenizer
23
+ from models.roformer import Roformer
24
 
25
  logger = logging.getLogger(__name__)
26
 
td3b/td3b_finetune.py CHANGED
@@ -7,7 +7,7 @@ import numpy as np
7
  import torch
8
  import wandb
9
  import os
10
- from finetune_utils import loss_wdce
11
  from .td3b_losses import TD3BTotalLoss, extract_embeddings_from_mdlm
12
  from tqdm import tqdm
13
  import pandas as pd
@@ -66,7 +66,7 @@ def td3b_finetune(
66
  print("[TD3B] Creating reference model for KL regularization...")
67
 
68
  # Import Diffusion class
69
- from diffusion import Diffusion
70
 
71
  # Create new instance with same config
72
  reference_model = Diffusion(
 
7
  import torch
8
  import wandb
9
  import os
10
+ from training.finetune_utils import loss_wdce
11
  from .td3b_losses import TD3BTotalLoss, extract_embeddings_from_mdlm
12
  from tqdm import tqdm
13
  import pandas as pd
 
66
  print("[TD3B] Creating reference model for KL regularization...")
67
 
68
  # Import Diffusion class
69
+ from models.diffusion import Diffusion
70
 
71
  # Create new instance with same config
72
  reference_model = Diffusion(
td3b/td3b_mcts.py CHANGED
@@ -5,7 +5,7 @@ Extends the base MCTS to support directional rewards and confidence weighting.
5
 
6
  import numpy as np
7
  import torch
8
- from peptide_mcts import MCTS as BaseMCTS
9
  from .td3b_scoring import TD3BRewardFunction, TD3BConfidenceWeighting
10
 
11
 
@@ -144,7 +144,7 @@ class TD3B_MCTS(BaseMCTS):
144
  }
145
 
146
  # Pareto dominance filtering (same as base class)
147
- from peptide_mcts import dominated_by, dominates
148
 
149
  if any(dominated_by(sv, bi["score_vector"]) for bi in self.buffer):
150
  self._debug_buffer_decision(sv, "rejected_dominated")
 
5
 
6
  import numpy as np
7
  import torch
8
+ from mcts.peptide_mcts import MCTS as BaseMCTS
9
  from .td3b_scoring import TD3BRewardFunction, TD3BConfidenceWeighting
10
 
11
 
 
144
  }
145
 
146
  # Pareto dominance filtering (same as base class)
147
+ from mcts.peptide_mcts import dominated_by, dominates
148
 
149
  if any(dominated_by(sv, bi["score_vector"]) for bi in self.buffer):
150
  self._debug_buffer_decision(sv, "rejected_dominated")
distributed_utils.py → training/distributed_utils.py RENAMED
File without changes
finetune_utils.py → training/finetune_utils.py RENAMED
@@ -15,7 +15,7 @@ import wandb
15
  from torch.utils.data import DataLoader, TensorDataset
16
  from tqdm import tqdm
17
 
18
- from diffusion import Diffusion
19
  from td3b.td3b_mcts import create_td3b_mcts
20
  from td3b.td3b_scoring import TD3BRewardFunction
21
  from tokenizer.my_tokenizers import SMILES_SPE_Tokenizer
 
15
  from torch.utils.data import DataLoader, TensorDataset
16
  from tqdm import tqdm
17
 
18
+ from models.diffusion import Diffusion
19
  from td3b.td3b_mcts import create_td3b_mcts
20
  from td3b.td3b_scoring import TD3BRewardFunction
21
  from tokenizer.my_tokenizers import SMILES_SPE_Tokenizer