chq1155 Claude Opus 4.8 (1M context) commited on
Commit ·
8d43f08
1
Parent(s): 1b56a0a
Reorganize root modules into models/ training/ mcts/ packages
Browse filesMove library modules out of the cluttered root into role-based folders:
- models/: diffusion.py, roformer.py, noise_schedule.py
- training/: finetune_utils.py, distributed_utils.py
- mcts/: peptide_mcts.py
Entry points (inference.py, finetune_multi_target.py) stay at root.
Update all imports across td3b/, baselines/, and entry scripts.
Fix pre-existing broken import (finetune_distributed_utils -> training.distributed_utils).
Update README Code Structure to match. Add .gitignore for __pycache__.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
- .gitignore +2 -0
- README.md +9 -5
- baselines/baselines.py +1 -1
- baselines/run_mcts_tr2d2.py +3 -3
- baselines/run_validation_td3b.py +3 -3
- baselines/sampling_setup.py +1 -1
- finetune_multi_target.py +3 -3
- inference.py +2 -2
- peptide_mcts.py → mcts/peptide_mcts.py +1 -1
- diffusion.py → models/diffusion.py +2 -2
- noise_schedule.py → models/noise_schedule.py +0 -0
- roformer.py → models/roformer.py +0 -0
- td3b/direction_oracle.py +1 -1
- td3b/td3b_finetune.py +2 -2
- td3b/td3b_mcts.py +2 -2
- distributed_utils.py → training/distributed_utils.py +0 -0
- finetune_utils.py → training/finetune_utils.py +1 -1
.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.pyc
|
README.md
CHANGED
|
@@ -46,12 +46,16 @@ TD3B/
|
|
| 46 |
TD3B/
|
| 47 |
├── inference.py # Generate binders (main inference entry point)
|
| 48 |
├── finetune_multi_target.py # Multi-target TD3B training
|
| 49 |
-
├── finetune_utils.py # Training utilities
|
| 50 |
├── launch_multi_target.sh # Training launcher script
|
| 51 |
-
├──
|
| 52 |
-
├──
|
| 53 |
-
├──
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
├── td3b/
|
| 56 |
│ ├── direction_oracle.py # Direction Oracle (f_φ)
|
| 57 |
│ ├── td3b_scoring.py # Gated reward R = g_ψ · σ(d*·(f_φ−0.5)/τ)
|
|
|
|
| 46 |
TD3B/
|
| 47 |
├── inference.py # Generate binders (main inference entry point)
|
| 48 |
├── finetune_multi_target.py # Multi-target TD3B training
|
|
|
|
| 49 |
├── launch_multi_target.sh # Training launcher script
|
| 50 |
+
├── models/
|
| 51 |
+
│ ├── diffusion.py # MDLM backbone (TR2-D2)
|
| 52 |
+
│ ├── roformer.py # RoFormer wrapper
|
| 53 |
+
│ └── noise_schedule.py # Noise schedules
|
| 54 |
+
├── training/
|
| 55 |
+
│ ├── finetune_utils.py # Training utilities
|
| 56 |
+
│ └── distributed_utils.py # Distributed training helpers
|
| 57 |
+
├── mcts/
|
| 58 |
+
│ └── peptide_mcts.py # MCTS tree search
|
| 59 |
├── td3b/
|
| 60 |
│ ├── direction_oracle.py # Direction Oracle (f_φ)
|
| 61 |
│ ├── td3b_scoring.py # Gated reward R = g_ψ · σ(d*·(f_φ−0.5)/τ)
|
baselines/baselines.py
CHANGED
|
@@ -316,7 +316,7 @@ class PepTuneSampler:
|
|
| 316 |
pareto_max_size: Optional[int],
|
| 317 |
eps: float,
|
| 318 |
):
|
| 319 |
-
from peptide_mcts import Node, updateParetoFront
|
| 320 |
from utils.app import PeptideAnalyzer
|
| 321 |
|
| 322 |
self.base_model = base_model
|
|
|
|
| 316 |
pareto_max_size: Optional[int],
|
| 317 |
eps: float,
|
| 318 |
):
|
| 319 |
+
from mcts.peptide_mcts import Node, updateParetoFront
|
| 320 |
from utils.app import PeptideAnalyzer
|
| 321 |
|
| 322 |
self.base_model = base_model
|
baselines/run_mcts_tr2d2.py
CHANGED
|
@@ -14,7 +14,7 @@ ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
|
| 14 |
if ROOT_DIR not in sys.path:
|
| 15 |
sys.path.insert(0, ROOT_DIR)
|
| 16 |
|
| 17 |
-
from diffusion import Diffusion
|
| 18 |
from configs.finetune_config import (
|
| 19 |
DiffusionConfig,
|
| 20 |
RoFormerConfig,
|
|
@@ -25,8 +25,8 @@ from configs.finetune_config import (
|
|
| 25 |
OptimConfig,
|
| 26 |
MCTSConfig,
|
| 27 |
)
|
| 28 |
-
from finetune_utils import load_tokenizer
|
| 29 |
-
from
|
| 30 |
from scoring.functions.binding import MultiTargetBindingAffinity, TargetSpecificBindingAffinity
|
| 31 |
from td3b.direction_oracle import DirectionalOracle
|
| 32 |
from finetune_multi_target_tr2d2_ddp import TR2D2GatedReward, TargetDataset, create_tr2d2_mcts
|
|
|
|
| 14 |
if ROOT_DIR not in sys.path:
|
| 15 |
sys.path.insert(0, ROOT_DIR)
|
| 16 |
|
| 17 |
+
from models.diffusion import Diffusion
|
| 18 |
from configs.finetune_config import (
|
| 19 |
DiffusionConfig,
|
| 20 |
RoFormerConfig,
|
|
|
|
| 25 |
OptimConfig,
|
| 26 |
MCTSConfig,
|
| 27 |
)
|
| 28 |
+
from training.finetune_utils import load_tokenizer
|
| 29 |
+
from training.distributed_utils import setup_distributed, cleanup_distributed, is_main_process
|
| 30 |
from scoring.functions.binding import MultiTargetBindingAffinity, TargetSpecificBindingAffinity
|
| 31 |
from td3b.direction_oracle import DirectionalOracle
|
| 32 |
from finetune_multi_target_tr2d2_ddp import TR2D2GatedReward, TargetDataset, create_tr2d2_mcts
|
baselines/run_validation_td3b.py
CHANGED
|
@@ -14,7 +14,7 @@ ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
|
| 14 |
if ROOT_DIR not in sys.path:
|
| 15 |
sys.path.insert(0, ROOT_DIR)
|
| 16 |
|
| 17 |
-
from diffusion import Diffusion
|
| 18 |
from configs.finetune_config import (
|
| 19 |
DiffusionConfig,
|
| 20 |
RoFormerConfig,
|
|
@@ -25,9 +25,9 @@ from configs.finetune_config import (
|
|
| 25 |
OptimConfig,
|
| 26 |
MCTSConfig,
|
| 27 |
)
|
| 28 |
-
from finetune_utils import load_tokenizer, create_reward_function
|
| 29 |
from finetune_multi_target import TargetDataset
|
| 30 |
-
from distributed_utils import setup_distributed, cleanup_distributed, is_main_process
|
| 31 |
from scoring.functions.binding import MultiTargetBindingAffinity, TargetSpecificBindingAffinity
|
| 32 |
from td3b.direction_oracle import DirectionalOracle
|
| 33 |
from utils.app import PeptideAnalyzer
|
|
|
|
| 14 |
if ROOT_DIR not in sys.path:
|
| 15 |
sys.path.insert(0, ROOT_DIR)
|
| 16 |
|
| 17 |
+
from models.diffusion import Diffusion
|
| 18 |
from configs.finetune_config import (
|
| 19 |
DiffusionConfig,
|
| 20 |
RoFormerConfig,
|
|
|
|
| 25 |
OptimConfig,
|
| 26 |
MCTSConfig,
|
| 27 |
)
|
| 28 |
+
from training.finetune_utils import load_tokenizer, create_reward_function
|
| 29 |
from finetune_multi_target import TargetDataset
|
| 30 |
+
from training.distributed_utils import setup_distributed, cleanup_distributed, is_main_process
|
| 31 |
from scoring.functions.binding import MultiTargetBindingAffinity, TargetSpecificBindingAffinity
|
| 32 |
from td3b.direction_oracle import DirectionalOracle
|
| 33 |
from utils.app import PeptideAnalyzer
|
baselines/sampling_setup.py
CHANGED
|
@@ -14,7 +14,7 @@ import torch
|
|
| 14 |
from hydra import compose, initialize_config_dir
|
| 15 |
from hydra.core.global_hydra import GlobalHydra
|
| 16 |
|
| 17 |
-
from diffusion import Diffusion
|
| 18 |
from scoring.scoring_functions import ScoringFunctions
|
| 19 |
from scoring.functions.binding import MultiTargetBindingAffinity
|
| 20 |
from td3b.direction_oracle import DirectionalOracle, resolve_device
|
|
|
|
| 14 |
from hydra import compose, initialize_config_dir
|
| 15 |
from hydra.core.global_hydra import GlobalHydra
|
| 16 |
|
| 17 |
+
from models.diffusion import Diffusion
|
| 18 |
from scoring.scoring_functions import ScoringFunctions
|
| 19 |
from scoring.functions.binding import MultiTargetBindingAffinity
|
| 20 |
from td3b.direction_oracle import DirectionalOracle, resolve_device
|
finetune_multi_target.py
CHANGED
|
@@ -33,7 +33,7 @@ from tqdm import tqdm
|
|
| 33 |
# Add project root to path
|
| 34 |
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
| 35 |
|
| 36 |
-
from diffusion import Diffusion
|
| 37 |
from tokenizer.my_tokenizers import SMILES_SPE_Tokenizer
|
| 38 |
from utils.app import PeptideAnalyzer
|
| 39 |
from scoring.functions.binding import MultiTargetBindingAffinity, TargetSpecificBindingAffinity
|
|
@@ -60,7 +60,7 @@ from configs.finetune_config import (
|
|
| 60 |
)
|
| 61 |
|
| 62 |
# Import shared utilities
|
| 63 |
-
from finetune_utils import (
|
| 64 |
load_tokenizer,
|
| 65 |
initialize_device,
|
| 66 |
create_output_directory,
|
|
@@ -728,7 +728,7 @@ def main():
|
|
| 728 |
)
|
| 729 |
|
| 730 |
# WDCE loss
|
| 731 |
-
from finetune_utils import loss_wdce
|
| 732 |
|
| 733 |
logger.info("\n[4/6] Setting up training...")
|
| 734 |
policy_model.train()
|
|
|
|
| 33 |
# Add project root to path
|
| 34 |
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
| 35 |
|
| 36 |
+
from models.diffusion import Diffusion
|
| 37 |
from tokenizer.my_tokenizers import SMILES_SPE_Tokenizer
|
| 38 |
from utils.app import PeptideAnalyzer
|
| 39 |
from scoring.functions.binding import MultiTargetBindingAffinity, TargetSpecificBindingAffinity
|
|
|
|
| 60 |
)
|
| 61 |
|
| 62 |
# Import shared utilities
|
| 63 |
+
from training.finetune_utils import (
|
| 64 |
load_tokenizer,
|
| 65 |
initialize_device,
|
| 66 |
create_output_directory,
|
|
|
|
| 728 |
)
|
| 729 |
|
| 730 |
# WDCE loss
|
| 731 |
+
from training.finetune_utils import loss_wdce
|
| 732 |
|
| 733 |
logger.info("\n[4/6] Setting up training...")
|
| 734 |
policy_model.train()
|
inference.py
CHANGED
|
@@ -24,12 +24,12 @@ ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
| 24 |
if ROOT_DIR not in sys.path:
|
| 25 |
sys.path.insert(0, ROOT_DIR)
|
| 26 |
|
| 27 |
-
from diffusion import Diffusion
|
| 28 |
from configs.finetune_config import (
|
| 29 |
DiffusionConfig, RoFormerConfig, NoiseConfig,
|
| 30 |
TrainingConfig, SamplingConfig, EvalConfig, OptimConfig, MCTSConfig,
|
| 31 |
)
|
| 32 |
-
from finetune_utils import load_tokenizer, create_reward_function
|
| 33 |
from td3b.direction_oracle import DirectionalOracle
|
| 34 |
from td3b.td3b_scoring import create_td3b_reward_function
|
| 35 |
from utils.app import PeptideAnalyzer
|
|
|
|
| 24 |
if ROOT_DIR not in sys.path:
|
| 25 |
sys.path.insert(0, ROOT_DIR)
|
| 26 |
|
| 27 |
+
from models.diffusion import Diffusion
|
| 28 |
from configs.finetune_config import (
|
| 29 |
DiffusionConfig, RoFormerConfig, NoiseConfig,
|
| 30 |
TrainingConfig, SamplingConfig, EvalConfig, OptimConfig, MCTSConfig,
|
| 31 |
)
|
| 32 |
+
from training.finetune_utils import load_tokenizer, create_reward_function
|
| 33 |
from td3b.direction_oracle import DirectionalOracle
|
| 34 |
from td3b.td3b_scoring import create_td3b_reward_function
|
| 35 |
from utils.app import PeptideAnalyzer
|
peptide_mcts.py → mcts/peptide_mcts.py
RENAMED
|
@@ -7,7 +7,7 @@ from utils.app import PeptideAnalyzer
|
|
| 7 |
from utils.timer import StepTimer
|
| 8 |
from scoring.scoring_functions import ScoringFunctions
|
| 9 |
|
| 10 |
-
import noise_schedule
|
| 11 |
|
| 12 |
### for peptide multi-objective ###
|
| 13 |
def dominates(a, b):
|
|
|
|
| 7 |
from utils.timer import StepTimer
|
| 8 |
from scoring.scoring_functions import ScoringFunctions
|
| 9 |
|
| 10 |
+
from models import noise_schedule
|
| 11 |
|
| 12 |
### for peptide multi-objective ###
|
| 13 |
def dominates(a, b):
|
diffusion.py → models/diffusion.py
RENAMED
|
@@ -15,9 +15,9 @@ import gc
|
|
| 15 |
import utils.utils as utils
|
| 16 |
|
| 17 |
from tokenizer.my_tokenizers import SMILES_SPE_Tokenizer
|
| 18 |
-
import noise_schedule
|
| 19 |
from torch.optim.lr_scheduler import _LRScheduler
|
| 20 |
-
|
| 21 |
from utils.app import PeptideAnalyzer
|
| 22 |
import pandas as pd
|
| 23 |
|
|
|
|
| 15 |
import utils.utils as utils
|
| 16 |
|
| 17 |
from tokenizer.my_tokenizers import SMILES_SPE_Tokenizer
|
| 18 |
+
from models import noise_schedule
|
| 19 |
from torch.optim.lr_scheduler import _LRScheduler
|
| 20 |
+
from models import roformer
|
| 21 |
from utils.app import PeptideAnalyzer
|
| 22 |
import pandas as pd
|
| 23 |
|
noise_schedule.py → models/noise_schedule.py
RENAMED
|
File without changes
|
roformer.py → models/roformer.py
RENAMED
|
File without changes
|
td3b/direction_oracle.py
CHANGED
|
@@ -20,7 +20,7 @@ if PROJECT_ROOT not in sys.path:
|
|
| 20 |
sys.path.insert(0, PROJECT_ROOT)
|
| 21 |
|
| 22 |
from tokenizer.my_tokenizers import SMILES_SPE_Tokenizer
|
| 23 |
-
from roformer import Roformer
|
| 24 |
|
| 25 |
logger = logging.getLogger(__name__)
|
| 26 |
|
|
|
|
| 20 |
sys.path.insert(0, PROJECT_ROOT)
|
| 21 |
|
| 22 |
from tokenizer.my_tokenizers import SMILES_SPE_Tokenizer
|
| 23 |
+
from models.roformer import Roformer
|
| 24 |
|
| 25 |
logger = logging.getLogger(__name__)
|
| 26 |
|
td3b/td3b_finetune.py
CHANGED
|
@@ -7,7 +7,7 @@ import numpy as np
|
|
| 7 |
import torch
|
| 8 |
import wandb
|
| 9 |
import os
|
| 10 |
-
from finetune_utils import loss_wdce
|
| 11 |
from .td3b_losses import TD3BTotalLoss, extract_embeddings_from_mdlm
|
| 12 |
from tqdm import tqdm
|
| 13 |
import pandas as pd
|
|
@@ -66,7 +66,7 @@ def td3b_finetune(
|
|
| 66 |
print("[TD3B] Creating reference model for KL regularization...")
|
| 67 |
|
| 68 |
# Import Diffusion class
|
| 69 |
-
from diffusion import Diffusion
|
| 70 |
|
| 71 |
# Create new instance with same config
|
| 72 |
reference_model = Diffusion(
|
|
|
|
| 7 |
import torch
|
| 8 |
import wandb
|
| 9 |
import os
|
| 10 |
+
from training.finetune_utils import loss_wdce
|
| 11 |
from .td3b_losses import TD3BTotalLoss, extract_embeddings_from_mdlm
|
| 12 |
from tqdm import tqdm
|
| 13 |
import pandas as pd
|
|
|
|
| 66 |
print("[TD3B] Creating reference model for KL regularization...")
|
| 67 |
|
| 68 |
# Import Diffusion class
|
| 69 |
+
from models.diffusion import Diffusion
|
| 70 |
|
| 71 |
# Create new instance with same config
|
| 72 |
reference_model = Diffusion(
|
td3b/td3b_mcts.py
CHANGED
|
@@ -5,7 +5,7 @@ Extends the base MCTS to support directional rewards and confidence weighting.
|
|
| 5 |
|
| 6 |
import numpy as np
|
| 7 |
import torch
|
| 8 |
-
from peptide_mcts import MCTS as BaseMCTS
|
| 9 |
from .td3b_scoring import TD3BRewardFunction, TD3BConfidenceWeighting
|
| 10 |
|
| 11 |
|
|
@@ -144,7 +144,7 @@ class TD3B_MCTS(BaseMCTS):
|
|
| 144 |
}
|
| 145 |
|
| 146 |
# Pareto dominance filtering (same as base class)
|
| 147 |
-
from peptide_mcts import dominated_by, dominates
|
| 148 |
|
| 149 |
if any(dominated_by(sv, bi["score_vector"]) for bi in self.buffer):
|
| 150 |
self._debug_buffer_decision(sv, "rejected_dominated")
|
|
|
|
| 5 |
|
| 6 |
import numpy as np
|
| 7 |
import torch
|
| 8 |
+
from mcts.peptide_mcts import MCTS as BaseMCTS
|
| 9 |
from .td3b_scoring import TD3BRewardFunction, TD3BConfidenceWeighting
|
| 10 |
|
| 11 |
|
|
|
|
| 144 |
}
|
| 145 |
|
| 146 |
# Pareto dominance filtering (same as base class)
|
| 147 |
+
from mcts.peptide_mcts import dominated_by, dominates
|
| 148 |
|
| 149 |
if any(dominated_by(sv, bi["score_vector"]) for bi in self.buffer):
|
| 150 |
self._debug_buffer_decision(sv, "rejected_dominated")
|
distributed_utils.py → training/distributed_utils.py
RENAMED
|
File without changes
|
finetune_utils.py → training/finetune_utils.py
RENAMED
|
@@ -15,7 +15,7 @@ import wandb
|
|
| 15 |
from torch.utils.data import DataLoader, TensorDataset
|
| 16 |
from tqdm import tqdm
|
| 17 |
|
| 18 |
-
from diffusion import Diffusion
|
| 19 |
from td3b.td3b_mcts import create_td3b_mcts
|
| 20 |
from td3b.td3b_scoring import TD3BRewardFunction
|
| 21 |
from tokenizer.my_tokenizers import SMILES_SPE_Tokenizer
|
|
|
|
| 15 |
from torch.utils.data import DataLoader, TensorDataset
|
| 16 |
from tqdm import tqdm
|
| 17 |
|
| 18 |
+
from models.diffusion import Diffusion
|
| 19 |
from td3b.td3b_mcts import create_td3b_mcts
|
| 20 |
from td3b.td3b_scoring import TD3BRewardFunction
|
| 21 |
from tokenizer.my_tokenizers import SMILES_SPE_Tokenizer
|