fondress
/

PDeepPP_ACE

Text Classification

protein language model

Model card Files Files and versions

fondress commited on Apr 14, 2025

Commit

5522f60

·

verified ·

1 Parent(s): a959f5c

Update README.md

Files changed (1) hide show

README.md +28 -4

README.md CHANGED Viewed

@@ -47,10 +47,27 @@ Here is an example of how to use PDeepPP to process protein sequences and obtain
 ```python
 import torch
 from transformers import AutoModel
 from processing_pdeeppp import PDeepPPProcessor
-# Load `PDeepPP` model
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Using {device} device")
 model = AutoModel.from_pretrained("fondress/PDeepPP_ACE", trust_remote_code=True)
@@ -59,11 +76,18 @@ model.to(device)
 # Example protein sequences
 protein_sequences = ["MKVSTYSTQ", "MSRSTYV"]
-# Preprocess sequences
 processor = PDeepPPProcessor(pad_char="X", target_length=33)
-inputs = processor(sequences=protein_sequences, ptm_mode=True, return_tensors="pt")  # Set ptm_mode=True for PTM processing
-# Make predictions
 model.eval()
 outputs = model(**inputs)
 print(outputs["logits"])

 ```python
 import torch
+import numpy as np
 from transformers import AutoModel
 from processing_pdeeppp import PDeepPPProcessor
+# 加载预训练的特征表示
+train_representations_path = "./pretrained_weights/Hydroxyproline_P/train_combined_representations.npy"  # 替换为你的路径
+test_representations_path = "./pretrained_weights/Hydroxyproline_P/test_combined_representations.npy"  # 替换为你的路径
+# 检查文件是否存在
+assert os.path.exists(train_representations_path), "预训练的 train_combined_representations.npy 文件不存在！"
+assert os.path.exists(test_representations_path), "预训练的 test_combined_representations.npy 文件不存在！"
+# 加载预训练特征
+train_representations = np.load(train_representations_path)
+test_representations = np.load(test_representations_path)
+# 转换为 PyTorch 张量
+train_representations_tensor = torch.tensor(train_representations)
+test_representations_tensor = torch.tensor(test_representations)
+# 加载 `PDeepPP` 模型
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Using {device} device")
 model = AutoModel.from_pretrained("fondress/PDeepPP_ACE", trust_remote_code=True)
 # Example protein sequences
 protein_sequences = ["MKVSTYSTQ", "MSRSTYV"]
+# 初始化 PDeepPPProcessor
 processor = PDeepPPProcessor(pad_char="X", target_length=33)
+# 预处理序列
+inputs = processor(sequences=protein_sequences, ptm_mode=True, return_tensors="pt")  # 设置 ptm_mode=True 处理 PTM 数据
+# 替换模型输入的嵌入表示为预训练特征
+# 假设 inputs["input_embeds"] 是需要被替换的嵌入
+# 在此处选择测试集中的预训练特征作为示例
+inputs["input_embeds"] = test_representations_tensor[:len(protein_sequences)].to(device)
+# 进行预测
 model.eval()
 outputs = model(**inputs)
 print(outputs["logits"])