{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from Bio import SeqIO\n", "from DeepPD.data_helper import Data2EqlTensor,Seqs2EqlTensor" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('LLSEVEELNMSLTALREK', 18)" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "file_path = './homo_test.fa'\n", "data = []\n", "for record in SeqIO.parse(file_path, 'fasta'):\n", " data.append((record.id, str(record.seq)))\n", "\n", "data[0][1],len(data[0][1])" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "default_padding_value: 1\n", "length>40: 0\n" ] }, { "data": { "text/plain": [ "torch.Size([6, 40])" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "seqs,ids = Data2EqlTensor(data,40)\n", "seqs.shape" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[ 4, 4, 8, 9, 7, 9, 9, 4, 17, 20, 8, 4, 11, 5, 4, 10, 9, 15,\n", " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", " 1, 1, 1, 1],\n", " [11, 5, 21, 19, 6, 8, 4, 14, 16, 15, 8, 21, 6, 10, 1, 1, 1, 1,\n", " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", " 1, 1, 1, 1],\n", " [ 7, 17, 18, 21, 18, 12, 4, 18, 17, 17, 7, 13, 6, 21, 4, 19, 9, 4,\n", " 13, 6, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", " 1, 1, 1, 1],\n", " [17, 16, 22, 16, 4, 8, 5, 13, 13, 4, 15, 15, 1, 1, 1, 1, 1, 1,\n", " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", " 1, 1, 1, 1],\n", " [ 7, 4, 7, 5, 4, 19, 9, 9, 14, 9, 15, 14, 17, 8, 5, 4, 13, 18,\n", " 4, 15, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", " 1, 1, 1, 1],\n", " [16, 5, 11, 11, 12, 12, 5, 13, 17, 12, 12, 18, 4, 8, 13, 16, 11, 15,\n", " 9, 15, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", " 1, 1, 1, 1]])" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "seqs" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of the model checkpoint at ./DeepPD/BERT were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']\n", "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n" ] } ], "source": [ "from DeepPD.predictor import predict\n", "import torch" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", "\n", "def homo_classifier(file,threshold):\n", " data = []\n", " for record in SeqIO.parse(file, 'fasta'):\n", " data.append((record.id, str(record.seq)))\n", " seqs,ids = Data2EqlTensor(data,40)\n", " homo_peptide_pred = predict(seqs,data, './weight-Homo/4.pth', threshold, device)\n", " return homo_peptide_pred" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "default_padding_value: 1\n", "length>40: 0\n" ] }, { "data": { "text/plain": [ "[['peptide_1', 'LLSEVEELNMSLTALREK', '0.296', 'Non-Peptide'],\n", " ['peptide_2', 'TAHYGSLPQKSHGR', '0.013', 'Non-Peptide'],\n", " ['peptide_3', 'VNFHFILFNNVDGHLYELDGR', '0.809', 'Peptide'],\n", " ['peptide_4', 'NQWQLSADDLKK', '0.827', 'Peptide'],\n", " ['peptide_5', 'VLVALYEEPEKPNSALDFLK', '0.868', 'Peptide'],\n", " ['peptide_6', 'QATTIIADNIIFLSDQTKEKE', '0.043', 'Non-Peptide']]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "out = homo_classifier(file_path,0.5)\n", "out" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "env3.8", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.0" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }