Mirror of github.com/kouroshSA/ppiBTEP
Browse files- .gitattributes +1 -0
- .gitignore +48 -0
- LICENSE +28 -0
- MED4-PPIs-low-confidence_ppiTEPM_prompts.csv +0 -0
- MED4_PRS_100.csv +101 -0
- MED4_RRS_100.csv +101 -0
- README.md +171 -0
- assets/ppiBTEP.png +3 -0
- inference_ppiBTPE_2GPU.py +164 -0
- requirements.txt +7 -0
- roc_analysis_color_threshold_F1e.py +210 -0
- train_ppiBTPE3b.py +223 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
assets/ppiBTEP.png filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Model checkpoints
|
| 2 |
+
*.pth
|
| 3 |
+
*.pt
|
| 4 |
+
out/
|
| 5 |
+
out_*/
|
| 6 |
+
|
| 7 |
+
# Data files
|
| 8 |
+
*.csv
|
| 9 |
+
*.bin
|
| 10 |
+
*.pkl
|
| 11 |
+
!requirements.txt
|
| 12 |
+
# Reference / inference CSVs that ship with the repo
|
| 13 |
+
!MED4_PRS_100.csv
|
| 14 |
+
!MED4_RRS_100.csv
|
| 15 |
+
!MED4-PPIs-low-confidence_ppiTEPM_prompts.csv
|
| 16 |
+
|
| 17 |
+
# Python
|
| 18 |
+
__pycache__/
|
| 19 |
+
*.pyc
|
| 20 |
+
*.pyo
|
| 21 |
+
*.egg-info/
|
| 22 |
+
dist/
|
| 23 |
+
build/
|
| 24 |
+
.eggs/
|
| 25 |
+
|
| 26 |
+
# Environment
|
| 27 |
+
.env
|
| 28 |
+
*.env
|
| 29 |
+
.venv/
|
| 30 |
+
venv/
|
| 31 |
+
|
| 32 |
+
# IDE
|
| 33 |
+
.vscode/
|
| 34 |
+
.idea/
|
| 35 |
+
*.swp
|
| 36 |
+
*.swo
|
| 37 |
+
*~
|
| 38 |
+
|
| 39 |
+
# OS
|
| 40 |
+
.DS_Store
|
| 41 |
+
Thumbs.db
|
| 42 |
+
|
| 43 |
+
# Jupyter
|
| 44 |
+
.ipynb_checkpoints/
|
| 45 |
+
|
| 46 |
+
# Logs
|
| 47 |
+
*.log
|
| 48 |
+
wandb/
|
LICENSE
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2026 Kourosh Salehi-Ashtiani
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
| 22 |
+
|
| 23 |
+
---
|
| 24 |
+
|
| 25 |
+
ppiBTEP (also designated SiameseBTPE) adapts the ESM-1b protein language
|
| 26 |
+
model (Rives et al., 2021) for binary protein-protein interaction
|
| 27 |
+
classification via Siamese (twin-branch) architecture.
|
| 28 |
+
The ESM framework is by Meta AI Research.
|
MED4-PPIs-low-confidence_ppiTEPM_prompts.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
MED4_PRS_100.csv
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MNTRKKNPKRGVGKTETNTEWLDKVINQLINKDFSQYL,MKLRLFEFYFIKDYLRPWFGLIYSLFFLFFLGAIGYRITEGWDWGDCLWMVLITITTIGFGEVQTLSPEGRIITVLIIVGGLIFIQFTFQKAVRLFESGYFQRVNELRFKRLLRKMENHVILCGYGRVGQEISNQIKTQNIPIIVVESDEDRKKIAEDNGLEVLCADATLDETLKLAGLDKCKSLVVTLPNDAANLYVVLSAKGIRSSIRVIARAGTEEAASKLRLAGASIVVSPYIAAGRAMASMALRPIAIDFLDLLAGSECEIEEFELSNDISLFETAEKITLLELGIGKKSGAKILAIKEDEKLITNPGGDFLLQPGQVLIAFGSKEQLTTLNRLLGNLVVSVELLK
|
| 2 |
+
LVESTQSQDSNLGTRLQQDLKNDLIAGLLVVIPLATTIWLSSIVSKFVLTLVTSVPKQLNPFITLNPLLQDLINLTLGLTVPLLAILLIGLMARNFVGRWLLEFGEGTLSKIPVAGAVYKTLKQLLETFLSNKSNRFRRVVLVEYPREGLFSVGFVTGDVGPSLQSELDEKLLSVFIPTAPNPTTGWYTLVPESSVKDLAISVEDAFRTIISVGIVNPDEKDSSSNPTFSKLFSQLRASTNTSST,LVESTQSQDSNLGTRLQQDLKNDLIAGLLVVIPLATTIWLSSIVSKFVLTLVTSVPKQLNPFITLNPLLQDLINLTLGLTVPLLAILLIGLMARNFVGRWLLEFGEGTLSKIPVAGAVYKTLKQLLETFLSNKSNRFRRVVLVEYPREGLFSVGFVTGDVGPSLQSELDEKLLSVFIPTAPNPTTGWYTLVPESSVKDLAISVEDAFRTIISVGIVNPDEKDSSSNPTFSKLFSQLRASTNTSST
|
| 3 |
+
LMHNRSLSRELSLLSLGLIKDTADLELNKIQIDEIFESALDSLINHCRDELDNCEADLENVSQHILDSELKEGSNSSFANVRDELKKAFYKMESVMNSLSVTLDFPKLIVSSNQIDIREDVNHRILSIINNLKSIDSEIDEVMDRWRLKRLPRVDRDILRLAYVDIHFLDTPVAVACDEAVNLANKYCDTQGRKMINGVLRRLQRVKVN,LMHNRSLSRELSLLSLGLIKDTADLELNKIQIDEIFESALDSLINHCRDELDNCEADLENVSQHILDSELKEGSNSSFANVRDELKKAFYKMESVMNSLSVTLDFPKLIVSSNQIDIREDVNHRILSIINNLKSIDSEIDEVMDRWRLKRLPRVDRDILRLAYVDIHFLDTPVAVACDEAVNLANKYCDTQGRKMINGVLRRLQRVKVN
|
| 4 |
+
MNQFFSRRSFILIPIMSILKFILQPKKVLAAFAASDDDWNLSKEDWKNKLSPESYYILREEGTERAFSSQLNNEKRKGIFYCAGCNQPLFTSDTKFDSGTGWPSFWDPIQGSVETKVDFKLIVPRTEYHCSRCGGHQGHVFNDGPLPTGKRYCNNGLALKFIAE,MNQFFSRRSFILIPIMSILKFILQPKKVLAAFAASDDDWNLSKEDWKNKLSPESYYILREEGTERAFSSQLNNEKRKGIFYCAGCNQPLFTSDTKFDSGTGWPSFWDPIQGSVETKVDFKLIVPRTEYHCSRCGGHQGHVFNDGPLPTGKRYCNNGLALKFIAE
|
| 5 |
+
LVKPKSPDNKISNHLQQDVVKIAGKTIFINPFLYWRRFDENTNRWLREPGQMSEEQIQPNRNRFYPEIDWADLSQNQKLVKDASVEMFLKTLELISTFHPQLNSGQLLEVERKMAITKKLPFEKWVTKSFAKKARAEEYEKRKFKRDRFIRSWKEWLSLENTQQALLPIIVVVFVSAFIGWSSGVSKNSCNPYFEQNLDQSI,MKGHKKIRFIFPLVAMYVPLLLLAPKAIAGSFGAEIFCTMRDGGNDHESSWQAAYSYIKKQKGGIFKTSPKQAAGQIIETVVRERDKFSYCVEFLDQLHPDRKLQLENDRKEKRRKKEELLQDKENEDYSKETFDRYSY
|
| 6 |
+
MDKPKNKNFANTASRISAIASSVMDLHVRIALQEVDREKRRLISGGVFIAMGGILLLLVLISIHVIFYLTLSKLNNWATEYNLLLIIFVDLFLAGLSLKLGGKLAKGPYLPQTLEGLGKTTKAVLGKK,MDKPKNKNFANTASRISAIASSVMDLHVRIALQEVDREKRRLISGGVFIAMGGILLLLVLISIHVIFYLTLSKLNNWATEYNLLLIIFVDLFLAGLSLKLGGKLAKGPYLPQTLEGLGKTTKAVLGKK
|
| 7 |
+
MQSKTKELDPILEVNNLFASIENLPILKGVTISVNPGEIHAIMGRNGCGKSTLSKIIAGHPSYKITKGEIKFTGNDIQSLEPEERAQSGIFLGFQYPIEIPGVSNLEFLRVATNARRKFLNKEELDTFDFEDLVKEKLDLVKMDSAFLSRSINQGFSGGEKKRNEILQMALLEPKIAILDETDSGLDIDALRIVASGIKKISNEETGIILITHYQRLLDEIQPDYVHVMSDGQIIKTGESDLALELEKHGYEWTDNFIKEQ,MQSKTKELDPILEVNNLFASIENLPILKGVTISVNPGEIHAIMGRNGCGKSTLSKIIAGHPSYKITKGEIKFTGNDIQSLEPEERAQSGIFLGFQYPIEIPGVSNLEFLRVATNARRKFLNKEELDTFDFEDLVKEKLDLVKMDSAFLSRSINQGFSGGEKKRNEILQMALLEPKIAILDETDSGLDIDALRIVASGIKKISNEETGIILITHYQRLLDEIQPDYVHVMSDGQIIKTGESDLALELEKHGYEWTDNFIKEQ
|
| 8 |
+
MSKVEIYTWRFCPFCIRAKSLLEKKNITFTEHKIDGDDNARELMMERANGKRTVPQIFIDDKSIGGCDELYELEKEDKLDLLLN,MSKVEIYTWRFCPFCIRAKSLLEKKNITFTEHKIDGDDNARELMMERANGKRTVPQIFIDDKSIGGCDELYELEKEDKLDLLLN
|
| 9 |
+
VQFIDQANIILKAGKGGNGIVSFRREKFVPAGGPSGGNGGKGGSIIIIADNNLQTLLDFKFNREIFAKDGFKGGPNKRSGASGENTILKVPCGTEIRDFNTGIILGDLTEDKQSLTIAHGGRGGHGNAYYLSNQNRAPESFTEGKEGEIWEVQLELKLLAEVGIIGLPNAGKSTLISVLSSARPKIANYPFTTLIPNLGVVRKADGNGCLFADIPGLISGAAEGVGLGHDFLRHIQRTKILIHLIDSIAENPIRDFEIIEKELKRYGSGLLNKERIVVLNKMELVDENYLQTITKKLENLSKKKVLVISSSLRKGLSPLLSEVWKRI,VPNNQNRDNFIDKAFTVIAESIVKIMPIADKEKKAYIYYRDGLAAQNNGDYSEALDYYNESLLLEENKIDRGETLKNMAIIYMSNGEEDRSIETYQKALEENPKQPSCLKNIGLIYEKRGRFAEQNGDLDQRDMWFDKAAQVWSKAVRLYPGGYLDIENWLKTSGRSSIDIYL
|
| 10 |
+
MTTIQQQRTSLLKGWPQFCEWVTSTNNRIYVGWFGVLMIPCLLAAAACFIVAFIAAPPVDIDGIREPVAGSFLYGNNIISGAVVPSSNAIGLHFYPIWEAATVDEWLYNGGPYQLVIFHFLIGISAYMGRQWELSYRLGMRPWICVAYSAPVSAAFAVFLVYPFGQGSFSDGMPLGISGTFNFMFVFQAEHNILMHPFHMAGVAGMFGGSLFSAMHGSLVTSSLIRETTETESQNYGYKFGQEEETYNIVAAHGYFGRLIFQYASFNNSRSLHFFLAVFPVVCVWLTSMGICTMAFNLNGFNFNQSVVDANGKIVPTWGDVLNRANLGMEVMHERNAHNFPLDLAAAESTTVALTAPAIG,MTTIQQQRTSLLKGWPQFCEWVTSTNNRIYVGWFGVLMIPCLLAAAACFIVAFIAAPPVDIDGIREPVAGSFLYGNNIISGAVVPSSNAIGLHFYPIWEAATVDEWLYNGGPYQLVIFHFLIGISAYMGRQWELSYRLGMRPWICVAYSAPVSAAFAVFLVYPFGQGSFSDGMPLGISGTFNFMFVFQAEHNILMHPFHMAGVAGMFGGSLFSAMHGSLVTSSLIRETTETESQNYGYKFGQEEETYNIVAAHGYFGRLIFQYASFNNSRSLHFFLAVFPVVCVWLTSMGICTMAFNLNGFNFNQSVVDANGKIVPTWGDVLNRANLGMEVMHERNAHNFPLDLAAAESTTVALTAPAIG
|
| 11 |
+
MSKVELISLTPEAEKTMAYIARVSNPSNQANDKFAGLLRYCIKHEHWSVFEQSCMTLKIETNRGIAAQILRHRSFTFQEFSQRYAETSLLGNEIPIPNLRRQDQKNRQNSIDDIPDELKIKFSEKISKHFQEANKLYEEMLNEGIAKECARFIMPLATPTRIYMTGSCRSWIHYIQLRSKEGTQKEHMEIAEDCKKVFIKYFPSVSEALNWE,MSKVELISLTPEAEKTMAYIARVSNPSNQANDKFAGLLRYCIKHEHWSVFEQSCMTLKIETNRGIAAQILRHRSFTFQEFSQRYAETSLLGNEIPIPNLRRQDQKNRQNSIDDIPDELKIKFSEKISKHFQEANKLYEEMLNEGIAKECARFIMPLATPTRIYMTGSCRSWIHYIQLRSKEGTQKEHMEIAEDCKKVFIKYFPSVSEALNWE
|
| 12 |
+
VSGWLFIIFLLLLGGLISTLGDLLGSKIGKARFSILKLRPKKTAILITILTGSLISASSLFLMILVNRQLRVGLFRLGDLQKKLQESKQVLIPLEKEREKLENKIKAKETEFKQLERNIIALRSGKFVIRSGQSLIISEISSSNLKDIKSKIEKIIINANRYTHKIVKPKNKEVKNLLLLRKNHIEEMQNIILKGGNWVINIKSVRNVLTGENFVYAFPEITENKIIVRKGEKITKIDFKQEDFNKKDFGDKVNFLLSSSLAEIKRRGSLVNEIKLRGDSIKELRDFLNKNDKTNFELEAVSLFNSKTAQPVIVELNVNYPES,VSGWLFIIFLLLLGGLISTLGDLLGSKIGKARFSILKLRPKKTAILITILTGSLISASSLFLMILVNRQLRVGLFRLGDLQKKLQESKQVLIPLEKEREKLENKIKAKETEFKQLERNIIALRSGKFVIRSGQSLIISEISSSNLKDIKSKIEKIIINANRYTHKIVKPKNKEVKNLLLLRKNHIEEMQNIILKGGNWVINIKSVRNVLTGENFVYAFPEITENKIIVRKGEKITKIDFKQEDFNKKDFGDKVNFLLSSSLAEIKRRGSLVNEIKLRGDSIKELRDFLNKNDKTNFELEAVSLFNSKTAQPVIVELNVNYPES
|
| 13 |
+
MNDRIIEFEPLIEGILIKRYKRFLADIQIENGEIVTAHCANTGPMKGLLNEGANVRISFSSSTKRKLPWTWEQVKVIGRDNKEVWVGINTLFANKLIRKVIEQNLFKDKLGEIAKIKSEVPYGKDKKSRIDFLLTPKSSNPDNRNIYVEVKNTTWTKNNVALFPDTETKRGQKHLIELKGLIPESKSVLVPCITRKDIDYFAPGDESDPLYGELFRESISAGMLLIPCCFEFHSDHVAWKGFKPLKLN,MNDRIIEFEPLIEGILIKRYKRFLADIQIENGEIVTAHCANTGPMKGLLNEGANVRISFSSSTKRKLPWTWEQVKVIGRDNKEVWVGINTLFANKLIRKVIEQNLFKDKLGEIAKIKSEVPYGKDKKSRIDFLLTPKSSNPDNRNIYVEVKNTTWTKNNVALFPDTETKRGQKHLIELKGLIPESKSVLVPCITRKDIDYFAPGDESDPLYGELFRESISAGMLLIPCCFEFHSDHVAWKGFKPLKLN
|
| 14 |
+
MRRSLRDSIVGFSLLGGLLVFTFFSFWLRGVKLSSKNWYLFAEFNNASGLSKKSPVTYRGILVGSIEDILFTNESIKAKIVLNNPEIILPKPAFARVVTNSFLGGDVQVALETSEKTIPKNTPKAISDKCDSKLIICQGDTITGKQLSSLSNITNRINQLLKESNQENLIENVVNSIDQFDKTQENLDELIYLSKQEIIRVKPLIKEVTIAAGHLNNILSTINDEETLKDIKLTIEAAESISGKFDNMSDDFEQLMKDKELTKSIRDLTIGLSKFLNEIYP,MRRSLRDSIVGFSLLGGLLVFTFFSFWLRGVKLSSKNWYLFAEFNNASGLSKKSPVTYRGILVGSIEDILFTNESIKAKIVLNNPEIILPKPAFARVVTNSFLGGDVQVALETSEKTIPKNTPKAISDKCDSKLIICQGDTITGKQLSSLSNITNRINQLLKESNQENLIENVVNSIDQFDKTQENLDELIYLSKQEIIRVKPLIKEVTIAAGHLNNILSTINDEETLKDIKLTIEAAESISGKFDNMSDDFEQLMKDKELTKSIRDLTIGLSKFLNEIYP
|
| 15 |
+
MASSETYEFLFVKPGDHVVIKNEKPPGNTQNGRQEYWIGQIISCIGGARNPNSWTLFQVADIDNGEIIIINADTVERILKTAEN,MASSETYEFLFVKPGDHVVIKNEKPPGNTQNGRQEYWIGQIISCIGGARNPNSWTLFQVADIDNGEIIIINADTVERILKTAEN
|
| 16 |
+
LSVDRELLKEVTQELWNTVKKLRPEIDRETRLQLVLKALLTIGDLPDQLQAAMVVGVCAEMDKSDFENADGNSNTKEESNSTSVDTSTGRKVFRRSSAK,LSVDRELLKEVTQELWNTVKKLRPEIDRETRLQLVLKALLTIGDLPDQLQAAMVVGVCAEMDKSDFENADGNSNTKEESNSTSVDTSTGRKVFRRSSAK
|
| 17 |
+
LQIGDKVPQFSLLDQNGTKRSNNGLKTPLVLFFYPKDDTPGCTIEVCGFRDKYDLFKVLGAQVWGVSNGSSSSHLAFANKNKLQYPLLCDKNDSLRKAFKVPKVLGLLDGRVTYVIDRNGFVKHIFRDLLNGPEHIKEAIRVLKEIQNQ,LQIGDKVPQFSLLDQNGTKRSNNGLKTPLVLFFYPKDDTPGCTIEVCGFRDKYDLFKVLGAQVWGVSNGSSSSHLAFANKNKLQYPLLCDKNDSLRKAFKVPKVLGLLDGRVTYVIDRNGFVKHIFRDLLNGPEHIKEAIRVLKEIQNQ
|
| 18 |
+
MFSINKSNFMKKIGMQAVDEAIENGIDLDGTPIPSKMLELYNRIMSEENKRERSGVKKSMRNRCVKTGSKHFDKETLDQLLIDSGWEGLKEKEILFFYS,MFSINKSNFMKKIGMQAVDEAIENGIDLDGTPIPSKMLELYNRIMSEENKRERSGVKKSMRNRCVKTGSKHFDKETLDQLLIDSGWEGLKEKEILFFYS
|
| 19 |
+
VNSNSSNQVGKNIRRTGFLIVLSYLLIVLIMKVLEANNFFGYSLSSFSNDIFAPPSLKHLCGTDRLGRDVCLRTLQGSSIAIEVVFLAIFFALILGLPLGLLSGYFGGILDKCLSLVMDTIFSIPVILLAVVVAFVLGKGIINASIALCIVYSPQYFRLIRNQTMLIKSETYVEAARVSGADVKTIIFKYILPNVITPLPILLTLNAADAVLVLGSLGFLGLGVPANVPEWGSDLNLALAAIPTGIWWTALFPGLAMFFLVLGLSFIGEELENIFEN,LEGINLNQIGVSFKGSGSYVPNQILTNQEISKKVETSDEWIKSRTGISQRRISGLSENVSEMGYKAALGAIEMARWDIETIDLIILATSTPNDLFGSAPEIQSKLGAINAVAFDLTAACSGFLFAAITATQFLKAGSYKRAVVIGSDQLSSYVDWNDRRSCILFGDGAGAIAIEGTNELDNLLGFSMRTDGQRGSFLNLPSQNNQDLIINDINFSSGGFSSIKMNGQEVYKFAVREVPLIIDNLFKKTNFNSEKINWLLLHQANQRILDSVGERLNVSTEKILSNLSNYGNTSAATIPLMLDEAIRNKKIKENDIIATSGFGAGLSWGAALIRWG
|
| 20 |
+
MSNTESLTGKVALITGASRGIGKEIALELSNLGAKVIINYSSSDEKAEEVVNLIKESGGKVHKLKFDVSKEESVSKAFEEIIKINGAIDILVNNAGITRDGLLMRMKSEQWDDVLNTNLKGVFLCTKYASKFMIKKRSGKIINISSIVGIIGNPGQANYSAAKAGVIGFTKTCAKEFASRGINVNAIAPGFIETEMTEKLNNEEIIKAIPLGKLGSCSQIANLVSFLVSSNAGSYITGQTISIDGGMSI,MSNTESLTGKVALITGASRGIGKEIALELSNLGAKVIINYSSSDEKAEEVVNLIKESGGKVHKLKFDVSKEESVSKAFEEIIKINGAIDILVNNAGITRDGLLMRMKSEQWDDVLNTNLKGVFLCTKYASKFMIKKRSGKIINISSIVGIIGNPGQANYSAAKAGVIGFTKTCAKEFASRGINVNAIAPGFIETEMTEKLNNEEIIKAIPLGKLGSCSQIANLVSFLVSSNAGSYITGQTISIDGGMSI
|
| 21 |
+
MHKVLAIETSCDETSVSIVSNSGDIYKIHSNIVASQIEDHSKWGGVVPELAARKHLELLPFVLEQALEESKIRIEKIDVIASTVTPGLVGCLRVGSITARSLCTLYSKPFLGIHHLEGHLSSILFSKNYPKPPFLTLLVSGGHTELIKVGERRKMQRLGRSYDDAAGEAFDKVGRLLGLSYPGGPAIAKIAKKGNASKFNLPKCKISDKEGGFLKYDFSFSGLKTAVLRLVEKINLNGDEIPIPDIAASFERVVAEVLVERTIKCANDYGLDNIVVVGGVAANDTLRKMMISEACKKSIKVHLAPINLCTDNAAMIGAAALYRLKFKAYESSLKLGISGRLPIDQANTLYENKPPF,LVKPKSPDNKISNHLQQDVVKIAGKTIFINPFLYWRRFDENTNRWLREPGQMSEEQIQPNRNRFYPEIDWADLSQNQKLVKDASVEMFLKTLELISTFHPQLNSGQLLEVERKMAITKKLPFEKWVTKSFAKKARAEEYEKRKFKRDRFIRSWKEWLSLENTQQALLPIIVVVFVSAFIGWSSGVSKNSCNPYFEQNLDQSI
|
| 22 |
+
MHKVLAIETSCDETSVSIVSNSGDIYKIHSNIVASQIEDHSKWGGVVPELAARKHLELLPFVLEQALEESKIRIEKIDVIASTVTPGLVGCLRVGSITARSLCTLYSKPFLGIHHLEGHLSSILFSKNYPKPPFLTLLVSGGHTELIKVGERRKMQRLGRSYDDAAGEAFDKVGRLLGLSYPGGPAIAKIAKKGNASKFNLPKCKISDKEGGFLKYDFSFSGLKTAVLRLVEKINLNGDEIPIPDIAASFERVVAEVLVERTIKCANDYGLDNIVVVGGVAANDTLRKMMISEACKKSIKVHLAPINLCTDNAAMIGAAALYRLKFKAYESSLKLGISGRLPIDQANTLYENKPPF,VNSNSSNQVGKNIRRTGFLIVLSYLLIVLIMKVLEANNFFGYSLSSFSNDIFAPPSLKHLCGTDRLGRDVCLRTLQGSSIAIEVVFLAIFFALILGLPLGLLSGYFGGILDKCLSLVMDTIFSIPVILLAVVVAFVLGKGIINASIALCIVYSPQYFRLIRNQTMLIKSETYVEAARVSGADVKTIIFKYILPNVITPLPILLTLNAADAVLVLGSLGFLGLGVPANVPEWGSDLNLALAAIPTGIWWTALFPGLAMFFLVLGLSFIGEELENIFEN
|
| 23 |
+
MHKVLAIETSCDETSVSIVSNSGDIYKIHSNIVASQIEDHSKWGGVVPELAARKHLELLPFVLEQALEESKIRIEKIDVIASTVTPGLVGCLRVGSITARSLCTLYSKPFLGIHHLEGHLSSILFSKNYPKPPFLTLLVSGGHTELIKVGERRKMQRLGRSYDDAAGEAFDKVGRLLGLSYPGGPAIAKIAKKGNASKFNLPKCKISDKEGGFLKYDFSFSGLKTAVLRLVEKINLNGDEIPIPDIAASFERVVAEVLVERTIKCANDYGLDNIVVVGGVAANDTLRKMMISEACKKSIKVHLAPINLCTDNAAMIGAAALYRLKFKAYESSLKLGISGRLPIDQANTLYENKPPF,MHKVLAIETSCDETSVSIVSNSGDIYKIHSNIVASQIEDHSKWGGVVPELAARKHLELLPFVLEQALEESKIRIEKIDVIASTVTPGLVGCLRVGSITARSLCTLYSKPFLGIHHLEGHLSSILFSKNYPKPPFLTLLVSGGHTELIKVGERRKMQRLGRSYDDAAGEAFDKVGRLLGLSYPGGPAIAKIAKKGNASKFNLPKCKISDKEGGFLKYDFSFSGLKTAVLRLVEKINLNGDEIPIPDIAASFERVVAEVLVERTIKCANDYGLDNIVVVGGVAANDTLRKMMISEACKKSIKVHLAPINLCTDNAAMIGAAALYRLKFKAYESSLKLGISGRLPIDQANTLYENKPPF
|
| 24 |
+
MIKNPIQEVTNKLQYRAIGIVKGIYKPNNIDQLNRGTLTDKEGKIIETVILGKAIALIKKYINLEKDYFWIVYPRNKNINNLHLQVAGIWDPYQLNQFDKNNSEKDPNQLLEELNLNNNYFSIRGELVYVNIKKKEIVIKICSSPPSKRSKYLTFKIIIEGEIPLQFLNNFVSLDVIRDGNTLRMAKYEIIEKIKPEKV,MIKNPIQEVTNKLQYRAIGIVKGIYKPNNIDQLNRGTLTDKEGKIIETVILGKAIALIKKYINLEKDYFWIVYPRNKNINNLHLQVAGIWDPYQLNQFDKNNSEKDPNQLLEELNLNNNYFSIRGELVYVNIKKKEIVIKICSSPPSKRSKYLTFKIIIEGEIPLQFLNNFVSLDVIRDGNTLRMAKYEIIEKIKPEKV
|
| 25 |
+
MTKFKLKIASRRSKLAMVQTLWVKEQLEKNIPDLEVSIEAMATQGDKILDVALAKIGDKGLFTKELEAQMLVGHADIAVHSLKDLPTNLPDGLTLGCITKREDPSDALVVNKKNKIYQLESLPPGSIVGTSSLRRLAQLRYKFPHLDFKDIRGNVITRIEKLDSGEFDCIILAAAGLKRLGFESRVHQIIPNEISLHAVGQGALGIECKSDDKEVLKIISVLEDKVSSQRCLAERSFLRELEGGCQVPIGVNSSIQNDEIALIGMVASIDGKRLIKNESIGNIKYPEEVGKKLAEKLKLQGADKILSEIFEQFRDK,VPNNQNRDNFIDKAFTVIAESIVKIMPIADKEKKAYIYYRDGLAAQNNGDYSEALDYYNESLLLEENKIDRGETLKNMAIIYMSNGEEDRSIETYQKALEENPKQPSCLKNIGLIYEKRGRFAEQNGDLDQRDMWFDKAAQVWSKAVRLYPGGYLDIENWLKTSGRSSIDIYL
|
| 26 |
+
MTKFKLKIASRRSKLAMVQTLWVKEQLEKNIPDLEVSIEAMATQGDKILDVALAKIGDKGLFTKELEAQMLVGHADIAVHSLKDLPTNLPDGLTLGCITKREDPSDALVVNKKNKIYQLESLPPGSIVGTSSLRRLAQLRYKFPHLDFKDIRGNVITRIEKLDSGEFDCIILAAAGLKRLGFESRVHQIIPNEISLHAVGQGALGIECKSDDKEVLKIISVLEDKVSSQRCLAERSFLRELEGGCQVPIGVNSSIQNDEIALIGMVASIDGKRLIKNESIGNIKYPEEVGKKLAEKLKLQGADKILSEIFEQFRDK,VNSNSSNQVGKNIRRTGFLIVLSYLLIVLIMKVLEANNFFGYSLSSFSNDIFAPPSLKHLCGTDRLGRDVCLRTLQGSSIAIEVVFLAIFFALILGLPLGLLSGYFGGILDKCLSLVMDTIFSIPVILLAVVVAFVLGKGIINASIALCIVYSPQYFRLIRNQTMLIKSETYVEAARVSGADVKTIIFKYILPNVITPLPILLTLNAADAVLVLGSLGFLGLGVPANVPEWGSDLNLALAAIPTGIWWTALFPGLAMFFLVLGLSFIGEELENIFEN
|
| 27 |
+
MTKFKLKIASRRSKLAMVQTLWVKEQLEKNIPDLEVSIEAMATQGDKILDVALAKIGDKGLFTKELEAQMLVGHADIAVHSLKDLPTNLPDGLTLGCITKREDPSDALVVNKKNKIYQLESLPPGSIVGTSSLRRLAQLRYKFPHLDFKDIRGNVITRIEKLDSGEFDCIILAAAGLKRLGFESRVHQIIPNEISLHAVGQGALGIECKSDDKEVLKIISVLEDKVSSQRCLAERSFLRELEGGCQVPIGVNSSIQNDEIALIGMVASIDGKRLIKNESIGNIKYPEEVGKKLAEKLKLQGADKILSEIFEQFRDK,MTKFKLKIASRRSKLAMVQTLWVKEQLEKNIPDLEVSIEAMATQGDKILDVALAKIGDKGLFTKELEAQMLVGHADIAVHSLKDLPTNLPDGLTLGCITKREDPSDALVVNKKNKIYQLESLPPGSIVGTSSLRRLAQLRYKFPHLDFKDIRGNVITRIEKLDSGEFDCIILAAAGLKRLGFESRVHQIIPNEISLHAVGQGALGIECKSDDKEVLKIISVLEDKVSSQRCLAERSFLRELEGGCQVPIGVNSSIQNDEIALIGMVASIDGKRLIKNESIGNIKYPEEVGKKLAEKLKLQGADKILSEIFEQFRDK
|
| 28 |
+
MLVKKMTELFSRFFVKAISFAICISVFFTLFNSPSYAAKTSMTGDYAKDTISVVKTLQIAVETPKDSPDKDKVRDESLALITDYISRYRNRGMVNKTQSFTTMQTALNAMAGHYKNFATRPLPDKLKERLTKEFTLAEKMVLRES,MLVKKMTELFSRFFVKAISFAICISVFFTLFNSPSYAAKTSMTGDYAKDTISVVKTLQIAVETPKDSPDKDKVRDESLALITDYISRYRNRGMVNKTQSFTTMQTALNAMAGHYKNFATRPLPDKLKERLTKEFTLAEKMVLRES
|
| 29 |
+
LKKITSILIIFFLIVLYPIRTYSAEILQINNSSSILVGDQNRDLPIKLFCVEINNEDDEKIALNLLKKEFPRGSKVKIKPIGFKENILTARVFNINETKEMSDLLISKNLSKETCQN,LKKITSILIIFFLIVLYPIRTYSAEILQINNSSSILVGDQNRDLPIKLFCVEINNEDDEKIALNLLKKEFPRGSKVKIKPIGFKENILTARVFNINETKEMSDLLISKNLSKETCQN
|
| 30 |
+
MESIFNNSFATLVAYVGIVSIYLLVIPLILFYWMNNRWNVMGKFERLIVYGLVFLFFPGLILFSPFLNLRLRGDSKG,MESIFNNSFATLVAYVGIVSIYLLVIPLILFYWMNNRWNVMGKFERLIVYGLVFLFFPGLILFSPFLNLRLRGDSKG
|
| 31 |
+
LTKGKVVQIGLLISLLGLLSYKLAPQLGIDNFTASTISNFVLIVIVISWVTSYVLRVLNGKMTFMEQRKRYRKEYEKIVNDKLETKFNLLPKEEQEKLMEDLEKNP,LTKGKVVQIGLLISLLGLLSYKLAPQLGIDNFTASTISNFVLIVIVISWVTSYVLRVLNGKMTFMEQRKRYRKEYEKIVNDKLETKFNLLPKEEQEKLMEDLEKNP
|
| 32 |
+
MNDSYYKDKEKIYDAEVLESSSLDENIIIKILIRAGRTIAKPALEVLEMALDPFTPTQVRVSLMAALAYLIMPFDLFPDFMPLVGYSDDFVALTAVLSIWSRYMTPAIRARAERKLNKLFPFVK,MNDSYYKDKEKIYDAEVLESSSLDENIIIKILIRAGRTIAKPALEVLEMALDPFTPTQVRVSLMAALAYLIMPFDLFPDFMPLVGYSDDFVALTAVLSIWSRYMTPAIRARAERKLNKLFPFVK
|
| 33 |
+
MLRSIFAGFFAIVLTLGLGISSVSAKTVEVKLGTDAGMLAFEPSSVTISTGDTVKFINNKLAPHNAVFDGHEELSHADLAFAPGESWEETFDTAGTFDYYCEPHRGAGMVGKVIVE,MKGHKKIRFIFPLVAMYVPLLLLAPKAIAGSFGAEIFCTMRDGGNDHESSWQAAYSYIKKQKGGIFKTSPKQAAGQIIETVVRERDKFSYCVEFLDQLHPDRKLQLENDRKEKRRKKEELLQDKENEDYSKETFDRYSY
|
| 34 |
+
MLRSIFAGFFAIVLTLGLGISSVSAKTVEVKLGTDAGMLAFEPSSVTISTGDTVKFINNKLAPHNAVFDGHEELSHADLAFAPGESWEETFDTAGTFDYYCEPHRGAGMVGKVIVE,VQFIDQANIILKAGKGGNGIVSFRREKFVPAGGPSGGNGGKGGSIIIIADNNLQTLLDFKFNREIFAKDGFKGGPNKRSGASGENTILKVPCGTEIRDFNTGIILGDLTEDKQSLTIAHGGRGGHGNAYYLSNQNRAPESFTEGKEGEIWEVQLELKLLAEVGIIGLPNAGKSTLISVLSSARPKIANYPFTTLIPNLGVVRKADGNGCLFADIPGLISGAAEGVGLGHDFLRHIQRTKILIHLIDSIAENPIRDFEIIEKELKRYGSGLLNKERIVVLNKMELVDENYLQTITKKLENLSKKKVLVISSSLRKGLSPLLSEVWKRI
|
| 35 |
+
MTNKKRILSGVQPTGDLHIGNWLGAINNWVELQEKHETFLCVVDLHAITTEYDTKQLSKNTLSTAALYIACGINPKICSIFVQSQISAHSELCWILNCMTPINWMERMIQFKEKSIQQGNNVSIGLFDYPILMAADILLYDADYVPVGEDQKQHLELAKDIAQQRINAKFGKEENILKIPQPIIMKKGSKIMSLNDGSKKMSKSDINEGSRINLLDTPEIITKKIKRAKSDSYMGMEFNNPERPESRNLLMIYSLLSGKEVSELENDLSQTGWGTFKKIFTEQIIESLKPIQERYQVLINDPHELNKILIQGKEKAEVVANKTLSRVKSELGFFEIEK,MTNKKRILSGVQPTGDLHIGNWLGAINNWVELQEKHETFLCVVDLHAITTEYDTKQLSKNTLSTAALYIACGINPKICSIFVQSQISAHSELCWILNCMTPINWMERMIQFKEKSIQQGNNVSIGLFDYPILMAADILLYDADYVPVGEDQKQHLELAKDIAQQRINAKFGKEENILKIPQPIIMKKGSKIMSLNDGSKKMSKSDINEGSRINLLDTPEIITKKIKRAKSDSYMGMEFNNPERPESRNLLMIYSLLSGKEVSELENDLSQTGWGTFKKIFTEQIIESLKPIQERYQVLINDPHELNKILIQGKEKAEVVANKTLSRVKSELGFFEIEK
|
| 36 |
+
LGRSRKTNQLIREFLSELKEVLTKDGSITLRSLIFQENFHSLEGALKETEIKFIYPSDLKRFKDKSLNVLDICFGLGYNSASLFNNVIRQNSLINWYALEIDKKPLEYSLGNKSFQKLWHPKVFKILKALLKNSKYKDQFFDCDILWGDAREKIKNIPANIKFDLIYLDGFSPQKCPQVWSVEFLSKVTQKLNPQGYLITYSCSAAIRSTLKDFGLNIFNNKPNLVSKNLWSYGTIAVKNIDEKVLQNNLYLKKLSWMEEEHLLTKASIPYRDPTLNSNPKDIIKKRVQEQFLSNLKTSKKWRDKWGMTK,LGRSRKTNQLIREFLSELKEVLTKDGSITLRSLIFQENFHSLEGALKETEIKFIYPSDLKRFKDKSLNVLDICFGLGYNSASLFNNVIRQNSLINWYALEIDKKPLEYSLGNKSFQKLWHPKVFKILKALLKNSKYKDQFFDCDILWGDAREKIKNIPANIKFDLIYLDGFSPQKCPQVWSVEFLSKVTQKLNPQGYLITYSCSAAIRSTLKDFGLNIFNNKPNLVSKNLWSYGTIAVKNIDEKVLQNNLYLKKLSWMEEEHLLTKASIPYRDPTLNSNPKDIIKKRVQEQFLSNLKTSKKWRDKWGMTK
|
| 37 |
+
LKNGADSIQVFSDLELLKKTAKKWDGNKRLMLAERGGKTIDGFDLGNSPLSVTKETVQGKRLFMSTTNGTKSLKKVQNVENLFAMSLPNRKAVAERIISLNKKNVLILGSGWEGSYSLEDSLAAGALAIYIKENFNSEVNILNDELQASLALWNVWKNDILKCLKTATHGKRLTSLGDYEDDFKCCSELDCLDIVPTQVERGVIRAS,LKNGADSIQVFSDLELLKKTAKKWDGNKRLMLAERGGKTIDGFDLGNSPLSVTKETVQGKRLFMSTTNGTKSLKKVQNVENLFAMSLPNRKAVAERIISLNKKNVLILGSGWEGSYSLEDSLAAGALAIYIKENFNSEVNILNDELQASLALWNVWKNDILKCLKTATHGKRLTSLGDYEDDFKCCSELDCLDIVPTQVERGVIRAS
|
| 38 |
+
LISEIKELCLKANAIILAHYYQAPEIQDIADFIGDSLDLSRKAANNDADTIVFCGVHFMAETAKILSPNKTVLLPDIDAGCSLADDCPAEEFQKFREENPDHYVVSYINCTAEVKAQSDLICTSSNAVSLVEKIPKDKKIIFAPDKNLGRWVQKNSGRKLKLWPGSCIVHETFSEEALLKLKYKHPDAKVIAHPECSQNLLVLSDFIGSTSKLLDFVSNDYSDTYMVLTEPGIIHQMKKKEPNKNFIEVPDIDGCKCNECPYMKLNTLEKILDCLKNNSPSIELDPEIIKKAYKPIKRMLDMSI,LISEIKELCLKANAIILAHYYQAPEIQDIADFIGDSLDLSRKAANNDADTIVFCGVHFMAETAKILSPNKTVLLPDIDAGCSLADDCPAEEFQKFREENPDHYVVSYINCTAEVKAQSDLICTSSNAVSLVEKIPKDKKIIFAPDKNLGRWVQKNSGRKLKLWPGSCIVHETFSEEALLKLKYKHPDAKVIAHPECSQNLLVLSDFIGSTSKLLDFVSNDYSDTYMVLTEPGIIHQMKKKEPNKNFIEVPDIDGCKCNECPYMKLNTLEKILDCLKNNSPSIELDPEIIKKAYKPIKRMLDMSI
|
| 39 |
+
MNRKSNNSNPTGNLDYDKILEEEIINSYENKFEANSNINNKNKRFYRLKRTPLEVINRLFFFFFVGSFIFSFFLAYSENKVWFIIYLISAFSCIFYTPNRKALKELIAAWPNIEDLIKGRSLWRKDNK,MNRKSNNSNPTGNLDYDKILEEEIINSYENKFEANSNINNKNKRFYRLKRTPLEVINRLFFFFFVGSFIFSFFLAYSENKVWFIIYLISAFSCIFYTPNRKALKELIAAWPNIEDLIKGRSLWRKDNK
|
| 40 |
+
MKLRLFEFYFIKDYLRPWFGLIYSLFFLFFLGAIGYRITEGWDWGDCLWMVLITITTIGFGEVQTLSPEGRIITVLIIVGGLIFIQFTFQKAVRLFESGYFQRVNELRFKRLLRKMENHVILCGYGRVGQEISNQIKTQNIPIIVVESDEDRKKIAEDNGLEVLCADATLDETLKLAGLDKCKSLVVTLPNDAANLYVVLSAKGIRSSIRVIARAGTEEAASKLRLAGASIVVSPYIAAGRAMASMALRPIAIDFLDLLAGSECEIEEFELSNDISLFETAEKITLLELGIGKKSGAKILAIKEDEKLITNPGGDFLLQPGQVLIAFGSKEQLTTLNRLLGNLVVSVELLK,VPNNQNRDNFIDKAFTVIAESIVKIMPIADKEKKAYIYYRDGLAAQNNGDYSEALDYYNESLLLEENKIDRGETLKNMAIIYMSNGEEDRSIETYQKALEENPKQPSCLKNIGLIYEKRGRFAEQNGDLDQRDMWFDKAAQVWSKAVRLYPGGYLDIENWLKTSGRSSIDIYL
|
| 41 |
+
MKLRLFEFYFIKDYLRPWFGLIYSLFFLFFLGAIGYRITEGWDWGDCLWMVLITITTIGFGEVQTLSPEGRIITVLIIVGGLIFIQFTFQKAVRLFESGYFQRVNELRFKRLLRKMENHVILCGYGRVGQEISNQIKTQNIPIIVVESDEDRKKIAEDNGLEVLCADATLDETLKLAGLDKCKSLVVTLPNDAANLYVVLSAKGIRSSIRVIARAGTEEAASKLRLAGASIVVSPYIAAGRAMASMALRPIAIDFLDLLAGSECEIEEFELSNDISLFETAEKITLLELGIGKKSGAKILAIKEDEKLITNPGGDFLLQPGQVLIAFGSKEQLTTLNRLLGNLVVSVELLK,VSGWLFIIFLLLLGGLISTLGDLLGSKIGKARFSILKLRPKKTAILITILTGSLISASSLFLMILVNRQLRVGLFRLGDLQKKLQESKQVLIPLEKEREKLENKIKAKETEFKQLERNIIALRSGKFVIRSGQSLIISEISSSNLKDIKSKIEKIIINANRYTHKIVKPKNKEVKNLLLLRKNHIEEMQNIILKGGNWVINIKSVRNVLTGENFVYAFPEITENKIIVRKGEKITKIDFKQEDFNKKDFGDKVNFLLSSSLAEIKRRGSLVNEIKLRGDSIKELRDFLNKNDKTNFELEAVSLFNSKTAQPVIVELNVNYPES
|
| 42 |
+
MKLRLFEFYFIKDYLRPWFGLIYSLFFLFFLGAIGYRITEGWDWGDCLWMVLITITTIGFGEVQTLSPEGRIITVLIIVGGLIFIQFTFQKAVRLFESGYFQRVNELRFKRLLRKMENHVILCGYGRVGQEISNQIKTQNIPIIVVESDEDRKKIAEDNGLEVLCADATLDETLKLAGLDKCKSLVVTLPNDAANLYVVLSAKGIRSSIRVIARAGTEEAASKLRLAGASIVVSPYIAAGRAMASMALRPIAIDFLDLLAGSECEIEEFELSNDISLFETAEKITLLELGIGKKSGAKILAIKEDEKLITNPGGDFLLQPGQVLIAFGSKEQLTTLNRLLGNLVVSVELLK,LGRSRKTNQLIREFLSELKEVLTKDGSITLRSLIFQENFHSLEGALKETEIKFIYPSDLKRFKDKSLNVLDICFGLGYNSASLFNNVIRQNSLINWYALEIDKKPLEYSLGNKSFQKLWHPKVFKILKALLKNSKYKDQFFDCDILWGDAREKIKNIPANIKFDLIYLDGFSPQKCPQVWSVEFLSKVTQKLNPQGYLITYSCSAAIRSTLKDFGLNIFNNKPNLVSKNLWSYGTIAVKNIDEKVLQNNLYLKKLSWMEEEHLLTKASIPYRDPTLNSNPKDIIKKRVQEQFLSNLKTSKKWRDKWGMTK
|
| 43 |
+
MSIAKKALLFTSALALIAGPSVTASTRLSGAGASFPAKIYTRWFSDLAKSGGPRVNYQAVGSGSGRKAFIDQTVNFGASDDPMKDKDIAKVTRGLVQIPMVGGTIAFGYNYDCDLKLTQEQAVQVAMGMVKNWKELGCKAGKLTWAHRSDGSGTTKAFTNSMEAFSPTWTLGTGKSVKWPAGVGAKGNSGVAGVIQNTPGAIGYVNQSYIKGNVKAAALQNLSGEFLKPSVEAGAKALNGITLDENLAGKNPNPTAKGAYPIASLTWILAYEKGNGRNTKAIKKSLSTLLSDEYQDKAPTLGFVPLKGDILEKSRAAVKKIGR,MSIAKKALLFTSALALIAGPSVTASTRLSGAGASFPAKIYTRWFSDLAKSGGPRVNYQAVGSGSGRKAFIDQTVNFGASDDPMKDKDIAKVTRGLVQIPMVGGTIAFGYNYDCDLKLTQEQAVQVAMGMVKNWKELGCKAGKLTWAHRSDGSGTTKAFTNSMEAFSPTWTLGTGKSVKWPAGVGAKGNSGVAGVIQNTPGAIGYVNQSYIKGNVKAAALQNLSGEFLKPSVEAGAKALNGITLDENLAGKNPNPTAKGAYPIASLTWILAYEKGNGRNTKAIKKSLSTLLSDEYQDKAPTLGFVPLKGDILEKSRAAVKKIGR
|
| 44 |
+
MKIGINGFGRIGRLVFRALWDRADTEITHINEMAGDSNAAAHLLEFDSVHGRWVKDIKVKEEEIIIDGKKLAYTSFKNYLDVPWEKSSVDIILECTGKNKKPDKLNPYFDSLGMKRVIVACPVKGIVAEAESLNIVYGINQNLYDPSKHKLVTAASCTTNCLAPIVKVINENFSIKHGAITTIHDVTNTQVPVDFYKSDLRRARGCMQSLIPTTTGSAKAIAEIFPELKGKLNGHAVRVPLLNGSLTDAVFELNNAVTEEQVNNEFKKASETYLEGILGYEERPLVSADYVNDSRSSIVDSLSTMVVNSNLLKIYAWYDNEWGYSCRLADLTEYVIKKEI,MKIGINGFGRIGRLVFRALWDRADTEITHINEMAGDSNAAAHLLEFDSVHGRWVKDIKVKEEEIIIDGKKLAYTSFKNYLDVPWEKSSVDIILECTGKNKKPDKLNPYFDSLGMKRVIVACPVKGIVAEAESLNIVYGINQNLYDPSKHKLVTAASCTTNCLAPIVKVINENFSIKHGAITTIHDVTNTQVPVDFYKSDLRRARGCMQSLIPTTTGSAKAIAEIFPELKGKLNGHAVRVPLLNGSLTDAVFELNNAVTEEQVNNEFKKASETYLEGILGYEERPLVSADYVNDSRSSIVDSLSTMVVNSNLLKIYAWYDNEWGYSCRLADLTEYVIKKEI
|
| 45 |
+
MKNSIKITQLFLLLIFLTSCKATANKQELIIDSEEQESQQTKLSKSKMEVRYSCGEDGISDFLNDGWIISKQYTEEKICTWKSFPATKDCDMEKDKGCKITTPDKIGEEKVYLLEK,MKNSIKITQLFLLLIFLTSCKATANKQELIIDSEEQESQQTKLSKSKMEVRYSCGEDGISDFLNDGWIISKQYTEEKICTWKSFPATKDCDMEKDKGCKITTPDKIGEEKVYLLEK
|
| 46 |
+
MNFEIKNVFLTIEGKSIVNDVSIKVCPGEIVGLMGPNGAGKTSTFNLAVGNLRPDKGDILINSKSIKNLPLPIRAKLGLGYLTQEASIFRDLTVKENIDLALENSFSSRAIVRNKREKIINEFNLNKVVDNYGYQLSGGERRRCEIARALSVGRQGPKYLLLDEPFAGIDPLAVNDLKKLIIKLRDNGMGILITDHNVRETLLITSKSYVLSEGKILAHGSSDELANNQIVKKFYLGVDFQL,MAAKEHKSLQGSKILLIEDDKSIRLTVTESLISEGFEVSNFKDGSSALDFILGEGIKDFDLILLDLMLPGLNGLELCRKIRNEELYTPILILSAKGNESDRVLGLEVGADDYLTKPFGISELIARCRALLRRSKRGKEKKQKIETIIEYKNIKMFTEECRVTNFNQEIILSPKEFKLLELFIKNPKRVWSRDLILEKIWAIDFIGDTKTVDVHVRWLREKLEENPSAPKIIKTVRGFGYRFG
|
| 47 |
+
MNFEIKNVFLTIEGKSIVNDVSIKVCPGEIVGLMGPNGAGKTSTFNLAVGNLRPDKGDILINSKSIKNLPLPIRAKLGLGYLTQEASIFRDLTVKENIDLALENSFSSRAIVRNKREKIINEFNLNKVVDNYGYQLSGGERRRCEIARALSVGRQGPKYLLLDEPFAGIDPLAVNDLKKLIIKLRDNGMGILITDHNVRETLLITSKSYVLSEGKILAHGSSDELANNQIVKKFYLGVDFQL,MNFEIKNVFLTIEGKSIVNDVSIKVCPGEIVGLMGPNGAGKTSTFNLAVGNLRPDKGDILINSKSIKNLPLPIRAKLGLGYLTQEASIFRDLTVKENIDLALENSFSSRAIVRNKREKIINEFNLNKVVDNYGYQLSGGERRRCEIARALSVGRQGPKYLLLDEPFAGIDPLAVNDLKKLIIKLRDNGMGILITDHNVRETLLITSKSYVLSEGKILAHGSSDELANNQIVKKFYLGVDFQL
|
| 48 |
+
MLLSKLVDLIKSGESKFIKANIFENIDIENAASIDIALKNQISFLEENNILKDNLGKTSASAIITSNNNEILGLLESLNISNIVVENPRIAFAEVLNFLYEEINFNPGIDDSAVIKSSAKVGKNCYVGPNVYIGENSIIGDNNKIFPGTTILGNVRLGNNNVIHPNCVIYENTSIENNCVINSNTVIGSEGFGFIPQDGKWIKMPQKGCVIIKSFVEIGTNCCIDRPSVGNTFIDEGTKMDNLVQIGHGVKIGKNCAFAAQVGIAGGAVIGNSVILAGQVGVNNRVKVGNNVIASSKCGIHCDIEDGEVVSGFPAMKNKSWLRSSSVFKKLPELAKKLRQLDKK,MLLSKLVDLIKSGESKFIKANIFENIDIENAASIDIALKNQISFLEENNILKDNLGKTSASAIITSNNNEILGLLESLNISNIVVENPRIAFAEVLNFLYEEINFNPGIDDSAVIKSSAKVGKNCYVGPNVYIGENSIIGDNNKIFPGTTILGNVRLGNNNVIHPNCVIYENTSIENNCVINSNTVIGSEGFGFIPQDGKWIKMPQKGCVIIKSFVEIGTNCCIDRPSVGNTFIDEGTKMDNLVQIGHGVKIGKNCAFAAQVGIAGGAVIGNSVILAGQVGVNNRVKVGNNVIASSKCGIHCDIEDGEVVSGFPAMKNKSWLRSSSVFKKLPELAKKLRQLDKK
|
| 49 |
+
MSSNFKNLYTSNNPPLEMILMRGSKLESIHKVHAVISDKKGRVLMCAGNPEYKSFIRSALKPFQAIPFVSSGASSKIKNSSKSIALSCGSHSGSKLHAREAFKILWEYNIDIHNLKCPIKKTSPLEHNCSGKHAAFLATCKKLNWPLETYLKGDHPLQVEIFRIISELLEIPLEQIYAERDDCGAPTLYMKILEMAKLYSLLSSSDNAELEQISRAMTINPTMISDHNRFDTEVIQASHGHVISKGGAEGIQCFCKVNEGMGLALKVEDGSKRAKQSVGLHILKQLEWISELRIQDIEDKIIKLPEGVQIEVKGQLKFQES,VPNNQNRDNFIDKAFTVIAESIVKIMPIADKEKKAYIYYRDGLAAQNNGDYSEALDYYNESLLLEENKIDRGETLKNMAIIYMSNGEEDRSIETYQKALEENPKQPSCLKNIGLIYEKRGRFAEQNGDLDQRDMWFDKAAQVWSKAVRLYPGGYLDIENWLKTSGRSSIDIYL
|
| 50 |
+
VRIIFWGTPEYSVKSLEVLKKSDHDIVAVITQPDKKRSRGNKLISSPVKEYATKENIPVFTPETIKENIQFISILNDLSCDLFIVIAYGKILPKAILDIPKYKSWNAHASLLPRWRGAAPIQWSILEGDKITGVGIMRMEEGLDTGDVLVEKQIKIENNDNLKTLTKKLSDLSSELFLRAISDIEQNKNRDINLLLKKQTDFKRELKYARMINKLDYIINWENSATDIYRKINALYPRANTTYKRKNLKIIKIKILTTHEIHNKNYKILSNVFKPGLIIGLIKNVGIIITTKTDPILLLEAKLEGKKVSSQNQLIQQLNPVIGENFSD,VRIIFWGTPEYSVKSLEVLKKSDHDIVAVITQPDKKRSRGNKLISSPVKEYATKENIPVFTPETIKENIQFISILNDLSCDLFIVIAYGKILPKAILDIPKYKSWNAHASLLPRWRGAAPIQWSILEGDKITGVGIMRMEEGLDTGDVLVEKQIKIENNDNLKTLTKKLSDLSSELFLRAISDIEQNKNRDINLLLKKQTDFKRELKYARMINKLDYIINWENSATDIYRKINALYPRANTTYKRKNLKIIKIKILTTHEIHNKNYKILSNVFKPGLIIGLIKNVGIIITTKTDPILLLEAKLEGKKVSSQNQLIQQLNPVIGENFSD
|
| 51 |
+
MRNEKYWVKALDQTHLSITNNGLFPLKTTVVTREYYNKNDFIIRELDTSRFTKKNNYGPNQNPFNPWDKILEVDKVGTNHQLILNKYPVQKGHILLITNTWRPQDGWLDINDWIAIQMVNEDTSGLWFFNSSPIAGASQPHRHFQLLRRDHGEIICPREKWFLDFENNNDQDSKLKKNTVVSKFNFLNNSINIYNLYLELSNKIGLGNPIDDEKPRFPYNILITNNWIAIIKRKYDHVHGFSVNGLGFAGYLLVTEKSNINYLKKYGPEKLLENFV,MRNEKYWVKALDQTHLSITNNGLFPLKTTVVTREYYNKNDFIIRELDTSRFTKKNNYGPNQNPFNPWDKILEVDKVGTNHQLILNKYPVQKGHILLITNTWRPQDGWLDINDWIAIQMVNEDTSGLWFFNSSPIAGASQPHRHFQLLRRDHGEIICPREKWFLDFENNNDQDSKLKKNTVVSKFNFLNNSINIYNLYLELSNKIGLGNPIDDEKPRFPYNILITNNWIAIIKRKYDHVHGFSVNGLGFAGYLLVTEKSNINYLKKYGPEKLLENFV
|
| 52 |
+
MTTSSKKDYLSILGLSSKFDDIELKKAFRREARKWHPDLNKNDINAEDRFKLINEAYEFLRDPVRRVKSIDSNSSNEEIYNKYSTGFPEFKDYLNSLFGFEYESELDNESYDQTSDFYEDEKPNAIFNEEEFNSYDYPARSPEEPPPVKLHQDIETIIELTPDEALSGASILIELEDQTVVEVDTPPFAGDGWRLRLENIAKGGKDHYLQLKVQTENGLRIDGLRVLYKLELFPPDALLGCAVEVPTLDGNVTLQVPPKSSTGRLLRLKGRGLSFGDNIGDQFVEILVVIPADINDEEIALYTRLQELSLSDE,MTTSSKKDYLSILGLSSKFDDIELKKAFRREARKWHPDLNKNDINAEDRFKLINEAYEFLRDPVRRVKSIDSNSSNEEIYNKYSTGFPEFKDYLNSLFGFEYESELDNESYDQTSDFYEDEKPNAIFNEEEFNSYDYPARSPEEPPPVKLHQDIETIIELTPDEALSGASILIELEDQTVVEVDTPPFAGDGWRLRLENIAKGGKDHYLQLKVQTENGLRIDGLRVLYKLELFPPDALLGCAVEVPTLDGNVTLQVPPKSSTGRLLRLKGRGLSFGDNIGDQFVEILVVIPADINDEEIALYTRLQELSLSDE
|
| 53 |
+
LKKTFKVTITNKETGKIYQENISDQEYILKEFEKKGLRLPFSCRNGCCTSCAVKIISGKLDQPEAMGVSQDLKDKGYALLCVAKVIEDIEVETTYYDEVYDLQFGQYFGKGKTRKAPPWEFEED,MKGHKKIRFIFPLVAMYVPLLLLAPKAIAGSFGAEIFCTMRDGGNDHESSWQAAYSYIKKQKGGIFKTSPKQAAGQIIETVVRERDKFSYCVEFLDQLHPDRKLQLENDRKEKRRKKEELLQDKENEDYSKETFDRYSY
|
| 54 |
+
MDFKTYQKQARLTAQYPNLGSNNIYPTLGLVGEAGEVAEKVKKVIRDKKGIFDEESKKGIKKELGDVLWYISNLCNEFNFELEEVALQNLEKLKLRAAKGKISGSGDDR,MDFKTYQKQARLTAQYPNLGSNNIYPTLGLVGEAGEVAEKVKKVIRDKKGIFDEESKKGIKKELGDVLWYISNLCNEFNFELEEVALQNLEKLKLRAAKGKISGSGDDR
|
| 55 |
+
MSFFQGKILLNFIIDLLNKPAINWSNFELNSSLQLNDFVDLLLEPLNTSQYSYNIKLGLHEALINAVTHGNKLDPNKSIRVRRIITPNWCVWQIQDQGNGLEIKKRLYKLPKKFTSFNGRGLYIINECFDDIRWSNKGNRLQLALKR,VPNNQNRDNFIDKAFTVIAESIVKIMPIADKEKKAYIYYRDGLAAQNNGDYSEALDYYNESLLLEENKIDRGETLKNMAIIYMSNGEEDRSIETYQKALEENPKQPSCLKNIGLIYEKRGRFAEQNGDLDQRDMWFDKAAQVWSKAVRLYPGGYLDIENWLKTSGRSSIDIYL
|
| 56 |
+
MSFFQGKILLNFIIDLLNKPAINWSNFELNSSLQLNDFVDLLLEPLNTSQYSYNIKLGLHEALINAVTHGNKLDPNKSIRVRRIITPNWCVWQIQDQGNGLEIKKRLYKLPKKFTSFNGRGLYIINECFDDIRWSNKGNRLQLALKR,MSFFQGKILLNFIIDLLNKPAINWSNFELNSSLQLNDFVDLLLEPLNTSQYSYNIKLGLHEALINAVTHGNKLDPNKSIRVRRIITPNWCVWQIQDQGNGLEIKKRLYKLPKKFTSFNGRGLYIINECFDDIRWSNKGNRLQLALKR
|
| 57 |
+
VASTLLFTALKEAIDEEMANDVNVCIMGEDVGQYGGSYKVTKDLYEKYGELRVLDTPIAENSFTGMAVGAAMTGLRPIVEGMNMGFLLLAFNQISNNMGMLRYTSGGNYKIPAVVRGPGGVGRQLGAEHSQRLEAYFHAVPGIKIVACSTPTNAKGLMKAAIRDNNPVLFFEHVLLYNLSEELPEGDYICSLDQADLVKEGKDITILTYSRMRHHCLKAVEELDKKNIDVELIDLISLKPFDMKTISKSIKKTNNVIIVEECMKTGGIGAELIALITEECFDDLDTRPIRLSSQDIPTPYNGNLENLTIIQPHQIVEKVEEVINGSI,VASTLLFTALKEAIDEEMANDVNVCIMGEDVGQYGGSYKVTKDLYEKYGELRVLDTPIAENSFTGMAVGAAMTGLRPIVEGMNMGFLLLAFNQISNNMGMLRYTSGGNYKIPAVVRGPGGVGRQLGAEHSQRLEAYFHAVPGIKIVACSTPTNAKGLMKAAIRDNNPVLFFEHVLLYNLSEELPEGDYICSLDQADLVKEGKDITILTYSRMRHHCLKAVEELDKKNIDVELIDLISLKPFDMKTISKSIKKTNNVIIVEECMKTGGIGAELIALITEECFDDLDTRPIRLSSQDIPTPYNGNLENLTIIQPHQIVEKVEEVINGSI
|
| 58 |
+
MSKLSTTKICVKSPAKINLHLEIIGKRKDGYHELAMIMQNIDLSDYIEFENNQIGEIKLKSNSKDLSLDEDNLIIKAANYIKDMSKNKELGANIFLKKNIPIGAGLAGGSSNAAATLVGLNKLWDLDLDYETIFILSAKLGSDVPFFIEGGCQFCFGRGEILEKYSSNFDFGVILLKNPNISISTVDTYKKYSQEFCPKYFTETEKTNKIRNDLRVNGFNDFKLSEQRINVKNDLQVIVERENNSVKKALYLLSNLQNCLSYSMSGSGPTCFALFKDINIANEVFEQNYKMFNNNGFEAWVCKLINSGITLL,LVKPKSPDNKISNHLQQDVVKIAGKTIFINPFLYWRRFDENTNRWLREPGQMSEEQIQPNRNRFYPEIDWADLSQNQKLVKDASVEMFLKTLELISTFHPQLNSGQLLEVERKMAITKKLPFEKWVTKSFAKKARAEEYEKRKFKRDRFIRSWKEWLSLENTQQALLPIIVVVFVSAFIGWSSGVSKNSCNPYFEQNLDQSI
|
| 59 |
+
MSKLSTTKICVKSPAKINLHLEIIGKRKDGYHELAMIMQNIDLSDYIEFENNQIGEIKLKSNSKDLSLDEDNLIIKAANYIKDMSKNKELGANIFLKKNIPIGAGLAGGSSNAAATLVGLNKLWDLDLDYETIFILSAKLGSDVPFFIEGGCQFCFGRGEILEKYSSNFDFGVILLKNPNISISTVDTYKKYSQEFCPKYFTETEKTNKIRNDLRVNGFNDFKLSEQRINVKNDLQVIVERENNSVKKALYLLSNLQNCLSYSMSGSGPTCFALFKDINIANEVFEQNYKMFNNNGFEAWVCKLINSGITLL,VPNNQNRDNFIDKAFTVIAESIVKIMPIADKEKKAYIYYRDGLAAQNNGDYSEALDYYNESLLLEENKIDRGETLKNMAIIYMSNGEEDRSIETYQKALEENPKQPSCLKNIGLIYEKRGRFAEQNGDLDQRDMWFDKAAQVWSKAVRLYPGGYLDIENWLKTSGRSSIDIYL
|
| 60 |
+
MKNLKLKVIFKYLKPYKKEFLYGGIALLVVNILSILIPLEVKNIIDQLKDGFSSSFVISKSLFLMFLATCMGLIRLFSRQIVFGIGRKVEVNLRQKLFDHLLIQDPDWIQKKGSGDIISRATSDVENIRRLLGFTVLSLCNIVLAYSLTIPSMLSINKTLTVAALMIFPMILVIVSLFGGRMVSQRKIQQESLSKLSDLIQEDLSGISAIKIYAQEEAEKKQFNNYNKVYRNSAIKLARTASTLFPLLQGISSISLLILLGLGTSQLENGFITIGGLVALILFVERLVFPTALLGFTLNTFQLGQVSLDRVEEIFQNNPKITDKPKAKFIKKKVKGTIEAKNLKIKYEGAKFNSLNRLNFKINPGELIAIVGPVGCGKTTLAKSLGRTIEIPDGQLFLDDIDITNIKLRDLRKHIAIVPQEAFLFTSTISENLKFGDPKASRNVVKNSAVNAGLIDDINSFPDGFKTIVGERGITLSGGQRQRTALGRALLVDASVVVLDDALASVDNKTAAKIIEEMRANKSKTILMISHQLSVAATCDRVLVMDQGKIVQEGIHKDLITTNGLYKNLWEREIATNKIVS,MKNLKLKVIFKYLKPYKKEFLYGGIALLVVNILSILIPLEVKNIIDQLKDGFSSSFVISKSLFLMFLATCMGLIRLFSRQIVFGIGRKVEVNLRQKLFDHLLIQDPDWIQKKGSGDIISRATSDVENIRRLLGFTVLSLCNIVLAYSLTIPSMLSINKTLTVAALMIFPMILVIVSLFGGRMVSQRKIQQESLSKLSDLIQEDLSGISAIKIYAQEEAEKKQFNNYNKVYRNSAIKLARTASTLFPLLQGISSISLLILLGLGTSQLENGFITIGGLVALILFVERLVFPTALLGFTLNTFQLGQVSLDRVEEIFQNNPKITDKPKAKFIKKKVKGTIEAKNLKIKYEGAKFNSLNRLNFKINPGELIAIVGPVGCGKTTLAKSLGRTIEIPDGQLFLDDIDITNIKLRDLRKHIAIVPQEAFLFTSTISENLKFGDPKASRNVVKNSAVNAGLIDDINSFPDGFKTIVGERGITLSGGQRQRTALGRALLVDASVVVLDDALASVDNKTAAKIIEEMRANKSKTILMISHQLSVAATCDRVLVMDQGKIVQEGIHKDLITTNGLYKNLWEREIATNKIVS
|
| 61 |
+
MAKSSWEGNCFLNFFNNKSSSGKDDKTIFKSKFTSPYKLLKCSYDQEGRCILPILHTAGGLVGGDLLEFEANIGINSKVLLTTSSAQKVYGSVGRSKINPEGTFSSQKTKISILDNSHLEYLPQETIVFANGLYSQEFNIKISDNSSFLFTDLIRLGRSSAGESIESGVFRSKLEIMRNGNLCDDWEFVDQIELTKFSFEAKSGMDFKPVFGSLIWICEKEFPITKISYLKEKIKIIFKENNNYLSLGTLENGLSIRFLGTSSQDARKCFFSIWTQIRTVCGFCKPEYQGVWPLQDL,MAKSSWEGNCFLNFFNNKSSSGKDDKTIFKSKFTSPYKLLKCSYDQEGRCILPILHTAGGLVGGDLLEFEANIGINSKVLLTTSSAQKVYGSVGRSKINPEGTFSSQKTKISILDNSHLEYLPQETIVFANGLYSQEFNIKISDNSSFLFTDLIRLGRSSAGESIESGVFRSKLEIMRNGNLCDDWEFVDQIELTKFSFEAKSGMDFKPVFGSLIWICEKEFPITKISYLKEKIKIIFKENNNYLSLGTLENGLSIRFLGTSSQDARKCFFSIWTQIRTVCGFCKPEYQGVWPLQDL
|
| 62 |
+
MSSKLRVGVAGPVGSGKTALVETLCIALKKRYKIAVVTNDIYTKEDANFLIKKKILEEGRIVGVETGGCPHTAIREDCSLNKNAVMDLENKYDPLDFIFVESGGDNLAASFSPELVDLSIYVIDVSAGDKIPRKGGPGITRSDLLLINKIDLADMVGANLNIMQNDTNMMRDGKPWFFTNLSSGSGVDNVIKYLVAQIPNI,VNSNSSNQVGKNIRRTGFLIVLSYLLIVLIMKVLEANNFFGYSLSSFSNDIFAPPSLKHLCGTDRLGRDVCLRTLQGSSIAIEVVFLAIFFALILGLPLGLLSGYFGGILDKCLSLVMDTIFSIPVILLAVVVAFVLGKGIINASIALCIVYSPQYFRLIRNQTMLIKSETYVEAARVSGADVKTIIFKYILPNVITPLPILLTLNAADAVLVLGSLGFLGLGVPANVPEWGSDLNLALAAIPTGIWWTALFPGLAMFFLVLGLSFIGEELENIFEN
|
| 63 |
+
MSSKLRVGVAGPVGSGKTALVETLCIALKKRYKIAVVTNDIYTKEDANFLIKKKILEEGRIVGVETGGCPHTAIREDCSLNKNAVMDLENKYDPLDFIFVESGGDNLAASFSPELVDLSIYVIDVSAGDKIPRKGGPGITRSDLLLINKIDLADMVGANLNIMQNDTNMMRDGKPWFFTNLSSGSGVDNVIKYLVAQIPNI,MTKFKLKIASRRSKLAMVQTLWVKEQLEKNIPDLEVSIEAMATQGDKILDVALAKIGDKGLFTKELEAQMLVGHADIAVHSLKDLPTNLPDGLTLGCITKREDPSDALVVNKKNKIYQLESLPPGSIVGTSSLRRLAQLRYKFPHLDFKDIRGNVITRIEKLDSGEFDCIILAAAGLKRLGFESRVHQIIPNEISLHAVGQGALGIECKSDDKEVLKIISVLEDKVSSQRCLAERSFLRELEGGCQVPIGVNSSIQNDEIALIGMVASIDGKRLIKNESIGNIKYPEEVGKKLAEKLKLQGADKILSEIFEQFRDK
|
| 64 |
+
MSSKLRVGVAGPVGSGKTALVETLCIALKKRYKIAVVTNDIYTKEDANFLIKKKILEEGRIVGVETGGCPHTAIREDCSLNKNAVMDLENKYDPLDFIFVESGGDNLAASFSPELVDLSIYVIDVSAGDKIPRKGGPGITRSDLLLINKIDLADMVGANLNIMQNDTNMMRDGKPWFFTNLSSGSGVDNVIKYLVAQIPNI,LGRSRKTNQLIREFLSELKEVLTKDGSITLRSLIFQENFHSLEGALKETEIKFIYPSDLKRFKDKSLNVLDICFGLGYNSASLFNNVIRQNSLINWYALEIDKKPLEYSLGNKSFQKLWHPKVFKILKALLKNSKYKDQFFDCDILWGDAREKIKNIPANIKFDLIYLDGFSPQKCPQVWSVEFLSKVTQKLNPQGYLITYSCSAAIRSTLKDFGLNIFNNKPNLVSKNLWSYGTIAVKNIDEKVLQNNLYLKKLSWMEEEHLLTKASIPYRDPTLNSNPKDIIKKRVQEQFLSNLKTSKKWRDKWGMTK
|
| 65 |
+
MSSKLRVGVAGPVGSGKTALVETLCIALKKRYKIAVVTNDIYTKEDANFLIKKKILEEGRIVGVETGGCPHTAIREDCSLNKNAVMDLENKYDPLDFIFVESGGDNLAASFSPELVDLSIYVIDVSAGDKIPRKGGPGITRSDLLLINKIDLADMVGANLNIMQNDTNMMRDGKPWFFTNLSSGSGVDNVIKYLVAQIPNI,MIDSFPLIKKEHIETLQINIGLKCNQACKHCHVNSSPLRSEKMSYEIISLIPKVIEKYKIKTLDITGGAPEMHPEFRNLITTLSDKNIDIIDRCNLTIFFEDGFEDLPQFLAKNNVIVTASLPCYEKDNVELQRGYGVFDKSINALKILNDLGYGKQKDGLQLNLVYNPVNPILPPSQVILKEDYKRILFEKYNISFNNLYTITNMPINRYADSLNSENKLDSYYKLLKENFNKNNLEKLMCKKTISVNWQGQIYDCDFNQQINLKGNKGPKTLSDLMSKSFKFDYGVAVKEHCFACTAGAGSSCGGTLT
|
| 66 |
+
MSSKLRVGVAGPVGSGKTALVETLCIALKKRYKIAVVTNDIYTKEDANFLIKKKILEEGRIVGVETGGCPHTAIREDCSLNKNAVMDLENKYDPLDFIFVESGGDNLAASFSPELVDLSIYVIDVSAGDKIPRKGGPGITRSDLLLINKIDLADMVGANLNIMQNDTNMMRDGKPWFFTNLSSGSGVDNVIKYLVAQIPNI,MKLRLFEFYFIKDYLRPWFGLIYSLFFLFFLGAIGYRITEGWDWGDCLWMVLITITTIGFGEVQTLSPEGRIITVLIIVGGLIFIQFTFQKAVRLFESGYFQRVNELRFKRLLRKMENHVILCGYGRVGQEISNQIKTQNIPIIVVESDEDRKKIAEDNGLEVLCADATLDETLKLAGLDKCKSLVVTLPNDAANLYVVLSAKGIRSSIRVIARAGTEEAASKLRLAGASIVVSPYIAAGRAMASMALRPIAIDFLDLLAGSECEIEEFELSNDISLFETAEKITLLELGIGKKSGAKILAIKEDEKLITNPGGDFLLQPGQVLIAFGSKEQLTTLNRLLGNLVVSVELLK
|
| 67 |
+
MSSKLRVGVAGPVGSGKTALVETLCIALKKRYKIAVVTNDIYTKEDANFLIKKKILEEGRIVGVETGGCPHTAIREDCSLNKNAVMDLENKYDPLDFIFVESGGDNLAASFSPELVDLSIYVIDVSAGDKIPRKGGPGITRSDLLLINKIDLADMVGANLNIMQNDTNMMRDGKPWFFTNLSSGSGVDNVIKYLVAQIPNI,MQKTKFSKINDQFNNLLFGFLSSSWKSKSINVISVLTGYFLFANFATKFISEGKNELIMVPIIILIIELIIRIRPPAGSSFFNLWSIIDKARIGATYAVILEAFKLGS
|
| 68 |
+
MSSKLRVGVAGPVGSGKTALVETLCIALKKRYKIAVVTNDIYTKEDANFLIKKKILEEGRIVGVETGGCPHTAIREDCSLNKNAVMDLENKYDPLDFIFVESGGDNLAASFSPELVDLSIYVIDVSAGDKIPRKGGPGITRSDLLLINKIDLADMVGANLNIMQNDTNMMRDGKPWFFTNLSSGSGVDNVIKYLVAQIPNI,MSKLSTTKICVKSPAKINLHLEIIGKRKDGYHELAMIMQNIDLSDYIEFENNQIGEIKLKSNSKDLSLDEDNLIIKAANYIKDMSKNKELGANIFLKKNIPIGAGLAGGSSNAAATLVGLNKLWDLDLDYETIFILSAKLGSDVPFFIEGGCQFCFGRGEILEKYSSNFDFGVILLKNPNISISTVDTYKKYSQEFCPKYFTETEKTNKIRNDLRVNGFNDFKLSEQRINVKNDLQVIVERENNSVKKALYLLSNLQNCLSYSMSGSGPTCFALFKDINIANEVFEQNYKMFNNNGFEAWVCKLINSGITLL
|
| 69 |
+
LSRLLISFIFFAIVFLSPLSTFASHTSDPTVSLLQSRISKNFSKKFCNAIQNGLSKDEAMTSAIVKTENIVSFSYNPQKKWIEKEDLANQISIKVINDCGWSFGLIGKEGIDYFNSYFLEIYDKTTPDKKLSS,LSRLLISFIFFAIVFLSPLSTFASHTSDPTVSLLQSRISKNFSKKFCNAIQNGLSKDEAMTSAIVKTENIVSFSYNPQKKWIEKEDLANQISIKVINDCGWSFGLIGKEGIDYFNSYFLEIYDKTTPDKKLSS
|
| 70 |
+
MTFEAKYLGSNGWLIKFDKTNLIIDPWLTGDLIFPPGEWFFKGSLDNEILIEEDINIILLTQGLPDHCHVPSLKKFKKDIDIICSNSAKGILEKLGFTSIKVLKPKEKIMQKELEIEATAGAPVPQIENGYIVKDYKGKGFYIEPHGYLDENVNSQELDAVITPIINLELPLVGSFVKGADVLPKLIKTFNPKYILSSTAGGEAKYTGLLNKFISVQEYAEEVKCNLVNLKTMDSVKI,MTFEAKYLGSNGWLIKFDKTNLIIDPWLTGDLIFPPGEWFFKGSLDNEILIEEDINIILLTQGLPDHCHVPSLKKFKKDIDIICSNSAKGILEKLGFTSIKVLKPKEKIMQKELEIEATAGAPVPQIENGYIVKDYKGKGFYIEPHGYLDENVNSQELDAVITPIINLELPLVGSFVKGADVLPKLIKTFNPKYILSSTAGGEAKYTGLLNKFISVQEYAEEVKCNLVNLKTMDSVKI
|
| 71 |
+
MPIVFAWSLCLSVVVVLLSTIPLTLGRIKAGYSVENMSAPRALFDKLPDFGKRAVWCHQNCWESISIHAPACILCLITLPDSNLSLIAAWMHPLLRFLYIGAYVLNIPIARGLIWASGIFTTLVLYKEGISQFM,MPIVFAWSLCLSVVVVLLSTIPLTLGRIKAGYSVENMSAPRALFDKLPDFGKRAVWCHQNCWESISIHAPACILCLITLPDSNLSLIAAWMHPLLRFLYIGAYVLNIPIARGLIWASGIFTTLVLYKEGISQFM
|
| 72 |
+
MTDIEEIKKKIYQIAAITDRGQRLNKLIAPMYQEKLKEMGNLIDILESFNTEVSEEKLSGEWELIYSTVELFRSSPFFLAIEKALNDEFKSNLFFKLHQLQVGSFGLSTIGRIAQNIDFDKKEFLSTFDTTIFGLTIIPILGWFKLLPTFGGRVITLADDLILEDKVLKMNLKKTKVSKVDGLNKIPLFSTLLMERWYPVKEVWEKLPWNKESPSCEVSVIYLDEEVRVMKDIYGSTFVYIRPTISLLNSK,MTDIEEIKKKIYQIAAITDRGQRLNKLIAPMYQEKLKEMGNLIDILESFNTEVSEEKLSGEWELIYSTVELFRSSPFFLAIEKALNDEFKSNLFFKLHQLQVGSFGLSTIGRIAQNIDFDKKEFLSTFDTTIFGLTIIPILGWFKLLPTFGGRVITLADDLILEDKVLKMNLKKTKVSKVDGLNKIPLFSTLLMERWYPVKEVWEKLPWNKESPSCEVSVIYLDEEVRVMKDIYGSTFVYIRPTISLLNSK
|
| 73 |
+
MLLSRVAESLYWINRYLERAENISRFVEVSEAMSLDCPPGSAEPWLPLIDASSDRETFDSRFPEKKQDDVINFLIRDRINPNSIISCIQLARENARQIRDVMTSEMWEQINILYWNLQEGESIWDLPRQEQLSEIRRGCQLFYGITDATLSKDLACQFSILGRLIERADKTSRILDVKYYLLLPSLDELGGVLDELQWIALLRSAGAYQMFRKAEQNSIQPNSVARFLLLDNNFPRSVRYCLDGISNTLKMIDTSPSSDNPSKLECMRGLLKAKWSYIRIEDIINDGLHEAIDSLQIDLNKLHNLIEDKYFINKEFDQ,LVKPKSPDNKISNHLQQDVVKIAGKTIFINPFLYWRRFDENTNRWLREPGQMSEEQIQPNRNRFYPEIDWADLSQNQKLVKDASVEMFLKTLELISTFHPQLNSGQLLEVERKMAITKKLPFEKWVTKSFAKKARAEEYEKRKFKRDRFIRSWKEWLSLENTQQALLPIIVVVFVSAFIGWSSGVSKNSCNPYFEQNLDQSI
|
| 74 |
+
MLLSRVAESLYWINRYLERAENISRFVEVSEAMSLDCPPGSAEPWLPLIDASSDRETFDSRFPEKKQDDVINFLIRDRINPNSIISCIQLARENARQIRDVMTSEMWEQINILYWNLQEGESIWDLPRQEQLSEIRRGCQLFYGITDATLSKDLACQFSILGRLIERADKTSRILDVKYYLLLPSLDELGGVLDELQWIALLRSAGAYQMFRKAEQNSIQPNSVARFLLLDNNFPRSVRYCLDGISNTLKMIDTSPSSDNPSKLECMRGLLKAKWSYIRIEDIINDGLHEAIDSLQIDLNKLHNLIEDKYFINKEFDQ,MKLRLFEFYFIKDYLRPWFGLIYSLFFLFFLGAIGYRITEGWDWGDCLWMVLITITTIGFGEVQTLSPEGRIITVLIIVGGLIFIQFTFQKAVRLFESGYFQRVNELRFKRLLRKMENHVILCGYGRVGQEISNQIKTQNIPIIVVESDEDRKKIAEDNGLEVLCADATLDETLKLAGLDKCKSLVVTLPNDAANLYVVLSAKGIRSSIRVIARAGTEEAASKLRLAGASIVVSPYIAAGRAMASMALRPIAIDFLDLLAGSECEIEEFELSNDISLFETAEKITLLELGIGKKSGAKILAIKEDEKLITNPGGDFLLQPGQVLIAFGSKEQLTTLNRLLGNLVVSVELLK
|
| 75 |
+
MLLSRVAESLYWINRYLERAENISRFVEVSEAMSLDCPPGSAEPWLPLIDASSDRETFDSRFPEKKQDDVINFLIRDRINPNSIISCIQLARENARQIRDVMTSEMWEQINILYWNLQEGESIWDLPRQEQLSEIRRGCQLFYGITDATLSKDLACQFSILGRLIERADKTSRILDVKYYLLLPSLDELGGVLDELQWIALLRSAGAYQMFRKAEQNSIQPNSVARFLLLDNNFPRSVRYCLDGISNTLKMIDTSPSSDNPSKLECMRGLLKAKWSYIRIEDIINDGLHEAIDSLQIDLNKLHNLIEDKYFINKEFDQ,MSKLSTTKICVKSPAKINLHLEIIGKRKDGYHELAMIMQNIDLSDYIEFENNQIGEIKLKSNSKDLSLDEDNLIIKAANYIKDMSKNKELGANIFLKKNIPIGAGLAGGSSNAAATLVGLNKLWDLDLDYETIFILSAKLGSDVPFFIEGGCQFCFGRGEILEKYSSNFDFGVILLKNPNISISTVDTYKKYSQEFCPKYFTETEKTNKIRNDLRVNGFNDFKLSEQRINVKNDLQVIVERENNSVKKALYLLSNLQNCLSYSMSGSGPTCFALFKDINIANEVFEQNYKMFNNNGFEAWVCKLINSGITLL
|
| 76 |
+
MTEVINNIPDFEKYLTDTKKVVEEALDFSLGPENPEILRESMRYSLLAGGKRIRPILCLASCSLAGGEPSLAVPTAVAIEMIHTMSLIHDDLPAMDNDGFRRGRPTNHKVYGDAIAILAGDALLTRAFEMVSLRSPGVDSNRLLNVVGELSLVAGAPGLVGGQVVDLECEGKEVDLETLEYIHLHKTGALLKASVRTGAMIAGANEELLNALTTYAEGIGLAFQIIDDILDLTSSSEKLGKTAGKDLLADKTTYPKLLGMEESKKKAFDLVDQAKKAIEPWGLNAKYLISLADFITNRDR,MTEVINNIPDFEKYLTDTKKVVEEALDFSLGPENPEILRESMRYSLLAGGKRIRPILCLASCSLAGGEPSLAVPTAVAIEMIHTMSLIHDDLPAMDNDGFRRGRPTNHKVYGDAIAILAGDALLTRAFEMVSLRSPGVDSNRLLNVVGELSLVAGAPGLVGGQVVDLECEGKEVDLETLEYIHLHKTGALLKASVRTGAMIAGANEELLNALTTYAEGIGLAFQIIDDILDLTSSSEKLGKTAGKDLLADKTTYPKLLGMEESKKKAFDLVDQAKKAIEPWGLNAKYLISLADFITNRDR
|
| 77 |
+
VNFWGFINLKFLLDVLFALGFGLLLFSRVKEQRTLWLLRGYLLLVSFAWFIQRYAYLPLTSKLIDAVVLACSLSLAILWQGELRRLMELLGTGRLAVLLGNPPKEFRATSTTVNQLVDAAGKLSQNRKGALIVVDLGSDLRPEDFLYSGIKIEAKLSTDLLINLFATDTPLHDGAVLVKGNKIISAGVILPLSRQGISRYGTRHLAALGITERFDRCICIVVSEETGTLSLANQGKLERPITSSRLQELLIKLVGNQNTSGTPKSSSNKTNSYQKTNTNDTITVEKKLDKQNTIQD,VNFWGFINLKFLLDVLFALGFGLLLFSRVKEQRTLWLLRGYLLLVSFAWFIQRYAYLPLTSKLIDAVVLACSLSLAILWQGELRRLMELLGTGRLAVLLGNPPKEFRATSTTVNQLVDAAGKLSQNRKGALIVVDLGSDLRPEDFLYSGIKIEAKLSTDLLINLFATDTPLHDGAVLVKGNKIISAGVILPLSRQGISRYGTRHLAALGITERFDRCICIVVSEETGTLSLANQGKLERPITSSRLQELLIKLVGNQNTSGTPKSSSNKTNSYQKTNTNDTITVEKKLDKQNTIQD
|
| 78 |
+
MENPTKNKIQNLIDLNPVMVFMKGTKLMPQCGFSNNVVQILNSLGVTFNTFDVLSDFEIREGIKEYSEWPTIPQVYLKGEFLGGSDILIEMYNAGTLKEKIEIALAS,VNSNSSNQVGKNIRRTGFLIVLSYLLIVLIMKVLEANNFFGYSLSSFSNDIFAPPSLKHLCGTDRLGRDVCLRTLQGSSIAIEVVFLAIFFALILGLPLGLLSGYFGGILDKCLSLVMDTIFSIPVILLAVVVAFVLGKGIINASIALCIVYSPQYFRLIRNQTMLIKSETYVEAARVSGADVKTIIFKYILPNVITPLPILLTLNAADAVLVLGSLGFLGLGVPANVPEWGSDLNLALAAIPTGIWWTALFPGLAMFFLVLGLSFIGEELENIFEN
|
| 79 |
+
MENPTKNKIQNLIDLNPVMVFMKGTKLMPQCGFSNNVVQILNSLGVTFNTFDVLSDFEIREGIKEYSEWPTIPQVYLKGEFLGGSDILIEMYNAGTLKEKIEIALAS,MVMNVSIVIPTYNRKPILEKCLKALEKQNLNENISNYEVIVVDDGSTDGTTYWIKDNYEVLPHVVLYEQEHGGPALGRNLGVMKSKYEIIIFIDSDLIVLDDFIACHVNKLLFSWSKNTKKCFTYGSVINTSNFSNPESERYKLTDFSFAYFATGNVAISKELLLSVGLFDNSFSLYGWEDLELGERLKKLGTKLIKCPEAVGFHWHPPFDCGQIESLVSQEKERARMALIFYKKHSNLRVRFMIQLTPIHILLWQIICLGGLISIKRLLPLLRFLIDSGRNRIALEIVRIPLNLIYVKELRRLI
|
| 80 |
+
VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,MKVNKKYKGLVTKKFNEFYLVELDKYETSVANKKFLCKIKKSVNFRNQFVFVGDEVIVYQIDLQSKRATIESLVKRNNLLERPSVANISNIYVICSVEEPKLNLSQVNKFLISSEQLGVEVSLVLTKCDLITEEKRLLLIEKFHQWGYQAITLNLNNPENLRTLLIELKKKKCSIFMGPSGVGKTTLLNMIIPNLDNKTAPVSSKIKRGKNTTRNVELFSLSSKSYIVDTPGFNIQTLEIDIRELSNLYPEIYKQVVNEGIHCKFRNCLHVNDEGCKLNKNFERYTFYKEMVESSKSHYCLIQED
|
| 81 |
+
VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,MAGFGLPNFGQLTEAFKKAKEIQQNAQKLQDELESMEIEGKSDDEMIKVWISGNQLPLRVEVNENISTANKEEIEKNILEAIKKAHESSTTTMKERMNDLTGGLNLNLPGLDNNDS
|
| 82 |
+
VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,MASQISYRGNKNPIKKKLSFFEGGHQLEKLEFALAVAQTKGDEQKSLVLMKKIIELGGNVEEPGT
|
| 83 |
+
VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,MITLYQFRHSAFCLKTRMALHAKKLQYRVEEVTPGLGQFEIFKISGQKQVPIIVDDNDQIISDSTIICEYINKKNDNNPLFPKDPLLFAQCKLIEDWADTTMASTCRKALIKSAIENPQLRTALLPDEIPSSVKGLVDKLPFKNLSKISNVVFSTKDNLELQKILEALSKALINKKYLIGDNLSIADIAISAQLSLLKFPKSSGPILSGEGCQEYINNPYLENIFIWRNNIEEYLFSANSQ
|
| 84 |
+
VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,VLIIYRSNSLTAKEASIFCNKTLKERNIKSKRIESDFDNNQLENYFYNLAALPDLVIVLGGDGTVLKSANALVNYDIPILSFNIGGNLGFLTQEKDFLFDQSFIKILEKEEFIIDFRNRLHCDVYSNEKNRERKILKSYDALNDFYFKSVEEDISPTNQIQIEIDNEKVNEYKGDGLIISSSTGSTAYSMAAGGPIVHPSINAFVINPICPMSLASRPIIIPDTSKVVIRVVQKNKREIKLWKDGSKCMTIKENDYCEINKVTKPCKMIKFNKSISYYITLIKKLDWKGDLSLKNNQNN
|
| 85 |
+
VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,MSKKRKRISRRRLAGQRVMAHVPIYHIETGKHKPVTAARRFIAENALSAPSVFNVRRNEHTTDRFFWGQKGLFSAQYAEENHFLFPSLKVVVEGIGEEKIFEGLELTADDWEEIEEYEYAFV
|
| 86 |
+
VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,MSKIKENKEQFWLEKFDCFSVTGKDSKRFLNGITTGNIVDLNNKVLKSCWLSPNGILKSLLEINCSEKELKVIVLVGNTSEIRKYFNDIIFPSDDVSLSDSFSINRLQQVDDMNSWRITQPIFLKNEDKKYDFYKNNPNSMNTNDLQLWKINQAIPSLNSEINGKNNPLELGLTDLIDFNKGCYLGQETMSKIKNVSSLKQEIRVWTAKDKDVNLESVNKILFNNQNKEKSVGYITSIYVLESRIIKGLAMIKRKYLDKGNPFFSDNFGQISLEKSVGSTFL
|
| 87 |
+
VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,VENSINISILIPLIPMGMALLILSLLVSFNRTINRLTKPVSALAVFSLLSSALISAFLYFKKIEGEIFLSDYLKLFGSTNLILHLNSLTEKIVIFFAVIIAIVIGVLFYKLPRRKGYVSLIIGISLISSSIMFAVFFLDFSFLI
|
| 88 |
+
VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,MKRKEDSKNNNYDSMSFTDHLEELRQRLLNSIYSILICIFFSFLIIKPLISFLEIPASDIHLLQLAPGEFLFVAIKVAGYSGIIVSIPYIFYQLILFISPGLTKKEKNLILPAVFGSGLLFFLGLIFSWWILVPAAINFFINFGADIVEPTWSIERYFDFVLLLMSSTAIAFQLPVLQFILGSLGIITTEKMLSNWKIVVISSAILSAVITPSTDPLTMSLLSISIIFLFFVGAGLTYISESLKSKTLSSSH
|
| 89 |
+
VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,MTINEKIISDKELKISDWELDFYSRPIIETNGKKRWELIISSSKSFKTEKIFLWNKVCPANEVNSIWLTKALNEALNDAEIEGWAKPLKIRFWRASMKSIIKKSIENIGIEALVSRRTYELFDRIEFLEREIYPLEQGYVRGVLAPTFTSNILNDPKPLPEAVRGDALTISEISIEELKLAKNWPIEFGDIFPIQSSIKNDNLVPGLRLFSKDRSLALAAWFSSLEPVKLLIKQNQLILEASEDDKWLVTDLQEKDAKVLNDKFTQSKKDSYGYQFISIQATPFIEKFAGFWILKDVELIS
|
| 90 |
+
VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,MESIFNNSFATLVAYVGIVSIYLLVIPLILFYWMNNRWNVMGKFERLIVYGLVFLFFPGLILFSPFLNLRLRGDSKG
|
| 91 |
+
VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,MTNKKRILSGVQPTGDLHIGNWLGAINNWVELQEKHETFLCVVDLHAITTEYDTKQLSKNTLSTAALYIACGINPKICSIFVQSQISAHSELCWILNCMTPINWMERMIQFKEKSIQQGNNVSIGLFDYPILMAADILLYDADYVPVGEDQKQHLELAKDIAQQRINAKFGKEENILKIPQPIIMKKGSKIMSLNDGSKKMSKSDINEGSRINLLDTPEIITKKIKRAKSDSYMGMEFNNPERPESRNLLMIYSLLSGKEVSELENDLSQTGWGTFKKIFTEQIIESLKPIQERYQVLINDPHELNKILIQGKEKAEVVANKTLSRVKSELGFFEIEK
|
| 92 |
+
VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,LALIIPSNYHKISDVEKNHISWIEPDLAERQDIRPLRIGILNIMPLGKQYEFNLLHPLGLSPLQIEPVWIKLKTHSYKTWDLNHLNNLYTTWEEANDPEPLDGVIITGAPVEHLAFEEVKYWDEFVNITNEARNSCASTLGLCWAGFALAYLAGVNKTVFDKKLFGVFPLKSLAPGHPLMGTQDDEFICPQSRFAGLPDLEMEEAQKEGKLNLLAYGKDVGYTIFETKDQKQLMHLGHPEYTVHRIISEINRDKEKGDVPPPENFDINSSNTSWRSHRNLLFQQWLWFCYQQVSLS
|
| 93 |
+
VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,MHSKINYFLGIFLSIVILIFNEPSFAINNPNLLPEEKTPVIDLAKTLSPNQKKSLEENLNNLEKESGWKIKYLSQFESVPGIAIKDYWDLDETSLLVIADPRGGNLLNFNVGEAYFAFMPRLFWVELQTRFGNQYYVKDHGEDGAVLDAINSVKICLDRGGCQVVPGLPKEQYIWTLCTSILGGLVAGFAAAPRKEGQIISIGFLALLSPLWGMLFGIFGLAPIISRTSEVLPLFKNGLAFAAAAIAGYLLSQTVFSRYEKPKKS
|
| 94 |
+
VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,LTNYTHYTTVVVHLYYLLMTLGGANVWSNFSYGSRVDSPNGWILNPQGSFLILFENCKKSARNNINVYTHLLFTNHLGEPAGLKNTRLHDLDSAFETWNELIAGGWTEVTNQFQESA
|
| 95 |
+
VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,LKEDSSYLIKYTSSGLYCELADTWIDPIKPVKRALITHAHMDHFTFGCDEYISTYETAVIIKERIGKEINIKTYDYEKEFKINGIKISFHPSGHILGSSQIKFSLAEEIWLITGDFKRQKDETCKEYEIVKTDYLISESTFGLPIFKWDEPQKTASDITKWVNSSQEKTSILFCYSLGKAQRLLNEISKTNFINNIYTHSSIYRMNNCYKKLGIDIIETTKLEQTKNNSDLKGSLIILPPALNKSSSLKNFKDIQTGFASGWMSIRALRKRSGYDKGFSISDHADWIAILKTIKESKAKNVFFHHGESEALNKYLKEKNSINVLEFEFKK
|
| 96 |
+
VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,MNFKNHHQKKRFGQHWLVNNLILEKIKEVAELEEKDFILEIGPGRGALTSKLLDSKISRLHAIELDEDLIDLLNNKFRNDKNFSLQQGDILSTNLDSINKKITKVIANIPYNITGPILDIFVGRLGIISKNNYNKIIFLMQKDVVDRILAKDGNTNAGAMSVRMQLISNIRRICDVPPSSFDPPPKVFSTLVVFEPLRPEMRLDIKLEKYLDKLLRISFNSRRKMIRNTLNSILSAEEIEKLSESSQICFNSRPQDISINKWIKLAEACIKITNKNQ
|
| 97 |
+
VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,LSRLLISFIFFAIVFLSPLSTFASHTSDPTVSLLQSRISKNFSKKFCNAIQNGLSKDEAMTSAIVKTENIVSFSYNPQKKWIEKEDLANQISIKVINDCGWSFGLIGKEGIDYFNSYFLEIYDKTTPDKKLSS
|
| 98 |
+
VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,VNKKKLYLANPYGFSKQTKNLLPEFIKIFQNLNVEVYEPFERTKHLITNKNNWAYDLAKANFNDLKSCDCIFAIVNGNPPDEGVMVELGISIALNKEIFLFRDDFRNCSDSDQYPLNLMLFVGLSKESWSKNYFESIEDILNPKKSFLNWAKRI
|
| 99 |
+
VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF
|
| 100 |
+
MANSQVTTESGGRQNMFPSETRPYIDESVSYDSYPKNAEKVNGRWAMIGFVALLGAYVTTGQIIPGIF,VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF
|
| 101 |
+
MLNLIKKNLNIKSGIALIVLATIFVFLSNSFKKNKSKDISNFVVSVEKGILSESINTSGEVKATRTSNIGPRKQGILEEIKVEEGDLVEKGQILATLDDEDFIYKLEELELNLKKQKSEYLRREFLFKEGAVSKEDYESYKNKYNTSEAKFSDAKAEKDFYSIRAPYPGKITAKYAEIGSYVTPSSNLSSNSKAKNFIFELSEGLEIIAKVPESDIGRIKTGQEASVRIEAYPSNKYRAIVKKIAERAVKDNNVTSFEVTLKFKEISEEIKIGMTADLEFKVKSSEEKILVPTVSIVTEKGEKGVLKVDKNNTPKFEKIEIGISSGNKTSIIEGLRPGEQIFIDIPPWANKRK,VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF
|
MED4_RRS_100.csv
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
LSKRNPIILIHGLWNTADIFSSITSKLDEIGIEYFSPTLKHEYGMTSIVELTNLLNYLILEKYGYEKELDILGFSMGGIIGRYWIKKLNGYKRTRRFITIGSPHNGTLSSQLIPKYPFKGISEMKINSPLLRELSRSDYLLSGIDCISFFTYWDLMVFPGWRACLNSGEKISLKIYKHKNLVRNPDAVDKIIEKLLN,VRNSPFLPNRPLKVAVLGSSGAVGSELLKILEERDFPISELVLLSSQRSEGKIVKWKGEEIITKKASKEEFLNVDLVLASAGGSISKQWLSTVKDQNAVLIDNSSAFRLENDVPLVVPEVNACEALKHNGVIANPNCTTILLTLVLAPLNKISPIKRVIVSTYQSVSGAGQLAMEELQFLTKKYLQGDPKESEVLPYSLAFNLFLHNSPMLSNNYCEEEMKMTNETRKILNITDLKLSSTCVRVPVLRAHSESVNVEFDDVIKPSYAINQLKKAKGLEIIEDYEKNRFPMPNDVMGRDNIAVGRIRTDISNSNGLELWLCGDQIRKGAALNAVQIAELLIAKK
|
| 2 |
+
LTNYTHYTTVVVHLYYLLMTLGGANVWSNFSYGSRVDSPNGWILNPQGSFLILFENCKKSARNNINVYTHLLFTNHLGEPAGLKNTRLHDLDSAFETWNELIAGGWTEVTNQFQESA,MLRKLIHPILILPFCLYINSQEALLSKANNSIEEILHENENQIFLNYSDIDNITLKNNRELKALESLVNSTMFTLSSKIAKRYPSLDLQASGLPKYTSGKNYNSSSSTTKTSQFSANPSLNIKLDLIDPLRGSEIKIARNNYAIAKNNYEIKKKDLIKEAKSRYHKLQKSYQDIKNKTLSLDLSITSLKDAQSKFDAGIGTKFEVLEAEAQLSRDMQSLNEKKIQNQINKIELKEILNINGDFEINQKQKLIGFWNHKLNKNITEGLANSLSLKNINLKKSIKENQAKNYLNVYKPNVYISNNFTSSFSKGDSLSVKIDPEKSGSSYTNTVSLNFSWNIFDGGQNKNLYKSSKADVKSEDYSYKNIENVLKTNISKAYLNLKLNEEKILSSLKEISSTEESLRLARLRYDIGISTLKDVLVRQKELSNANSKKIDSIYNYNLNLDELVRLTFLEISNICNEENNLIKNEIQSICNI
|
| 3 |
+
MNQFFSRRSFILIPIMSILKFILQPKKVLAAFAASDDDWNLSKEDWKNKLSPESYYILREEGTERAFSSQLNNEKRKGIFYCAGCNQPLFTSDTKFDSGTGWPSFWDPIQGSVETKVDFKLIVPRTEYHCSRCGGHQGHVFNDGPLPTGKRYCNNGLALKFIAE,MTNTKVSNNNPDKESIINKSITKAKDNEIIKNKTIQNKKVNSVSKEPNKSVDDISNELFSELISKKISLVQEIKDLETKKNELEKDIESNFKGQSDNIAKRVKGFQEYLTGSLQNLSQNVEKLELVSQPIVVKPSPLDEKKEASNKNELLTVPALSETFKPDEQLIRSCFSNFIEQPDFYSEPWKLRRSLDSSDIEVMDDWFFNMGGRGSLESRGSRQKNALLSAGFIAILGELYGDQFQTLILASQPERLGEWRRVLQDSLGLTRDDFGPNSGIVLFERPEGVIERADRLEANEELPFIIVDAAETSVEIPILQFPLWLAFAGSNDEIYDDLELN
|
| 4 |
+
MVKEDPVRLELSITPSYGKNPVIVGIVESLDLVARRDREGRMPRDLQGTWDWTVRHGKVSTGGWNPMLKEALQTMFETGLPSIIYEELTGDEYKPVDGIRHVR,MSEYRDSSSNNFLSLISGAFIGAAGLAWWLISEADKRKEEKKQKAMMYSSRIQDGSEAIDTNENIKDVEGDKLEQKVEELNSAIADVRRQLEELGQ
|
| 5 |
+
MSLTQSKEVNSLSRRYSTYIERRITRTVMVGDIAIGSDYPVRVQSMINEDTMDVDNSYLAIKRLHEVGCEIVRLTVPSLAHAKAVGDIKEKLIKNNIDTPLVADVHHNGMKIAMEVAKHVDKVRINPGLFVFEKSDPTRTEYTDTEFETIKKTILKRFTPLVEVLKSENKALRIGVNHGSLSERMLFTYGDTPLGMTESAMEFVKICDELDFHNIIISMKASRAPVMLAAYRMIADRLDAEGYNYPLHLGVTEAGDGDYGRIKSTAGIGTLLAEGLGDTIRVSLTEAPEKEIPVCYSILQSLGLRKTMVEYISCPSCGRTLFNLEEVVDKVRKATSHLTGLDIAIMGCIVNGPGEMADADYGYVGKGKGTIALYRRKEEIKRVPEDEGVDALIRLIKDDGKWIDP,MSKVELISLTPEAEKTMAYIARVSNPSNQANDKFAGLLRYCIKHEHWSVFEQSCMTLKIETNRGIAAQILRHRSFTFQEFSQRYAETSLLGNEIPIPNLRRQDQKNRQNSIDDIPDELKIKFSEKISKHFQEANKLYEEMLNEGIAKECARFIMPLATPTRIYMTGSCRSWIHYIQLRSKEGTQKEHMEIAEDCKKVFIKYFPSVSEALNWE
|
| 6 |
+
MKKLFLLSLLISLISPIKTSAGFPEGEKGYDLKKIEDSFKLPCDEIGNDECIARAFGVGACTWVFGIKNGKDSKEALRIADGVLIALLKGNNLDINSIFEKDGSIKETIQKESVYRINFCKDATKLAIPKLIKKLPEGVELDDERIENLADVFPLQYLTMFEQMRKRN,MKAAILVNQKKKLIVDELDLPTSLKVGQVLVKLEYSGICGTQIGEIDGVKGEDKFLPHLLGHEGSGIVEKVGPGVKTVREGDSVVLHWRQGNGIQSEPPKYNWNGKTVNAGWVTTFNTKAIISENRCTKIPANISKEDAALFGCAVTTGFGVIENNAKLKMGESIVVFGAGGIGLNIIQAARLTSAWPIIAVDLFDNRLDLAKKLGATHSVNSSNKSYLDEIENILKDRELDVFIDNTGNTSIIEMGYNLISDQGRLILVGVPKTGENINIFSLPLHFGKKITGSFGGECNPAKDIPRFIKMMQNGLWDLKGLITESYDLENINEAIFSMRTGKTSGRVIIKL
|
| 7 |
+
MNNVVQNKSKIFYQLQKLRRLAQPFFLPIDQCNGFQFIWLLISLLFCVGGIVLVALTGIISFFESIQPIFLDKYFGGVVNTVNTIWSGWWGLLFSGLFLIGSGSFFSLRRQLKNRRWVHWLFLAVIVLMLLAVNGINAGIGFIARDLTNALVEKQQDGFYRILGIYACCFAVALPIRVSQIFFTYKLGIIWRDWLSKSLVKDYMTNKAYYQLNPNDEEQTDVDNPDQRITDDTRAFTGQSLSFTLGVFDALLTFSLNILILWSISTTLTFSLFGYAAFATAILLIAGKNLVKIDFDQLRYEADFRYGLVHIRDNAESIAFYSGEKPEKSETERRLGEVVRNFNLLIIWRVIIDVMRRSINYAGNFFPYLIMAIPYFRGDIDYGRFIQASFAFGMVEGSLFFIVNQIEELAKFTAGIGRLEGFQSKVESISQTKPIDNQNIISDYSSILINNADLFPPGSDKAIIKNLNLSIETNQSLLVVGPSGCGKTSLLRMISGLWEPNQGSIKKPKTGDLLFIPQKPYMLLGSLREQLCYPTEVDKFSDDHLISVLNEVNLNSIVDRYPNLDVKQDWPRILSLGEQQRLAFARLLLNSPRFAVLDEATSALDIKTEKRLYNLLRDRELSLISVGHRPSLKDFHENILELNGQGGWKLFTTDKYNFKN,MNRWVLLEHKILSSKFIDIHYDFLVEDQLDCLTWKFHEIPSLNKGVIKIVKQPNHRLVWLSRVEYQLSKNRGLVKRIDHGIFSNIPHNQDSQKLKIILNGKLLNGLFIIDGNFCQLTKNN
|
| 8 |
+
MKYLILGSGSFAGQLIFSEYLERNYDVYGFNRSRVKDHYQWPWIKKYKNDLGNRWFEYNLTNDVEEMISHINRLKPNFIIDFMGQGMVAPSWLKPEVWYTTNIAIKSRLMNALIDSSFLQKYIRIGTPEVFGSNENFLKEDECFNPSTPYAVSHAAIDFNLRCLYKQYNFPYLIGRFANFYGVGQQLYRIIPRLFLSCRSERNFILDGKGESRRSFIFSKDIVSAIDSMIKFDGIGQEFNFSSNEEISIMSLVNKICNLTNVDKSRILKFGPERPGKDRYYRLDIKKSKNVLNWEPEVSLDEGLNIINIWISENIENLSNKSWTYEYKD,LSLIFINLLTSIPEYISKAVETNSTIAYLTICLAMFLENIIPPIPSEIIMPLGGFFVYQQKLNFYILVFWGVFGTILGSMPWYYLGKLVNEKRLSNFLDKRGKYIGITSNDLIKSRRWFDKYGVSLVFWGRLVPGIRTLISVPAGMELMPLRKFLIWTSLGSLIWVTLLTYAGFVFGENYPIIETYLNQIKFIVKPILILIFVYFLIKFFIRLYKKKIT
|
| 9 |
+
MGENLPLLLSAALGKKVNRPPVWMMRQAGRYMKIYRDLRERYPSFRERSENPELSYEISMQPFLAFKPDGVILFSDILTPLPGMGINFEIIESKGPIIEDPIRNIRQVEKLKELIPNESLSFVGEVLSSLKKDVKNEATVLGFVGAPWTLAAYVVEGKSSKNYSLIKSMAFKEPDLLHKLLDHFAKSIGEYLKYQIKSGAQVVQIFDSWAGQLSPQDYDIFAGPYQKKVVDIVKEEFPDTPIILYISGSAGVLERMAKTGVDIISLDWTVDIEEACKRIPTGIGIQGNVDPGILFGNKDSIKERIDNTFNKVKERKYILNLGHGILPGTPEENAKTFFEHGKKLTY,MRGSIKRSNESYQDSYSPNGIIGEKDACGVGFIANIDGKESNWILKQSLKGLNCMEHRGGCGGDSDSGDGAGILCSIPWEFLDRELNLNTESYEKRGLGMIFMPNNELKVKESKLICDEEAKELNFKQSFWRNVPIKNETLGILAKANAPFINQWIVCLEKDDSRDIEMLLFQLRKRIEKRIRDNTKNAIGECEFYFASLSSKTVVYKGMVRSEVLSEFYEDLKKEDFKVSFSVYHRRFSTNTLPKWPLAQPMRFLGHNGEINTLLGNINWAKASEIHIDDYWGELSRDIKPIVDKNKSDSSNLDATLEINIRSGKPITDSLLKLVPEAFRDQPELESREDIKAFYEYSATLQEAWDGPALLVFADGNYVGATLDRNGLRPARYSITNDGFVIMGSETGVVDIEENRVIEKGRLGPGQMLAVDLSQNKILRNWEVKAEAAKRKNYKKLIQKRTIKLKNNEWSNTCNLKDFELLQQQTAFGFSSEDNDLILDSMASLSKEPTYCMGDDIPLAVLSSKPHILYDYFKQRFAQVTNPPIDPLREKLVMSLEMHLGERCSPFEFNGIKPFIHLKSPIINEKELISLKESEIKSKTISSLFDIEERIKGFEAKLDDICKVSEKAIKEGCSLIIISDKGVSSKQSFIPPLLAVGAIHHYLLKKEIRLKASLIIETGQCWSTHHLACLIGYGVSAVCPWLTLESGRHWLQHPKTQKLIATKKINPLSIDDVQENIKKALEDGLRKILSKIGISLLSSYHGAQIFEAVGLGSDLIKIAFDGTTSRIAGITLKELANESLLIHTKAFPEIDLKKLEFLGFVQFRNNGEYHSNNPEMSKVLHSALKQGPGYDHFETYKTLIRNRPVTSLRDLLSINSTRKSIPIDEVESVESICKRFCTGGMSLGALSREAHEVLAVAMNRIGGKSNSGEGGEDPARFNVLNDIDENTQSAILPSIKGLENGDTACSAIKQIASGRFGVTPEYLRSGKQLEIKMAQGAKPGEGGQLPGPKVDSYIAKLRNSKPGVALISPPPHHDIYSIEDLAQLIHDLHQVHPRAKVSVKLVSEIGIGTIAAGVSKANADVIQISGHDGGTGASPLSSIKHAGLPWELGVAEVHKSLMENNLRGRVLLRTDGGLKTGWDVVIAAILGAEEFGFGSVAMIAEGCIMARVCHTNKCPVGVATQKEELRKRFKGLPENVVNFFLYIAEEIRQIMSSIGVSNMEELIGNQEFLTARDIKLPKTANIDLSSLIKKGTQYKDRSWLKHSKTAHTNGYVLEDQFLSDNEFMNSIKNHGKVIKEIEIKNTDRSVCAKISGEIAGLYGNNGFNGELNLNFKGYAGQSFGAFLLKGMHIQLIGEANDYVCKGMNGGVLTIVPPQVDEKSSEQVILGNTCLYGATGGKLFALGKSGERFAVRNSGATAVTEGSGDHCCEYMTGGKIVILGSTGRNIGAGMTGGIAYILDENNDLENKVNKEIVSIHKITSLKQEEILLGILGEYLEKTKSLKASKIINNWSNFKGIFKIVVPPSEEETLGI
|
| 10 |
+
MIEKKGDNIRSENFYPDSNYYLDQDNTPEETTLPEDQIFNTKKFEWPNSYWFIAERTNGRLAMIGFMAVIINYTLFGWIAYPIL,VHKNKILVPLSNNSYEVIIKQGLINNIGEELIRIGINSNRKILIVSNKEISTLFGRKLLNNLKKNNFNAEIFNIKAGESHKNFASLSEIFNAAFEVGLDRNSLLIALGGGIVGDVTGFAAATWLRGIEYIQIPTTLLSMVDSSVGGKTAVNHPKGKNLIGAFYQPKAVFIDPETLITLPTREFKAGMAEVIKYGVIKDKSLFEYLENEKNRDKILNLENESLIKIINKSIKTKACIVSEDEKENGIRAILNYGHSFGHVIENLCGYGEYLHGEAISIGMKIAGDIATEKNLWSKEHSLRQDHLIESYGLPIQTPKIKKNDVMKILMGDKKVRNGKMRFILPIELGEVDIFNDINESQFLKYFN
|
| 11 |
+
MLKNDLWINQKASKGMINPFQSNLVRHLDPNNKKNAVLSYGCSSYGYDLRLSSKEFLIFKHVPGTVMNPKKFNPDNLEKTILHEDKDGEFFILPAHSYGLGVALEKMKVPENITVICIGKSTYARLGIIVNTTPAEAGWEGHLTLEFSNSSGADCRIYANEGICQLLFFEGDPCSTTYEDRKGKYQNQPEKVTLAKI,MKKTKVICIGEALIDRIKNKSNQEFTDFLGGAPANVACALRKLQIDSVFIGRIGSDEFGKKFINQFKELEVNINFLQLDDCLPTRIVKVNRDNSGDRYFSGFDTSLNTFFADEAFDKNEIKKDLKSLENLFSKTKYLVCGTIILSSSISADTINFLLSLANKFDVKIIIDLNWREVFWDFATSSSETNKKERVDLIRNLLNKAHILKLAKEEAILFFENKNPLEISERLLNRPDVIITDGANPICWLINGVQGTTEVSKSLKIIDTTGAGDAFLAGLISQLLSFDYPSNESEIQNCVKFASICGLLTCLGEGAIEQQPDYSKVNKFFGSQIL
|
| 12 |
+
MVSVPFSNNGSNKNFKKDFNNENAGLVPPQNIQAEEAVLGGILLDPDAIGRIADLIKPEAFYINAHQEIYKTALMLHTQGKPTDLTSMSAWLADNGSLEKIGGNSKLVELVENVSSTASIEQVANLISDKFIRRQLIRSGNEVVQLGFDQTQETNEVLDKAEQKIFEISQEKPTKGLTQAAEILTSTFNEIESRSLGTSVAGIPVNFYDLDAMTQGFQRSDLIIVAGRPSMGKTSMVLNLAKNVAQSQDLPVCVFSLEMSKEQLTYRLLSMEVGIESGRLRTGRLQQEEWPLLGEGINSLGQLPIFIDDKPNLSVLEMRSLCRRLIAEQKKELGLIVIDYLQLMEGTTPDNRVQELSRITRGLKSMARELKVPVVALSQLSRGVESRTNKRPMLSDLRESGSIEQDADLVLMIYRDEYYNPETEDRGITEIIVTKHRNGPVGTVKLLFEPQFTRFRNLAN,VELMGQFFSNVARYPKYLISIIAGGLVALLEPLFKNRSNPLTLVGLISSVISAFITFYFVLKAMTNPINL
|
| 13 |
+
MSFSKLVEIKNNFKFDANNSCKNLYKGACVKIKNSQKTFQVVGINPQSKVCWIREWPFALEVNKTFSLELNQITLQTYCSDTFNEK,MAENFSFDVVSDFDRQELVNALDQVKREISQRYDLKGTDTSLDLEKDNIFITTNSELTLNSVIDIIRQKAIKRKLSIKIFDFNSIEVVSGNKVKQTITLKKGLNQEIAKKISKNIRDEIKKINVSINGETLRVMSKSKNDLQLAIKLLENLEETYKIPLQTNNYR
|
| 14 |
+
MNKRHSLQRKTTLKWNSNGDLSEIDMLRILDRISAYELNQCELTCDLDE,MASQDYLIAIALIEQNNIRAMPLGGKEIKEKLEEEGNLIKLGEEVILNLLLRVFQRSDEGALKRVSEDKGLLLVHMHPKRMQKELPFIKSEWIRDGDTTQFLKYLGNLSKEIWTASLIKYKGLELVSIAKNEDI
|
| 15 |
+
MVEKFKTLFFVKSSLISLYLALTCPIPFISSEKLKIFSIITFFFGLLLIINITNDYVDTCDKKISYKTSFISKIFGKKNWEIFWKDIKLIKSLPTSQGSNIHYFISNKNESFLVPQRVENFERFVSIIEEKTKLNIDKLSYISPLWTYKLLTYMSILMIIGELIAFII,MSSNKICLNCGSSDLVSDRSLGGRMVCFKCGSSSFKNNSFSRIQNKKIIYLLIVLVILLIVVL
|
| 16 |
+
LTNSIKGRNSKEVTIQLKRAETQKNILIKNIYKEYETYFDIVRKSMLISAKKGIAGIYSDFSISDKALHSKELNIFLNKNISLLINSKLPFITIEQLKLGDISYPTKQLVNASVLKELVKRKEYQTVHIDHENEKTANESIEFHCDNNLNTYEYYESLSEDEISSVNLDESCYLNSFSKEISIENIEEGKRLVNAFLELIEETSDNKLIDYEKINDQAPDVFISSDNLNTFEFIDKSFSNFLLNLSYNINLELFKIELIKKIITEETFKCLSNNNSIIKHPYPFVIRYDLYPDNLYPRKNKSSDVYLFNITNVELELYNLDLSICRNNINDLKNRFKLLNKKQRYWKNKELASNSSK,MTDILVLILFVLSGAASGWLGVDLLPIDILKQVSNVEGFRIVLAIIGFFIGLAAGFVFLQLRKTFLDQIRTMPTDLLISRAVGLILGLLVANLLLAPILLIPFPREVFFAKPLAAILSNFFFGALGYKLADTHGRTLLRLFNPTNTDAYLVNEGIIPAASPKILDTSVIIDGRINGLLSCGLLEGQLIVAQSVIDELQTLADSSSNEKRGKGRRGLKLLKELRELYGRRLVINPTKYEGNGVDEKLLRITEDMAGTLITADYNLSQIAEVKELKVMNLSDLVIALRPEVQPGESLNIKIVREGKEKLQGIGYLDDGTMVVIDDAKKFVGERLDIVITGALQSPTGRMVFGKLINNPESNKSFKSPATQG
|
| 17 |
+
VHLDKSSSEIINKFKLSPHPEGGWFREIIRSKNHVTRNDGQKRNNITSIYYLLCKSERSKWHRVNSSDEIWIYLQGAPLNLYFLDDNKELRNIRLDLNNPIEMIPSGYWQAASSTGEFTLTSCCVGPGFDFNDFQMLRNIDPSLRPAKAIKELI,MFFWYRVLSLWPLRRRIINLERFRGLHDDYSKSSTSLNAIRELNDTCNVNLLCTPYVAYIPNSDYWRPNQARDLYKLHLKKSSKKLKIKFIDGSTVIDTKDIKNYAPLGPHLSKLGYQKFAELLSSHLSKKK
|
| 18 |
+
MSKVEIYTWRFCPFCIRAKSLLEKKNITFTEHKIDGDDNARELMMERANGKRTVPQIFIDDKSIGGCDELYELEKEDKLDLLLN,LTIYLGFLYLFFGIIFLLMPLIYIELGRPRDFIKGGLNLVIGMLLIYKQNIFNTLNYLIFSVITTLLTFYIVEIFSIRWNQLTNQEKNNLLTLEELKKNLSIFLKAISLARQDFLNSNNIFKFGRKNENLNKKKWVRNDENDNIVNSNKNNLLTLEMPKKATNKSTKDTINEGK
|
| 19 |
+
MKLSLLSAVLFLFTEISFAQEKLNYTVTSDSQIQSIKGNFEAIGNVIIKSTNNNFEASSNKLTYDKDAKTLKLVGNVFVKNLESEGLSIQKSYGDELTIFTDSGLFKFNSENKNRVKTKLKF,MKNLKSNKRKIHRKVAAISSIPLLITLISGTIYSFLQPLGVDAFWLIKWHTGNFGIINLQPFYSIFLGIASIISVISGIRLLQKNS
|
| 20 |
+
MVCVSNNKSYLKSQHLKIIGQKTLRGKVKISGAKNSALVLLAASLLTDEKIILDNVPLLTDIEKMGNILKNLGVKLHNKDHQLIIDSKNISIQELPYELVNGLRASFFCIGALLTRFGEASIPLPGGCNIGERPINEHINGLRALGAEIIIDRDVVKAKLVKKKTKLFGANIRLNCPSVGATETLIMAASLAEGRTVIENAAREPEIQDLCQMLNKMGAKIYDSGKEKIIIDGVHKLHGCTHKVIPDRIEAGTFLIAAAATSSSITVSPVIPNHLEAVLNKLEESGSKIIIKGNSISIKGNNIKAVDIKTAPFPGFPTDLQAPFMALMTIAKGRSKITETIFENRMNHVDLLNQMGSSITLKNNIAHINGVKKLRGMTLVGSDLRSSAALIIAALTSKSVSYVYGLEHLDRGYENFEQKLSKLGIEIKRQITKQTINKSKNRSSNSKLKEVSEIRAA,LELVDNINPGLVNNLLKMKSKIKQTFKLILFIFLTNTHFLQAHNLFNGGCKNHCKESVKPLIMNKELNNSSYKNQIEDDDSCLIKSLCRG
|
| 21 |
+
LIFIMIQFASFAIGGFVPSAAIAGVLVLIGLGAFFYLGLKGPTDY,MYSLEISLRYSPFPLSIQKKEYEDIKRIYDEIKDSMNSDNQNSPLIELSCEKVQDKLITVLAKEVISVQIYEKSAVAGGSKRPGFSLDI
|
| 22 |
+
MSGIKTKNNTQKLSFRLAPYLFIAVAIFTAFGTNGGTWV,MRNSWIQPRIGQKNITQMNFAKNGHITEEMNYVAKKENLPPSLIMEEVARGRLIIPANVNHVNLEPMAIGIASKCKVNANIGASPNASDINEEVEKLKLAVKYGADTVMDLSTGGVNLDEVRQAIIKESSVPIGTVPVYQALESAHGSIERLTEDDFLHIIEKHCQQGVDYQTIHAGLLIEHLPKVKGRITGIVSRGGGILAQWMLHHFKQNPLYTRFDDICEIFKKYDCTFSLGDSLRPGCLHDASDDAQLAELKTLGELTRRAWTHNVQVMVEGPGHVPMDQIEFNVRKQMEECSEAPFYVLGPLVTDISPGYDHISSAIGAAMAGWYGTAMLCYVTPKEHLGLPNAEDVREGLIAYKIAAHAADIARHRAGARDRDDELSHARYNFDWNKQFELSLDPERAKQYHDETLPEEIFKKAEFCSMCGPNHCPMNSKISDETLDELNNKLTKCDTSV
|
| 23 |
+
MKSPVLKNPNQDWHPNIWPPFTQIINSKPQLEVTHGKNALIYTKNPKQELIDGISSWWVTLHGHSNDYIADAIYHQAKTLEQVIFADFLHPQAQILSERLSGLTKLERLFFSDNGSTAVEVALKIAYQSWQNQGETRNQIIAFDGAYHGDTFGAMALGERNIFNENFDNLMFPVKRAPWPSTWINDEEVERKENNAIQILTKLLKKPTVAVILEPLVQGAGGMNMVRPEFIRRVSEVVKNNNSLLIADEVLTGFGRCGSLFAFQKANIIPDLISISKGLTGGFLPMGITLAKETIFQSFISDSPKKTFWHGHSFTANPLGCAAANASLDLLEKDPIKYLSFEEKHLSHLKKIKKLPFVKNIRVTGTIAAFDIEIGKNEGYLNNVGKRIKALSIKKGLFIRPLGNVIYLLPPLCITDRQLEKSYRIIFEILSDL,MKSLLNTHTKFEIMHIVLRNFKFLIFLFLLSLNLSGYSNAHMRGTFLSEEDARNRSLELGCEGIHKNQDKWMPCKNEKELHKFLNKKGSSRGNNLASSLAWIFILSSSFGILWLSIVKIKRK
|
| 24 |
+
MNNAKNLKIKQIDKKNISFKELSLIKNIIFWVDIIPGDQTQKNAIFARPFHDKNAIPQKLTGDNFYIKSNFHGYGGKSYQCIEVNDHIYLIWVDQLSKAMWLKIFKVQEKVLKNDNQYLLCDVEPRQLTESIKTNFDTSFVISKNNLLFGLCEIKHRDYLFSLNLKKTKQDIRIIKKFDNFAGNLSSNISADLFSWIEWNAGSMPWERNELFFAMIDNDGEIQNIKNFSNKFVNEEKNVSFFQPYWMSDTTLVCSEDSTGWWNLLFLDLTDIKNIILKKRIIKPLTEYGSPQWVSGISFFSGNIKNLFCVAKKDNSWVLEHYQNCECIKELKLPFCSIGDLDVCDQKLVIRGCSFGCFEELFECDFGEKSHTKLLNEISLESINEYSRPESFWFKGFNNQPTHSFIYKPLFERFIKSPLIVKAHSGPTACFDGSLNSEVQHWTSKGFTVAEVNYGGSSGFGREYRERLNYKWGILDSYDCKALVLDLIRLNLVDRTKVAILGNSAGGLTAINALCEGDLFKVAICKYPVLDLNDMHQHTHRFEKGYLNSLIGRYSKFHNEYKLRSPIYKINHLKKPVLLFHGKKDLVISCKKTLQIKEKLLKNNKNSEVIIFENEGHGFKNTNNKKQVLIKTQEFLEKTLNI,LSRILLLSNGHGEDLSGSLLAKYFVKKGDLVDALPIVGDGENYKKENIRIIGKTKKFRTGGIGYNSFSGRIFEIFGGQIIYFFKKLYLSYKLKNKYDFYLVIGDIVPVFFAWFAKKDFFTYLVAYSSHYEGKLKLPWPCKFFLISKYAKKIYARDFLTADDLSQQLRKKVSFLGNPFMDKFSFFENKPKIVPFNIGLFPGSRFPELLDNLKLILEVLETMSKLQYFENIAFKFAIVKALSMEEIRQILNQRKWIYIEKKGKNDGLEFTFGFITINLNWNLFEEILFESNFVISMAGTASEQAIGLAKPVIQIEGNGPQFTKSFAEAQRRLLGRYVFCSTNYINKKDQINQTINLILKVIYLIKLDKKFLVSCLDNANLRIGESNSCLKIINDIKGFHEK
|
| 25 |
+
MMIPISLQKNQNRHIIKSVRKFIDRFFKIKKNQNLNMTEQRKIEKFGVGNLYPDIHPPEFSFFKEKCIDVALGYDDGFTFTPKFGNFKETEDIFDYLKQYLEDKELEKLDIRFDTLKTCIYQINPETLELGELLECEGSDVEYFEWNKKTKSIDEVDSNSLSDEEEEYFH,MIKIFALTFSEIGIGKLEIFVIGIVSLLFPILFIIASRNLDAKGVFDWMMEKPNDWIGKK
|
| 26 |
+
VFDISKENFFKNLIKFPKKNIFMILLFLGFGEWFLSDLINFAGGSIGFFILCFGGYFYLKSEKPKFNEPKDLDGWIKLCNEDLDFFEEIELHNNLEKQNINRKKALELILNREKKEEIYCIGQKNFDSNATLFKNYFKEDKFKLNFMERLPKYNSSEIVPEVILNSDAILYFLKLPLSANDFLWLEKLPKNMPIWLVASFTKGLSFNNEIEEVKAQISGEYANRIIKFDKTKNSFANIPFSLRKFFISSNNNIENTKKRLLKRLHTNWQSEIEGIRRMQLNDLQRRNQIIVATSVFFSPIPSIDVLSMTVLNSLMIKEIKSIWGCNWSPEILDKVSKQIIKTAIAQGVIEWSGQTLIGLTKLHGPNWLVTGAFQAISAAYLTRVVSSSLADFMALTKGVSEPDLEFIKENSDKIVERAFENEKINWKSLIPELNIPLTRLT,MIKNTKKSQKNKILTLEDVSISYGTFEAVRNVFCNFKSGDITSLIGPSGCGKSTVLRALNRMNDLIPNCSLRGTVLFDGTNIYDKRVDPVEVRRRIGMVFQQPNPFPKSIYENIAFGARINGFVGDMDELVESSLRKAAVWSECKDKLNDSGYSLSGGQQQRLCIARTIAIEPEIILMDEPCSALDPISTLKIEETMHELKKNYTIIIVTHNMQQALRVSDMTAFFNAVEYEEGDGGKVGYLAEFDSTKKIFSSPKEKTTQEYISGKFG
|
| 27 |
+
MWIRFHEKNGSERLNFTAFYEALLEAKGVNLGDTGVAGIGKGGRKLSYIATVQGNGNLLIGKAYTALLDLKAGDEFEIKLGRKQIRLLPSE,MTISDKIRVYELSRDLKLENKDILDAAQKLSISVKSHSSSISLEDAKKIKNLINKNSSKKILSVSKSAIKAKNENPKNNDNKNNKNFSNPSHPEKLSKEGLNKKPLLIKPTNKVVNSLVSSNIKNPNPPTIVSNLKSQALSKNQNKTNTSVITTPNLKDKKNPSALQDKKPLKNSSGSPAKTTARPPIQLIEKPKNLANSNRNINANKINNSVNQKAQSLNRADNNKLSRADNNNFPKKNLNSPNVKSTPELVGAPIRREDPKINTNRPNSNSRQPSSNTQISANRPGGQNRQGVPNREGGPYRQGSPNRPGTPYRQGAPNRPGGQNRQGVPNREGGGPYRQGSPNRPGTPNRPGTPYRQGAPNRPGGQNRQGVPNREGGGPYRQGSPNRPGTPYRQGASGIRKPVAPNELMQLQKTNASNKEKPNISNVNKQKIEGANQKTKAPNSRLNTSPSPTAKKPARSFASNTKKPGRTDWDDSAKLEALRNKNPQKQRQKVHIIGENDDSLTSETSGYSGEKVSILSASLARPKKEKSEEIKSQKPSKQFKKKKKETTRQRQKRRAMELRAAKDAKQVRPEMIIIPEDNLTVQELADKLSLESSEIIKSLFFKGITATVTQSLDLATIETVAEEFGVPVLQDDVQEAAKKTVDMIETDDIESLIKRPPVITVMGHVDHGKTSLLDSIRESRVASGEAGGITQHIGAYQVEFEHESKKKKLTFLDTPGHEAFTAMRARGTKVTDVAVLVVAADDGCRPQTLEAISHARAAKVPIVVAINKIDKEGASPDRVKQELSEKDLIAEDWGGDVVMVPVSAIKKQNIDKLLEMILLVSEVEDLQANPERLAKGTVIEAHLDKAKGPVATLLVQNGTLKAGDVLAAGSVLGKIRAMVDEHGNRIKEAGPSCPVEALGFSEVPTAGDEFEVYRDEKSARAIVGDRATDARATKLAQQMASRRVSLSSLSTQANDGELKELNLILKADVQGSVEAILGSLEQLPKNEVQVRVLLSAPGEITETDIDLAAASGSVIIGFNTSLASGAKRAADANDVDIREYEVIYKLLEDIQSAMEGLLEPDLVEESLGQAEVRATFAVGKGAIAGCYIQSGKLQRNCSLRVLRSDKVIFEGNLDSLKRSKDDVKEVNTGFECGVGCDKFSTWSEGDIISAFKFVTKKRTLNK
|
| 28 |
+
MNKTITPSIETIERNWFLVDAKDKTLGRLSTEIAAVLRGKNKPTFTPHLDTGDFVIVVNAEKVEVTGKKASQKLYRRHSGRPGGMKVEKFESLQERIPERIIEQAVKGMLPHNSLGRQQFKKLKVYKGSDHPHAAQNPVLLNS,MKIILLVFFFIMFSFIFLKFYKYKKAFKKDKSIKFNKSNLYNWMNLTKKERFDLSKKESNSYLKKRKTLLEEIRKEYKIISKND
|
| 29 |
+
MTELNQKNSGKNIKWHNLTIDRNKLEKMRGHKGMVIWFTGLSGSGKSTLANAVNEVLHLDGFSTYVLDGDNIRHGLCKDLGFSDEDREENIRRIGEVANLFMNAGIITITAFVSPFISDRDKVRKIIGSKDFIEVHCAADIEVCESRDTKGLYKKARLGEIKEFTGISSPYEAPVNPEIVVDTGSLGLNDSVEKVINHLREQNLLERS,MESDNLFSNTYRIESNAPLADKLRPKNLDDFFGQESILGHDSLLRNAILNDKVGNIIFSGPPGVGKTTLIEIISSNTRSSLIKLNAVLSSIKELRTEIANAKERLRSSNRKTILFIDEVHRFTSVQQDALLPSIENGTITFIGATTENPFFAVNKALISRARIFSLLPLNKNDLKKIIDKVIKYYSCLKDSKVVEIKEEAINHLIKFSGGDARNLINALELGISITKENKENLVVIDLAIAEDSIQKKNIVYDKNGQNHFDVISAFIKSIRGSDPDATLYWLANMVEAGEDPNFIFRRLLISACEDIGLADPNAIVVVQSCCDAFDRVGFPEGLFFLSQASLYLAISPKSNSTKSIFKALEAIKATNVSLVPNHLKNNASNYLNPHNYQGKWLQQEYLPTDLQGIKFWKPKDSGWEKNKYEDLPKKQKS
|
| 30 |
+
MSASKREEVSSHLRYIRLELREMHQMLIRDDLLPDLSEAKEVHAQLDALYELLSDKRKKKVKNEFENF,MEAFHPPKEVKETIDDSGLPKEEGISEKWLREKIDSLIPLIQEKWPNIAQQTLETAKGSIDDLVGVIASHTGSSASGIKNQLFQIIDSIQENNWEIADKIEPIESQLEELLDELNSTLRPKIETPIRKKPILSIAIAAGIGLFIGSLINSRNK
|
| 31 |
+
LNYWIQNLAPDGSPDEIGVIQLAWLGDSVWELHQRLRHIHIPLKSRDLHLSVVNEVKAQAQSKALDEIEHLLNSFEINLIRRARNKTKRFPKSSDPAIYSRATGFEALVGWLFLKDPKRLSKFFEYLECK,VTQIDSKKKFDRLRLCKLLETIYKEHTTEELNLICNQLLQILDNFSEKSRYEEISEDKKWDESFAVLITYADGVYKKGETTLVTLRELLSKNFGSLSKVVHILPFLKSTSDGGFAVSSHTSLEEKFGSWEDLKSISNKHYLMADLVLNHVSSSHPWVQQFIKCQEPGLSNVFSPSQDLDWKNVIRPRSSSLFSQINTDDGQKQVWTTFGPDQIDLNWLNPKMTIEFLNLIITYLSNGIKWLRLDAVGFIWKEPGTTCLHLSKAHSIVKILRILLNDLLKDGVLITETNVPQKENLSYLLPEDEADMAYNFPLPPLLLEAIISSRADILNAWICDWPELPKTTTLFNFTASHDGVGLRALEGLMNEQRIKDLLINCEKRGGLVSHRRLSNGEDKPYELNISWWSAMEDPGRDSNRYQYERFLLTQLLVMSLKGVPAFYLPALLASENDIKSFSMTGQRRDLNREKFKSEKLAAVFNNPESNANKNLKYLRHAMDVRAKLPQFHPQSHMECLSKNRADIVALKRGIGSKAVFTIHNMTENKINYRFIDYEFNKLIKNDLNMQDYLTSNKYNSNNIELDPFQVIWLGF
|
| 32 |
+
MTSSKPKKSSRVRKTTKNSKKNHNTMMPLLPKTPPSFKNKVVDKKALKNLVSWAYKTHGTAVTAAMADNLKDLGFKYATQAAVSISVNDLKVPEAKQDLIGQAEAQITATEECYRLGEITEVERHTKVIDTWTETNERLVDAVKNNFNQNDPLNSVWMMANSGARGNMSQVRQLVGMRGLMANPQGEIIDLPIRTNFREGLTVTEYVISSYGARKGLVDTALRTADSGYLTRRLVDVAQDVIVREEDCGTERSIVINSEDGKFGSRLIGRLSAEDILDSEGNLIVPKNTAIDPSLSKTLETSLISKVNIRSPLTCEANRSVCRKCYGWALAHNHLVDLGEAVGIIAAQSIGEPGTQLTMRTFHTGGVSTAESGVVRSKIKGKVEFGSKAKIRGYRTPHGVEAKQAEVDFLLKIIPTGSITNKAQKIEVTSGSLLFVEDGQDIDSDITVAQITSGAVKKSVEKATKDVICDLAGEVRYDKVIQPKEVTDRQGNITLKAQRLGRLWVLAGDVYNLPPNAKPVVSTETKVEQGTVLAEASQSSEFGGEVRLRESVGDSREVQIVTTSMLLSNFKLIEESTHSGELFHLESNDGTIYRLNTSPGSKISSGEVIADLADERFRTKTGGLVKYAPGLSVKKARSSKNGFEVSQGGTLLWIPQETHEINKDISLLMTEDMEWIEAGTEVVKDIFSQTSGIVTVTQKNDILREITVRNGSFHECEDEEILSRFTEEGKLVNPGEKIIDGVDNDEILFVQKLETSKGKGLLLRTVEEYTIPNEAELPELSHVKQEKGPSLALKAIQRLSYKDGELIKSVEGVELLKTNLSIESFDATPQMTIDVETIQDKSDKSINRLNLVILESILVRRDTISDSSHGSTHTELQINNNQLVKAGDVIATTQILCKERGVLQLPDSVEGEPIRRLIVERNEDKIKINIKDKAVVKTGDRVVDGDLISKGVKSTSCGEIEEVSSEYVILRIGRPYMVSPDSVLHVKDGDLVLRGDGLALLVFERQKTGDIVQGLPRIEELLEARRPRDSSILCKKSGVVQIKEGTDEESVSLSVIERDDSISEYQLLMGQNIMVSDGQQVTGGELLTDGPINPHDLLDCLFTDLKDQKPLMEAAQESISKLQRKMVNEVQNVYKSQGVAISDKHIEVIVRQMTSKVRIEDAGDTTLLPGELIELRQVEDTNQAMSITGGAPAEFTPVLLGITKASLNTDSFISAASFQETTRVLTEAAIEGKSDWLRGLKENVIIGRLIPAGTGFSGFVEELASEAGPHPDILAEESGGYRRTQNLRPDYTVDMPQTPIVSSTAILDDPSDEDLETTRNRHGIDPTSSNFAAFARPNAENQFSEDQLPDPAALEGLQEEGLLSDG,MQNPAEKKSSILKDFKNLFIWIIIALIIRWQVIEPRWIPSGSMLPTLQIQDKILVEKLTPKITSKSNLSKLKNKIIVFNVPEQLIDAGYESDIALIKRVIGVPGDKVEVKEGNLYLNDIAQNNYISDKNINYSTGPYYVPEKSLWVMGDNRNNSMDSHIWGFLPYEKVIGKAIFRYWPLNNIGPIRFPSLNNLG
|
| 33 |
+
MEDWQEWEYFDYHGELRSKRTKICITCTHFRYSTTDQCVTILTCPFHQKLIPQGDHLVKGCTYWRKDSRIFAPEAA,VRFHIQQEIDIPASTQLYNQICFAIAARYYPPGHRLPSTRQLAMQTGLHRNTISKVYRQLETDGVVEAIAGSGIYVRDNLKKSFNSKNNLNTTPALETKKAVDKLIKLGCTLQETRNLLTNEIDWRIKCGSRIIVSTPREDIGASMLIAEDLSPNINVPVEVIPMEELEKVLCNSNNGTIVTSRYFLQPLEKLAKQYRVRAIAVDLSDFQKELKIIKELKPGSCVGIVSISPGLLRAAEIIIHSMRGSDIVIMTTISDNSNRLLALLKASNHIVCDGPSLSVIENTLLKNRSQLMRVPQIICAKNYLSIKTINHLKTEIGVIN
|
| 34 |
+
MKVIVIDDDPTGSQTVNNCLLLLKWDYSTLIKGFQSKSNLFFILANTRSLSENDAKLRLVEICNALKKVISKESYKEEFIFVSRGDSTLRGHNFLEPKIMNDCLGPFDATFHIPAFIEGKRKTIDGEHFVDNVPVSQTIFAKDKIFGYKTSNVKQLLFQKCKSQIKFNDIQNLKISELKVLESKEKNIVFNKIRNLKENSHVIVDIENYSQLQKFSLSIKKLSKQKKFLFRTAASFISSISAVKDNPKEPFFYSLIRRKNREKKFLPGFLVIGSYIELTTMQLKEFLEISDCIPIELDVFEFLKISKLKSNQDQLEVFKNKLLAQIRSILKQENTPVLFTSRKEVSLARNDEQVNFNNSLAHFISELVSDLKNEIGYLVSKGGITSNVILSNGFKANYVYLQGQIITGVSLVTFKLENDENLPIVTFPGNIGNQDSLVKVWRILENKNNSSN,MRILHTMLRVGDLDKSIDFYVNILGMNLIRRKDYPHGEFTLAFVGYGSEKDNAVIELTHNWSKKSEDYELGNKYGHIAIGVKDIYDICQGLEDNGCNVTTKPKTMKNSTTVLAFVEDPDGYKIELIERD
|
| 35 |
+
MANNFYQWWKNHRRVVTFGGFLILLGLYVSPVIKEAKYKNMCIKLSEKGALNKLNGDNIGETLLKDTGLSIEELAKIEGYRNCF,MNDLNIEFPLDDFEELISQIGWSSLDEWFIFWNLKKEMLSINNFWDDNVKDDWIWGLALPLLSQAYKLNKKSPDRKIIGISALPGTGKTTLGKWLESISLKLKFKLSVISIDDFYLPSEEMEFAIKNNPWNVSRGFPGSHSIDLMKEKLLKWKTDGQLNVPVFDKSLRKGLGDRAHWREESPDLLIIEGWFLGVKPLSIDLDNSEKFSPPLSVFESSYRNKIQNNLDQYLDIWNMIDQIWHLKPLKFEYLNEWKSNQEKSMHFKSGSSLKGDNLSNFLRMLNVSIPHKSFDDINSDVLLMINQERKLVRVGLNQQISK
|
| 36 |
+
MKLNQFLKWHNIVSSGGEAKILINSGQIKVNGEIEKKRGRKLVKGDKVMFLKSELIFE,MKPQLTLQTPLELPHQEISNYLNQLWISEDEESVGANTFTLMVWQPAWLEQCLVKSGLISGPITGTLSPEIIKVAKKLIIDKGLSHTTSIHSEELLTLLKENLLNKDYEDLRGQFFESSISTLNPRRLITLAPTLNKESEIKTFVSAYCPLSDNTITQPICGDLVVIRGDSNSINNKGLKIIDDLSIKDLPIWLWWNGSLDESQEIFNFFTDQGIRLIIDSANGSPKRCLKILYQSIKSNKAINDLNWVRLKSWRESLAMIFDPPSRRPILEHISDIDIDIAEGNFLQALLLISWISDKLEWVFSKINKHGDLIKIEFKRKNGENILTCINPVPLGNPSIHSGQVIGLRLISKISEVRKNNTCVILGCESVECMRLEAGGMADMQLIEQVVPNSFSSSESDVSKLLGSSRGNTSPLFENAIKVAVQIFNGFNK
|
| 37 |
+
MKDKQEKIRMFLPFSWVICAVISFAYINSHLINT,MEFNIQDKVKLKNPLSYLKTSDNMPMLRPPDLVAIDEVGEIIAIKSPDTVEIKFRRGSFLIDTDKIEKTQI
|
| 38 |
+
MKYLILGSGSFAGQLIFSEYLERNYDVYGFNRSRVKDHYQWPWIKKYKNDLGNRWFEYNLTNDVEEMISHINRLKPNFIIDFMGQGMVAPSWLKPEVWYTTNIAIKSRLMNALIDSSFLQKYIRIGTPEVFGSNENFLKEDECFNPSTPYAVSHAAIDFNLRCLYKQYNFPYLIGRFANFYGVGQQLYRIIPRLFLSCRSERNFILDGKGESRRSFIFSKDIVSAIDSMIKFDGIGQEFNFSSNEEISIMSLVNKICNLTNVDKSRILKFGPERPGKDRYYRLDIKKSKNVLNWEPEVSLDEGLNIINIWISENIENLSNKSWTYEYKD,VTQRHFFVTTSSSSAAEKTLKTKIWKRVFIVCMILLISGSFFYFNHEENNTYILKTLELNGSVKEGDTLFKMNCVGCHGITARGLVGPDLQSITMRLNDAEIIKQVIEGVTPPMPSFEIDPQNMSNLLTYLHSL
|
| 39 |
+
VENDEIKYKKDIPIEWLKMPSSAKEAEKLNIKKYFIGDKRFMRELEDRDEYNAA,VILKTIKISNKLCLIGIIVFCLFQNHSVSASREPLIRVLISKNRNLRIRSDKSIPLIIKGQKFSNKKIKGLTVKKENNTTSLFFDKNKQKIYDLKNKVKLVVKSSDGRGIWVGQKRYSGILNLLVLESEILVINILGIEKYLSSVVGSEMPAKWPLEALKAQAIASRTYALKQKGNQIYDIDSTQKNQVYNGLESRTYKTIRAVRSTRSLVLTYKNKLINALFHSSSGGMTENSQDVWKNEYPYLSSVRDFDRNNPKLQWKKKFSSGELQKLFPEIGGIKKIEILNITNTGRVKNVQIFGKYGSDQISGVDIRKRMNLKSTFMRFKFIEDKKYISDNDNSNNPIEKTLIVFGRGSGHGVGMSQWGARYMASKGQKADRILKHFYKGVGIKPFSKNYL
|
| 40 |
+
MSIETTVLDFKLSNTFEEYQAHMNAPEQQAMFKEMGVKTFYIGKSLEDPKRATVMFQGPVNTCYDIFVNPETKPIVEASGHIYEGTIINRWIS,MMFNKQKKLILNLKILKLLFFSPLLISIPFYLGNSDAKAGLEFQWDQDSGYRRLKWFQKENKKRFRNTIFFFLRPSDRQANLLKITLNIPKTFDSTLKDKVSFCKVKIGGFEGRTKCIEDIPADVDINEDNSSLDIYPYSPIPSNKDSYAIVFKKISNPKKSGLKQFHSYGQYAQKNTSSRYLGSWTIVID
|
| 41 |
+
MNFITNSRQFHKSLAPWVFLPLFISALTGTFYRICKDLLGYSRDEVHWLMSLHEGEWLGDNGELIYVILNSLGLIWMLITGFQMFSKKISFPKKVTKGESKG,MASFTVGIVVFPGSNCDRDVSWALEGCLDIKTKFLWHESSDLNDVDSIVLPGGFSYGDYLRCGAIARFSPLINSLHDFIKSGRRVLGICNGFQILTESGFLPGALVANKNLNFICDDVDLNVITSKGGWFQKLNENQNIKLPIAHGEGCYHCDQDTLKRLVDNDLIALKYKTNPNGSTSDIAGITNEKGNVLGLMPHPERACDESIGGIDGLYTLRSLITQ
|
| 42 |
+
MNNSQRSVTHSQNGDYRTIEQTMEKLSGGTRRLAAQLTTSATFNSLWNVLTDYDRLNLYIPNLLSSRKIYKNNNNVHLKQVGAQDFLGMKFSAEVTIDLFEEKELGLLKFSLIKGDFRRFEGSWKIKKIKDTSKNSLIYDLTVQGCQWMPIGMIEKRLKKDLSENLIAVDKQAKASIK,VILNPELQEKGEIKDLMKSRGSFRAFPLAAITGHSLLKLSLLLAAVDPSLGGVIIAGGRGTGKSILARGLHTLLPPIEVLDNESILEKLTMSNSNTSLRPIGRNLDPDKAEEWDISTNKLLEEVIGSDYLNQIEEIPKKVREAPFIQVPIGITEDRLVGSIDVAASLSSGEQVFQPGVLAEAHRGVLYVDDINLLDDGIVNLILEATGREQNNIERDGLSLSHPCRSLLIATYNPEEGALRDHVLDRFAIVLSADQSIDNNQRVEITKSVLSHAENNIKFSEKWSEESDNLSTQLILARQWLKDVKITKEQITYLVNEALRGGVEGHRSELFAVKVAKANAALRGDENVNSDDLKVAVRLVILPRATQIPPQDDDIQPPPPQDQSPPPPQSNNEDSEPESNEKEDNQEEEQDNSDGEEDSTPDIPEEFILDPESCMVDPDLLLFSSAKSKAGNSGSRSVILSQSRGRYVRPLIPRGKVKRIAVDATLRAAAPYQKSRRLKNPNKTIIIEENDFRAKLLQKQAGALVIFLVDASGSMALNRMQSAKGAVIRLLTEAYENRDEVALIPFRGNQAEVLLPPTRSITAAKRRLETMPCGGGSPLAHGLTQSAKVAKNALSTGDIGQVIVVGITDGRGNVPLGTSLGQAEVNENENVDLKQEVLDIAAKYPMLGIKLLIIDTERKFIASGFGKELAEAAQGKYVQLPKATDKTIAAMALNAINEF
|
| 43 |
+
MSKDFKSGKVKRLPINNLNLPNFVNNSLRNNTKVNTVEGTNVIRVPFGKRFPKKQRPDKNQNIATLILPINTFINPTPPPHVA,MPSLSDGDFYYYRNFYIDQPPQMAQVFYESLHVISFSLKIIFYFLERLSDVN
|
| 44 |
+
MRHQLRVPLLSKPADQRKALLRALTTQLIREGRITTTKARAKALRNEAERMISLAKEGTLSARRRALGYIYDKKLVHSLFEKAQERYGERNGGYTRIVRTVARKGDNAQMAIIELV,MKDIFLVLDSYQYQMESNYQETSSLTNLFTENKFIGWLGLFIVFFSIFAIIIFQFLEWESNDKNKE
|
| 45 |
+
MNERNKSLWKQAIKWPLYSVAILPVFISGAYTLNSFKNVKIYNLIAFTIAAILILIWENLTNDLFDSETGIDEFKFHSIVNLVRSKTIVSITAYTSLLIGLVVIAIISISTSINVMLLVGACCFLGYLYQGPPFRLGYQGLGEPLCWLAFGPFAYAAALIALNPSDIYMISIPWKESLLLGSGSSLATTLVLFCSHFHQIKEDKEHGKNSPLVLLGAKKGAKIIPWIVFIIYVFQLFLIINGFIPILCVLFLISFPQSLKLINLLKYSYNKPEAIKNCKFIAIKFQTLNGIGLIAGFIINYLIYK,MKIMDNFDDDLSLKQKEFVEPIDKATNKDLFEKKDEFKEATPKVLHLNSLITKNIYLFTKDPNYKLFAWLMVQLFIFSLFVLVATLMKNNLVPYINSL
|
| 46 |
+
LLLMKRLLLAAVLFLLSEISFAKEKLNYTITSDSQVQNGKGNFEAISNVVIKSINNNF,MDEDSRKVTEEVWLICPNSTEVRRFTKNKNNKDKFFEYMFVDSGIIIGVLGAKPPLMKTRKEIKIEAARKEYQQLIISGWQVTIPKW
|
| 47 |
+
LDQFEVKVFIRLRPSVLDPAGEAIKSASSKLGVAGIKSLRIGKLIEVKIESNEEDIKEKIELLCDRLFANTVIEDYEYSINKL,MNLKQITQKDQLDLKKIYFDSIISIDQKIYTSEQKRAWASQAWDNKYFNLTLKEGKGWLINEREKIIAFASRYPNNRISLLYCRGDSQRKGYGTKLLKKIEKEAIKEGLPCLTTEASLISYKLFLKNSWKIIRKEKIIIKNITFERYKMIKNF
|
| 48 |
+
MEQGLNNPGPLTIFLVFTAGLLTSLGPCSLSLLPITIAYVGGTKNNKFKLISFSGGVIFSLITLGALSGFLGKIYGQLPSYYASLVALIAIIMGLNLLGILKFQLPNGPDLQFMEDKVPSIITPFVVGGAFGLASSPCITPVLATLLAWVSQAKNPTISIIFLFFFGLGQVTPLILAGATTENLKQFLELRKYSQVIPTLSGVFLVSLGILNLISNWI,MKENITELWFSWFYKNWEKNAPGNLIDKGLSPSQIAERFVNENHKEFLEIANEFDEDNYQALNEFMKLSESELHILKYFLKLIKLKNS
|
| 49 |
+
MGTANLHDSTNKPLYGERIIEESNIICFENPNKKRIYEISIELPEFTCKCPFSGYPDFAKLNIYYQPNMKVYELKSLKLYINKFRDLKISHEEVVNRIMDDLLKAAVPHWIHLNADFNPRGNVSMKLDIYSGQKRN,VRIIFWGTPEYSVKSLEVLKKSDHDIVAVITQPDKKRSRGNKLISSPVKEYATKENIPVFTPETIKENIQFISILNDLSCDLFIVIAYGKILPKAILDIPKYKSWNAHASLLPRWRGAAPIQWSILEGDKITGVGIMRMEEGLDTGDVLVEKQIKIENNDNLKTLTKKLSDLSSELFLRAISDIEQNKNRDINLLLKKQTDFKRELKYARMINKLDYIINWENSATDIYRKINALYPRANTTYKRKNLKIIKIKILTTHEIHNKNYKILSNVFKPGLIIGLIKNVGIIITTKTDPILLLEAKLEGKKVSSQNQLIQQLNPVIGENFSD
|
| 50 |
+
MAQLETRTEPMVVNFGPHHPSMHGVLRLVVTLDGENVIDCEPVIGYLHRGMEKIAENRTNVMYVPYVSRMDYAAGMFYEAIVVNAPERLANIVVPKRASYIRVLMLELNRIANHLLWLGPFLADVGAQTPFFYIFREREMIYDLWEAATGQRLINNNFFRIGGVACDLPYGWLEKCIDFCDWFAPKIDEYEKLITNNPIFKKRIEGLGTIERDQAINWSLSGPMLRASGVSWDLRKVDSYECYDDFEWEIASEKEGDCYARYRVRVQEMRQSLKIIRQACEMIPGGPTENLEAKRMATEDKKSEIFGMDYQYVAKKVAPTFKIPNGELYTRLESGKGEIGVFIQGNNEVTPWRFKIRAADLNNLQILPHILKGAKIADIMAILGSIDVIMGSVDR,MVRKISFIGVGPGDPDLLTIKALKKIESADVIFWADSLIPEKIINFSLKGSEKIKTSTLTLEKITSIMIERFNEGKTVIRLHDGDPCLYGAVKEQLEILRQENIETEVIPGVSAFQVAAAYHQAELTIPDITQTIILTRAGGRTGMPEKESLKDLAKHKSSLCLYLSARHIKSSQKTLLEFYPPETKVIVGYRVSWDDGWTSLIELKDMEKFTLEKELIRTTIYIVSPAINTIANRSNLYNPSYKHLFRGK
|
| 51 |
+
MLRPPFSQESISIDKWDVIVIGAGAAGLMTCLELPENLNVLLLNRNTSKRSSSRWAQGGIASVVRPEDSFALHVEDTLKAGDDLCDLSAVEMLVKDAPGCVDRLQNLGMIFDQSSDQLSTTLEAAHSCRRVLHVKDRTGRALVEVLEDHIENKENILHCRGVRVTELLIEKEVCKGVQVLDGSNLYWITSKAVVLATGGGGHLFTNTTNPAQSAGEGIALSWKAGVAIEDLEFIQFHPTALKFYGSPCFLISEALRGEGAVLVDKNGESPVKHLENGDLATRDQVSRAIMNNMQENDVDHVGLDLRFIDPEKIVERFPMIISRCQDYGVNPLNEVIPVAPAAHYWMGGVHTDLNASSTMKGLYAVGEVASTGVHGANRLASNSLMECLVFARKMSCIELNAPYNLRRLDRYTTEIFMDNPKEDFILGVSDKIDSLRKLCWSNLGVSRNKKNMNKLLKTLQDEIDQLQKNPLLECLNKIEIDQKLKLSEPNRRGLNLLLDLHNRQITTLLLLKACLFREESRGGHYRDDFPIKETTWKCHTRQQLNQEIIKRFIKN,MRTILISGANSGIGLNIAHKELKAGNRISIGLRDLESVKGSVIDPNNWTNEKILLNKYDALDKFSAKKWVENTVSKFGGFDTLINCSGVLSKVPFLYKDGDEEEILNTFNINFLAIWHLCRISWKHLSQSNNGRIIVLVSMSGKRSKGDLAAYSSSKFALMSLCQTMKNKGWEENIRVTAICPSWVNTKMAEKISSIEKSKMTQPGDIAEICSTILKLPMQSVPFEIALNCNYEI
|
| 52 |
+
MSRKSELLKGEETKNFSEFSQLADFSLMNSLNADPHSTKDGNDHRARSVNSGHYVPVTPTPIPEPIYVSHSKTLFKELGLSSDLTKDKNFCRFFSGDIEVAEYPMRPFGWATGYALSIYGTEYTQQCPFGTGNGYGDGRAISVFEGLFNGKRMEMQLKGGGPTPYCRGADGRAVLRSSVREFLAQELMHALGIPTSRSLTLYVSGTEIVRRPWYTEGSRYFEPDIMVDNHAAITTRVAPSFLRVGQLELFARRVRSNSHDDAFNELKIIVQHLIDRNYRDEIDPSYSFNEKVIRLANLYRGRLISLVTNWMRVGYCQGNFNSDNCAAGGFTLDYGPFGFCELFDPRFQPWTGGGEHFSFFNQPFAAEINFKMFCSSLLPLLLENKEDIEKLEKIKNDFSKFMSKEMQLMWAKKLGLEKYDETLTNELFNLMVNSKVDFSIFFRKLSHIPDNISFLKDSFYLPSSEELDKEWFIWLKKWQDCINKQGDLKEISKSMKQVNPKFTWREWMIVPAYQEAEEGNYNKIKELQTIFKNPYEEESLEIEQKYNRLRPREFFNKGGVSHYSCSS,MNYIQIKDLSKSYSDIKALKNLSMEINAGTLFGILGPNGAGKSTLIKILATLVEPDGGEVFVNNINLIKNPRKIRELIGYVAQDIALDKILTGRELLDFQSDLYHMNKKEKYERIKLLINQLEMNDWIDRKCGTYSGGMKRRIDLAAGLLHLPKVLILDEPTVGLDIESRNIIWQLLKDLKNDGMTIILSSHYLDEIDKLADSLVIIDDGKVIAQGTPAQLKNKLGGDRITLKVREFSNHEESKKISEILSSINGISQIIINKAQGYAINFVVDKEKDLLTKLKVELAFSKFEIFSLAQSQPSLDDVYLQATGKTLLDAEISMTGKRDLKKESKQSMR
|
| 53 |
+
MFLISAEKFSLWKKKQLSKGGDNHSLNLLLESLGGLSNIELNLLKINLEKNLNFKVNLDLIESFWDKHLNTSIPIQYLSGISFWRNLKLEVSNRVLIPRPETELIIDIISGIFKNKEEKITFVDLGTGSGAISIALALENPNWNGIATDIDKNAIKIASRNFATYSNQSNLKFYNGNWWDPLKNFKGEIDFAVSNPPYIPQDTYEVLPIEVKNFEPKLALLGGQEGLDHINQIVQNAPLYLKNKGWLLIENHFDQGEKVKKLFLENRFTSVKVLKDFSGIGRFTIGRYK,LFDKENLKYFLIWPMSVLLAIFFKYYGFLKPDFLLINNYLVLLLVCGPALVVTIILVFNKI
|
| 54 |
+
MKLQTQFTVPKKEFRDLDYVNKVKVLEETLKKECMDYPTKEDCLVCCN,MSKLKGPDGRIPDRLPDGRPAVAWERRWTEGTLPLWLVATAGGIAVIFVLGIFFYGSYQGVGAG
|
| 55 |
+
MNNKRIFHDPIHKEIIIDSDKPEELMIMQLIDTLAFQRLRRIKQLGAASLLFHGAESSRFTHSIGVFCVARKIYRKLVEINPDFSQNKFILFGAALLHDLGHGPLSHTSEVIFAHDHELWSKNLVKNYSPISSILKNFGTELPNQIGDLFKTKNLFSRPLKTLISSEIDCDRLDYLLRDSYNTGTKYGLVDLERIISALTFSPDGNIAIKPKGVIAIEHFLVLRNMMYRTIYNHRINEISTWILEKIIQIIKKDSVKKDLWIDESMRRWIFFPNQLEVKDFLANDDIVFYFHLMKWKEESFEPLKTLCKMFIDRKLLKASDISFLTKLKRLEILAFARKKCKLNNYDSEIFCGIKERSFKGFKSDNSLKIWDGTYQNLLENQSDLINTLMSSKDTSLIIYPGEFRKEIEDQIAIERANV,MSLKSVLKNKSLGILVHPSSLPGGSYCGTFGEGAKDWIKKLCKYKINHWQFLPLTPTDSTGSPYSSPSSFALNPWFLDINKLIEENFIISLNKKDLQSINQNEDHFDFDYANNLSKKLGEYLLFDWESQSEMRKTDFYLWNKKNTWVEDYSIFMVLREKFNMLPWWEWPLEFKQKENEFIKTWIKDKKNEILKTKLIQWHLDKQWKEIKVFAKTNGITLIGDLPFYVSRDSVDVWSNKSLFSISQNGDLLFQSGVPPDYFSSTGQLWGTPTYYWAKHKSTAFRWWRKRFKRQFELVDILRLDHFRALAGYWRVDGNAQNAINGSWINSPGKELLNLLKKDLKSDYLPIIAEDLGVITKDVEILRDNYELPGMKILQFAFDGNDNNPYLPKNIEKENWVVYTGTHDNATSTSWWDCLDITIKTHIKDKYKYSIDPSWNLMEIGMSTKANLFISPIQDILSLDDSSRLNTPGTITNNWRWKLNQTLDEIDMNLKKYSDLGNNYGRLSN
|
| 56 |
+
MSIETKKYNNLISISTELRKRIIKTSYEAKIPHIGSCLSCIELLVFLYWKELNIDPSNSEAINRDRFILSKGHGAPALFQVLGLKGFFPIERLNSFGKPGSVFHEHPPKPGYIPGIEAATGSLGHGFPMAVGMSLAKRINNLQYRTYSILSDGECNEGSIWEAAMFAGAQKLDDLTIFIDFNKWQATGRSKEVLALDPLKEKWQSFGWDVYEIDGHKFNQIDKSIELAKTNKNKPSAIIAHTIKGKGVSFMEDNNNWHYKTPNEEEFKKAFEELKN,MSKFSSQEIESQYNLIKTLLSDPEKYNDALDAIKKDIAHMPLELKKKLEEENITF
|
| 57 |
+
MQKKSFSISWGDTSLEMLPSKALLLPQTNELLICDVHLGKAEYFQQNGIPLTNNSDEQNLLSIKKIVENHKPYKLIILGDLFHSKYSISKSIKSKVENLSESLNIKIELIVGNHDIGCKVKNISFLEYKRSSNFIFSHEPIGKFENKILNICGHYHPKTFLKNSKDKLSFKCFAMDEKNNTLYLPAFGDLTGGYPCKNSFKKWAIISEKEIIAV,MKRLDLIFSERELDAIINTLEKANVPGYTVMKHATGRGPERVVTEDMEFTGLGSNAHVIVFCEQELIDQMRDNIKSDLSYYGGVAYISEATPL
|
| 58 |
+
LEELITKKLEVNDNLKSRFHNGFNIVKSTFLSSPIALRLWSSFFVILPIFVQAPWVRFAPISALCATFFILAAAFLLSRKEGDKWFIVGSLLLGVTGSWLGGCLFWGWLSAYPILHIPVEAVALPLAIVGLGTKWKIGSSFYISSLFGTAITDLTIFLTGIMDQWKEVIIADSDNAPLILQKTSENLIQFKSLSIIILAALILWFISKEIFNYATSNSINGKAFLVSSYVIQTTLIVDGIFIMLAIIQPTLSGLV,LNRSFYFKFSVVIISFLLVWTLRDFILLIICSLVISNVVSNLCYQIQTILKLPRFVSLLIVLVGISFMIFAISIIVLPPFIREFNEILIDIPNGLSRVNELVNSNLNKFNDLIYGKESERIVNIFDLVNDVVPIPDGATIAKAIQESFINIINLAGNLGSGFIRVIFVLVVSFMISIEPKAYKEGVLFMIPKVYRNKFRIILDKCNIALTNWTFSIVISSISVGLLSLIVLSILDVKYVVSNAIIAMILNIIPNIGPVLSGIFPISIALLDNFWKPVAVFGAYIVIQNIESYIIMPSILKKKTNLLPGLTLISQFGFTFIFGPLGLVLSLPIVVVTQVLIKELINDN
|
| 59 |
+
MSQIFTWIWVSSGILLILLVLLHSPKGDGMGGIAASGSSMFTSASSAEASLNKITWTILIIFLSLAIILSAGWI,MSKLKGPDGRIPDRLPDGRPAVAWERRWTEGTLPLWLVATAGGIAVIFVLGIFFYGSYQGVGAG
|
| 60 |
+
MGEAKRREELGLPPREKKEAKKDSKSNLNQILNKYPFAPYILGISLLTILIIDLVNYYK,MASNKDNQLVEKNDDNLGVENISNNPSIQSEQKLEVTEDEISFKEEDLDNGFACFGFNKLILNSLESKGYKTPTPIQKAAIPELMLGRDLLGQAQTGTGKTAAFALPLIEKLENNKESNAKVLVMTPTRELATQVADSFKSYSAESTNLRTLAIYGGTDFRNQISSLKRKTDIVVGTPGRIMDHIRQGTFKINNISCLVLDEADEMLKMGFLEDIEWIIDKLPENKQMVLFSATMPNEIRNIAKKYLNEPAEILIKSVKQETQLITQKYINVQRHHKLDALKRILEITNEGVIIFVRTKLLTTSIAEALENSGHSVAVLNGDIPQNQRENTVDRLKKGFIDILVATDVAARGLDVERIKLVINYDFPFDKETYTHRIGRTGRAGRSGEAILFVNQREKHFLRNLENSTRNKIEEIEIPNNKIINEKRMGKLITNLNESSLDQENNEEKKALMIDILDTLREKHSMEDSNIAMAAINLAIGNKSFFINEDESWLYRQNNSDRNRSNRNGNNRMRNTNRRNNYQNDSFETYKFNFGKMDRVRVANIISSICTSTNINGRLIGKIQIFNEYSLVDLPRDLHGEVKNKLKNLRIRN
|
| 61 |
+
MNSKLKFIYEGKAKKIFAYEDSDKVIIEFKDDATAFNALKKAKFEGKGELNCLISSKIFEFLIKNNIPTHYIGLKNNNSMIAQKIKIIPLEVVLRNTAYGSLCKQTTIKPGTVLESPLIDFYLKNDTLNDPLLTKDRINLLKIVDEEELDFISNMTLKINKLLKKFFYNIKLDLVDFKLEFGYNSNGQIVLGDEISPDNCRLWDLNQKNGMIVSLDKDRFRNDLGGFIEAYSEINKRINNFI,MGFVPLHNHSDYSLLDGASQVSKIVDRACELGMDSIALTDHGVMYGVLDLVKKCKSKGIKPIIGNEMYIINGSIDDPQPKKEKRYHLVVLAKNHTGYKNLVKLTTISHLNGMRGRGIFSRPCIDKSLLEKYNDGLIISTACLGGEIPQAILKGRIDVAENTAVWYKRIFGDDFYLEIQDHGSIEDRIVNVELIRIGKEHQIKVIATNDAHYISNMDVEAHDALLCVLTGKLISDEKRLRYTGTEYIKSEDEMLRLFNDHIDKESIKEAINNTVEVSQKIEEFELFGTYRMPKFPLKEETDSLSFLTKITKQGLLSRLNKNNLDEIDEIYKKRLTSELKIIDDMGFPDYFLVVWDYIKFARDSSIPVGPGRGSAAGSLVAYALQITNIDPVKHGLLFERFLNPARKSMPDIDTDFCIDRRNEVIDYVTNRYGEDKVAQIITFNKMTSKAVLKDVARVLDIPYGESDKLAKLIPVVRGKPYKLNEMIDKKSPSPEFRDKYLKDIKVKKWIDLALRIEGTNKTYGVHAAGVVIASDPLDMLVPLQRNNEGQIITQYSMDDIESLGLLKMDFLGLKNLTMIDKTISLIESSTGQKINIDKLPPKDNKTFDLIGRGDLEGVFQLESSGMKQVVKDFKPNSLEDISSILALYRPGPLDAGLIPKFINRKNGSEKIDFPHPFIESILTETYGIMVYQEQIMKIAQDLAGYSLGDADLLRRAMGKKKVSEMVKHRNIFIEGSCKKGVDKKIANDLFDQMVLFAEYCFNKSHSTAYGAVTYQTAFLKAHYPVAYMASLLSVNAGSSDKMQRYISNCYSMGIEVISPSINLSGIDFTIKKDQILFGLSAIKNLGDSAIRNIIDNRNKLGVFKSFSDLCDRLPSNILNKRNLESLIHCGALDEFSENNNRAQLFSDLEYVMEWASSRNRDRISGQGNLFDSISKNDTKEFSLSQGSKVEDYSLIEKLKLEKQLLGFYLSDHPLKHLAKPAKLVSPISISQLENSHDRTKVSLVGMIPELKQITTRKGDRMAIVQLEDLSGSCEAIVFPKTYCRLSEFLLTDTRLLVWGTIDKKSDKTQLIIDDCREIDNLKLLVINLDSSQASDIRIQNTIRDCLVKFKPDRDKCGIKIPVLAAVRNNDSITYVKFGDQFCVGDILGVSKLLSDKSFQVNLKSMIA
|
| 62 |
+
MNIKQPSSHKNPEPESSVLYIVGTPIGNLSDLSSRAINILKNVSLIACEDTRQTKKIMNKFEFTNNLISFNKHNSLKKIPRIINDLNSGKSVALVSDAGMPSICDPGEDLVKNVRSNGSNIICIPGPCAALTALVSSGLPSSKFIFEGFLPKKKSQREKILFEISKNEKTTIIYESPHRLKKLLNELKIYCGGEREIQVSRELTKKFEEHIGNDINNVIKTFQEKEVIGELTIVIKGIKKESNLLINKSDLKKELNELIKAGLSLSAASKYLAKKHGIKKSETYNLN,MSFLNNWWLIPLIITIFSGILCPAMGTVLITHRRLLQVNLISHCVLPGLALALALGIHPSIGGVISGLVGAIIAESLTNKKSENYEAVMNTILAGMLGFGVLLIPLLGIRIDLEAVLFGDLLTANLGDLLRTIIAFLTFILLVTFGYEKVVYVGLDPEGASASGINVSLLNLALSFTTALVIVSSMSAVGVILVIALLSTPTLLGLDKAQSLRIAMMRSSFFGLCISLLGFILSIVFNLSPGPAISVICVASLIIPKIGNKF
|
| 63 |
+
MAAKEHKSLQGSKILLIEDDKSIRLTVTESLISEGFEVSNFKDGSSALDFILGEGIKDFDLILLDLMLPGLNGLELCRKIRNEELYTPILILSAKGNESDRVLGLEVGADDYLTKPFGISELIARCRALLRRSKRGKEKKQKIETIIEYKNIKMFTEECRVTNFNQEIILSPKEFKLLELFIKNPKRVWSRDLILEKIWAIDFIGDTKTVDVHVRWLREKLEENPSAPKIIKTVRGFGYRFG,MRTILISGANSGIGLNIAHKELKAGNRISIGLRDLESVKGSVIDPNNWTNEKILLNKYDALDKFSAKKWVENTVSKFGGFDTLINCSGVLSKVPFLYKDGDEEEILNTFNINFLAIWHLCRISWKHLSQSNNGRIIVLVSMSGKRSKGDLAAYSSSKFALMSLCQTMKNKGWEENIRVTAICPSWVNTKMAEKISSIEKSKMTQPGDIAEICSTILKLPMQSVPFEIALNCNYEI
|
| 64 |
+
VARIAGIDIPREKRVEIALTYIYGVGLTRSKLILSNTGVNPDIRVKDLSDSDVQKLRGATEDFTVEGDLRRKEGMAMKRLQDIGCVRGRRHRMSLPVRGQRTRTNARTRRGSRKTVAGRKK,MTLSSYRMHRIYLAATMGYGLGSDDPEEVAYYKKLRKEMDEMKKDVVKKGIPLTWDIPDGMDK
|
| 65 |
+
MENSKPNYWQNAERTNGRMAMMGFFALVVNYGLFGWIIPGIF,MQILIIPIGFILWYFAYESKPINNDEVTSLWEKENYVKRTKLLNILKESF
|
| 66 |
+
MDLCFLSTNITSFVADPLSHEFMRKALLMSSLVAAVCGFLSSYLTLKGWALMGDAVSHSVMPGVVVAYALGLPFSLGAFIFGVGSVALIGFVKQKSRVKEDTVIGLVFTGFFALGIVLVSKIKSNIDLHSILFGSPLGISLSDVKQTVFISLLVVILLSVFRKDLILYCFDPRHAKTVGINVLFLHYLLLTCLSLAAVVGLQSVGIVLVVAMLITPGATAYLLTDKFDNMTIISVISAIISSVFGIYFSFWFDLETGGSIVLVQTFIFLFAFLFAPRYGIFKFKKLFSSY,MKDMPTWIDEYHKGSRFGLNGKVLLKKNSKYQEILIIETDFYGKALMLDGCWMTSVRDEKYYHECLVHPALSSIDKKSHILIIGGGDGGTARECLKYSQVSKIDLVEIDEEVIKVSKTFLKEIGGGAWSDKRLAIHIDDGVKWVETTKDNSYDVIFIDCSDPSEFSNLLFTDSFYKECKRILTKKGILATQSESPESFENIHIHILKSLNKIFKLSETMYSFVPIYPSGIWSWTFASDEELNLSKVNYKEVMEIENNCDVWNLNFQNAAFKMMPNKIVKKLNS
|
| 67 |
+
LQISNNNYPWPDDWGRKTSIMGIINLTPDSFSDGGDFCSIEKVLNQVNYFVSNGVDVIDLGAQSTRPGAIEIGAKNESKRLIPYLKKIRSEYPNILISIDTFNSEVAHEALSNGANWINDVTGGRRDEEILDVVSEFNCPFVITHSRGNSITMNNLTNYDDFLVDIIHSLESLTKKALNKNVSKDKIIWDPGIGFSKDTKQNIEILRNVPLLKNFEFPLLIGASRKRFIGEILNQPNPKERDIGTLAISCLCSQQKIHLVRVHNVKINYQVLKVADHIFR,VIPSDTPINQHSLQSLELWLKDLGATKDIDNPSKWYLLLSNWNATIIFEQEDLSVVWESGGKLTKRLFSYCINREDIENAILQGP
|
| 68 |
+
MKAKPETTAHVSVKEYCFTKKEVKGVVEASDFKWTFTWSFGKGVLFVTPPLGRALIQDSLLRFFLKKDYELEAGNEYKFIISAKF,MDICLLNIDNNSNKSLNPTSVIGMLWLQTHFEDTQWEALSNNQVIISKENSKLLVKDAISAGLKIKSFSGVSMLDVFQKKN
|
| 69 |
+
MKNKVFPFIKKYPMSILLAIIAINLFSIASSLRTEAYLNREKNLCIKYLKHQIDRDTLIKKLRIVKQANPSSICDSVLKS,MNKFEFFKTDAIQSSYGGQFSYKVIGPCCRLYDREELPWPCSRLAWRSKEPSWRRIGARFVADMASRKCPSYSVQILEPGSKPVETVITLFSKKFSSEIQEWWYSKKPGSKEPGNVLPESI
|
| 70 |
+
MDYKTSGVDIKAGREFVSEIKQSVESTYSSNVLEGIGGFGGLFKIPLEGLKKPVLVSGTDGVGTKLELAQIKNFHFEVGIDLVAMCMNDIITTGAKPLFFLDYIATGKLEKNQLLEVINGIAHSCRENKCSILGGETAEMPGFYSKNKYDLAGFCVGIADEEKLINGKKICENDLIIALQSNGMHSNGFSLVRKIIENNNQIDKQFEKKYNLDFYDELLKPTKIYFKIVNQILSQNIQIKGMSHITGGGIPENLPRCMSSDFIPYIDKKSWKIPVLFEFLKDVGQIPEKDFWNTFNLGVGFCLIIDKKYKDKILNICNAFDISSWVLGKVLKKNNSKENNFLPEIII,MYFQDIIQNLNKFWSEEGCLIMQPYDTEKGAGTMNPHTFLRAIGPEPWSVAYAEPCRRPTDGRFGDNPNRAQHYFQYQVIIKPSPDEIQEKYLTSLEFLGINPKDHDIRFVEDNWESPTLGAWGVGWEVWLDGMEVTQFTYFQQCGGIDCNPIPIEITYGLERIAMFLQDKESIWDLNWNKDINYSDIWLQFEKNQCSFNFSNSNPENMRKLFAIYQEEANSLIEKDLTYPALDFVLKCSHCFNLLDARGVISVTDRAQYIEKIRKLAREVATSWIKERELMNFPLVKK
|
| 71 |
+
MKNFTKNNYSTKRNDTENRRSQSKNNFKKGNDLNTRDDSNRRDNSNRRDNSNRRDNSNKRDNSNRRDDLNRRDDFNRRDNFKRRDDSKRRDNFKSRDDLNRRYDFNRRDNFKRRDDSNRRDDFKRRDDYERKGAIKSNEYSYLKSKEKPRNSFNQSQTRFSSNAQQTENYSENSSKKFQLSPNERNYEDWIWGKHSVFAALNSERPINRIWCTSEIFSSEKFYLLLKDLKSKGVLIEEVPWSRLSQLTSGAVHQGVALQHASTESISLEKLIDISKSKSSNPIIVALDGVTDPHNFGAIIRSAEAFDCKGIIVPQRRSAGLTGTVAKVAAGALEHIPVSRVVNLNRAIDELKKKGFIIIGLSGDGQVPISEFKEKAPVVVIVGAENKGISLLVQKKCDYLLKIPLKGKTSSLNASVAAAISLCYLSNN,MELPCRRFGRTNLKMPVLSLGGMRFQKSWDELKFSEISRKEQNKVENILNLANKFGFNHIETAKYYGTSEIQLGMGFKSIEKKPKIIQTKIPPNRDPKLFEAELLKSFEKLQVKKIDLLAIHGINTPEHLHQAVKDGGCIDILKKFQQENLIGYIGFSTHGELSLIEKAITTNLFDYINLHWYFINQTNSKLIELAHKYDLGVFIISPTDKGGHLHTPSTKILELCSPLHPIVFNDLFCLRNKYVHTISVGIAKEQDFNLHLEAVSLLSESDHYIPKILNRLKEESINSLGIEWYKSWDKNLPNWKNTPGGINIPVLLWLANLIDSFDLEEFAKSRYQLLGNGSHWFPGNNANLLDVNVCESQLLKVLERHIKPKKVIKKLRVLKDKFGDKSLKRLSKN
|
| 72 |
+
MSESKSPLDRIYRLIASHAWMTENEAKVLLVMMYASGTKSLGLEGKGLNKFMERSLEKMCSDNKENLQEYLLKIKDKFPNNELLSED,MEPTSSLNRGDRKKGSSLVTGSEVQSQSNGASCFITTDSEKSLVSRQASQVEQIELRTYVFLDSLQPQLAAYMGTVSRGFLPIPGDSCLWMEVSPGMAVHRVTDIALKASNVRLGQMIVERAFGSLALYHKDQSTVLHSGDVVLDAIGSEVRKRTKPSTSWTEVICAITPDHAVLINRQNRSGSMIQSGMSMFILETEPAGYVLKAANEAEKSANITIIDVKAVGAFGRLTLAGKEGDVEEAAAAAIRAIDQISNY
|
| 73 |
+
MTMNNLKTKKLVNLGPSGRAVAQPMDVSLLDNFYEHLTMERYANVQYFSIYLWFQERDLDGFASHFLSESQGEMEHAYKFANYFIARGQTVKLKELPAPIQTWDSIEDIISYSFNMEADLTSSLQQLYSISERISDTRTSVFLDPIVDAQTKSEDEFAHILGKVKFAANQPSAILLIDSDLKKK,MFLKDHLKDTYQKASFDNNHLMLENIINIWAHRFGPESLNELFVKDQDQDQLKLIEENQAEASQNQINLELIEDHQSEANQNQTNLELIEEHQSEVNQNQINLELLKNLQYEEKIEFKPKETKKSNNTEIINKDIYGSYKNESEFKDKEELPLPNIKNLRKWINNEKKAS
|
| 74 |
+
MIILHIGLFENSFSNIMKSVIFQETANLKKPVPAEKVIELSDKLLEPSSHSKRYPPRLHKTWGTIFFMIAIHLLSLLALQPQFWSMPAVTALFFFYWLTACLGVTLGYHRLLSHRSFVVPKWLERFFATCGAISCQHGPIDWVGLHRHHHSFSDTEVDHHNSKRGFWWSHMGWMFKDVEALKAVPKLSADLIKDPYYRFLNKYFLFLQIPIGLCLYAIGQKLGVGGWALVLWGIPLRLVVVYHITWLVNSATHCWGKAPFESGDGSKNNAWVAALTFGEGWHNNHHAFPNSARQGLFRGQIDLTWEHIKILAKLGFAKKVKLPSRSYY,LNKKLGHKDHFHFIGIGGIGMSAIAMALIKKGYSVSGSDLIQNKETKSLKTLGAIIFDSQIKKNIDFVISKFQDHTLNCVISSAIKDENEELCFCKKNNLSIKHRSEILAMIMNSYTSLSIAGSHGKTSTSTFLSTLLELCTHDSSSITGGIIPIYDSNAHIENTKYLVTEIDESDGTIKNYNSDIGIINNIDFDHCDHYSNIDEVLSSFKKFASNCQKLLINYDCKFTKNNFTSKNQWSIKESNNIAYSLIPNIINKDKTVGKYYEHGKFIDIINIPVPGLHNLSNITAAIAACRMVGVSFKEIKKNTESLKLPKKRFEFRGEINQRIIYDDYAHHPNEIKATIDLARLFIKDKNSSDREEKGRLIAIFQPHRFTRVKQFIHEFVKELSKADVIYVTNIFGAGEKNIDNIDSQLIANLIYKNNKNVTCLKDNYEINEKFFKLTKKNDFIINMGAGDCHNLWSILKNKNTLNN
|
| 75 |
+
MDINWASTQIVKNLDRHEKRDLLAWILTQSERTFQRAFEAGQYSSAIGSLKLIWEMTIKDSKEKDSRYHGNYKH,MSKLHLKRFLKKSYEFSLVLFQFFIIILHFIHLEFIPKKEIMQVNFFFSFVGFLLIIISTIVMLISIKDLGRNLSPFPRPTVNGNLTTSGIYSFIRHPMYYSLILISFGFFITKLSFYHLFLTISLALIIKLKIILEEKYLNKKFKNYFIYTDKVKY
|
| 76 |
+
VHKNKILVPLSNNSYEVIIKQGLINNIGEELIRIGINSNRKILIVSNKEISTLFGRKLLNNLKKNNFNAEIFNIKAGESHKNFASLSEIFNAAFEVGLDRNSLLIALGGGIVGDVTGFAAATWLRGIEYIQIPTTLLSMVDSSVGGKTAVNHPKGKNLIGAFYQPKAVFIDPETLITLPTREFKAGMAEVIKYGVIKDKSLFEYLENEKNRDKILNLENESLIKIINKSIKTKACIVSEDEKENGIRAILNYGHSFGHVIENLCGYGEYLHGEAISIGMKIAGDIATEKNLWSKEHSLRQDHLIESYGLPIQTPKIKKNDVMKILMGDKKVRNGKMRFILPIELGEVDIFNDINESQFLKYFN,MKKIWKIEKLVLPQHSDHAGVMWHGTYFDWLEEGRINALSKAGLNYVDLTKNGFDLPLIDTSIKYISPLFLGDTVTIETIFEISKSPKIKIHSKFINKSKTILTIAKVNLVLINKKSFSIIRKRPDFISKAFLKLNG
|
| 77 |
+
MNDLNIEFPLDDFEELISQIGWSSLDEWFIFWNLKKEMLSINNFWDDNVKDDWIWGLALPLLSQAYKLNKKSPDRKIIGISALPGTGKTTLGKWLESISLKLKFKLSVISIDDFYLPSEEMEFAIKNNPWNVSRGFPGSHSIDLMKEKLLKWKTDGQLNVPVFDKSLRKGLGDRAHWREESPDLLIIEGWFLGVKPLSIDLDNSEKFSPPLSVFESSYRNKIQNNLDQYLDIWNMIDQIWHLKPLKFEYLNEWKSNQEKSMHFKSGSSLKGDNLSNFLRMLNVSIPHKSFDDINSDVLLMINQERKLVRVGLNQQISK,MLTTKITYALSDWIREWRKCRKENPSLDDCIKFTEWKIENYELTDSDRMIIESILLYETEET
|
| 78 |
+
VNITFLGTSSGVPTLTRNVSSLALKLSQTAEVWLFDCGEGTQHQLMKSNIKSSQIKKIFITHMHGDHIYGLPGLLATLGLSGNSNGIEIYGPSELKSFVTSALESSFCKLSFPLRFRAVEDFASLNKILFENDKLKVHCACLKHRLPAYGYRVSEKDKPGVFDIKKAEDSNIPPGPIYSELQAGKTVQLKDGRSFNGQDFCGPPRKGESFVYCTDTVFSKSAVNLSKNADLLVHESTFSKEDEKMAYEKLHSTTIMAAKTALLSNVKKLIITHLSPRYTQRSSIKPSDLLKEAQKIFPNTYLAKDFLTAEIK,MKLSKKFEELIIKQLESFGCSMGVTHLVMYLASTEQGTKASFEMIGQWPQIDRLLVSVEDDPSLKVSSPNRRWYPLQENDILLGVLRVETDLKEGNWPVSLDSRLKALSLSLAKCVSIELERQNKNEEINYLKSQVNVIIHQLRNPLAALRTYAKLLIKRLGSDVDSIEIVERMIIEQKQINNYMDSFAQLNSPIQLPLDIGEERLLLPPNLDNKKLITVQSLLRPILERGQANANLENRDWTEPSLWPDWTLSPLKAKYAVIAEIVANLLENAFKYAHKDAEIGVAIMSKGLCIFDDGKKITKIENEKIFQKGFRGSAAKKKDGTGVGLFLARKLAKQIGGELRLLENSSINDVEELKSFKKKNIFYLELPIKELHS
|
| 79 |
+
MIENPSQIVKEISDEKEIENSTIEENTSDTPKEEDLSFDHKDIPSADSSSSRRNNDLDTAGFTQEEFASLLGKYDYNFKPGDLVKGTVFALEPKGAMIDIGAKTAAFMPMQEVSINRVEGLSDVLQPSESREFFIMSEENEDGQLALSIRRIEYQRAWERVRQLQKEDATIYSEVFATNRGGALVRVEGLRGFIPGSHISARKIKEDLEGEYLPLKFLEVDEERNRLVLSHRRALVEKKMNRLEVGEVVIGSVKGIKPYGAFIDIGGVSGLLHISEISHEHIETPHNVLNVNDQMKVMIIDLDSERGRISLSTKALEPEPGDMLTDPQKVFNKAEEMAAKYKQMLLEQTDENEEQTVEIAESV,LSRSLDLPSTEGVDTLAQELAKLQDNGKRRIAFLGSRHVPVVDIHLIELIARSLAEEGHTILTSGSQGVNAAVIRAVLGINPSLLTVLLPQSLDKQLPEIKNQLESVIHLVEKSENDELPLPMASSLCNQEIINRCDQLICFAFHDSETLLNSCRCAEEMGKVVSLLFFD
|
| 80 |
+
MKKKLAAVSFSALLAIVASSTTSGFASWNTKYWTNEKNFNRISSFNVSENLPEGSKSTTKTSSEVVTASEDGKTLMYTDSDLGVVGLVDISDPAKPKALGIVELEAEPTGIAALGNNIYIGSNTSESYTNPSGALVQYNLDKRRAVKECDLGGQPDSVFVSPDGSFLAVAIENERDEEYKDGQIPQLDEDGKQINPAGYVSLVKLNKKGKIQCNSIKKVDLTGLASIAPSDPEPEFVAINDLGETVVSIQENNHLAVIDKEGKVISHFTAGIVKQMAGMDTKKDGAHKFKKKLKNVRREPDGLTWIDNDHFATANEGDYKHKAPGQAKRGGSRSWTIFKKDGTVVYEDANRLERSIAQIGHFQDGRAGKKGVEPESVTFGKIDGTPYLFVGAERAGIVAVYDITELSQPVLTQLLPSGIGPEGFVAIPDRGLIASANEKDYNKKEPGLSSHVTIYQLQDAPASYPHLTNENGLEFVSWGAISGMVSGEDGKIYAVNDGTFKTQPRIYVIDPSSSPALLERAIDIKLDGKTALFMDQEGITTDGRGGFYISTEGIKKKLTEHPPAIYHVSSEGDILEKITPPPSYLNYAKNPGFEGITRNGNILYIAQQKPWGDDTFNTTKILSYNLISKQWGAVNYQLDRIKKGGVGISELTYHDGALYVIERDSFYGKKAKLKAIYKVDLDGVVFEGLQTTMPPRLYPLVEKELVTDLKPVMKSTGGFILEKVEGLAINNDGQAWISTDNDGTGKKSTGETLFLNIGKI,MKNLKSNKRKIHRKVAAISSIPLLITLISGTIYSFLQPLGVDAFWLIKWHTGNFGIINLQPFYSIFLGIASIISVISGIRLLQKNS
|
| 81 |
+
MGFIKNKLFIFIILILLQSCSGGRIGNFFESSFKNIEETKIKEDVKNNLKNKIVIKSGGIVEKNKNIEETKIKEDVKNNLKNKIVIKSGGIVEKNKNIEETKIKEDVKNNLKNKVLKMSEKKSKNNKKISDKNISPKKIIFQPKSYKIIFILKDVDPKDPTEDLRAILRNSDVNFEIEKIERYFDTKNKTIKSN,MKWIIQEEKEEDHLQILNKDSEIGIDEVGRGSVFGPVFSVAVVLSKKSGLTLKKLGVNDSKKLTPKKRKDFFPKIIALSSDYALGQSSVREIDLLGIRHATELSMIRAVKKLKHMPSELLIDGPLTLRLWEGNQRNIISGDSKFISIATASIIAKVMRDSLMERLESKYPGYFIFKNKGYGTKQHFSSLKKHGLTNLHRKSFLNKLNLI
|
| 82 |
+
MELPCRRFGRTNLKMPVLSLGGMRFQKSWDELKFSEISRKEQNKVENILNLANKFGFNHIETAKYYGTSEIQLGMGFKSIEKKPKIIQTKIPPNRDPKLFEAELLKSFEKLQVKKIDLLAIHGINTPEHLHQAVKDGGCIDILKKFQQENLIGYIGFSTHGELSLIEKAITTNLFDYINLHWYFINQTNSKLIELAHKYDLGVFIISPTDKGGHLHTPSTKILELCSPLHPIVFNDLFCLRNKYVHTISVGIAKEQDFNLHLEAVSLLSESDHYIPKILNRLKEESINSLGIEWYKSWDKNLPNWKNTPGGINIPVLLWLANLIDSFDLEEFAKSRYQLLGNGSHWFPGNNANLLDVNVCESQLLKVLERHIKPKKVIKKLRVLKDKFGDKSLKRLSKN,MIFRNKRSSIKKTNILSQDELIKHYGINSYEFTHQEKKEIFVCSKVKEFDLIELDQLLQTVGWSRRPIRRVKRALEFSILVVGLWRHDEKFPRLVGFARCTGDGIIEATIWDVAINPVYQGLGLGKELMKYILQELKKIGISKVTLFADAEVVSFYKRQGWELEPKGSKCAFWYAN
|
| 83 |
+
MIIIEGFHIFNHKQNCKTKAEWMEQSGMTYDRESEVN,MQIKILVKLFSHLIKVIFKPLLGFAKFFITTYGVFLKFFLQLNGGYWGKIGIGQYSKIERKRFFCILPFYILLALLFGILSLIYWYFVVLFIPFWIERYLTDTAQWNNIFSSIMAFALICGWLLLLSKTK
|
| 84 |
+
VEGKNTSITFDGREIRLTTGLYAPQAGGAVMIECGDTSLLVTATKTTKKQAADFLPLICDYEEKLYAAGRIPGGFMRREGRPPERATLIARLIDRPMRPLFPSWMRDEIQIVASCLSLDERVPADVLGVTGASIATLLAEIPFYGPMAAVRVGLIGDDFILNPSYREIEKGDLDIVVAGSPEGIVMIEAGANQLSEQDTIEAIDFGYEAVSELIKAQENLLKDLGIKQVKPLEPEEDKALATYLEKNCTKPIDLILKKFDQSKEERDLELDKIELEVQTKIDSLKDDNQLKVLTSENEKLIHSDFKKLTKKLMRSQIINEGKRVDGRDLDEVRKISASAGILPKRVHGSALFQRGLTQVLSTTTLGTPSDAQEMDDLNPSTEKTYLHHYNFPPYSVGETRPMRTPGRREIGHGALAERAITPVLPGKETFPYVLRVVSEVLSSNGSTSMGSVCGSTLSLLDAGVPLKAPVSGTAMGLIKEGKEVRILTDIQGIEDFLGDMDFKVAGTEKGITALQMDMKITGLPVSVISDAIKKARPARLHILEKMQEAIDKPQESLSPHAPRLLSFRIDPELIGTVIGPGGRTIKGITERTNTKIDIEDGGIVTIASHDGAAAEEAQKIIEGLTRKVHEGEIFPGVVTRIIPIGAFVEILPGKEGMVHISQLSEARVERVEDVVRQGDEVTVRVREIDSRGRINLTLRGVAQNGGMSYPEPTPTPVAPLN,MPKQLSFSNESREALEKGINTVANAVKVTIGPKAKNVVIERKFGSPDIVRDGSTVAKEINLDNPISNLGAKLIEQVASKTKESAGDGTTTATILTQIMVQEGLKNIAAGASPIELKKGMEKGLNFVLEKLRSKSIKINGSDIKKVATVSAGGDEDIGSIISKAMDIVTSDGVITVEESQSLETELDITEGMSFDRGYSSPYFVTDQERQICELENPKILITDQKISTLTNLVPILEEVQKSASPFLILAEDIEGEALTTLVLNKNSGVLNVSAVRAPSFGERRKAALEDIAILTGAKLISEDQSMKLEEVTLNDLGKAKKITISKDKTTIVAFDDTKDLVQERVEKLKREVEITESEYDKDKINERIAKLAGGVALIKVGAATETEMKYKKLRIEDSLNATKAAIEEGVVSGGGQTLIEISNELSNSRKEISDDLTTGIDIITNALLEPTKQIAKNAGFNGDVVIADIKRLGKGFNANNGEYENLNESGILDPTKVIRLALQDSVSIAAMIITTEVAVADIPEPEAAPGGPGADPMGGMGGMGGMGGMGGMGMPGMGGMGMPGMGGMGMPGMGGMGMPGMM
|
| 85 |
+
MILSLLLSTFITIFIAELGDKTQLATLTMSGTSNKPLAVFLGSSSALVLASLVGALAGGSISNFLPEIILKSIASITFFIIGIRLFVNSFTSKENDNNQ,LKNLLGCSVKDLEKIALNYGQAAFRGRQIYNWLYNYKNRSKSIDEINVLPLKFRDQLKNEAFLFGELTLKEKYLATDGTLKLLLNTRDNESVECVGIPTEKRLTACLSSQVGCPMDCKFCATGKEGLKRSLKVSEILDQILFIENQMNQKVSNIVFMGMGEPLLNIDELLLSIRSINEDFAISQRKITVSTVAIPKMISKLSELSFQVLGKCQFTLAISLHASNQKIREAIIPSAKNYHIKNIIDDCREYVRETGRRVSFEYLMLHGVNDKLEHADELSNLIKGFQCHVNLIQYNHIEEVEFKQTPIKNAQLFQTRLSNSGINVSFRKSRGSDRNAACGQLRQNDKIK
|
| 86 |
+
MYSLEISLRYSPFPLSIQKKEYEDIKRIYDEIKDSMNSDNQNSPLIELSCEKVQDKLITVLAKEVISVQIYEKSAVAGGSKRPGFSLDI,LARDFPLERVRNIGIAAHIDAGKTTTTERILFYSGVVHKIGEVHDGAAVTDWMAQERERGITITAAAISTSWQDHRINIIDTPGHVDFTIEVERSMRVLDGVIAVFCAVGGVQPQSETVWRQADRYSVPRMVFVNKMDRTGADFLKVNQQIKDRLKANAFPIQLPIGAEGDLSGIIDLVSNKAYLYKNDLGTDIEEAPIPDEMKDEALEWRSKLMESVAENDEELIEIFLDKGELTEDQLKKGIREGVLKHGLVPVLCGSAFKNKGVQLVLDAVVDYLPAPIDVKPIQGVLPNGKEDVRPSDDNAPFSALAFKVMSDPYGKLTFVRMYSGVLSKGSYVMNSTKDAKERISRLVILKADEREEVDELRAGDLGAVLGLKNTTTGDTLCNTDDPIVLETLFIPEPVISVAVEPKTKGDMEKLSKALQALSEEDPTFRVSTDQETNQTVIAGMGELHLEILVDRMLREFKVEANIGAPQVSYRETIRSSSKGEGKYARQTGGKGQYGHVVIEMEPAEVGKGFEFVNKIVGGTVPKEYIGPASNGMKETCESGVLAGYPLIDVKVTLVDGSFHDVDSSEMAFKIAGSMAFKDGVKKCNPVLLEPMMKVEVESPDDFLGSVIGDLSSRRGQVEGQSVDDGLSKVQAKVPLAEMFGYATQLRSMTQGRGIFSMEFANYEEVPRNVAEAIISKNQGNS
|
| 87 |
+
VKKSLFKPSRKFTLFSAFVTLLNDRLSESILLPILPSFVLLFDSKASTYGLLSCTYQLAQFTASPFIGLMSDRYGRRPVTLFCITGSIIGISILSFTVLFDWSTSLATIPLFLLFIARLIDGLSGGTAATATTILADISSPEKRAKTFGLIGVAFGLSFFLGNIFVVIFAKNTNNNFIIPVIIASIIPIINFILVFFYLPETKPQNELNKSTQILKNPLKQLFKVFKEEKIRKLSLAFFIYFIAFTGLTNILIFFLQESLNWTTKASSGTLVVVGVIAIIVQGGLIGPLVKKFGEMRLTLIGSGFILLACFLLITTPQKNAIVNIYSAVSFLAVGAGLITPTLRALISKKLDGDNQGSILSNLQGLQSLGGVLGIGMAGKVYDDFGPKAPFIAGSIILLFMIYLIAEGKNNNISYN,MTDIFEVPTPDNELLEKAKQLRLASIKTSQTNNDDRIRALNLMADYLEKNSKEIIEANIEDYKKAEIKGISKSLLSRLKLSKEKLNLGIEGVRQVGNLIDPVGQIQIKRELSKGLILERKTVPIGVLGVIFESRPDAVMQISSLAIRSGNGVMLKGGSEANLTNLAIVSALKEGLQDSNLDENAICLLTSRKDSMAMLNLEKYINLIIPRGSNELVKFIQENTEIPVLGHADGICHLYIDNEVNLDMALKVALDSKIQYPAACNAVETLLIHKDTASEFLNKAIPMFNSNDVKLIGDKKSFQLGVAFEANYEDWQTEYLDLILSIKIVNDLEEAIAHIQKFSSKHTDGIITENINNANKFMSEIDSSGVFHNCSTRFADGFRYGFGAEVGISTQTLPPRGPVGLEGLVTYKYFLRGEGHIVDDFSSGKLIYSHKDV
|
| 88 |
+
MQNITFKGNVNFDNQKEELNENELFSLKITDSLYKKDIGKFLEILSSHFIP,MRVVIAGAGLAGLSCAKYLVDNGHIPIVLEARDVLGGKVAAWKDEDGDWYETGLHIFFGAYPNMLQLFKELDIEDRLQWKSHSMIFNQPSEPGTYSRFDFPDIPAPANGVTAILSNNDMLSWNEKILFGLGLVPAMLRGQKYLDKCDSKSWTEWLKEHNIPERVNDEVFIAMSKALNFIGPDEISSTVLLTALNRFLQEKNGSKMAFLDGAPPERLCQPMVDYITERGGEVHMNSPLRKIDLNEDSTVKSFTIAPLDSDEKKKVITADAYVSAMPVDLFKLIIPDQWKGINAFSKLDGLIGVPVINIHLWFDKKLTDIDHLLFSRSPLLSVYADMSITCKEYEDPNRSMLELVFAPAKEWINRSDQDIVDATMEELKKLFPTHFIGDDKTKLRKFKVVKTPRSVYKAVPGCQEFRPSQRSPIKNFFLAGDYTMQKYLASMEGAVLSGKLCAETINKEYSKTSNIVSRETSKIN
|
| 89 |
+
MLENIWHPSYSAAEYLGITEIKLSHLRENGYFKPGIHWKSSPLGQKKPWNPEVLYNSILCRKIMDEFYSEEKNDQYAA,MRNLIKENIKKTGNNSSRSIKKLLKQRSFVVFISILLTGLGASITSISFKTGIYFINNWRLELLNHFPSVAVLPLFGAVGGAIAGFLIKNFAPAAKGSGVSQIMGFLRHKKVPMNLKVGLVKLISGIIAIGSGFPLGPEGPSVQMGGSVAWQMARWLKAPLAFRRVIVAAGGGAGIAAVFSAPLGGFIYAIEELLNSARPVILLLVVITTFIADSSADIIQALGLDPKAGGFDFNLGFLIQKEYDPSVFFLPIDFIYLVLLGIIIGLFAELYSKYVLFMQKLGKKWYKNKFVLKMSICGLLLGSIYSFLPSSFHNLDELQKIIVEKNTNIEIAFLAVFILFITTGLAAASGAPGGLFYPMLTLGGAIGLIMGTWVEIATGHAPSTYIFAGMGAFVAGCSRTPITAMFLAFALTKNLLIMKPVLISCIASFLVARAFNEESIYERQIQIELED
|
| 90 |
+
MKKKSITYTDLSKKQLQHLKELYIQKKVECMSHKELKEFVLEIISHQINDTIGKEEEMEAWMEMSKFYGDQFEIIILEIQQKFANNENLQNFEEDSKEHRLELLEKNNIEQNKQDMWDD,MKEIGWPTIDSKHLVVYSKQMLDLENEIFSQGMPQEALMEKVGIQLSKWLLKRKSLLKKGVIVFLGPGHNGGDGAVIAKELFLKGYLVKLWCPFPLKKTITINYVNYLTSLGVEILGDSPNPEGKDLWIDAIFGNNQKRKVDEELIELFNKKFEKRSGKVVSIDVPTGLCPNSGKPFLKNAVKADFNLVVGLNKIGLLQDTALPYVGELHHIDIGICRSQLCKLESKILKISYQDLRTIKLPLLPKNSSKYKRGRTLVIAGSEKYPGAAYLAIKGAISSGAGFVSAIIPNLVSNSIWQVEPEVVVTGSLSSDKNGNSILFNALKNVDFSAYDSIVIGPGIGLNEEDWEKSTQYLLDLKGLLILDADALNRISKSNLGPKFFLERKSKTWITPHNKEFMRLFPEIDCTNKVELAKKAAKAFDISILLKGANSVIANNENAWQLFGTDAETSRAGLGDLLSGFIGGCSSIELSSRDYTKTESLAKYVFLHSFAASKCKKGSNASLIGAQLSKLMRKTKTRLMS
|
| 91 |
+
MRQHVNPLSKNFFEIDPIPPLNQVFENPKLPLHLDIGCASGEFLFELSLKNKNWNYIGIEIREKLVLNANLKMKSRENKNLYFSFGNANNIFNQTNNKSIINLITSISFNFPDPWFKKKHHKRRVIQPKLLNLLSNSMKKGSLIFIKTDVRDLFDHMELTISESIKFKKIPYQDVDFCESFNPNRIQTNREKYVILNQLKIYESIYKKI,MTDDINPIESDFNAALSRYQDGQELIPIAQDFQKIIQQIPNHFAAWTCLSWLQLLLKNNEEALAAAREAVRLNQQDPQARMNLSLALLATNNKGVRDHVELIKKMAMMMPDVKTELKESVEDGFNRYPNWPELTKINKWLEF
|
| 92 |
+
VLNDTLSSKFICFHLINISNKLNSTLKIELANPNKSEMFELKSYEKFRDTEDVRFFDISINNSNFRDLVIHNGPAVSPPNDKELGNWQFYIHHKQEDNLLAISGGRTFYLVNLGWEYPFYKVRLESCGLILKIPRGTFHRSVSDENGSVVLNQAIRDKGGSVESEFKVTNSKDNKKLHDCITNLQPKFKIYSVK,LIMSLIPLLPVFHKFNRQFFDQSLTTNREPLVKVRWSDNRLKTTAGFYKRKQLKGVIDSEIILSKPILSKLSCNEIHSTLCHEMIHAWVDRILNINEIHGPNFLSKMNEINKAENNFQISIRHNFPVERKALKYTGKCLNCGEKYMYRKRIKNIACKKCCNLFFNGSWNKKCLILFD
|
| 93 |
+
MIKDHPIFLESIRFIKSNLIENNFNYLENRVLERLVHTSGDFNIQKLLEFSEGACEKGVKSLKAGAPILTDTDMAAAAIKSMAKNTNGNLVVSAKHWFDDRDLSELTKTAYGIEKGWIELSANNSGNQSPIIVIGSSPTALVNLLEIIQNSQQIPSLIIGMPVGFIGVRQSKNKLLNTNYPRIVMNSTRGGAAMAAAAVNALLRESI,MREEDIKSFEDAFFDALNLFNNQKWYEAHDAFEDIWNTLEGDERQIIQGIIQVSVSQFHLSKGNLNGATILMGEGLGRIKNRTNIDLGVDLVSFCKCLDELLRKLQYKEELTKNDKPYLLIKEQNEF
|
| 94 |
+
MRVKLEPETAFIGKKFAYIFLGIIFALNSIVFIWYFFFSNLTWS,LFQSLFSTKYNYFYNIYIVFHIRTSILLLSGLVLGLWTSWPGIVIPNNWKCFKDMIEKSSKE
|
| 95 |
+
VSENIQPSSEENQIVEDLTNKESPEKLPEFKDKELITNLEQNRFECRSCGYIYDPIEGNKKLNIPKNTPFSAIDGNTFACPVCRAGKNLYKDIGPREKPSGFEENLTYGFGFNSLPPGQKNILIFGGLAFAAACFLSLYSLH,LIGIFSAFGAAISWTYACFIWRSQTEKYKSIDINLVKNIIAFLIFLPAFINLSVLNNLKSIITLLFSGVIGIGLGDTFYIKSLQLIGTRRTLSIETLSPILAALSGEIFINENLAFRSYQGILIISISLFILLRQRTNLIVNNLTNITERNNLSVYVFPFLSVLCAVSGGLLSRKVFLESNLSPFQATEIRLLGAIIFLIIIKKFRINFFLKKLDFNDQKRFLLSILLGTNLGILLQQIVFKTLPLGIGWTLLSTSPVISLFFATKEEGQITKGIIFFTTLLFLGLCLIII
|
| 96 |
+
MKILLSVFFLFAFIPPSKGVTTKMFKVLDTCARYRLGEIDAKQAIEKLKLKSVNSSEIDLKNIVSNYCSVFTPNENIKF,MPKKHPTRRQFLNFGKLSLLFFLNSCSNSLKKIKIGFQSSTYPKSFRDTFPAIWQKENINFSKLKLEKNKIKFSKSDFILINDGWLKSINFANFQNINNLFLNDLLDNRSRDYLKSFKEYQRNKLFPIGVVPYAVIIKNNKDLIYEASNNWDFLLDEKLKGKIIFPQSPRILISISKRINVKNSLSKLKEQAMLFDDKNSINWLINSDASVAIIPFSLCEKYLRVDSRLSMVFPNKGVPLMWNFLLTKSKINNIVLFDWIKSLEKRSTIDELANQGWYLPFKNEYSQDKYNIKTENSNYGPSENCWENSWSFSSLNYEEKVNLENLWNQS
|
| 97 |
+
MRFKVSLKKDGKEFDEVVIANNKKDAIEVALKNNPEAEVLNSDWTFKL,MNDHNSKDNYEAQTLILNDSNGNELFCYLEQIVKVEEKEYALLTPVDTPVSLFKINENDEPELIEKIEKNEQVLKNADAVLQEHDLKLIRSAVTLTVSGELEEPIYDELEEDGIEEESETYELLVSFNLLEQEYGLYIPLDPFFIVGKLINQGALLIEDDEFDKVQPLIESELEKSSF
|
| 98 |
+
MENSKPNYWQNAERTNGRMAMMGFFALVVNYGLFGWIIPGIF,LMNKYDVIIIGSGIGGLCCGSLLALAGKKVLIAEAHSQPGGVAHSFNMRGYKFESGPSLWSGIGKWPTTNPLGQILRLLDEKVELIKYQGWHVNVPEGEFNLEVGQEPFKERIRLLRGEKSVKEWDSFVSGIRPLSQIVSEIPLLSSSPETINFLEIIKLASKFLPNIKSLPKLNGGFGDIVDSHLNDPFLRNWVDLLSFLISGMPMHDTNSAAMATLFDEWFKPASYLEYPKGGSESIVKALVDSFKKNGGELILSSKVEAVNFSKNIASGVTLENGSNFISNFVVMNTDAWTSRKLIPQEFQKKWSPKAKDINKCGSFLHIHLGFDASGLQNLPIHAIHVDNWERGITAERNVAVFSIPSVLDKSMAPKGKHVLHGYTPANEPWEIWKNLKSNELAYKELKEERCSIFLKSLRKIIPDIDNRIEIKLLGTPLTHKKYTNTYCGSYGPALSAAQGLFPGCKTSVRNLLTCGASTFPGIGIPAVSASGAYAAEKIMGKKEYKKLLKTIDL
|
| 99 |
+
VFFKSNFSYSDSNKSYSDLLLELDSGNIQSIYFYPRKREIDVLYKNGNKEKIPILYNDQLILEKASENNVDLTINNSRKESSAANSFASVGLFLIFIIAIVLILKSTSKLASKALGFGKNKSKFVTIDDVETRFDDVAGVPEAAEELKEVIKFLNEPKKFTDLGAKVPKGVLLIGPPGTGKTLLAKAIAGESGVPFLSIAASEFVELFVGVGASRVRDLFEKAKEKSPCIIFIDEIDSIGRQRGSGIGGGNDEREQTLNQLLTELDGFADNSGIIVIAATNRPDILDSALLRPGRFDRKIEVMLPDLDGRKKILSVHSLSKPLAKDVDLSYWATRTVGFSGADLANLMNESAIHCAREDSKLITYSHIENALDKVTLGLRTSIISSQNMKKIIAYNEVGRAIVSAVKNGVDSVDKITILPRSGYLGGYTKINPDEDIVSSGLISKKLLLSKIEIALAGRAAEIIVYGKNEITQCSFNDISYATSIIREMVTKYGFSIIGPLSLEDGGEMSIGDGFVRNKSTIADNTYSRIDNEIINISKISLNNAIKIISNNRILLEKLVELLLIKETVENNTFKKITFDLLKV,MRRKIFFEVFNIKKLSILVLGFTLGVIAIWPGIISRNSRKCFFNIIKDGSDGNIQIKTILLVNPNYLLRIKNAKNDYWKVLLVGDACFRKF
|
| 100 |
+
MVTSQKKGPDSSASDNELSPDQTLGLVSLSLMQKLSQKDPSFSWLGEMKPDQLNLKNLRDRLELTELAIKTGAPLTTSEVSILMGAKPGKSKIERGGILAIKVARNVWKLSKLGQGSSYYRN,MILKVLEFEFDLIVLGAGSGGLAAAKRAASYGAKVAIIEVNKIGGTCVIRGCVPKKLMVYAANNRRNMLSSEGYGLISKEITFESNILLKNVREEVSRLSVLHSNSLKKLNVKVFEGLGRFLNQNTVEVVCPKTKNILRKVSAKSILISVGGKPKKLNIPGTDFAWTSDDIFELKDFPKKLLIVGGGYIACEFASIFKNLGTEVTQLIRGENLLNGFDKDLSECLEKSMTSLGINLKFKNQLKSIKKINDGLESTLESGSKLLTDNILVATGREPSLKRLNLDTLNLKMDGIYLEVNELNKTSISNIFAIGDIVKRPNLTPVAIEQGRVFADNYFAALKRKVNYENIPKAVFTIPEISTVGLSEEKANEIYSEVNVQVFKCNFTPMSNTFKKNKSKCMLKLVVNKKNDKVLGCHMFGEAASEIIQMVAVSLNTGITKKDFDTTMALHPTISEEFVTMYG
|
| 101 |
+
MFFLSIPQAWHLAGTWSEQLPNDSNLIGMSQTELMMTLHSIFVPLLLVISYFLFLKISKNESKKVKG,MTSTLPNDNIKNIDEKISNKLISEIIRDRIKSKGTRFSANDNIADFINPGELKVLEKEVASRIKDLLKSLVIDVDNDHNTQETAERVSKMYLNEVFKGRYHEQPKVTSFPNDKNLDEIYTVGPITVRSACSHHLVPILGECWIGIKPGSKVIGLSKFARVADWVFSRPHIQEEAVMILADEIEKLCEPKGLGIIVKAQHYCMKWRGVKEPNTSMINSVVRGDFRHDISLKQEFFELVRQQSSNNNY
|
README.md
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ppiBTEP
|
| 2 |
+
|
| 3 |
+
A Siamese (twin-branch) protein-protein interaction classifier built on ESM-1b ([Rives et al., 2021](https://doi.org/10.1073/pnas.2016239118)). Also designated SiameseBTPE (BERT-Twin Protein Encoder).
|
| 4 |
+
|
| 5 |
+

|
| 6 |
+
|
| 7 |
+
## Overview
|
| 8 |
+
|
| 9 |
+
ppiBTEP processes each protein independently through a shared ESM-1b encoder -- no cross-sequence attention is used between the two proteins. Each branch extracts the `[CLS]` token embedding from the final transformer layer, the two embeddings are concatenated, and a dropout + linear classification head produces binary interaction predictions with softmax probabilities.
|
| 10 |
+
|
| 11 |
+
Unlike the cross-encoding approach (see [ppiDCE](https://github.com/kouroshSA/ppiDCE)), ppiBTEP must capture interaction-predictive features entirely from each protein's own sequence context. This makes it faster per pair and allows protein representations to be precomputed and reused, at the cost of not modeling direct inter-protein residue dependencies.
|
| 12 |
+
|
| 13 |
+
The model was developed for the *Prochlorococcus marinus* MED4 interactome, where it serves as one component of a tri-model consensus framework (alongside [ppiGPLM](https://github.com/kouroshSA/ppiGPLM) and [ppiDCE](https://github.com/kouroshSA/ppiDCE)) for computational PPI screening.
|
| 14 |
+
|
| 15 |
+
## Architecture
|
| 16 |
+
|
| 17 |
+
| Parameter | Value |
|
| 18 |
+
|-----------|-------|
|
| 19 |
+
| Foundation | ESM-1b (facebook/esm1b_t33_650M_UR50S) |
|
| 20 |
+
| Strategy | Siamese / twin-branch |
|
| 21 |
+
| Layers | 12 default; 6, 8, 12, 16, or 18 selectable via --num_layers |
|
| 22 |
+
| Classification | Concat [CLS_A, CLS_B] -> Dropout(0.1) -> Linear -> 2 |
|
| 23 |
+
| Max sequence length | 1,024 tokens |
|
| 24 |
+
| Optimizer | AdamW (lr = 1 x 10^-5) |
|
| 25 |
+
| Loss | Cross-Entropy |
|
| 26 |
+
|
| 27 |
+
### Siamese vs Cross-Encoder
|
| 28 |
+
|
| 29 |
+
| | ppiDCE (Cross-Encoder) | ppiBTEP (Siamese) |
|
| 30 |
+
|---|---|---|
|
| 31 |
+
| Input | `[CLS] Seq_A [SEP] Seq_B` (joint) | `[CLS] Seq_A` and `[CLS] Seq_B` (separate) |
|
| 32 |
+
| Cross-attention | Full bidirectional at every layer | None |
|
| 33 |
+
| Classification | Single [CLS] -> Linear | Concat [CLS_A, CLS_B] -> Linear |
|
| 34 |
+
| Complexity | O((n+m)^2) | O(n^2) + O(m^2) |
|
| 35 |
+
| Speed | Slower (joint encoding) | Faster (independent, reusable) |
|
| 36 |
+
|
| 37 |
+
## Installation
|
| 38 |
+
|
| 39 |
+
### Prerequisites
|
| 40 |
+
|
| 41 |
+
- Python 3.10+
|
| 42 |
+
- CUDA-capable GPU (recommended)
|
| 43 |
+
- conda (recommended) or pip
|
| 44 |
+
|
| 45 |
+
### Setup
|
| 46 |
+
|
| 47 |
+
```bash
|
| 48 |
+
# Clone the repository
|
| 49 |
+
git clone https://github.com/kouroshSA/ppiBTEP.git
|
| 50 |
+
cd ppiBTEP
|
| 51 |
+
|
| 52 |
+
# Create a conda environment
|
| 53 |
+
conda create -n esm python=3.10
|
| 54 |
+
conda activate esm
|
| 55 |
+
pip install -r requirements.txt
|
| 56 |
+
```
|
| 57 |
+
|
| 58 |
+
## Repository Structure
|
| 59 |
+
|
| 60 |
+
```
|
| 61 |
+
ppiBTEP/
|
| 62 |
+
|-- train_ppiBTPE3b.py # Training script
|
| 63 |
+
|-- inference_ppiBTPE_2GPU.py # Batch inference script (multi-GPU)
|
| 64 |
+
|-- roc_analysis_color_threshold_F1e.py # ROC curve analysis with F1 optimization
|
| 65 |
+
|-- assets/
|
| 66 |
+
| +-- ppiBTEP.png # ASCII workflow diagram
|
| 67 |
+
|-- requirements.txt
|
| 68 |
+
|-- LICENSE
|
| 69 |
+
+-- README.md
|
| 70 |
+
```
|
| 71 |
+
|
| 72 |
+
## Usage
|
| 73 |
+
|
| 74 |
+
### Data Format
|
| 75 |
+
|
| 76 |
+
Training and inference use CSV files with columns: `seq1, seq2, label`
|
| 77 |
+
|
| 78 |
+
- `seq1`, `seq2`: Amino acid sequences
|
| 79 |
+
- `label`: `0` or `enemies` (non-interacting), `1` or `friends` (interacting)
|
| 80 |
+
|
| 81 |
+
For inference-only input, only the first two columns are required.
|
| 82 |
+
|
| 83 |
+
### Training
|
| 84 |
+
|
| 85 |
+
```bash
|
| 86 |
+
# Train from scratch with 12 layers
|
| 87 |
+
python train_ppiBTPE3b.py \
|
| 88 |
+
--train_file train.csv \
|
| 89 |
+
--val_file val.csv \
|
| 90 |
+
--model_config facebook/esm1b_t33_650M_UR50S \
|
| 91 |
+
--num_layers 12 \
|
| 92 |
+
--freeze_layers 0 \
|
| 93 |
+
--epochs 20 \
|
| 94 |
+
--batch_size 2 \
|
| 95 |
+
--learning_rate 1e-5 \
|
| 96 |
+
--max_length 1024 \
|
| 97 |
+
--output_dir ./out \
|
| 98 |
+
--device cuda
|
| 99 |
+
```
|
| 100 |
+
|
| 101 |
+
#### Key training options
|
| 102 |
+
|
| 103 |
+
- `--num_layers N`: Total transformer layers (6, 8, 12, 16, or 18)
|
| 104 |
+
- `--freeze_layers N`: Freeze bottom N layers (use 0 for training from scratch)
|
| 105 |
+
- `--checkpoint path.pth`: Resume from a saved checkpoint
|
| 106 |
+
- `--model_config`: ESM model config (default: `facebook/esm1b_t33_650M_UR50S`)
|
| 107 |
+
|
| 108 |
+
**Important:** When training from scratch, use `--freeze_layers 0` to ensure all layers (including embeddings) remain trainable. The default is 20, which would freeze most layers.
|
| 109 |
+
|
| 110 |
+
### Inference
|
| 111 |
+
|
| 112 |
+
```bash
|
| 113 |
+
python inference_ppiBTPE_2GPU.py \
|
| 114 |
+
--model_path out/ppiBTPE_epoch_17.pth \
|
| 115 |
+
--model_config facebook/esm1b_t33_650M_UR50S \
|
| 116 |
+
--num_layers 12 \
|
| 117 |
+
--input_file test_pairs.csv \
|
| 118 |
+
--output_file predictions.csv \
|
| 119 |
+
--batch_size 4 \
|
| 120 |
+
--max_length 1024 \
|
| 121 |
+
--device cuda
|
| 122 |
+
```
|
| 123 |
+
|
| 124 |
+
Multi-GPU inference:
|
| 125 |
+
```bash
|
| 126 |
+
python inference_ppiBTPE_2GPU.py \
|
| 127 |
+
--model_path out/ppiBTPE_final.pth \
|
| 128 |
+
--model_config facebook/esm1b_t33_650M_UR50S \
|
| 129 |
+
--num_layers 12 \
|
| 130 |
+
--input_file test_pairs.csv \
|
| 131 |
+
--output_file predictions.csv \
|
| 132 |
+
--device cuda:0,1
|
| 133 |
+
```
|
| 134 |
+
|
| 135 |
+
Output CSV columns: `seq1, seq2, Prediction, Probability_Friends, Probability_Enemies`
|
| 136 |
+
|
| 137 |
+
### ROC Analysis
|
| 138 |
+
|
| 139 |
+
Evaluate model predictions using ROC curve analysis with threshold-colored visualization and F1 optimization:
|
| 140 |
+
|
| 141 |
+
```bash
|
| 142 |
+
python roc_analysis_color_threshold_F1e.py \
|
| 143 |
+
--input_csv probabilities.csv \
|
| 144 |
+
--output_file roc_curve.png
|
| 145 |
+
```
|
| 146 |
+
|
| 147 |
+
The input CSV should have two columns: PRS (positive) and RRS (random/negative) probability values.
|
| 148 |
+
|
| 149 |
+
## Architecture Diagram
|
| 150 |
+
|
| 151 |
+
The ASCII workflow diagram (`assets/ppiBTEP.png`) covers:
|
| 152 |
+
- **A.** Siamese input strategy (independent per-protein encoding)
|
| 153 |
+
- **B.** Model architecture (twin ESM-1b branches + concat classification head)
|
| 154 |
+
- **C.** Training pipeline
|
| 155 |
+
- **D.** Inference pipeline (multi-GPU)
|
| 156 |
+
|
| 157 |
+
> Note: the diagram shows Softmax in the classification head for clarity, but
|
| 158 |
+
> the implementation returns raw logits — softmax is applied implicitly by
|
| 159 |
+
> CrossEntropyLoss during training and explicitly during inference.
|
| 160 |
+
|
| 161 |
+
## Citation
|
| 162 |
+
|
| 163 |
+
If you use this software, please cite:
|
| 164 |
+
|
| 165 |
+
```
|
| 166 |
+
Daakour, S. et al. (2026).
|
| 167 |
+
```
|
| 168 |
+
|
| 169 |
+
## License
|
| 170 |
+
|
| 171 |
+
This project is licensed under the MIT License. See [LICENSE](LICENSE) for details.
|
assets/ppiBTEP.png
ADDED
|
Git LFS Details
|
inference_ppiBTPE_2GPU.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
inference_ppiBTPE_2GPU.py — Batch inference for ppiBTEP / SiameseBTPE,
|
| 4 |
+
supporting single-GPU, multi-GPU (DataParallel), and CPU execution.
|
| 5 |
+
|
| 6 |
+
Inputs
|
| 7 |
+
------
|
| 8 |
+
CSV with at least 2 columns: seq1, seq2 (label column, if present, is ignored).
|
| 9 |
+
|
| 10 |
+
Outputs
|
| 11 |
+
-------
|
| 12 |
+
CSV with columns: seq1, seq2, Prediction, Probability_Friends, Probability_Enemies
|
| 13 |
+
|
| 14 |
+
Example (single GPU)
|
| 15 |
+
--------------------
|
| 16 |
+
python inference_ppiBTPE_2GPU.py \\
|
| 17 |
+
--model_path out/ppiBTPE_epoch_17.pth \\
|
| 18 |
+
--model_config facebook/esm1b_t33_650M_UR50S \\
|
| 19 |
+
--num_layers 12 \\
|
| 20 |
+
--input_file test_pairs.csv \\
|
| 21 |
+
--output_file predictions.csv \\
|
| 22 |
+
--batch_size 4 \\
|
| 23 |
+
--max_length 1024 \\
|
| 24 |
+
--device cuda
|
| 25 |
+
|
| 26 |
+
Example (multi-GPU)
|
| 27 |
+
-------------------
|
| 28 |
+
python inference_ppiBTPE_2GPU.py \\
|
| 29 |
+
--model_path out/ppiBTPE_final.pth \\
|
| 30 |
+
--model_config facebook/esm1b_t33_650M_UR50S \\
|
| 31 |
+
--num_layers 12 \\
|
| 32 |
+
--input_file test_pairs.csv \\
|
| 33 |
+
--output_file predictions.csv \\
|
| 34 |
+
--device cuda:0,1
|
| 35 |
+
"""
|
| 36 |
+
import argparse
|
| 37 |
+
import os
|
| 38 |
+
import torch
|
| 39 |
+
import torch.nn as nn
|
| 40 |
+
from transformers import EsmModel, EsmTokenizer, EsmConfig
|
| 41 |
+
import pandas as pd
|
| 42 |
+
from tqdm import tqdm
|
| 43 |
+
|
| 44 |
+
# Command-line arguments
|
| 45 |
+
parser = argparse.ArgumentParser(
|
| 46 |
+
description='Inference using the trained ppiBTPE Siamese model for PPI prediction.'
|
| 47 |
+
)
|
| 48 |
+
parser.add_argument('--model_path', type=str, required=True, help='Path to the trained ppiBTPE checkpoint (.pth).')
|
| 49 |
+
parser.add_argument('--model_config', type=str, default='facebook/esm1b_t33_650M_UR50S',
|
| 50 |
+
help='ESM config name or path used during training.')
|
| 51 |
+
parser.add_argument('--num_layers', type=int, required=True,
|
| 52 |
+
help='Number of transformer layers used during training (for config override).')
|
| 53 |
+
parser.add_argument('--num_labels', type=int, default=2, help='Number of output labels.')
|
| 54 |
+
parser.add_argument('--input_file', type=str, required=True, help='CSV with protein pairs (seq1, seq2).')
|
| 55 |
+
parser.add_argument('--output_file', type=str, required=True, help='Path to write predictions CSV.')
|
| 56 |
+
parser.add_argument('--batch_size', type=int, default=4, help='Batch size for inference.')
|
| 57 |
+
parser.add_argument('--max_length', type=int, default=1024, help='Max token length.')
|
| 58 |
+
parser.add_argument('--device', type=str, default='cuda', help='Device: cpu or cuda or cuda:0,1')
|
| 59 |
+
args = parser.parse_args()
|
| 60 |
+
|
| 61 |
+
# Device setup
|
| 62 |
+
def get_device(device_str):
|
| 63 |
+
if device_str == 'cpu':
|
| 64 |
+
return torch.device('cpu'), None
|
| 65 |
+
if ',' in device_str:
|
| 66 |
+
devs = [d.strip() for d in device_str.split(',')]
|
| 67 |
+
device = torch.device(devs[0])
|
| 68 |
+
device_ids = [int(d.split(':')[-1]) for d in devs]
|
| 69 |
+
return device, device_ids
|
| 70 |
+
else:
|
| 71 |
+
return torch.device(device_str), None
|
| 72 |
+
|
| 73 |
+
device, device_ids = get_device(args.device)
|
| 74 |
+
|
| 75 |
+
# Dataset
|
| 76 |
+
class PPIDatasetInference(torch.utils.data.Dataset):
|
| 77 |
+
def __init__(self, csv_file, tokenizer, max_length):
|
| 78 |
+
self.data = pd.read_csv(csv_file)
|
| 79 |
+
self.tokenizer = tokenizer
|
| 80 |
+
self.max_length = max_length
|
| 81 |
+
|
| 82 |
+
def __len__(self):
|
| 83 |
+
return len(self.data)
|
| 84 |
+
|
| 85 |
+
def __getitem__(self, idx):
|
| 86 |
+
seq1 = self.data.iloc[idx, 0]
|
| 87 |
+
seq2 = self.data.iloc[idx, 1]
|
| 88 |
+
enc1 = self.tokenizer(seq1, truncation=True, padding='max_length',
|
| 89 |
+
max_length=self.max_length, return_tensors='pt')
|
| 90 |
+
enc2 = self.tokenizer(seq2, truncation=True, padding='max_length',
|
| 91 |
+
max_length=self.max_length, return_tensors='pt')
|
| 92 |
+
return {
|
| 93 |
+
'input_ids1': enc1.input_ids.squeeze(0),
|
| 94 |
+
'attention_mask1': enc1.attention_mask.squeeze(0),
|
| 95 |
+
'input_ids2': enc2.input_ids.squeeze(0),
|
| 96 |
+
'attention_mask2': enc2.attention_mask.squeeze(0),
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
# Model definition matching training
|
| 100 |
+
class SiameseBTPE(nn.Module):
|
| 101 |
+
def __init__(self, config, num_labels=2):
|
| 102 |
+
super(SiameseBTPE, self).__init__()
|
| 103 |
+
self.esm = EsmModel(config)
|
| 104 |
+
self.dropout = nn.Dropout(0.1)
|
| 105 |
+
self.classifier = nn.Linear(config.hidden_size * 2, num_labels)
|
| 106 |
+
|
| 107 |
+
def forward(self, input_ids1, attention_mask1, input_ids2, attention_mask2):
|
| 108 |
+
o1 = self.esm(input_ids=input_ids1, attention_mask=attention_mask1)
|
| 109 |
+
o2 = self.esm(input_ids=input_ids2, attention_mask=attention_mask2)
|
| 110 |
+
p1 = o1.last_hidden_state[:, 0, :]
|
| 111 |
+
p2 = o2.last_hidden_state[:, 0, :]
|
| 112 |
+
concat = torch.cat((p1, p2), dim=1)
|
| 113 |
+
out = self.dropout(concat)
|
| 114 |
+
logits = self.classifier(out)
|
| 115 |
+
return logits
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def main():
|
| 119 |
+
# Tokenizer & config
|
| 120 |
+
tokenizer = EsmTokenizer.from_pretrained(args.model_config)
|
| 121 |
+
config = EsmConfig.from_pretrained(args.model_config)
|
| 122 |
+
config.num_hidden_layers = args.num_layers
|
| 123 |
+
print(f'Overriding config to {args.num_layers} transformer layers.')
|
| 124 |
+
|
| 125 |
+
# Dataset & loader
|
| 126 |
+
ds = PPIDatasetInference(args.input_file, tokenizer, args.max_length)
|
| 127 |
+
loader = torch.utils.data.DataLoader(ds, batch_size=args.batch_size, shuffle=False)
|
| 128 |
+
|
| 129 |
+
# Model init & load
|
| 130 |
+
model = SiameseBTPE(config, num_labels=args.num_labels)
|
| 131 |
+
ckpt = torch.load(args.model_path, map_location='cpu')
|
| 132 |
+
model.load_state_dict(ckpt)
|
| 133 |
+
|
| 134 |
+
# DataParallel if needed
|
| 135 |
+
if device_ids:
|
| 136 |
+
model = nn.DataParallel(model, device_ids=device_ids)
|
| 137 |
+
model.to(device)
|
| 138 |
+
model.eval()
|
| 139 |
+
|
| 140 |
+
all_preds, all_probs = [], []
|
| 141 |
+
with torch.no_grad():
|
| 142 |
+
for batch in tqdm(loader, desc='Inference'):
|
| 143 |
+
ids1 = batch['input_ids1'].to(device)
|
| 144 |
+
mask1 = batch['attention_mask1'].to(device)
|
| 145 |
+
ids2 = batch['input_ids2'].to(device)
|
| 146 |
+
mask2 = batch['attention_mask2'].to(device)
|
| 147 |
+
logits = model(ids1, mask1, ids2, mask2)
|
| 148 |
+
probs = nn.functional.softmax(logits, dim=1)
|
| 149 |
+
preds = probs.argmax(dim=1)
|
| 150 |
+
all_preds.extend(preds.cpu().tolist())
|
| 151 |
+
all_probs.extend(probs.cpu().tolist())
|
| 152 |
+
|
| 153 |
+
# Map & save
|
| 154 |
+
label_map = {0: 'enemies', 1: 'friends'}
|
| 155 |
+
df = pd.read_csv(args.input_file)
|
| 156 |
+
df['Prediction'] = [label_map[p] for p in all_preds]
|
| 157 |
+
df['Probability_Friends'] = [p[1] for p in all_probs]
|
| 158 |
+
df['Probability_Enemies'] = [p[0] for p in all_probs]
|
| 159 |
+
os.makedirs(os.path.dirname(args.output_file), exist_ok=True)
|
| 160 |
+
df.to_csv(args.output_file, index=False)
|
| 161 |
+
print(f'Saved predictions to {args.output_file}')
|
| 162 |
+
|
| 163 |
+
if __name__ == '__main__':
|
| 164 |
+
main()
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
torch>=2.0
|
| 2 |
+
transformers>=4.30
|
| 3 |
+
pandas
|
| 4 |
+
tqdm
|
| 5 |
+
numpy
|
| 6 |
+
matplotlib
|
| 7 |
+
scikit-learn
|
roc_analysis_color_threshold_F1e.py
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
|
| 3 |
+
Notes:
|
| 4 |
+
|
| 5 |
+
The script handles variable numbers of PRS and RRS probability values.
|
| 6 |
+
It combines the probabilities and labels, computes the ROC curve, calculates the AUC (Area Under the Curve), and plots the ROC curve using Matplotlib.
|
| 7 |
+
You can customize the script to read multiple PRS and RRS files by modifying the read_probabilities function and how you handle the input arguments.
|
| 8 |
+
Dependencies:
|
| 9 |
+
|
| 10 |
+
Python 3
|
| 11 |
+
numpy
|
| 12 |
+
matplotlib
|
| 13 |
+
scikit-learn
|
| 14 |
+
|
| 15 |
+
Explanation of the Area Under the Curve (AUC):
|
| 16 |
+
|
| 17 |
+
The Area Under the Curve (AUC) refers to the area under the Receiver Operating Characteristic (ROC) curve. The ROC curve is a graphical representation of a classifier's performance across all classification thresholds. It plots the True Positive Rate (TPR) against the False Positive Rate (FPR) at various threshold settings.
|
| 18 |
+
|
| 19 |
+
True Positive Rate (TPR), also known as Sensitivity or Recall, is the proportion of actual positives that are correctly identified.
|
| 20 |
+
False Positive Rate (FPR) is the proportion of actual negatives that are incorrectly identified as positives.
|
| 21 |
+
The AUC provides a single scalar value that summarizes the performance of the classifier:
|
| 22 |
+
|
| 23 |
+
An AUC of 1.0 indicates a perfect classifier.
|
| 24 |
+
An AUC of 0.5 suggests no discriminative ability (equivalent to random guessing).
|
| 25 |
+
An AUC between 0.5 and 1.0 indicates the degree to which the classifier can distinguish between the positive and negative classes.
|
| 26 |
+
Why is AUC important?
|
| 27 |
+
|
| 28 |
+
More on interpreting the ROC Curve:
|
| 29 |
+
|
| 30 |
+
The ROC curve plots the TPR against the FPR at various threshold levels.
|
| 31 |
+
The closer the curve follows the left-hand border and then the top border of the ROC space, the better the classifier.
|
| 32 |
+
The diagonal line represents the performance of a classifier that makes random guesses.
|
| 33 |
+
Understanding AUC Values:
|
| 34 |
+
|
| 35 |
+
AUC = 0.90-1.00: Excellent
|
| 36 |
+
AUC = 0.80-0.90: Good
|
| 37 |
+
AUC = 0.70-0.80: Fair
|
| 38 |
+
AUC = 0.60-0.70: Poor
|
| 39 |
+
AUC = 0.50-0.60: Fail
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
Threshold-Independent: AUC measures the classifier's ability to rank predictions without being dependent on a specific threshold.
|
| 44 |
+
Performance Metric: It provides a comprehensive measure of performance across all possible classification thresholds.
|
| 45 |
+
In summary, the AUC quantifies the overall ability of the model to discriminate between positive and negative classes. A higher AUC indicates better model performance.
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
F1 = 2 * ( (precision * recall) / (precision + recall) )
|
| 50 |
+
|
| 51 |
+
precision = TP / (TP + FP)
|
| 52 |
+
|
| 53 |
+
recall = TP / (TP + FN)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
You can adjust the decimal percision by changing ".6f" to desired value in f'Best F1 Threshold: {best_thresh:.6f}'
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
"""
|
| 60 |
+
#
|
| 61 |
+
|
| 62 |
+
# pip install numpy matplotlib scikit-learn
|
| 63 |
+
|
| 64 |
+
# python roc_analysis_color_threshold_F1e.py --input_csv probabilities.csv --output_file roc_curve.png
|
| 65 |
+
|
| 66 |
+
#!/usr/bin/env python
|
| 67 |
+
|
| 68 |
+
#!/usr/bin/env python
|
| 69 |
+
import argparse
|
| 70 |
+
import numpy as np
|
| 71 |
+
import matplotlib.pyplot as plt
|
| 72 |
+
from sklearn.metrics import roc_curve, auc, f1_score
|
| 73 |
+
import csv
|
| 74 |
+
|
| 75 |
+
def read_probabilities_from_csv(filename):
|
| 76 |
+
"""Read PRS and RRS probability values from a CSV file."""
|
| 77 |
+
prs_probs = []
|
| 78 |
+
rrs_probs = []
|
| 79 |
+
with open(filename, 'r') as csvfile:
|
| 80 |
+
reader = csv.reader(csvfile)
|
| 81 |
+
headers = next(reader) # Skip the header row
|
| 82 |
+
for row in reader:
|
| 83 |
+
# Ensure there are at least two columns
|
| 84 |
+
if len(row) >= 2:
|
| 85 |
+
prs_value = row[0].strip()
|
| 86 |
+
rrs_value = row[1].strip()
|
| 87 |
+
# Append PRS probability if not empty
|
| 88 |
+
if prs_value:
|
| 89 |
+
prs_probs.append(float(prs_value))
|
| 90 |
+
# Append RRS probability if not empty
|
| 91 |
+
if rrs_value:
|
| 92 |
+
rrs_probs.append(float(rrs_value))
|
| 93 |
+
return prs_probs, rrs_probs
|
| 94 |
+
|
| 95 |
+
def main():
|
| 96 |
+
parser = argparse.ArgumentParser(description='Compute ROC curve, best F1 score, and annotate thresholds.')
|
| 97 |
+
parser.add_argument('--input_csv', required=True, help='CSV file containing PRS and RRS probability values')
|
| 98 |
+
parser.add_argument('--output_file', default='roc_curve.png', help='Output file name for ROC curve plot')
|
| 99 |
+
|
| 100 |
+
args = parser.parse_args()
|
| 101 |
+
|
| 102 |
+
# Read probability values from CSV file
|
| 103 |
+
prs_probs, rrs_probs = read_probabilities_from_csv(args.input_csv)
|
| 104 |
+
|
| 105 |
+
# Assign labels
|
| 106 |
+
prs_labels = [1] * len(prs_probs)
|
| 107 |
+
rrs_labels = [0] * len(rrs_probs)
|
| 108 |
+
|
| 109 |
+
# Combine probabilities and labels
|
| 110 |
+
probs = np.array(prs_probs + rrs_probs)
|
| 111 |
+
labels = np.array(prs_labels + rrs_labels)
|
| 112 |
+
|
| 113 |
+
# Compute ROC curve and AUC
|
| 114 |
+
fpr, tpr, thresholds = roc_curve(labels, probs)
|
| 115 |
+
roc_auc = auc(fpr, tpr)
|
| 116 |
+
|
| 117 |
+
# Exclude infinite thresholds and thresholds outside [0, 1]
|
| 118 |
+
finite_idxs = np.where(np.isfinite(thresholds))[0]
|
| 119 |
+
fpr = fpr[finite_idxs]
|
| 120 |
+
tpr = tpr[finite_idxs]
|
| 121 |
+
thresholds = thresholds[finite_idxs]
|
| 122 |
+
|
| 123 |
+
# Filter thresholds within [0, 1]
|
| 124 |
+
valid_thresholds_idxs = np.where((thresholds >= 0) & (thresholds <= 1))[0]
|
| 125 |
+
fpr = fpr[valid_thresholds_idxs]
|
| 126 |
+
tpr = tpr[valid_thresholds_idxs]
|
| 127 |
+
thresholds = thresholds[valid_thresholds_idxs]
|
| 128 |
+
|
| 129 |
+
# Compute best F1 score across thresholds
|
| 130 |
+
best_f1 = -1.0
|
| 131 |
+
best_thresh = None
|
| 132 |
+
best_idx = None
|
| 133 |
+
for i, thresh in enumerate(thresholds):
|
| 134 |
+
predicted_labels = (probs >= thresh).astype(int)
|
| 135 |
+
current_f1 = f1_score(labels, predicted_labels)
|
| 136 |
+
if current_f1 > best_f1:
|
| 137 |
+
best_f1 = current_f1
|
| 138 |
+
best_thresh = thresh
|
| 139 |
+
best_idx = i
|
| 140 |
+
|
| 141 |
+
# Retrieve FPR and TPR for the best threshold
|
| 142 |
+
best_fpr = fpr[best_idx]
|
| 143 |
+
best_tpr = tpr[best_idx]
|
| 144 |
+
|
| 145 |
+
# Set global font
|
| 146 |
+
plt.rcParams['font.family'] = 'Arial'
|
| 147 |
+
|
| 148 |
+
# Create figure and colormap
|
| 149 |
+
fig, ax = plt.subplots(figsize=(10, 8))
|
| 150 |
+
norm = plt.Normalize(vmin=thresholds.min(), vmax=thresholds.max())
|
| 151 |
+
cmap = plt.cm.viridis
|
| 152 |
+
|
| 153 |
+
# Plot the ROC curve in segments, color-coded by threshold
|
| 154 |
+
for i in range(len(fpr) - 1):
|
| 155 |
+
x = fpr[i:i + 2]
|
| 156 |
+
y = tpr[i:i + 2]
|
| 157 |
+
z = thresholds[i]
|
| 158 |
+
ax.plot(x, y, color=cmap(norm(z)), lw=2.5)
|
| 159 |
+
|
| 160 |
+
# Diagonal line
|
| 161 |
+
ax.plot([0, 1], [0, 1], color='gray', lw=2, linestyle='--')
|
| 162 |
+
|
| 163 |
+
# Add a colorbar for thresholds
|
| 164 |
+
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
|
| 165 |
+
sm.set_array([])
|
| 166 |
+
cbar = fig.colorbar(sm, ax=ax)
|
| 167 |
+
cbar.set_label('Threshold', fontsize=16)
|
| 168 |
+
cbar.ax.tick_params(labelsize=14)
|
| 169 |
+
|
| 170 |
+
# Annotate a subset of thresholds on the ROC curve
|
| 171 |
+
num_thresholds_to_annotate = 10 # Number of thresholds to annotate
|
| 172 |
+
idxs = np.linspace(0, len(thresholds) - 1, num_thresholds_to_annotate).astype(int)
|
| 173 |
+
for idx in idxs:
|
| 174 |
+
thresh = thresholds[idx]
|
| 175 |
+
ax.annotate(f'{thresh:.2f}', xy=(fpr[idx], tpr[idx]),
|
| 176 |
+
textcoords='offset points', xytext=(0, 10),
|
| 177 |
+
ha='center', fontsize=12, color='blue')
|
| 178 |
+
|
| 179 |
+
# No red scatter point for the best threshold
|
| 180 |
+
# ax.scatter(best_fpr, best_tpr, color='red', s=100, zorder=5)
|
| 181 |
+
|
| 182 |
+
# Set axis limits and labels
|
| 183 |
+
ax.set_xlim([0.0, 1.0])
|
| 184 |
+
ax.set_ylim([0.0, 1.05])
|
| 185 |
+
ax.set_xlabel('False Positive Rate', fontsize=16)
|
| 186 |
+
ax.set_ylabel('True Positive Rate', fontsize=16)
|
| 187 |
+
ax.set_title('Receiver Operating Characteristic (ROC)', fontsize=18)
|
| 188 |
+
ax.tick_params(axis='both', which='major', labelsize=14)
|
| 189 |
+
|
| 190 |
+
# Add gridlines
|
| 191 |
+
ax.grid(True, linestyle='--', linewidth=0.5, alpha=0.7)
|
| 192 |
+
|
| 193 |
+
# Add legend with 3 decimal places
|
| 194 |
+
legend_text = (f'ROC curve (AUC = {roc_auc:.3f}, '
|
| 195 |
+
f'Best F1 = {best_f1:.3f}, '
|
| 196 |
+
f'Best F1 Threshold = {best_thresh:.3f})')
|
| 197 |
+
ax.legend([legend_text], loc="lower right", fontsize=12)
|
| 198 |
+
|
| 199 |
+
# Adjust layout
|
| 200 |
+
plt.tight_layout()
|
| 201 |
+
|
| 202 |
+
# Save and show the figure
|
| 203 |
+
plt.savefig(args.output_file, dpi=300, format='png')
|
| 204 |
+
plt.show()
|
| 205 |
+
|
| 206 |
+
print(f"ROC curve saved to {args.output_file}")
|
| 207 |
+
print(f"Best F1 Score: {best_f1:.3f} at threshold {best_thresh:.3f}")
|
| 208 |
+
|
| 209 |
+
if __name__ == '__main__':
|
| 210 |
+
main()
|
train_ppiBTPE3b.py
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
train_ppiBTPE3b.py — Train or fine-tune the ppiBTEP / SiameseBTPE model for
|
| 4 |
+
protein-protein interaction (PPI) classification.
|
| 5 |
+
|
| 6 |
+
Architecture
|
| 7 |
+
------------
|
| 8 |
+
SiameseBTPE: two branches with shared weights, each an ESM-1b encoder
|
| 9 |
+
(facebook/esm1b_t33_650M_UR50S by default). Each branch produces a [CLS]
|
| 10 |
+
embedding (last_hidden_state[:, 0, :], dim=1280). The two [CLS] embeddings
|
| 11 |
+
are concatenated (dim=2560), passed through Dropout(0.1) and a Linear layer
|
| 12 |
+
to 2 logits (CrossEntropyLoss; softmax applied at inference).
|
| 13 |
+
|
| 14 |
+
Modes
|
| 15 |
+
-----
|
| 16 |
+
- From scratch: --num_layers N --freeze_layers 0
|
| 17 |
+
- Fine-tuning ESM-1b: omit --num_layers, set --freeze_layers >= 1
|
| 18 |
+
- Resume: --checkpoint <path/to/ppiBTPE_epoch_K.pth>
|
| 19 |
+
|
| 20 |
+
Important
|
| 21 |
+
---------
|
| 22 |
+
When training from scratch, pass --freeze_layers 0 explicitly. The default is
|
| 23 |
+
20, which would freeze most of the model.
|
| 24 |
+
|
| 25 |
+
Data format
|
| 26 |
+
-----------
|
| 27 |
+
CSV with columns: seq1, seq2, label
|
| 28 |
+
- label = 1 or 'friends' → interacting
|
| 29 |
+
- label = 0 or 'enemies' → non-interacting
|
| 30 |
+
|
| 31 |
+
Example
|
| 32 |
+
-------
|
| 33 |
+
python train_ppiBTPE3b.py \\
|
| 34 |
+
--train_file train.csv \\
|
| 35 |
+
--val_file val.csv \\
|
| 36 |
+
--model_config facebook/esm1b_t33_650M_UR50S \\
|
| 37 |
+
--num_layers 12 \\
|
| 38 |
+
--freeze_layers 0 \\
|
| 39 |
+
--epochs 20 \\
|
| 40 |
+
--batch_size 2 \\
|
| 41 |
+
--learning_rate 1e-5 \\
|
| 42 |
+
--max_length 1024 \\
|
| 43 |
+
--output_dir ./out \\
|
| 44 |
+
--device cuda
|
| 45 |
+
"""
|
| 46 |
+
import argparse
|
| 47 |
+
import os
|
| 48 |
+
import torch
|
| 49 |
+
import torch.nn as nn
|
| 50 |
+
import pandas as pd
|
| 51 |
+
from torch.utils.data import Dataset, DataLoader
|
| 52 |
+
from transformers import EsmModel, EsmTokenizer, EsmConfig
|
| 53 |
+
from tqdm import tqdm
|
| 54 |
+
|
| 55 |
+
# Command-line arguments
|
| 56 |
+
parser = argparse.ArgumentParser(
|
| 57 |
+
description='Train or fine-tune ppiBTPE Siamese model for PPI prediction.'
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
# Input files
|
| 61 |
+
parser.add_argument('--train_file', type=str, required=True,
|
| 62 |
+
help='Path to the training CSV file.')
|
| 63 |
+
parser.add_argument('--val_file', type=str, required=True,
|
| 64 |
+
help='Path to the validation CSV file.')
|
| 65 |
+
parser.add_argument('--checkpoint', type=str, default=None,
|
| 66 |
+
help='(Optional) Path to a .pth checkpoint to load before training/fine-tuning.')
|
| 67 |
+
|
| 68 |
+
# Model parameters
|
| 69 |
+
parser.add_argument('--model_config', type=str,
|
| 70 |
+
default='facebook/esm1b_t33_650M_UR50S',
|
| 71 |
+
help='ESM config name or path for architecture.')
|
| 72 |
+
parser.add_argument('--num_labels', type=int, default=2,
|
| 73 |
+
help='Number of output labels (e.g., 2 for binary classification).')
|
| 74 |
+
parser.add_argument('--num_layers', type=int, default=None,
|
| 75 |
+
help='Total transformer layers to initialize (scratch).')
|
| 76 |
+
parser.add_argument('--freeze_layers', type=int, default=20,
|
| 77 |
+
help='Number of bottom layers to freeze during fine-tuning.')
|
| 78 |
+
|
| 79 |
+
# Training hyperparameters
|
| 80 |
+
parser.add_argument('--epochs', type=int, default=3, help='Number of training epochs.')
|
| 81 |
+
parser.add_argument('--batch_size', type=int, default=4, help='Batch size.')
|
| 82 |
+
parser.add_argument('--learning_rate', type=float, default=1e-5, help='Learning rate.')
|
| 83 |
+
parser.add_argument('--max_length', type=int, default=1024,
|
| 84 |
+
help='Maximum sequence length for tokenization.')
|
| 85 |
+
|
| 86 |
+
# Misc
|
| 87 |
+
parser.add_argument('--output_dir', type=str, default='./',
|
| 88 |
+
help='Directory to save checkpoints and final model.')
|
| 89 |
+
parser.add_argument('--device', type=str, default='cuda', choices=['cpu','cuda'],
|
| 90 |
+
help='Device to run training on.')
|
| 91 |
+
args = parser.parse_args()
|
| 92 |
+
|
| 93 |
+
# Device setup
|
| 94 |
+
if torch.cuda.is_available() and args.device.startswith('cuda'):
|
| 95 |
+
device = torch.device(args.device)
|
| 96 |
+
n_gpu = torch.cuda.device_count()
|
| 97 |
+
print(f"GPUs available: {n_gpu}")
|
| 98 |
+
else:
|
| 99 |
+
device = torch.device('cpu')
|
| 100 |
+
n_gpu = 0
|
| 101 |
+
print("Using CPU.")
|
| 102 |
+
|
| 103 |
+
# Dataset definition
|
| 104 |
+
class SiamesePPIDataset(Dataset):
|
| 105 |
+
def __init__(self, csv_file, tokenizer, max_length):
|
| 106 |
+
self.data = pd.read_csv(csv_file)
|
| 107 |
+
self.tokenizer = tokenizer
|
| 108 |
+
self.max_length = max_length
|
| 109 |
+
def __len__(self):
|
| 110 |
+
return len(self.data)
|
| 111 |
+
def __getitem__(self, idx):
|
| 112 |
+
seq1 = self.data.iloc[idx,0]
|
| 113 |
+
seq2 = self.data.iloc[idx,1]
|
| 114 |
+
lbl = self.data.iloc[idx,2]
|
| 115 |
+
# map labels
|
| 116 |
+
if isinstance(lbl, str):
|
| 117 |
+
label = 1 if lbl=='friends' else 0
|
| 118 |
+
else:
|
| 119 |
+
label = int(lbl)
|
| 120 |
+
enc1 = self.tokenizer(seq1, truncation=True, padding='max_length',
|
| 121 |
+
max_length=self.max_length, return_tensors='pt')
|
| 122 |
+
enc2 = self.tokenizer(seq2, truncation=True, padding='max_length',
|
| 123 |
+
max_length=self.max_length, return_tensors='pt')
|
| 124 |
+
return {
|
| 125 |
+
'input_ids1': enc1.input_ids.squeeze(0),
|
| 126 |
+
'attention_mask1': enc1.attention_mask.squeeze(0),
|
| 127 |
+
'input_ids2': enc2.input_ids.squeeze(0),
|
| 128 |
+
'attention_mask2': enc2.attention_mask.squeeze(0),
|
| 129 |
+
'labels': torch.tensor(label, dtype=torch.long)
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
# Model definition
|
| 133 |
+
class SiameseBTPE(nn.Module):
|
| 134 |
+
def __init__(self, config, num_labels=2):
|
| 135 |
+
super().__init__()
|
| 136 |
+
self.esm = EsmModel(config)
|
| 137 |
+
self.dropout = nn.Dropout(0.1)
|
| 138 |
+
self.classifier = nn.Linear(config.hidden_size*2, num_labels)
|
| 139 |
+
def forward(self, id1, mask1, id2, mask2):
|
| 140 |
+
o1 = self.esm(input_ids=id1, attention_mask=mask1)
|
| 141 |
+
o2 = self.esm(input_ids=id2, attention_mask=mask2)
|
| 142 |
+
p1 = o1.last_hidden_state[:,0,:]
|
| 143 |
+
p2 = o2.last_hidden_state[:,0,:]
|
| 144 |
+
x = torch.cat((p1,p2), dim=1)
|
| 145 |
+
x = self.dropout(x)
|
| 146 |
+
return self.classifier(x)
|
| 147 |
+
|
| 148 |
+
# Main training loop
|
| 149 |
+
|
| 150 |
+
def main():
|
| 151 |
+
tokenizer = EsmTokenizer.from_pretrained(args.model_config)
|
| 152 |
+
config = EsmConfig.from_pretrained(args.model_config)
|
| 153 |
+
if args.num_layers is not None:
|
| 154 |
+
config.num_hidden_layers = args.num_layers
|
| 155 |
+
print(f"Using {args.num_layers} layers (override)")
|
| 156 |
+
|
| 157 |
+
# datasets + loaders
|
| 158 |
+
train_ds = SiamesePPIDataset(args.train_file, tokenizer, args.max_length)
|
| 159 |
+
val_ds = SiamesePPIDataset(args.val_file, tokenizer, args.max_length)
|
| 160 |
+
train_loader = DataLoader(train_ds, batch_size=args.batch_size, shuffle=True)
|
| 161 |
+
val_loader = DataLoader(val_ds, batch_size=args.batch_size, shuffle=False)
|
| 162 |
+
|
| 163 |
+
model = SiameseBTPE(config, num_labels=args.num_labels)
|
| 164 |
+
# load checkpoint if provided
|
| 165 |
+
if args.checkpoint:
|
| 166 |
+
state = torch.load(args.checkpoint, map_location='cpu')
|
| 167 |
+
model.load_state_dict(state, strict=False)
|
| 168 |
+
print(f"Loaded checkpoint: {args.checkpoint}")
|
| 169 |
+
|
| 170 |
+
# freeze layers
|
| 171 |
+
total = len(model.esm.encoder.layer)
|
| 172 |
+
to_freeze = min(args.freeze_layers, total)
|
| 173 |
+
for param in model.esm.embeddings.parameters(): param.requires_grad=False
|
| 174 |
+
for layer in model.esm.encoder.layer[:to_freeze]:
|
| 175 |
+
for p in layer.parameters(): p.requires_grad=False
|
| 176 |
+
print(f"Frozen {to_freeze}/{total} layers")
|
| 177 |
+
|
| 178 |
+
# device
|
| 179 |
+
model.to(device)
|
| 180 |
+
if n_gpu>1: model = nn.DataParallel(model)
|
| 181 |
+
|
| 182 |
+
optim = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()),
|
| 183 |
+
lr=args.learning_rate)
|
| 184 |
+
crit = nn.CrossEntropyLoss()
|
| 185 |
+
os.makedirs(args.output_dir, exist_ok=True)
|
| 186 |
+
|
| 187 |
+
for e in range(args.epochs):
|
| 188 |
+
print(f"Epoch {e+1}/{args.epochs}")
|
| 189 |
+
model.train()
|
| 190 |
+
train_loss=0
|
| 191 |
+
for b in tqdm(train_loader, desc='Train'):
|
| 192 |
+
optim.zero_grad()
|
| 193 |
+
logits = model(b['input_ids1'].to(device), b['attention_mask1'].to(device),
|
| 194 |
+
b['input_ids2'].to(device), b['attention_mask2'].to(device))
|
| 195 |
+
loss = crit(logits, b['labels'].to(device))
|
| 196 |
+
loss.backward(); optim.step()
|
| 197 |
+
train_loss+=loss.item()
|
| 198 |
+
print(f"Train loss: {train_loss/len(train_loader):.4f}")
|
| 199 |
+
|
| 200 |
+
model.eval()
|
| 201 |
+
val_loss, correct, total = 0,0,0
|
| 202 |
+
for b in tqdm(val_loader, desc='Val'):
|
| 203 |
+
with torch.no_grad():
|
| 204 |
+
logits = model(b['input_ids1'].to(device), b['attention_mask1'].to(device),
|
| 205 |
+
b['input_ids2'].to(device), b['attention_mask2'].to(device))
|
| 206 |
+
loss = crit(logits, b['labels'].to(device))
|
| 207 |
+
val_loss+=loss.item()
|
| 208 |
+
preds=logits.argmax(dim=1)
|
| 209 |
+
correct+=(preds==b['labels'].to(device)).sum().item()
|
| 210 |
+
total+=len(preds)
|
| 211 |
+
print(f"Val loss: {val_loss/len(val_loader):.4f}, Acc: {correct/total:.4f}")
|
| 212 |
+
|
| 213 |
+
# save
|
| 214 |
+
path = os.path.join(args.output_dir, f"ppiBTPE_epoch_{e+1}.pth")
|
| 215 |
+
torch.save(model.module.state_dict() if n_gpu>1 else model.state_dict(), path)
|
| 216 |
+
print(f"Saved {path}")
|
| 217 |
+
|
| 218 |
+
final = os.path.join(args.output_dir, 'ppiBTPE_final.pth')
|
| 219 |
+
torch.save(model.module.state_dict() if n_gpu>1 else model.state_dict(), final)
|
| 220 |
+
print(f"Saved final model: {final}")
|
| 221 |
+
|
| 222 |
+
if __name__=='__main__':
|
| 223 |
+
main()
|