kouroshSA commited on
Commit
23b71a0
·
verified ·
1 Parent(s): 8e8386d

Mirror of github.com/kouroshSA/ppiBTEP

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/ppiBTEP.png filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model checkpoints
2
+ *.pth
3
+ *.pt
4
+ out/
5
+ out_*/
6
+
7
+ # Data files
8
+ *.csv
9
+ *.bin
10
+ *.pkl
11
+ !requirements.txt
12
+ # Reference / inference CSVs that ship with the repo
13
+ !MED4_PRS_100.csv
14
+ !MED4_RRS_100.csv
15
+ !MED4-PPIs-low-confidence_ppiTEPM_prompts.csv
16
+
17
+ # Python
18
+ __pycache__/
19
+ *.pyc
20
+ *.pyo
21
+ *.egg-info/
22
+ dist/
23
+ build/
24
+ .eggs/
25
+
26
+ # Environment
27
+ .env
28
+ *.env
29
+ .venv/
30
+ venv/
31
+
32
+ # IDE
33
+ .vscode/
34
+ .idea/
35
+ *.swp
36
+ *.swo
37
+ *~
38
+
39
+ # OS
40
+ .DS_Store
41
+ Thumbs.db
42
+
43
+ # Jupyter
44
+ .ipynb_checkpoints/
45
+
46
+ # Logs
47
+ *.log
48
+ wandb/
LICENSE ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Kourosh Salehi-Ashtiani
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
23
+ ---
24
+
25
+ ppiBTEP (also designated SiameseBTPE) adapts the ESM-1b protein language
26
+ model (Rives et al., 2021) for binary protein-protein interaction
27
+ classification via Siamese (twin-branch) architecture.
28
+ The ESM framework is by Meta AI Research.
MED4-PPIs-low-confidence_ppiTEPM_prompts.csv ADDED
The diff for this file is too large to render. See raw diff
 
MED4_PRS_100.csv ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MNTRKKNPKRGVGKTETNTEWLDKVINQLINKDFSQYL,MKLRLFEFYFIKDYLRPWFGLIYSLFFLFFLGAIGYRITEGWDWGDCLWMVLITITTIGFGEVQTLSPEGRIITVLIIVGGLIFIQFTFQKAVRLFESGYFQRVNELRFKRLLRKMENHVILCGYGRVGQEISNQIKTQNIPIIVVESDEDRKKIAEDNGLEVLCADATLDETLKLAGLDKCKSLVVTLPNDAANLYVVLSAKGIRSSIRVIARAGTEEAASKLRLAGASIVVSPYIAAGRAMASMALRPIAIDFLDLLAGSECEIEEFELSNDISLFETAEKITLLELGIGKKSGAKILAIKEDEKLITNPGGDFLLQPGQVLIAFGSKEQLTTLNRLLGNLVVSVELLK
2
+ LVESTQSQDSNLGTRLQQDLKNDLIAGLLVVIPLATTIWLSSIVSKFVLTLVTSVPKQLNPFITLNPLLQDLINLTLGLTVPLLAILLIGLMARNFVGRWLLEFGEGTLSKIPVAGAVYKTLKQLLETFLSNKSNRFRRVVLVEYPREGLFSVGFVTGDVGPSLQSELDEKLLSVFIPTAPNPTTGWYTLVPESSVKDLAISVEDAFRTIISVGIVNPDEKDSSSNPTFSKLFSQLRASTNTSST,LVESTQSQDSNLGTRLQQDLKNDLIAGLLVVIPLATTIWLSSIVSKFVLTLVTSVPKQLNPFITLNPLLQDLINLTLGLTVPLLAILLIGLMARNFVGRWLLEFGEGTLSKIPVAGAVYKTLKQLLETFLSNKSNRFRRVVLVEYPREGLFSVGFVTGDVGPSLQSELDEKLLSVFIPTAPNPTTGWYTLVPESSVKDLAISVEDAFRTIISVGIVNPDEKDSSSNPTFSKLFSQLRASTNTSST
3
+ LMHNRSLSRELSLLSLGLIKDTADLELNKIQIDEIFESALDSLINHCRDELDNCEADLENVSQHILDSELKEGSNSSFANVRDELKKAFYKMESVMNSLSVTLDFPKLIVSSNQIDIREDVNHRILSIINNLKSIDSEIDEVMDRWRLKRLPRVDRDILRLAYVDIHFLDTPVAVACDEAVNLANKYCDTQGRKMINGVLRRLQRVKVN,LMHNRSLSRELSLLSLGLIKDTADLELNKIQIDEIFESALDSLINHCRDELDNCEADLENVSQHILDSELKEGSNSSFANVRDELKKAFYKMESVMNSLSVTLDFPKLIVSSNQIDIREDVNHRILSIINNLKSIDSEIDEVMDRWRLKRLPRVDRDILRLAYVDIHFLDTPVAVACDEAVNLANKYCDTQGRKMINGVLRRLQRVKVN
4
+ MNQFFSRRSFILIPIMSILKFILQPKKVLAAFAASDDDWNLSKEDWKNKLSPESYYILREEGTERAFSSQLNNEKRKGIFYCAGCNQPLFTSDTKFDSGTGWPSFWDPIQGSVETKVDFKLIVPRTEYHCSRCGGHQGHVFNDGPLPTGKRYCNNGLALKFIAE,MNQFFSRRSFILIPIMSILKFILQPKKVLAAFAASDDDWNLSKEDWKNKLSPESYYILREEGTERAFSSQLNNEKRKGIFYCAGCNQPLFTSDTKFDSGTGWPSFWDPIQGSVETKVDFKLIVPRTEYHCSRCGGHQGHVFNDGPLPTGKRYCNNGLALKFIAE
5
+ LVKPKSPDNKISNHLQQDVVKIAGKTIFINPFLYWRRFDENTNRWLREPGQMSEEQIQPNRNRFYPEIDWADLSQNQKLVKDASVEMFLKTLELISTFHPQLNSGQLLEVERKMAITKKLPFEKWVTKSFAKKARAEEYEKRKFKRDRFIRSWKEWLSLENTQQALLPIIVVVFVSAFIGWSSGVSKNSCNPYFEQNLDQSI,MKGHKKIRFIFPLVAMYVPLLLLAPKAIAGSFGAEIFCTMRDGGNDHESSWQAAYSYIKKQKGGIFKTSPKQAAGQIIETVVRERDKFSYCVEFLDQLHPDRKLQLENDRKEKRRKKEELLQDKENEDYSKETFDRYSY
6
+ MDKPKNKNFANTASRISAIASSVMDLHVRIALQEVDREKRRLISGGVFIAMGGILLLLVLISIHVIFYLTLSKLNNWATEYNLLLIIFVDLFLAGLSLKLGGKLAKGPYLPQTLEGLGKTTKAVLGKK,MDKPKNKNFANTASRISAIASSVMDLHVRIALQEVDREKRRLISGGVFIAMGGILLLLVLISIHVIFYLTLSKLNNWATEYNLLLIIFVDLFLAGLSLKLGGKLAKGPYLPQTLEGLGKTTKAVLGKK
7
+ MQSKTKELDPILEVNNLFASIENLPILKGVTISVNPGEIHAIMGRNGCGKSTLSKIIAGHPSYKITKGEIKFTGNDIQSLEPEERAQSGIFLGFQYPIEIPGVSNLEFLRVATNARRKFLNKEELDTFDFEDLVKEKLDLVKMDSAFLSRSINQGFSGGEKKRNEILQMALLEPKIAILDETDSGLDIDALRIVASGIKKISNEETGIILITHYQRLLDEIQPDYVHVMSDGQIIKTGESDLALELEKHGYEWTDNFIKEQ,MQSKTKELDPILEVNNLFASIENLPILKGVTISVNPGEIHAIMGRNGCGKSTLSKIIAGHPSYKITKGEIKFTGNDIQSLEPEERAQSGIFLGFQYPIEIPGVSNLEFLRVATNARRKFLNKEELDTFDFEDLVKEKLDLVKMDSAFLSRSINQGFSGGEKKRNEILQMALLEPKIAILDETDSGLDIDALRIVASGIKKISNEETGIILITHYQRLLDEIQPDYVHVMSDGQIIKTGESDLALELEKHGYEWTDNFIKEQ
8
+ MSKVEIYTWRFCPFCIRAKSLLEKKNITFTEHKIDGDDNARELMMERANGKRTVPQIFIDDKSIGGCDELYELEKEDKLDLLLN,MSKVEIYTWRFCPFCIRAKSLLEKKNITFTEHKIDGDDNARELMMERANGKRTVPQIFIDDKSIGGCDELYELEKEDKLDLLLN
9
+ VQFIDQANIILKAGKGGNGIVSFRREKFVPAGGPSGGNGGKGGSIIIIADNNLQTLLDFKFNREIFAKDGFKGGPNKRSGASGENTILKVPCGTEIRDFNTGIILGDLTEDKQSLTIAHGGRGGHGNAYYLSNQNRAPESFTEGKEGEIWEVQLELKLLAEVGIIGLPNAGKSTLISVLSSARPKIANYPFTTLIPNLGVVRKADGNGCLFADIPGLISGAAEGVGLGHDFLRHIQRTKILIHLIDSIAENPIRDFEIIEKELKRYGSGLLNKERIVVLNKMELVDENYLQTITKKLENLSKKKVLVISSSLRKGLSPLLSEVWKRI,VPNNQNRDNFIDKAFTVIAESIVKIMPIADKEKKAYIYYRDGLAAQNNGDYSEALDYYNESLLLEENKIDRGETLKNMAIIYMSNGEEDRSIETYQKALEENPKQPSCLKNIGLIYEKRGRFAEQNGDLDQRDMWFDKAAQVWSKAVRLYPGGYLDIENWLKTSGRSSIDIYL
10
+ MTTIQQQRTSLLKGWPQFCEWVTSTNNRIYVGWFGVLMIPCLLAAAACFIVAFIAAPPVDIDGIREPVAGSFLYGNNIISGAVVPSSNAIGLHFYPIWEAATVDEWLYNGGPYQLVIFHFLIGISAYMGRQWELSYRLGMRPWICVAYSAPVSAAFAVFLVYPFGQGSFSDGMPLGISGTFNFMFVFQAEHNILMHPFHMAGVAGMFGGSLFSAMHGSLVTSSLIRETTETESQNYGYKFGQEEETYNIVAAHGYFGRLIFQYASFNNSRSLHFFLAVFPVVCVWLTSMGICTMAFNLNGFNFNQSVVDANGKIVPTWGDVLNRANLGMEVMHERNAHNFPLDLAAAESTTVALTAPAIG,MTTIQQQRTSLLKGWPQFCEWVTSTNNRIYVGWFGVLMIPCLLAAAACFIVAFIAAPPVDIDGIREPVAGSFLYGNNIISGAVVPSSNAIGLHFYPIWEAATVDEWLYNGGPYQLVIFHFLIGISAYMGRQWELSYRLGMRPWICVAYSAPVSAAFAVFLVYPFGQGSFSDGMPLGISGTFNFMFVFQAEHNILMHPFHMAGVAGMFGGSLFSAMHGSLVTSSLIRETTETESQNYGYKFGQEEETYNIVAAHGYFGRLIFQYASFNNSRSLHFFLAVFPVVCVWLTSMGICTMAFNLNGFNFNQSVVDANGKIVPTWGDVLNRANLGMEVMHERNAHNFPLDLAAAESTTVALTAPAIG
11
+ MSKVELISLTPEAEKTMAYIARVSNPSNQANDKFAGLLRYCIKHEHWSVFEQSCMTLKIETNRGIAAQILRHRSFTFQEFSQRYAETSLLGNEIPIPNLRRQDQKNRQNSIDDIPDELKIKFSEKISKHFQEANKLYEEMLNEGIAKECARFIMPLATPTRIYMTGSCRSWIHYIQLRSKEGTQKEHMEIAEDCKKVFIKYFPSVSEALNWE,MSKVELISLTPEAEKTMAYIARVSNPSNQANDKFAGLLRYCIKHEHWSVFEQSCMTLKIETNRGIAAQILRHRSFTFQEFSQRYAETSLLGNEIPIPNLRRQDQKNRQNSIDDIPDELKIKFSEKISKHFQEANKLYEEMLNEGIAKECARFIMPLATPTRIYMTGSCRSWIHYIQLRSKEGTQKEHMEIAEDCKKVFIKYFPSVSEALNWE
12
+ VSGWLFIIFLLLLGGLISTLGDLLGSKIGKARFSILKLRPKKTAILITILTGSLISASSLFLMILVNRQLRVGLFRLGDLQKKLQESKQVLIPLEKEREKLENKIKAKETEFKQLERNIIALRSGKFVIRSGQSLIISEISSSNLKDIKSKIEKIIINANRYTHKIVKPKNKEVKNLLLLRKNHIEEMQNIILKGGNWVINIKSVRNVLTGENFVYAFPEITENKIIVRKGEKITKIDFKQEDFNKKDFGDKVNFLLSSSLAEIKRRGSLVNEIKLRGDSIKELRDFLNKNDKTNFELEAVSLFNSKTAQPVIVELNVNYPES,VSGWLFIIFLLLLGGLISTLGDLLGSKIGKARFSILKLRPKKTAILITILTGSLISASSLFLMILVNRQLRVGLFRLGDLQKKLQESKQVLIPLEKEREKLENKIKAKETEFKQLERNIIALRSGKFVIRSGQSLIISEISSSNLKDIKSKIEKIIINANRYTHKIVKPKNKEVKNLLLLRKNHIEEMQNIILKGGNWVINIKSVRNVLTGENFVYAFPEITENKIIVRKGEKITKIDFKQEDFNKKDFGDKVNFLLSSSLAEIKRRGSLVNEIKLRGDSIKELRDFLNKNDKTNFELEAVSLFNSKTAQPVIVELNVNYPES
13
+ MNDRIIEFEPLIEGILIKRYKRFLADIQIENGEIVTAHCANTGPMKGLLNEGANVRISFSSSTKRKLPWTWEQVKVIGRDNKEVWVGINTLFANKLIRKVIEQNLFKDKLGEIAKIKSEVPYGKDKKSRIDFLLTPKSSNPDNRNIYVEVKNTTWTKNNVALFPDTETKRGQKHLIELKGLIPESKSVLVPCITRKDIDYFAPGDESDPLYGELFRESISAGMLLIPCCFEFHSDHVAWKGFKPLKLN,MNDRIIEFEPLIEGILIKRYKRFLADIQIENGEIVTAHCANTGPMKGLLNEGANVRISFSSSTKRKLPWTWEQVKVIGRDNKEVWVGINTLFANKLIRKVIEQNLFKDKLGEIAKIKSEVPYGKDKKSRIDFLLTPKSSNPDNRNIYVEVKNTTWTKNNVALFPDTETKRGQKHLIELKGLIPESKSVLVPCITRKDIDYFAPGDESDPLYGELFRESISAGMLLIPCCFEFHSDHVAWKGFKPLKLN
14
+ MRRSLRDSIVGFSLLGGLLVFTFFSFWLRGVKLSSKNWYLFAEFNNASGLSKKSPVTYRGILVGSIEDILFTNESIKAKIVLNNPEIILPKPAFARVVTNSFLGGDVQVALETSEKTIPKNTPKAISDKCDSKLIICQGDTITGKQLSSLSNITNRINQLLKESNQENLIENVVNSIDQFDKTQENLDELIYLSKQEIIRVKPLIKEVTIAAGHLNNILSTINDEETLKDIKLTIEAAESISGKFDNMSDDFEQLMKDKELTKSIRDLTIGLSKFLNEIYP,MRRSLRDSIVGFSLLGGLLVFTFFSFWLRGVKLSSKNWYLFAEFNNASGLSKKSPVTYRGILVGSIEDILFTNESIKAKIVLNNPEIILPKPAFARVVTNSFLGGDVQVALETSEKTIPKNTPKAISDKCDSKLIICQGDTITGKQLSSLSNITNRINQLLKESNQENLIENVVNSIDQFDKTQENLDELIYLSKQEIIRVKPLIKEVTIAAGHLNNILSTINDEETLKDIKLTIEAAESISGKFDNMSDDFEQLMKDKELTKSIRDLTIGLSKFLNEIYP
15
+ MASSETYEFLFVKPGDHVVIKNEKPPGNTQNGRQEYWIGQIISCIGGARNPNSWTLFQVADIDNGEIIIINADTVERILKTAEN,MASSETYEFLFVKPGDHVVIKNEKPPGNTQNGRQEYWIGQIISCIGGARNPNSWTLFQVADIDNGEIIIINADTVERILKTAEN
16
+ LSVDRELLKEVTQELWNTVKKLRPEIDRETRLQLVLKALLTIGDLPDQLQAAMVVGVCAEMDKSDFENADGNSNTKEESNSTSVDTSTGRKVFRRSSAK,LSVDRELLKEVTQELWNTVKKLRPEIDRETRLQLVLKALLTIGDLPDQLQAAMVVGVCAEMDKSDFENADGNSNTKEESNSTSVDTSTGRKVFRRSSAK
17
+ LQIGDKVPQFSLLDQNGTKRSNNGLKTPLVLFFYPKDDTPGCTIEVCGFRDKYDLFKVLGAQVWGVSNGSSSSHLAFANKNKLQYPLLCDKNDSLRKAFKVPKVLGLLDGRVTYVIDRNGFVKHIFRDLLNGPEHIKEAIRVLKEIQNQ,LQIGDKVPQFSLLDQNGTKRSNNGLKTPLVLFFYPKDDTPGCTIEVCGFRDKYDLFKVLGAQVWGVSNGSSSSHLAFANKNKLQYPLLCDKNDSLRKAFKVPKVLGLLDGRVTYVIDRNGFVKHIFRDLLNGPEHIKEAIRVLKEIQNQ
18
+ MFSINKSNFMKKIGMQAVDEAIENGIDLDGTPIPSKMLELYNRIMSEENKRERSGVKKSMRNRCVKTGSKHFDKETLDQLLIDSGWEGLKEKEILFFYS,MFSINKSNFMKKIGMQAVDEAIENGIDLDGTPIPSKMLELYNRIMSEENKRERSGVKKSMRNRCVKTGSKHFDKETLDQLLIDSGWEGLKEKEILFFYS
19
+ VNSNSSNQVGKNIRRTGFLIVLSYLLIVLIMKVLEANNFFGYSLSSFSNDIFAPPSLKHLCGTDRLGRDVCLRTLQGSSIAIEVVFLAIFFALILGLPLGLLSGYFGGILDKCLSLVMDTIFSIPVILLAVVVAFVLGKGIINASIALCIVYSPQYFRLIRNQTMLIKSETYVEAARVSGADVKTIIFKYILPNVITPLPILLTLNAADAVLVLGSLGFLGLGVPANVPEWGSDLNLALAAIPTGIWWTALFPGLAMFFLVLGLSFIGEELENIFEN,LEGINLNQIGVSFKGSGSYVPNQILTNQEISKKVETSDEWIKSRTGISQRRISGLSENVSEMGYKAALGAIEMARWDIETIDLIILATSTPNDLFGSAPEIQSKLGAINAVAFDLTAACSGFLFAAITATQFLKAGSYKRAVVIGSDQLSSYVDWNDRRSCILFGDGAGAIAIEGTNELDNLLGFSMRTDGQRGSFLNLPSQNNQDLIINDINFSSGGFSSIKMNGQEVYKFAVREVPLIIDNLFKKTNFNSEKINWLLLHQANQRILDSVGERLNVSTEKILSNLSNYGNTSAATIPLMLDEAIRNKKIKENDIIATSGFGAGLSWGAALIRWG
20
+ MSNTESLTGKVALITGASRGIGKEIALELSNLGAKVIINYSSSDEKAEEVVNLIKESGGKVHKLKFDVSKEESVSKAFEEIIKINGAIDILVNNAGITRDGLLMRMKSEQWDDVLNTNLKGVFLCTKYASKFMIKKRSGKIINISSIVGIIGNPGQANYSAAKAGVIGFTKTCAKEFASRGINVNAIAPGFIETEMTEKLNNEEIIKAIPLGKLGSCSQIANLVSFLVSSNAGSYITGQTISIDGGMSI,MSNTESLTGKVALITGASRGIGKEIALELSNLGAKVIINYSSSDEKAEEVVNLIKESGGKVHKLKFDVSKEESVSKAFEEIIKINGAIDILVNNAGITRDGLLMRMKSEQWDDVLNTNLKGVFLCTKYASKFMIKKRSGKIINISSIVGIIGNPGQANYSAAKAGVIGFTKTCAKEFASRGINVNAIAPGFIETEMTEKLNNEEIIKAIPLGKLGSCSQIANLVSFLVSSNAGSYITGQTISIDGGMSI
21
+ MHKVLAIETSCDETSVSIVSNSGDIYKIHSNIVASQIEDHSKWGGVVPELAARKHLELLPFVLEQALEESKIRIEKIDVIASTVTPGLVGCLRVGSITARSLCTLYSKPFLGIHHLEGHLSSILFSKNYPKPPFLTLLVSGGHTELIKVGERRKMQRLGRSYDDAAGEAFDKVGRLLGLSYPGGPAIAKIAKKGNASKFNLPKCKISDKEGGFLKYDFSFSGLKTAVLRLVEKINLNGDEIPIPDIAASFERVVAEVLVERTIKCANDYGLDNIVVVGGVAANDTLRKMMISEACKKSIKVHLAPINLCTDNAAMIGAAALYRLKFKAYESSLKLGISGRLPIDQANTLYENKPPF,LVKPKSPDNKISNHLQQDVVKIAGKTIFINPFLYWRRFDENTNRWLREPGQMSEEQIQPNRNRFYPEIDWADLSQNQKLVKDASVEMFLKTLELISTFHPQLNSGQLLEVERKMAITKKLPFEKWVTKSFAKKARAEEYEKRKFKRDRFIRSWKEWLSLENTQQALLPIIVVVFVSAFIGWSSGVSKNSCNPYFEQNLDQSI
22
+ MHKVLAIETSCDETSVSIVSNSGDIYKIHSNIVASQIEDHSKWGGVVPELAARKHLELLPFVLEQALEESKIRIEKIDVIASTVTPGLVGCLRVGSITARSLCTLYSKPFLGIHHLEGHLSSILFSKNYPKPPFLTLLVSGGHTELIKVGERRKMQRLGRSYDDAAGEAFDKVGRLLGLSYPGGPAIAKIAKKGNASKFNLPKCKISDKEGGFLKYDFSFSGLKTAVLRLVEKINLNGDEIPIPDIAASFERVVAEVLVERTIKCANDYGLDNIVVVGGVAANDTLRKMMISEACKKSIKVHLAPINLCTDNAAMIGAAALYRLKFKAYESSLKLGISGRLPIDQANTLYENKPPF,VNSNSSNQVGKNIRRTGFLIVLSYLLIVLIMKVLEANNFFGYSLSSFSNDIFAPPSLKHLCGTDRLGRDVCLRTLQGSSIAIEVVFLAIFFALILGLPLGLLSGYFGGILDKCLSLVMDTIFSIPVILLAVVVAFVLGKGIINASIALCIVYSPQYFRLIRNQTMLIKSETYVEAARVSGADVKTIIFKYILPNVITPLPILLTLNAADAVLVLGSLGFLGLGVPANVPEWGSDLNLALAAIPTGIWWTALFPGLAMFFLVLGLSFIGEELENIFEN
23
+ MHKVLAIETSCDETSVSIVSNSGDIYKIHSNIVASQIEDHSKWGGVVPELAARKHLELLPFVLEQALEESKIRIEKIDVIASTVTPGLVGCLRVGSITARSLCTLYSKPFLGIHHLEGHLSSILFSKNYPKPPFLTLLVSGGHTELIKVGERRKMQRLGRSYDDAAGEAFDKVGRLLGLSYPGGPAIAKIAKKGNASKFNLPKCKISDKEGGFLKYDFSFSGLKTAVLRLVEKINLNGDEIPIPDIAASFERVVAEVLVERTIKCANDYGLDNIVVVGGVAANDTLRKMMISEACKKSIKVHLAPINLCTDNAAMIGAAALYRLKFKAYESSLKLGISGRLPIDQANTLYENKPPF,MHKVLAIETSCDETSVSIVSNSGDIYKIHSNIVASQIEDHSKWGGVVPELAARKHLELLPFVLEQALEESKIRIEKIDVIASTVTPGLVGCLRVGSITARSLCTLYSKPFLGIHHLEGHLSSILFSKNYPKPPFLTLLVSGGHTELIKVGERRKMQRLGRSYDDAAGEAFDKVGRLLGLSYPGGPAIAKIAKKGNASKFNLPKCKISDKEGGFLKYDFSFSGLKTAVLRLVEKINLNGDEIPIPDIAASFERVVAEVLVERTIKCANDYGLDNIVVVGGVAANDTLRKMMISEACKKSIKVHLAPINLCTDNAAMIGAAALYRLKFKAYESSLKLGISGRLPIDQANTLYENKPPF
24
+ MIKNPIQEVTNKLQYRAIGIVKGIYKPNNIDQLNRGTLTDKEGKIIETVILGKAIALIKKYINLEKDYFWIVYPRNKNINNLHLQVAGIWDPYQLNQFDKNNSEKDPNQLLEELNLNNNYFSIRGELVYVNIKKKEIVIKICSSPPSKRSKYLTFKIIIEGEIPLQFLNNFVSLDVIRDGNTLRMAKYEIIEKIKPEKV,MIKNPIQEVTNKLQYRAIGIVKGIYKPNNIDQLNRGTLTDKEGKIIETVILGKAIALIKKYINLEKDYFWIVYPRNKNINNLHLQVAGIWDPYQLNQFDKNNSEKDPNQLLEELNLNNNYFSIRGELVYVNIKKKEIVIKICSSPPSKRSKYLTFKIIIEGEIPLQFLNNFVSLDVIRDGNTLRMAKYEIIEKIKPEKV
25
+ MTKFKLKIASRRSKLAMVQTLWVKEQLEKNIPDLEVSIEAMATQGDKILDVALAKIGDKGLFTKELEAQMLVGHADIAVHSLKDLPTNLPDGLTLGCITKREDPSDALVVNKKNKIYQLESLPPGSIVGTSSLRRLAQLRYKFPHLDFKDIRGNVITRIEKLDSGEFDCIILAAAGLKRLGFESRVHQIIPNEISLHAVGQGALGIECKSDDKEVLKIISVLEDKVSSQRCLAERSFLRELEGGCQVPIGVNSSIQNDEIALIGMVASIDGKRLIKNESIGNIKYPEEVGKKLAEKLKLQGADKILSEIFEQFRDK,VPNNQNRDNFIDKAFTVIAESIVKIMPIADKEKKAYIYYRDGLAAQNNGDYSEALDYYNESLLLEENKIDRGETLKNMAIIYMSNGEEDRSIETYQKALEENPKQPSCLKNIGLIYEKRGRFAEQNGDLDQRDMWFDKAAQVWSKAVRLYPGGYLDIENWLKTSGRSSIDIYL
26
+ MTKFKLKIASRRSKLAMVQTLWVKEQLEKNIPDLEVSIEAMATQGDKILDVALAKIGDKGLFTKELEAQMLVGHADIAVHSLKDLPTNLPDGLTLGCITKREDPSDALVVNKKNKIYQLESLPPGSIVGTSSLRRLAQLRYKFPHLDFKDIRGNVITRIEKLDSGEFDCIILAAAGLKRLGFESRVHQIIPNEISLHAVGQGALGIECKSDDKEVLKIISVLEDKVSSQRCLAERSFLRELEGGCQVPIGVNSSIQNDEIALIGMVASIDGKRLIKNESIGNIKYPEEVGKKLAEKLKLQGADKILSEIFEQFRDK,VNSNSSNQVGKNIRRTGFLIVLSYLLIVLIMKVLEANNFFGYSLSSFSNDIFAPPSLKHLCGTDRLGRDVCLRTLQGSSIAIEVVFLAIFFALILGLPLGLLSGYFGGILDKCLSLVMDTIFSIPVILLAVVVAFVLGKGIINASIALCIVYSPQYFRLIRNQTMLIKSETYVEAARVSGADVKTIIFKYILPNVITPLPILLTLNAADAVLVLGSLGFLGLGVPANVPEWGSDLNLALAAIPTGIWWTALFPGLAMFFLVLGLSFIGEELENIFEN
27
+ MTKFKLKIASRRSKLAMVQTLWVKEQLEKNIPDLEVSIEAMATQGDKILDVALAKIGDKGLFTKELEAQMLVGHADIAVHSLKDLPTNLPDGLTLGCITKREDPSDALVVNKKNKIYQLESLPPGSIVGTSSLRRLAQLRYKFPHLDFKDIRGNVITRIEKLDSGEFDCIILAAAGLKRLGFESRVHQIIPNEISLHAVGQGALGIECKSDDKEVLKIISVLEDKVSSQRCLAERSFLRELEGGCQVPIGVNSSIQNDEIALIGMVASIDGKRLIKNESIGNIKYPEEVGKKLAEKLKLQGADKILSEIFEQFRDK,MTKFKLKIASRRSKLAMVQTLWVKEQLEKNIPDLEVSIEAMATQGDKILDVALAKIGDKGLFTKELEAQMLVGHADIAVHSLKDLPTNLPDGLTLGCITKREDPSDALVVNKKNKIYQLESLPPGSIVGTSSLRRLAQLRYKFPHLDFKDIRGNVITRIEKLDSGEFDCIILAAAGLKRLGFESRVHQIIPNEISLHAVGQGALGIECKSDDKEVLKIISVLEDKVSSQRCLAERSFLRELEGGCQVPIGVNSSIQNDEIALIGMVASIDGKRLIKNESIGNIKYPEEVGKKLAEKLKLQGADKILSEIFEQFRDK
28
+ MLVKKMTELFSRFFVKAISFAICISVFFTLFNSPSYAAKTSMTGDYAKDTISVVKTLQIAVETPKDSPDKDKVRDESLALITDYISRYRNRGMVNKTQSFTTMQTALNAMAGHYKNFATRPLPDKLKERLTKEFTLAEKMVLRES,MLVKKMTELFSRFFVKAISFAICISVFFTLFNSPSYAAKTSMTGDYAKDTISVVKTLQIAVETPKDSPDKDKVRDESLALITDYISRYRNRGMVNKTQSFTTMQTALNAMAGHYKNFATRPLPDKLKERLTKEFTLAEKMVLRES
29
+ LKKITSILIIFFLIVLYPIRTYSAEILQINNSSSILVGDQNRDLPIKLFCVEINNEDDEKIALNLLKKEFPRGSKVKIKPIGFKENILTARVFNINETKEMSDLLISKNLSKETCQN,LKKITSILIIFFLIVLYPIRTYSAEILQINNSSSILVGDQNRDLPIKLFCVEINNEDDEKIALNLLKKEFPRGSKVKIKPIGFKENILTARVFNINETKEMSDLLISKNLSKETCQN
30
+ MESIFNNSFATLVAYVGIVSIYLLVIPLILFYWMNNRWNVMGKFERLIVYGLVFLFFPGLILFSPFLNLRLRGDSKG,MESIFNNSFATLVAYVGIVSIYLLVIPLILFYWMNNRWNVMGKFERLIVYGLVFLFFPGLILFSPFLNLRLRGDSKG
31
+ LTKGKVVQIGLLISLLGLLSYKLAPQLGIDNFTASTISNFVLIVIVISWVTSYVLRVLNGKMTFMEQRKRYRKEYEKIVNDKLETKFNLLPKEEQEKLMEDLEKNP,LTKGKVVQIGLLISLLGLLSYKLAPQLGIDNFTASTISNFVLIVIVISWVTSYVLRVLNGKMTFMEQRKRYRKEYEKIVNDKLETKFNLLPKEEQEKLMEDLEKNP
32
+ MNDSYYKDKEKIYDAEVLESSSLDENIIIKILIRAGRTIAKPALEVLEMALDPFTPTQVRVSLMAALAYLIMPFDLFPDFMPLVGYSDDFVALTAVLSIWSRYMTPAIRARAERKLNKLFPFVK,MNDSYYKDKEKIYDAEVLESSSLDENIIIKILIRAGRTIAKPALEVLEMALDPFTPTQVRVSLMAALAYLIMPFDLFPDFMPLVGYSDDFVALTAVLSIWSRYMTPAIRARAERKLNKLFPFVK
33
+ MLRSIFAGFFAIVLTLGLGISSVSAKTVEVKLGTDAGMLAFEPSSVTISTGDTVKFINNKLAPHNAVFDGHEELSHADLAFAPGESWEETFDTAGTFDYYCEPHRGAGMVGKVIVE,MKGHKKIRFIFPLVAMYVPLLLLAPKAIAGSFGAEIFCTMRDGGNDHESSWQAAYSYIKKQKGGIFKTSPKQAAGQIIETVVRERDKFSYCVEFLDQLHPDRKLQLENDRKEKRRKKEELLQDKENEDYSKETFDRYSY
34
+ MLRSIFAGFFAIVLTLGLGISSVSAKTVEVKLGTDAGMLAFEPSSVTISTGDTVKFINNKLAPHNAVFDGHEELSHADLAFAPGESWEETFDTAGTFDYYCEPHRGAGMVGKVIVE,VQFIDQANIILKAGKGGNGIVSFRREKFVPAGGPSGGNGGKGGSIIIIADNNLQTLLDFKFNREIFAKDGFKGGPNKRSGASGENTILKVPCGTEIRDFNTGIILGDLTEDKQSLTIAHGGRGGHGNAYYLSNQNRAPESFTEGKEGEIWEVQLELKLLAEVGIIGLPNAGKSTLISVLSSARPKIANYPFTTLIPNLGVVRKADGNGCLFADIPGLISGAAEGVGLGHDFLRHIQRTKILIHLIDSIAENPIRDFEIIEKELKRYGSGLLNKERIVVLNKMELVDENYLQTITKKLENLSKKKVLVISSSLRKGLSPLLSEVWKRI
35
+ MTNKKRILSGVQPTGDLHIGNWLGAINNWVELQEKHETFLCVVDLHAITTEYDTKQLSKNTLSTAALYIACGINPKICSIFVQSQISAHSELCWILNCMTPINWMERMIQFKEKSIQQGNNVSIGLFDYPILMAADILLYDADYVPVGEDQKQHLELAKDIAQQRINAKFGKEENILKIPQPIIMKKGSKIMSLNDGSKKMSKSDINEGSRINLLDTPEIITKKIKRAKSDSYMGMEFNNPERPESRNLLMIYSLLSGKEVSELENDLSQTGWGTFKKIFTEQIIESLKPIQERYQVLINDPHELNKILIQGKEKAEVVANKTLSRVKSELGFFEIEK,MTNKKRILSGVQPTGDLHIGNWLGAINNWVELQEKHETFLCVVDLHAITTEYDTKQLSKNTLSTAALYIACGINPKICSIFVQSQISAHSELCWILNCMTPINWMERMIQFKEKSIQQGNNVSIGLFDYPILMAADILLYDADYVPVGEDQKQHLELAKDIAQQRINAKFGKEENILKIPQPIIMKKGSKIMSLNDGSKKMSKSDINEGSRINLLDTPEIITKKIKRAKSDSYMGMEFNNPERPESRNLLMIYSLLSGKEVSELENDLSQTGWGTFKKIFTEQIIESLKPIQERYQVLINDPHELNKILIQGKEKAEVVANKTLSRVKSELGFFEIEK
36
+ LGRSRKTNQLIREFLSELKEVLTKDGSITLRSLIFQENFHSLEGALKETEIKFIYPSDLKRFKDKSLNVLDICFGLGYNSASLFNNVIRQNSLINWYALEIDKKPLEYSLGNKSFQKLWHPKVFKILKALLKNSKYKDQFFDCDILWGDAREKIKNIPANIKFDLIYLDGFSPQKCPQVWSVEFLSKVTQKLNPQGYLITYSCSAAIRSTLKDFGLNIFNNKPNLVSKNLWSYGTIAVKNIDEKVLQNNLYLKKLSWMEEEHLLTKASIPYRDPTLNSNPKDIIKKRVQEQFLSNLKTSKKWRDKWGMTK,LGRSRKTNQLIREFLSELKEVLTKDGSITLRSLIFQENFHSLEGALKETEIKFIYPSDLKRFKDKSLNVLDICFGLGYNSASLFNNVIRQNSLINWYALEIDKKPLEYSLGNKSFQKLWHPKVFKILKALLKNSKYKDQFFDCDILWGDAREKIKNIPANIKFDLIYLDGFSPQKCPQVWSVEFLSKVTQKLNPQGYLITYSCSAAIRSTLKDFGLNIFNNKPNLVSKNLWSYGTIAVKNIDEKVLQNNLYLKKLSWMEEEHLLTKASIPYRDPTLNSNPKDIIKKRVQEQFLSNLKTSKKWRDKWGMTK
37
+ LKNGADSIQVFSDLELLKKTAKKWDGNKRLMLAERGGKTIDGFDLGNSPLSVTKETVQGKRLFMSTTNGTKSLKKVQNVENLFAMSLPNRKAVAERIISLNKKNVLILGSGWEGSYSLEDSLAAGALAIYIKENFNSEVNILNDELQASLALWNVWKNDILKCLKTATHGKRLTSLGDYEDDFKCCSELDCLDIVPTQVERGVIRAS,LKNGADSIQVFSDLELLKKTAKKWDGNKRLMLAERGGKTIDGFDLGNSPLSVTKETVQGKRLFMSTTNGTKSLKKVQNVENLFAMSLPNRKAVAERIISLNKKNVLILGSGWEGSYSLEDSLAAGALAIYIKENFNSEVNILNDELQASLALWNVWKNDILKCLKTATHGKRLTSLGDYEDDFKCCSELDCLDIVPTQVERGVIRAS
38
+ LISEIKELCLKANAIILAHYYQAPEIQDIADFIGDSLDLSRKAANNDADTIVFCGVHFMAETAKILSPNKTVLLPDIDAGCSLADDCPAEEFQKFREENPDHYVVSYINCTAEVKAQSDLICTSSNAVSLVEKIPKDKKIIFAPDKNLGRWVQKNSGRKLKLWPGSCIVHETFSEEALLKLKYKHPDAKVIAHPECSQNLLVLSDFIGSTSKLLDFVSNDYSDTYMVLTEPGIIHQMKKKEPNKNFIEVPDIDGCKCNECPYMKLNTLEKILDCLKNNSPSIELDPEIIKKAYKPIKRMLDMSI,LISEIKELCLKANAIILAHYYQAPEIQDIADFIGDSLDLSRKAANNDADTIVFCGVHFMAETAKILSPNKTVLLPDIDAGCSLADDCPAEEFQKFREENPDHYVVSYINCTAEVKAQSDLICTSSNAVSLVEKIPKDKKIIFAPDKNLGRWVQKNSGRKLKLWPGSCIVHETFSEEALLKLKYKHPDAKVIAHPECSQNLLVLSDFIGSTSKLLDFVSNDYSDTYMVLTEPGIIHQMKKKEPNKNFIEVPDIDGCKCNECPYMKLNTLEKILDCLKNNSPSIELDPEIIKKAYKPIKRMLDMSI
39
+ MNRKSNNSNPTGNLDYDKILEEEIINSYENKFEANSNINNKNKRFYRLKRTPLEVINRLFFFFFVGSFIFSFFLAYSENKVWFIIYLISAFSCIFYTPNRKALKELIAAWPNIEDLIKGRSLWRKDNK,MNRKSNNSNPTGNLDYDKILEEEIINSYENKFEANSNINNKNKRFYRLKRTPLEVINRLFFFFFVGSFIFSFFLAYSENKVWFIIYLISAFSCIFYTPNRKALKELIAAWPNIEDLIKGRSLWRKDNK
40
+ MKLRLFEFYFIKDYLRPWFGLIYSLFFLFFLGAIGYRITEGWDWGDCLWMVLITITTIGFGEVQTLSPEGRIITVLIIVGGLIFIQFTFQKAVRLFESGYFQRVNELRFKRLLRKMENHVILCGYGRVGQEISNQIKTQNIPIIVVESDEDRKKIAEDNGLEVLCADATLDETLKLAGLDKCKSLVVTLPNDAANLYVVLSAKGIRSSIRVIARAGTEEAASKLRLAGASIVVSPYIAAGRAMASMALRPIAIDFLDLLAGSECEIEEFELSNDISLFETAEKITLLELGIGKKSGAKILAIKEDEKLITNPGGDFLLQPGQVLIAFGSKEQLTTLNRLLGNLVVSVELLK,VPNNQNRDNFIDKAFTVIAESIVKIMPIADKEKKAYIYYRDGLAAQNNGDYSEALDYYNESLLLEENKIDRGETLKNMAIIYMSNGEEDRSIETYQKALEENPKQPSCLKNIGLIYEKRGRFAEQNGDLDQRDMWFDKAAQVWSKAVRLYPGGYLDIENWLKTSGRSSIDIYL
41
+ MKLRLFEFYFIKDYLRPWFGLIYSLFFLFFLGAIGYRITEGWDWGDCLWMVLITITTIGFGEVQTLSPEGRIITVLIIVGGLIFIQFTFQKAVRLFESGYFQRVNELRFKRLLRKMENHVILCGYGRVGQEISNQIKTQNIPIIVVESDEDRKKIAEDNGLEVLCADATLDETLKLAGLDKCKSLVVTLPNDAANLYVVLSAKGIRSSIRVIARAGTEEAASKLRLAGASIVVSPYIAAGRAMASMALRPIAIDFLDLLAGSECEIEEFELSNDISLFETAEKITLLELGIGKKSGAKILAIKEDEKLITNPGGDFLLQPGQVLIAFGSKEQLTTLNRLLGNLVVSVELLK,VSGWLFIIFLLLLGGLISTLGDLLGSKIGKARFSILKLRPKKTAILITILTGSLISASSLFLMILVNRQLRVGLFRLGDLQKKLQESKQVLIPLEKEREKLENKIKAKETEFKQLERNIIALRSGKFVIRSGQSLIISEISSSNLKDIKSKIEKIIINANRYTHKIVKPKNKEVKNLLLLRKNHIEEMQNIILKGGNWVINIKSVRNVLTGENFVYAFPEITENKIIVRKGEKITKIDFKQEDFNKKDFGDKVNFLLSSSLAEIKRRGSLVNEIKLRGDSIKELRDFLNKNDKTNFELEAVSLFNSKTAQPVIVELNVNYPES
42
+ MKLRLFEFYFIKDYLRPWFGLIYSLFFLFFLGAIGYRITEGWDWGDCLWMVLITITTIGFGEVQTLSPEGRIITVLIIVGGLIFIQFTFQKAVRLFESGYFQRVNELRFKRLLRKMENHVILCGYGRVGQEISNQIKTQNIPIIVVESDEDRKKIAEDNGLEVLCADATLDETLKLAGLDKCKSLVVTLPNDAANLYVVLSAKGIRSSIRVIARAGTEEAASKLRLAGASIVVSPYIAAGRAMASMALRPIAIDFLDLLAGSECEIEEFELSNDISLFETAEKITLLELGIGKKSGAKILAIKEDEKLITNPGGDFLLQPGQVLIAFGSKEQLTTLNRLLGNLVVSVELLK,LGRSRKTNQLIREFLSELKEVLTKDGSITLRSLIFQENFHSLEGALKETEIKFIYPSDLKRFKDKSLNVLDICFGLGYNSASLFNNVIRQNSLINWYALEIDKKPLEYSLGNKSFQKLWHPKVFKILKALLKNSKYKDQFFDCDILWGDAREKIKNIPANIKFDLIYLDGFSPQKCPQVWSVEFLSKVTQKLNPQGYLITYSCSAAIRSTLKDFGLNIFNNKPNLVSKNLWSYGTIAVKNIDEKVLQNNLYLKKLSWMEEEHLLTKASIPYRDPTLNSNPKDIIKKRVQEQFLSNLKTSKKWRDKWGMTK
43
+ MSIAKKALLFTSALALIAGPSVTASTRLSGAGASFPAKIYTRWFSDLAKSGGPRVNYQAVGSGSGRKAFIDQTVNFGASDDPMKDKDIAKVTRGLVQIPMVGGTIAFGYNYDCDLKLTQEQAVQVAMGMVKNWKELGCKAGKLTWAHRSDGSGTTKAFTNSMEAFSPTWTLGTGKSVKWPAGVGAKGNSGVAGVIQNTPGAIGYVNQSYIKGNVKAAALQNLSGEFLKPSVEAGAKALNGITLDENLAGKNPNPTAKGAYPIASLTWILAYEKGNGRNTKAIKKSLSTLLSDEYQDKAPTLGFVPLKGDILEKSRAAVKKIGR,MSIAKKALLFTSALALIAGPSVTASTRLSGAGASFPAKIYTRWFSDLAKSGGPRVNYQAVGSGSGRKAFIDQTVNFGASDDPMKDKDIAKVTRGLVQIPMVGGTIAFGYNYDCDLKLTQEQAVQVAMGMVKNWKELGCKAGKLTWAHRSDGSGTTKAFTNSMEAFSPTWTLGTGKSVKWPAGVGAKGNSGVAGVIQNTPGAIGYVNQSYIKGNVKAAALQNLSGEFLKPSVEAGAKALNGITLDENLAGKNPNPTAKGAYPIASLTWILAYEKGNGRNTKAIKKSLSTLLSDEYQDKAPTLGFVPLKGDILEKSRAAVKKIGR
44
+ MKIGINGFGRIGRLVFRALWDRADTEITHINEMAGDSNAAAHLLEFDSVHGRWVKDIKVKEEEIIIDGKKLAYTSFKNYLDVPWEKSSVDIILECTGKNKKPDKLNPYFDSLGMKRVIVACPVKGIVAEAESLNIVYGINQNLYDPSKHKLVTAASCTTNCLAPIVKVINENFSIKHGAITTIHDVTNTQVPVDFYKSDLRRARGCMQSLIPTTTGSAKAIAEIFPELKGKLNGHAVRVPLLNGSLTDAVFELNNAVTEEQVNNEFKKASETYLEGILGYEERPLVSADYVNDSRSSIVDSLSTMVVNSNLLKIYAWYDNEWGYSCRLADLTEYVIKKEI,MKIGINGFGRIGRLVFRALWDRADTEITHINEMAGDSNAAAHLLEFDSVHGRWVKDIKVKEEEIIIDGKKLAYTSFKNYLDVPWEKSSVDIILECTGKNKKPDKLNPYFDSLGMKRVIVACPVKGIVAEAESLNIVYGINQNLYDPSKHKLVTAASCTTNCLAPIVKVINENFSIKHGAITTIHDVTNTQVPVDFYKSDLRRARGCMQSLIPTTTGSAKAIAEIFPELKGKLNGHAVRVPLLNGSLTDAVFELNNAVTEEQVNNEFKKASETYLEGILGYEERPLVSADYVNDSRSSIVDSLSTMVVNSNLLKIYAWYDNEWGYSCRLADLTEYVIKKEI
45
+ MKNSIKITQLFLLLIFLTSCKATANKQELIIDSEEQESQQTKLSKSKMEVRYSCGEDGISDFLNDGWIISKQYTEEKICTWKSFPATKDCDMEKDKGCKITTPDKIGEEKVYLLEK,MKNSIKITQLFLLLIFLTSCKATANKQELIIDSEEQESQQTKLSKSKMEVRYSCGEDGISDFLNDGWIISKQYTEEKICTWKSFPATKDCDMEKDKGCKITTPDKIGEEKVYLLEK
46
+ MNFEIKNVFLTIEGKSIVNDVSIKVCPGEIVGLMGPNGAGKTSTFNLAVGNLRPDKGDILINSKSIKNLPLPIRAKLGLGYLTQEASIFRDLTVKENIDLALENSFSSRAIVRNKREKIINEFNLNKVVDNYGYQLSGGERRRCEIARALSVGRQGPKYLLLDEPFAGIDPLAVNDLKKLIIKLRDNGMGILITDHNVRETLLITSKSYVLSEGKILAHGSSDELANNQIVKKFYLGVDFQL,MAAKEHKSLQGSKILLIEDDKSIRLTVTESLISEGFEVSNFKDGSSALDFILGEGIKDFDLILLDLMLPGLNGLELCRKIRNEELYTPILILSAKGNESDRVLGLEVGADDYLTKPFGISELIARCRALLRRSKRGKEKKQKIETIIEYKNIKMFTEECRVTNFNQEIILSPKEFKLLELFIKNPKRVWSRDLILEKIWAIDFIGDTKTVDVHVRWLREKLEENPSAPKIIKTVRGFGYRFG
47
+ MNFEIKNVFLTIEGKSIVNDVSIKVCPGEIVGLMGPNGAGKTSTFNLAVGNLRPDKGDILINSKSIKNLPLPIRAKLGLGYLTQEASIFRDLTVKENIDLALENSFSSRAIVRNKREKIINEFNLNKVVDNYGYQLSGGERRRCEIARALSVGRQGPKYLLLDEPFAGIDPLAVNDLKKLIIKLRDNGMGILITDHNVRETLLITSKSYVLSEGKILAHGSSDELANNQIVKKFYLGVDFQL,MNFEIKNVFLTIEGKSIVNDVSIKVCPGEIVGLMGPNGAGKTSTFNLAVGNLRPDKGDILINSKSIKNLPLPIRAKLGLGYLTQEASIFRDLTVKENIDLALENSFSSRAIVRNKREKIINEFNLNKVVDNYGYQLSGGERRRCEIARALSVGRQGPKYLLLDEPFAGIDPLAVNDLKKLIIKLRDNGMGILITDHNVRETLLITSKSYVLSEGKILAHGSSDELANNQIVKKFYLGVDFQL
48
+ MLLSKLVDLIKSGESKFIKANIFENIDIENAASIDIALKNQISFLEENNILKDNLGKTSASAIITSNNNEILGLLESLNISNIVVENPRIAFAEVLNFLYEEINFNPGIDDSAVIKSSAKVGKNCYVGPNVYIGENSIIGDNNKIFPGTTILGNVRLGNNNVIHPNCVIYENTSIENNCVINSNTVIGSEGFGFIPQDGKWIKMPQKGCVIIKSFVEIGTNCCIDRPSVGNTFIDEGTKMDNLVQIGHGVKIGKNCAFAAQVGIAGGAVIGNSVILAGQVGVNNRVKVGNNVIASSKCGIHCDIEDGEVVSGFPAMKNKSWLRSSSVFKKLPELAKKLRQLDKK,MLLSKLVDLIKSGESKFIKANIFENIDIENAASIDIALKNQISFLEENNILKDNLGKTSASAIITSNNNEILGLLESLNISNIVVENPRIAFAEVLNFLYEEINFNPGIDDSAVIKSSAKVGKNCYVGPNVYIGENSIIGDNNKIFPGTTILGNVRLGNNNVIHPNCVIYENTSIENNCVINSNTVIGSEGFGFIPQDGKWIKMPQKGCVIIKSFVEIGTNCCIDRPSVGNTFIDEGTKMDNLVQIGHGVKIGKNCAFAAQVGIAGGAVIGNSVILAGQVGVNNRVKVGNNVIASSKCGIHCDIEDGEVVSGFPAMKNKSWLRSSSVFKKLPELAKKLRQLDKK
49
+ MSSNFKNLYTSNNPPLEMILMRGSKLESIHKVHAVISDKKGRVLMCAGNPEYKSFIRSALKPFQAIPFVSSGASSKIKNSSKSIALSCGSHSGSKLHAREAFKILWEYNIDIHNLKCPIKKTSPLEHNCSGKHAAFLATCKKLNWPLETYLKGDHPLQVEIFRIISELLEIPLEQIYAERDDCGAPTLYMKILEMAKLYSLLSSSDNAELEQISRAMTINPTMISDHNRFDTEVIQASHGHVISKGGAEGIQCFCKVNEGMGLALKVEDGSKRAKQSVGLHILKQLEWISELRIQDIEDKIIKLPEGVQIEVKGQLKFQES,VPNNQNRDNFIDKAFTVIAESIVKIMPIADKEKKAYIYYRDGLAAQNNGDYSEALDYYNESLLLEENKIDRGETLKNMAIIYMSNGEEDRSIETYQKALEENPKQPSCLKNIGLIYEKRGRFAEQNGDLDQRDMWFDKAAQVWSKAVRLYPGGYLDIENWLKTSGRSSIDIYL
50
+ VRIIFWGTPEYSVKSLEVLKKSDHDIVAVITQPDKKRSRGNKLISSPVKEYATKENIPVFTPETIKENIQFISILNDLSCDLFIVIAYGKILPKAILDIPKYKSWNAHASLLPRWRGAAPIQWSILEGDKITGVGIMRMEEGLDTGDVLVEKQIKIENNDNLKTLTKKLSDLSSELFLRAISDIEQNKNRDINLLLKKQTDFKRELKYARMINKLDYIINWENSATDIYRKINALYPRANTTYKRKNLKIIKIKILTTHEIHNKNYKILSNVFKPGLIIGLIKNVGIIITTKTDPILLLEAKLEGKKVSSQNQLIQQLNPVIGENFSD,VRIIFWGTPEYSVKSLEVLKKSDHDIVAVITQPDKKRSRGNKLISSPVKEYATKENIPVFTPETIKENIQFISILNDLSCDLFIVIAYGKILPKAILDIPKYKSWNAHASLLPRWRGAAPIQWSILEGDKITGVGIMRMEEGLDTGDVLVEKQIKIENNDNLKTLTKKLSDLSSELFLRAISDIEQNKNRDINLLLKKQTDFKRELKYARMINKLDYIINWENSATDIYRKINALYPRANTTYKRKNLKIIKIKILTTHEIHNKNYKILSNVFKPGLIIGLIKNVGIIITTKTDPILLLEAKLEGKKVSSQNQLIQQLNPVIGENFSD
51
+ MRNEKYWVKALDQTHLSITNNGLFPLKTTVVTREYYNKNDFIIRELDTSRFTKKNNYGPNQNPFNPWDKILEVDKVGTNHQLILNKYPVQKGHILLITNTWRPQDGWLDINDWIAIQMVNEDTSGLWFFNSSPIAGASQPHRHFQLLRRDHGEIICPREKWFLDFENNNDQDSKLKKNTVVSKFNFLNNSINIYNLYLELSNKIGLGNPIDDEKPRFPYNILITNNWIAIIKRKYDHVHGFSVNGLGFAGYLLVTEKSNINYLKKYGPEKLLENFV,MRNEKYWVKALDQTHLSITNNGLFPLKTTVVTREYYNKNDFIIRELDTSRFTKKNNYGPNQNPFNPWDKILEVDKVGTNHQLILNKYPVQKGHILLITNTWRPQDGWLDINDWIAIQMVNEDTSGLWFFNSSPIAGASQPHRHFQLLRRDHGEIICPREKWFLDFENNNDQDSKLKKNTVVSKFNFLNNSINIYNLYLELSNKIGLGNPIDDEKPRFPYNILITNNWIAIIKRKYDHVHGFSVNGLGFAGYLLVTEKSNINYLKKYGPEKLLENFV
52
+ MTTSSKKDYLSILGLSSKFDDIELKKAFRREARKWHPDLNKNDINAEDRFKLINEAYEFLRDPVRRVKSIDSNSSNEEIYNKYSTGFPEFKDYLNSLFGFEYESELDNESYDQTSDFYEDEKPNAIFNEEEFNSYDYPARSPEEPPPVKLHQDIETIIELTPDEALSGASILIELEDQTVVEVDTPPFAGDGWRLRLENIAKGGKDHYLQLKVQTENGLRIDGLRVLYKLELFPPDALLGCAVEVPTLDGNVTLQVPPKSSTGRLLRLKGRGLSFGDNIGDQFVEILVVIPADINDEEIALYTRLQELSLSDE,MTTSSKKDYLSILGLSSKFDDIELKKAFRREARKWHPDLNKNDINAEDRFKLINEAYEFLRDPVRRVKSIDSNSSNEEIYNKYSTGFPEFKDYLNSLFGFEYESELDNESYDQTSDFYEDEKPNAIFNEEEFNSYDYPARSPEEPPPVKLHQDIETIIELTPDEALSGASILIELEDQTVVEVDTPPFAGDGWRLRLENIAKGGKDHYLQLKVQTENGLRIDGLRVLYKLELFPPDALLGCAVEVPTLDGNVTLQVPPKSSTGRLLRLKGRGLSFGDNIGDQFVEILVVIPADINDEEIALYTRLQELSLSDE
53
+ LKKTFKVTITNKETGKIYQENISDQEYILKEFEKKGLRLPFSCRNGCCTSCAVKIISGKLDQPEAMGVSQDLKDKGYALLCVAKVIEDIEVETTYYDEVYDLQFGQYFGKGKTRKAPPWEFEED,MKGHKKIRFIFPLVAMYVPLLLLAPKAIAGSFGAEIFCTMRDGGNDHESSWQAAYSYIKKQKGGIFKTSPKQAAGQIIETVVRERDKFSYCVEFLDQLHPDRKLQLENDRKEKRRKKEELLQDKENEDYSKETFDRYSY
54
+ MDFKTYQKQARLTAQYPNLGSNNIYPTLGLVGEAGEVAEKVKKVIRDKKGIFDEESKKGIKKELGDVLWYISNLCNEFNFELEEVALQNLEKLKLRAAKGKISGSGDDR,MDFKTYQKQARLTAQYPNLGSNNIYPTLGLVGEAGEVAEKVKKVIRDKKGIFDEESKKGIKKELGDVLWYISNLCNEFNFELEEVALQNLEKLKLRAAKGKISGSGDDR
55
+ MSFFQGKILLNFIIDLLNKPAINWSNFELNSSLQLNDFVDLLLEPLNTSQYSYNIKLGLHEALINAVTHGNKLDPNKSIRVRRIITPNWCVWQIQDQGNGLEIKKRLYKLPKKFTSFNGRGLYIINECFDDIRWSNKGNRLQLALKR,VPNNQNRDNFIDKAFTVIAESIVKIMPIADKEKKAYIYYRDGLAAQNNGDYSEALDYYNESLLLEENKIDRGETLKNMAIIYMSNGEEDRSIETYQKALEENPKQPSCLKNIGLIYEKRGRFAEQNGDLDQRDMWFDKAAQVWSKAVRLYPGGYLDIENWLKTSGRSSIDIYL
56
+ MSFFQGKILLNFIIDLLNKPAINWSNFELNSSLQLNDFVDLLLEPLNTSQYSYNIKLGLHEALINAVTHGNKLDPNKSIRVRRIITPNWCVWQIQDQGNGLEIKKRLYKLPKKFTSFNGRGLYIINECFDDIRWSNKGNRLQLALKR,MSFFQGKILLNFIIDLLNKPAINWSNFELNSSLQLNDFVDLLLEPLNTSQYSYNIKLGLHEALINAVTHGNKLDPNKSIRVRRIITPNWCVWQIQDQGNGLEIKKRLYKLPKKFTSFNGRGLYIINECFDDIRWSNKGNRLQLALKR
57
+ VASTLLFTALKEAIDEEMANDVNVCIMGEDVGQYGGSYKVTKDLYEKYGELRVLDTPIAENSFTGMAVGAAMTGLRPIVEGMNMGFLLLAFNQISNNMGMLRYTSGGNYKIPAVVRGPGGVGRQLGAEHSQRLEAYFHAVPGIKIVACSTPTNAKGLMKAAIRDNNPVLFFEHVLLYNLSEELPEGDYICSLDQADLVKEGKDITILTYSRMRHHCLKAVEELDKKNIDVELIDLISLKPFDMKTISKSIKKTNNVIIVEECMKTGGIGAELIALITEECFDDLDTRPIRLSSQDIPTPYNGNLENLTIIQPHQIVEKVEEVINGSI,VASTLLFTALKEAIDEEMANDVNVCIMGEDVGQYGGSYKVTKDLYEKYGELRVLDTPIAENSFTGMAVGAAMTGLRPIVEGMNMGFLLLAFNQISNNMGMLRYTSGGNYKIPAVVRGPGGVGRQLGAEHSQRLEAYFHAVPGIKIVACSTPTNAKGLMKAAIRDNNPVLFFEHVLLYNLSEELPEGDYICSLDQADLVKEGKDITILTYSRMRHHCLKAVEELDKKNIDVELIDLISLKPFDMKTISKSIKKTNNVIIVEECMKTGGIGAELIALITEECFDDLDTRPIRLSSQDIPTPYNGNLENLTIIQPHQIVEKVEEVINGSI
58
+ MSKLSTTKICVKSPAKINLHLEIIGKRKDGYHELAMIMQNIDLSDYIEFENNQIGEIKLKSNSKDLSLDEDNLIIKAANYIKDMSKNKELGANIFLKKNIPIGAGLAGGSSNAAATLVGLNKLWDLDLDYETIFILSAKLGSDVPFFIEGGCQFCFGRGEILEKYSSNFDFGVILLKNPNISISTVDTYKKYSQEFCPKYFTETEKTNKIRNDLRVNGFNDFKLSEQRINVKNDLQVIVERENNSVKKALYLLSNLQNCLSYSMSGSGPTCFALFKDINIANEVFEQNYKMFNNNGFEAWVCKLINSGITLL,LVKPKSPDNKISNHLQQDVVKIAGKTIFINPFLYWRRFDENTNRWLREPGQMSEEQIQPNRNRFYPEIDWADLSQNQKLVKDASVEMFLKTLELISTFHPQLNSGQLLEVERKMAITKKLPFEKWVTKSFAKKARAEEYEKRKFKRDRFIRSWKEWLSLENTQQALLPIIVVVFVSAFIGWSSGVSKNSCNPYFEQNLDQSI
59
+ MSKLSTTKICVKSPAKINLHLEIIGKRKDGYHELAMIMQNIDLSDYIEFENNQIGEIKLKSNSKDLSLDEDNLIIKAANYIKDMSKNKELGANIFLKKNIPIGAGLAGGSSNAAATLVGLNKLWDLDLDYETIFILSAKLGSDVPFFIEGGCQFCFGRGEILEKYSSNFDFGVILLKNPNISISTVDTYKKYSQEFCPKYFTETEKTNKIRNDLRVNGFNDFKLSEQRINVKNDLQVIVERENNSVKKALYLLSNLQNCLSYSMSGSGPTCFALFKDINIANEVFEQNYKMFNNNGFEAWVCKLINSGITLL,VPNNQNRDNFIDKAFTVIAESIVKIMPIADKEKKAYIYYRDGLAAQNNGDYSEALDYYNESLLLEENKIDRGETLKNMAIIYMSNGEEDRSIETYQKALEENPKQPSCLKNIGLIYEKRGRFAEQNGDLDQRDMWFDKAAQVWSKAVRLYPGGYLDIENWLKTSGRSSIDIYL
60
+ MKNLKLKVIFKYLKPYKKEFLYGGIALLVVNILSILIPLEVKNIIDQLKDGFSSSFVISKSLFLMFLATCMGLIRLFSRQIVFGIGRKVEVNLRQKLFDHLLIQDPDWIQKKGSGDIISRATSDVENIRRLLGFTVLSLCNIVLAYSLTIPSMLSINKTLTVAALMIFPMILVIVSLFGGRMVSQRKIQQESLSKLSDLIQEDLSGISAIKIYAQEEAEKKQFNNYNKVYRNSAIKLARTASTLFPLLQGISSISLLILLGLGTSQLENGFITIGGLVALILFVERLVFPTALLGFTLNTFQLGQVSLDRVEEIFQNNPKITDKPKAKFIKKKVKGTIEAKNLKIKYEGAKFNSLNRLNFKINPGELIAIVGPVGCGKTTLAKSLGRTIEIPDGQLFLDDIDITNIKLRDLRKHIAIVPQEAFLFTSTISENLKFGDPKASRNVVKNSAVNAGLIDDINSFPDGFKTIVGERGITLSGGQRQRTALGRALLVDASVVVLDDALASVDNKTAAKIIEEMRANKSKTILMISHQLSVAATCDRVLVMDQGKIVQEGIHKDLITTNGLYKNLWEREIATNKIVS,MKNLKLKVIFKYLKPYKKEFLYGGIALLVVNILSILIPLEVKNIIDQLKDGFSSSFVISKSLFLMFLATCMGLIRLFSRQIVFGIGRKVEVNLRQKLFDHLLIQDPDWIQKKGSGDIISRATSDVENIRRLLGFTVLSLCNIVLAYSLTIPSMLSINKTLTVAALMIFPMILVIVSLFGGRMVSQRKIQQESLSKLSDLIQEDLSGISAIKIYAQEEAEKKQFNNYNKVYRNSAIKLARTASTLFPLLQGISSISLLILLGLGTSQLENGFITIGGLVALILFVERLVFPTALLGFTLNTFQLGQVSLDRVEEIFQNNPKITDKPKAKFIKKKVKGTIEAKNLKIKYEGAKFNSLNRLNFKINPGELIAIVGPVGCGKTTLAKSLGRTIEIPDGQLFLDDIDITNIKLRDLRKHIAIVPQEAFLFTSTISENLKFGDPKASRNVVKNSAVNAGLIDDINSFPDGFKTIVGERGITLSGGQRQRTALGRALLVDASVVVLDDALASVDNKTAAKIIEEMRANKSKTILMISHQLSVAATCDRVLVMDQGKIVQEGIHKDLITTNGLYKNLWEREIATNKIVS
61
+ MAKSSWEGNCFLNFFNNKSSSGKDDKTIFKSKFTSPYKLLKCSYDQEGRCILPILHTAGGLVGGDLLEFEANIGINSKVLLTTSSAQKVYGSVGRSKINPEGTFSSQKTKISILDNSHLEYLPQETIVFANGLYSQEFNIKISDNSSFLFTDLIRLGRSSAGESIESGVFRSKLEIMRNGNLCDDWEFVDQIELTKFSFEAKSGMDFKPVFGSLIWICEKEFPITKISYLKEKIKIIFKENNNYLSLGTLENGLSIRFLGTSSQDARKCFFSIWTQIRTVCGFCKPEYQGVWPLQDL,MAKSSWEGNCFLNFFNNKSSSGKDDKTIFKSKFTSPYKLLKCSYDQEGRCILPILHTAGGLVGGDLLEFEANIGINSKVLLTTSSAQKVYGSVGRSKINPEGTFSSQKTKISILDNSHLEYLPQETIVFANGLYSQEFNIKISDNSSFLFTDLIRLGRSSAGESIESGVFRSKLEIMRNGNLCDDWEFVDQIELTKFSFEAKSGMDFKPVFGSLIWICEKEFPITKISYLKEKIKIIFKENNNYLSLGTLENGLSIRFLGTSSQDARKCFFSIWTQIRTVCGFCKPEYQGVWPLQDL
62
+ MSSKLRVGVAGPVGSGKTALVETLCIALKKRYKIAVVTNDIYTKEDANFLIKKKILEEGRIVGVETGGCPHTAIREDCSLNKNAVMDLENKYDPLDFIFVESGGDNLAASFSPELVDLSIYVIDVSAGDKIPRKGGPGITRSDLLLINKIDLADMVGANLNIMQNDTNMMRDGKPWFFTNLSSGSGVDNVIKYLVAQIPNI,VNSNSSNQVGKNIRRTGFLIVLSYLLIVLIMKVLEANNFFGYSLSSFSNDIFAPPSLKHLCGTDRLGRDVCLRTLQGSSIAIEVVFLAIFFALILGLPLGLLSGYFGGILDKCLSLVMDTIFSIPVILLAVVVAFVLGKGIINASIALCIVYSPQYFRLIRNQTMLIKSETYVEAARVSGADVKTIIFKYILPNVITPLPILLTLNAADAVLVLGSLGFLGLGVPANVPEWGSDLNLALAAIPTGIWWTALFPGLAMFFLVLGLSFIGEELENIFEN
63
+ MSSKLRVGVAGPVGSGKTALVETLCIALKKRYKIAVVTNDIYTKEDANFLIKKKILEEGRIVGVETGGCPHTAIREDCSLNKNAVMDLENKYDPLDFIFVESGGDNLAASFSPELVDLSIYVIDVSAGDKIPRKGGPGITRSDLLLINKIDLADMVGANLNIMQNDTNMMRDGKPWFFTNLSSGSGVDNVIKYLVAQIPNI,MTKFKLKIASRRSKLAMVQTLWVKEQLEKNIPDLEVSIEAMATQGDKILDVALAKIGDKGLFTKELEAQMLVGHADIAVHSLKDLPTNLPDGLTLGCITKREDPSDALVVNKKNKIYQLESLPPGSIVGTSSLRRLAQLRYKFPHLDFKDIRGNVITRIEKLDSGEFDCIILAAAGLKRLGFESRVHQIIPNEISLHAVGQGALGIECKSDDKEVLKIISVLEDKVSSQRCLAERSFLRELEGGCQVPIGVNSSIQNDEIALIGMVASIDGKRLIKNESIGNIKYPEEVGKKLAEKLKLQGADKILSEIFEQFRDK
64
+ MSSKLRVGVAGPVGSGKTALVETLCIALKKRYKIAVVTNDIYTKEDANFLIKKKILEEGRIVGVETGGCPHTAIREDCSLNKNAVMDLENKYDPLDFIFVESGGDNLAASFSPELVDLSIYVIDVSAGDKIPRKGGPGITRSDLLLINKIDLADMVGANLNIMQNDTNMMRDGKPWFFTNLSSGSGVDNVIKYLVAQIPNI,LGRSRKTNQLIREFLSELKEVLTKDGSITLRSLIFQENFHSLEGALKETEIKFIYPSDLKRFKDKSLNVLDICFGLGYNSASLFNNVIRQNSLINWYALEIDKKPLEYSLGNKSFQKLWHPKVFKILKALLKNSKYKDQFFDCDILWGDAREKIKNIPANIKFDLIYLDGFSPQKCPQVWSVEFLSKVTQKLNPQGYLITYSCSAAIRSTLKDFGLNIFNNKPNLVSKNLWSYGTIAVKNIDEKVLQNNLYLKKLSWMEEEHLLTKASIPYRDPTLNSNPKDIIKKRVQEQFLSNLKTSKKWRDKWGMTK
65
+ MSSKLRVGVAGPVGSGKTALVETLCIALKKRYKIAVVTNDIYTKEDANFLIKKKILEEGRIVGVETGGCPHTAIREDCSLNKNAVMDLENKYDPLDFIFVESGGDNLAASFSPELVDLSIYVIDVSAGDKIPRKGGPGITRSDLLLINKIDLADMVGANLNIMQNDTNMMRDGKPWFFTNLSSGSGVDNVIKYLVAQIPNI,MIDSFPLIKKEHIETLQINIGLKCNQACKHCHVNSSPLRSEKMSYEIISLIPKVIEKYKIKTLDITGGAPEMHPEFRNLITTLSDKNIDIIDRCNLTIFFEDGFEDLPQFLAKNNVIVTASLPCYEKDNVELQRGYGVFDKSINALKILNDLGYGKQKDGLQLNLVYNPVNPILPPSQVILKEDYKRILFEKYNISFNNLYTITNMPINRYADSLNSENKLDSYYKLLKENFNKNNLEKLMCKKTISVNWQGQIYDCDFNQQINLKGNKGPKTLSDLMSKSFKFDYGVAVKEHCFACTAGAGSSCGGTLT
66
+ MSSKLRVGVAGPVGSGKTALVETLCIALKKRYKIAVVTNDIYTKEDANFLIKKKILEEGRIVGVETGGCPHTAIREDCSLNKNAVMDLENKYDPLDFIFVESGGDNLAASFSPELVDLSIYVIDVSAGDKIPRKGGPGITRSDLLLINKIDLADMVGANLNIMQNDTNMMRDGKPWFFTNLSSGSGVDNVIKYLVAQIPNI,MKLRLFEFYFIKDYLRPWFGLIYSLFFLFFLGAIGYRITEGWDWGDCLWMVLITITTIGFGEVQTLSPEGRIITVLIIVGGLIFIQFTFQKAVRLFESGYFQRVNELRFKRLLRKMENHVILCGYGRVGQEISNQIKTQNIPIIVVESDEDRKKIAEDNGLEVLCADATLDETLKLAGLDKCKSLVVTLPNDAANLYVVLSAKGIRSSIRVIARAGTEEAASKLRLAGASIVVSPYIAAGRAMASMALRPIAIDFLDLLAGSECEIEEFELSNDISLFETAEKITLLELGIGKKSGAKILAIKEDEKLITNPGGDFLLQPGQVLIAFGSKEQLTTLNRLLGNLVVSVELLK
67
+ MSSKLRVGVAGPVGSGKTALVETLCIALKKRYKIAVVTNDIYTKEDANFLIKKKILEEGRIVGVETGGCPHTAIREDCSLNKNAVMDLENKYDPLDFIFVESGGDNLAASFSPELVDLSIYVIDVSAGDKIPRKGGPGITRSDLLLINKIDLADMVGANLNIMQNDTNMMRDGKPWFFTNLSSGSGVDNVIKYLVAQIPNI,MQKTKFSKINDQFNNLLFGFLSSSWKSKSINVISVLTGYFLFANFATKFISEGKNELIMVPIIILIIELIIRIRPPAGSSFFNLWSIIDKARIGATYAVILEAFKLGS
68
+ MSSKLRVGVAGPVGSGKTALVETLCIALKKRYKIAVVTNDIYTKEDANFLIKKKILEEGRIVGVETGGCPHTAIREDCSLNKNAVMDLENKYDPLDFIFVESGGDNLAASFSPELVDLSIYVIDVSAGDKIPRKGGPGITRSDLLLINKIDLADMVGANLNIMQNDTNMMRDGKPWFFTNLSSGSGVDNVIKYLVAQIPNI,MSKLSTTKICVKSPAKINLHLEIIGKRKDGYHELAMIMQNIDLSDYIEFENNQIGEIKLKSNSKDLSLDEDNLIIKAANYIKDMSKNKELGANIFLKKNIPIGAGLAGGSSNAAATLVGLNKLWDLDLDYETIFILSAKLGSDVPFFIEGGCQFCFGRGEILEKYSSNFDFGVILLKNPNISISTVDTYKKYSQEFCPKYFTETEKTNKIRNDLRVNGFNDFKLSEQRINVKNDLQVIVERENNSVKKALYLLSNLQNCLSYSMSGSGPTCFALFKDINIANEVFEQNYKMFNNNGFEAWVCKLINSGITLL
69
+ LSRLLISFIFFAIVFLSPLSTFASHTSDPTVSLLQSRISKNFSKKFCNAIQNGLSKDEAMTSAIVKTENIVSFSYNPQKKWIEKEDLANQISIKVINDCGWSFGLIGKEGIDYFNSYFLEIYDKTTPDKKLSS,LSRLLISFIFFAIVFLSPLSTFASHTSDPTVSLLQSRISKNFSKKFCNAIQNGLSKDEAMTSAIVKTENIVSFSYNPQKKWIEKEDLANQISIKVINDCGWSFGLIGKEGIDYFNSYFLEIYDKTTPDKKLSS
70
+ MTFEAKYLGSNGWLIKFDKTNLIIDPWLTGDLIFPPGEWFFKGSLDNEILIEEDINIILLTQGLPDHCHVPSLKKFKKDIDIICSNSAKGILEKLGFTSIKVLKPKEKIMQKELEIEATAGAPVPQIENGYIVKDYKGKGFYIEPHGYLDENVNSQELDAVITPIINLELPLVGSFVKGADVLPKLIKTFNPKYILSSTAGGEAKYTGLLNKFISVQEYAEEVKCNLVNLKTMDSVKI,MTFEAKYLGSNGWLIKFDKTNLIIDPWLTGDLIFPPGEWFFKGSLDNEILIEEDINIILLTQGLPDHCHVPSLKKFKKDIDIICSNSAKGILEKLGFTSIKVLKPKEKIMQKELEIEATAGAPVPQIENGYIVKDYKGKGFYIEPHGYLDENVNSQELDAVITPIINLELPLVGSFVKGADVLPKLIKTFNPKYILSSTAGGEAKYTGLLNKFISVQEYAEEVKCNLVNLKTMDSVKI
71
+ MPIVFAWSLCLSVVVVLLSTIPLTLGRIKAGYSVENMSAPRALFDKLPDFGKRAVWCHQNCWESISIHAPACILCLITLPDSNLSLIAAWMHPLLRFLYIGAYVLNIPIARGLIWASGIFTTLVLYKEGISQFM,MPIVFAWSLCLSVVVVLLSTIPLTLGRIKAGYSVENMSAPRALFDKLPDFGKRAVWCHQNCWESISIHAPACILCLITLPDSNLSLIAAWMHPLLRFLYIGAYVLNIPIARGLIWASGIFTTLVLYKEGISQFM
72
+ MTDIEEIKKKIYQIAAITDRGQRLNKLIAPMYQEKLKEMGNLIDILESFNTEVSEEKLSGEWELIYSTVELFRSSPFFLAIEKALNDEFKSNLFFKLHQLQVGSFGLSTIGRIAQNIDFDKKEFLSTFDTTIFGLTIIPILGWFKLLPTFGGRVITLADDLILEDKVLKMNLKKTKVSKVDGLNKIPLFSTLLMERWYPVKEVWEKLPWNKESPSCEVSVIYLDEEVRVMKDIYGSTFVYIRPTISLLNSK,MTDIEEIKKKIYQIAAITDRGQRLNKLIAPMYQEKLKEMGNLIDILESFNTEVSEEKLSGEWELIYSTVELFRSSPFFLAIEKALNDEFKSNLFFKLHQLQVGSFGLSTIGRIAQNIDFDKKEFLSTFDTTIFGLTIIPILGWFKLLPTFGGRVITLADDLILEDKVLKMNLKKTKVSKVDGLNKIPLFSTLLMERWYPVKEVWEKLPWNKESPSCEVSVIYLDEEVRVMKDIYGSTFVYIRPTISLLNSK
73
+ MLLSRVAESLYWINRYLERAENISRFVEVSEAMSLDCPPGSAEPWLPLIDASSDRETFDSRFPEKKQDDVINFLIRDRINPNSIISCIQLARENARQIRDVMTSEMWEQINILYWNLQEGESIWDLPRQEQLSEIRRGCQLFYGITDATLSKDLACQFSILGRLIERADKTSRILDVKYYLLLPSLDELGGVLDELQWIALLRSAGAYQMFRKAEQNSIQPNSVARFLLLDNNFPRSVRYCLDGISNTLKMIDTSPSSDNPSKLECMRGLLKAKWSYIRIEDIINDGLHEAIDSLQIDLNKLHNLIEDKYFINKEFDQ,LVKPKSPDNKISNHLQQDVVKIAGKTIFINPFLYWRRFDENTNRWLREPGQMSEEQIQPNRNRFYPEIDWADLSQNQKLVKDASVEMFLKTLELISTFHPQLNSGQLLEVERKMAITKKLPFEKWVTKSFAKKARAEEYEKRKFKRDRFIRSWKEWLSLENTQQALLPIIVVVFVSAFIGWSSGVSKNSCNPYFEQNLDQSI
74
+ MLLSRVAESLYWINRYLERAENISRFVEVSEAMSLDCPPGSAEPWLPLIDASSDRETFDSRFPEKKQDDVINFLIRDRINPNSIISCIQLARENARQIRDVMTSEMWEQINILYWNLQEGESIWDLPRQEQLSEIRRGCQLFYGITDATLSKDLACQFSILGRLIERADKTSRILDVKYYLLLPSLDELGGVLDELQWIALLRSAGAYQMFRKAEQNSIQPNSVARFLLLDNNFPRSVRYCLDGISNTLKMIDTSPSSDNPSKLECMRGLLKAKWSYIRIEDIINDGLHEAIDSLQIDLNKLHNLIEDKYFINKEFDQ,MKLRLFEFYFIKDYLRPWFGLIYSLFFLFFLGAIGYRITEGWDWGDCLWMVLITITTIGFGEVQTLSPEGRIITVLIIVGGLIFIQFTFQKAVRLFESGYFQRVNELRFKRLLRKMENHVILCGYGRVGQEISNQIKTQNIPIIVVESDEDRKKIAEDNGLEVLCADATLDETLKLAGLDKCKSLVVTLPNDAANLYVVLSAKGIRSSIRVIARAGTEEAASKLRLAGASIVVSPYIAAGRAMASMALRPIAIDFLDLLAGSECEIEEFELSNDISLFETAEKITLLELGIGKKSGAKILAIKEDEKLITNPGGDFLLQPGQVLIAFGSKEQLTTLNRLLGNLVVSVELLK
75
+ MLLSRVAESLYWINRYLERAENISRFVEVSEAMSLDCPPGSAEPWLPLIDASSDRETFDSRFPEKKQDDVINFLIRDRINPNSIISCIQLARENARQIRDVMTSEMWEQINILYWNLQEGESIWDLPRQEQLSEIRRGCQLFYGITDATLSKDLACQFSILGRLIERADKTSRILDVKYYLLLPSLDELGGVLDELQWIALLRSAGAYQMFRKAEQNSIQPNSVARFLLLDNNFPRSVRYCLDGISNTLKMIDTSPSSDNPSKLECMRGLLKAKWSYIRIEDIINDGLHEAIDSLQIDLNKLHNLIEDKYFINKEFDQ,MSKLSTTKICVKSPAKINLHLEIIGKRKDGYHELAMIMQNIDLSDYIEFENNQIGEIKLKSNSKDLSLDEDNLIIKAANYIKDMSKNKELGANIFLKKNIPIGAGLAGGSSNAAATLVGLNKLWDLDLDYETIFILSAKLGSDVPFFIEGGCQFCFGRGEILEKYSSNFDFGVILLKNPNISISTVDTYKKYSQEFCPKYFTETEKTNKIRNDLRVNGFNDFKLSEQRINVKNDLQVIVERENNSVKKALYLLSNLQNCLSYSMSGSGPTCFALFKDINIANEVFEQNYKMFNNNGFEAWVCKLINSGITLL
76
+ MTEVINNIPDFEKYLTDTKKVVEEALDFSLGPENPEILRESMRYSLLAGGKRIRPILCLASCSLAGGEPSLAVPTAVAIEMIHTMSLIHDDLPAMDNDGFRRGRPTNHKVYGDAIAILAGDALLTRAFEMVSLRSPGVDSNRLLNVVGELSLVAGAPGLVGGQVVDLECEGKEVDLETLEYIHLHKTGALLKASVRTGAMIAGANEELLNALTTYAEGIGLAFQIIDDILDLTSSSEKLGKTAGKDLLADKTTYPKLLGMEESKKKAFDLVDQAKKAIEPWGLNAKYLISLADFITNRDR,MTEVINNIPDFEKYLTDTKKVVEEALDFSLGPENPEILRESMRYSLLAGGKRIRPILCLASCSLAGGEPSLAVPTAVAIEMIHTMSLIHDDLPAMDNDGFRRGRPTNHKVYGDAIAILAGDALLTRAFEMVSLRSPGVDSNRLLNVVGELSLVAGAPGLVGGQVVDLECEGKEVDLETLEYIHLHKTGALLKASVRTGAMIAGANEELLNALTTYAEGIGLAFQIIDDILDLTSSSEKLGKTAGKDLLADKTTYPKLLGMEESKKKAFDLVDQAKKAIEPWGLNAKYLISLADFITNRDR
77
+ VNFWGFINLKFLLDVLFALGFGLLLFSRVKEQRTLWLLRGYLLLVSFAWFIQRYAYLPLTSKLIDAVVLACSLSLAILWQGELRRLMELLGTGRLAVLLGNPPKEFRATSTTVNQLVDAAGKLSQNRKGALIVVDLGSDLRPEDFLYSGIKIEAKLSTDLLINLFATDTPLHDGAVLVKGNKIISAGVILPLSRQGISRYGTRHLAALGITERFDRCICIVVSEETGTLSLANQGKLERPITSSRLQELLIKLVGNQNTSGTPKSSSNKTNSYQKTNTNDTITVEKKLDKQNTIQD,VNFWGFINLKFLLDVLFALGFGLLLFSRVKEQRTLWLLRGYLLLVSFAWFIQRYAYLPLTSKLIDAVVLACSLSLAILWQGELRRLMELLGTGRLAVLLGNPPKEFRATSTTVNQLVDAAGKLSQNRKGALIVVDLGSDLRPEDFLYSGIKIEAKLSTDLLINLFATDTPLHDGAVLVKGNKIISAGVILPLSRQGISRYGTRHLAALGITERFDRCICIVVSEETGTLSLANQGKLERPITSSRLQELLIKLVGNQNTSGTPKSSSNKTNSYQKTNTNDTITVEKKLDKQNTIQD
78
+ MENPTKNKIQNLIDLNPVMVFMKGTKLMPQCGFSNNVVQILNSLGVTFNTFDVLSDFEIREGIKEYSEWPTIPQVYLKGEFLGGSDILIEMYNAGTLKEKIEIALAS,VNSNSSNQVGKNIRRTGFLIVLSYLLIVLIMKVLEANNFFGYSLSSFSNDIFAPPSLKHLCGTDRLGRDVCLRTLQGSSIAIEVVFLAIFFALILGLPLGLLSGYFGGILDKCLSLVMDTIFSIPVILLAVVVAFVLGKGIINASIALCIVYSPQYFRLIRNQTMLIKSETYVEAARVSGADVKTIIFKYILPNVITPLPILLTLNAADAVLVLGSLGFLGLGVPANVPEWGSDLNLALAAIPTGIWWTALFPGLAMFFLVLGLSFIGEELENIFEN
79
+ MENPTKNKIQNLIDLNPVMVFMKGTKLMPQCGFSNNVVQILNSLGVTFNTFDVLSDFEIREGIKEYSEWPTIPQVYLKGEFLGGSDILIEMYNAGTLKEKIEIALAS,MVMNVSIVIPTYNRKPILEKCLKALEKQNLNENISNYEVIVVDDGSTDGTTYWIKDNYEVLPHVVLYEQEHGGPALGRNLGVMKSKYEIIIFIDSDLIVLDDFIACHVNKLLFSWSKNTKKCFTYGSVINTSNFSNPESERYKLTDFSFAYFATGNVAISKELLLSVGLFDNSFSLYGWEDLELGERLKKLGTKLIKCPEAVGFHWHPPFDCGQIESLVSQEKERARMALIFYKKHSNLRVRFMIQLTPIHILLWQIICLGGLISIKRLLPLLRFLIDSGRNRIALEIVRIPLNLIYVKELRRLI
80
+ VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,MKVNKKYKGLVTKKFNEFYLVELDKYETSVANKKFLCKIKKSVNFRNQFVFVGDEVIVYQIDLQSKRATIESLVKRNNLLERPSVANISNIYVICSVEEPKLNLSQVNKFLISSEQLGVEVSLVLTKCDLITEEKRLLLIEKFHQWGYQAITLNLNNPENLRTLLIELKKKKCSIFMGPSGVGKTTLLNMIIPNLDNKTAPVSSKIKRGKNTTRNVELFSLSSKSYIVDTPGFNIQTLEIDIRELSNLYPEIYKQVVNEGIHCKFRNCLHVNDEGCKLNKNFERYTFYKEMVESSKSHYCLIQED
81
+ VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,MAGFGLPNFGQLTEAFKKAKEIQQNAQKLQDELESMEIEGKSDDEMIKVWISGNQLPLRVEVNENISTANKEEIEKNILEAIKKAHESSTTTMKERMNDLTGGLNLNLPGLDNNDS
82
+ VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,MASQISYRGNKNPIKKKLSFFEGGHQLEKLEFALAVAQTKGDEQKSLVLMKKIIELGGNVEEPGT
83
+ VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,MITLYQFRHSAFCLKTRMALHAKKLQYRVEEVTPGLGQFEIFKISGQKQVPIIVDDNDQIISDSTIICEYINKKNDNNPLFPKDPLLFAQCKLIEDWADTTMASTCRKALIKSAIENPQLRTALLPDEIPSSVKGLVDKLPFKNLSKISNVVFSTKDNLELQKILEALSKALINKKYLIGDNLSIADIAISAQLSLLKFPKSSGPILSGEGCQEYINNPYLENIFIWRNNIEEYLFSANSQ
84
+ VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,VLIIYRSNSLTAKEASIFCNKTLKERNIKSKRIESDFDNNQLENYFYNLAALPDLVIVLGGDGTVLKSANALVNYDIPILSFNIGGNLGFLTQEKDFLFDQSFIKILEKEEFIIDFRNRLHCDVYSNEKNRERKILKSYDALNDFYFKSVEEDISPTNQIQIEIDNEKVNEYKGDGLIISSSTGSTAYSMAAGGPIVHPSINAFVINPICPMSLASRPIIIPDTSKVVIRVVQKNKREIKLWKDGSKCMTIKENDYCEINKVTKPCKMIKFNKSISYYITLIKKLDWKGDLSLKNNQNN
85
+ VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,MSKKRKRISRRRLAGQRVMAHVPIYHIETGKHKPVTAARRFIAENALSAPSVFNVRRNEHTTDRFFWGQKGLFSAQYAEENHFLFPSLKVVVEGIGEEKIFEGLELTADDWEEIEEYEYAFV
86
+ VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,MSKIKENKEQFWLEKFDCFSVTGKDSKRFLNGITTGNIVDLNNKVLKSCWLSPNGILKSLLEINCSEKELKVIVLVGNTSEIRKYFNDIIFPSDDVSLSDSFSINRLQQVDDMNSWRITQPIFLKNEDKKYDFYKNNPNSMNTNDLQLWKINQAIPSLNSEINGKNNPLELGLTDLIDFNKGCYLGQETMSKIKNVSSLKQEIRVWTAKDKDVNLESVNKILFNNQNKEKSVGYITSIYVLESRIIKGLAMIKRKYLDKGNPFFSDNFGQISLEKSVGSTFL
87
+ VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,VENSINISILIPLIPMGMALLILSLLVSFNRTINRLTKPVSALAVFSLLSSALISAFLYFKKIEGEIFLSDYLKLFGSTNLILHLNSLTEKIVIFFAVIIAIVIGVLFYKLPRRKGYVSLIIGISLISSSIMFAVFFLDFSFLI
88
+ VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,MKRKEDSKNNNYDSMSFTDHLEELRQRLLNSIYSILICIFFSFLIIKPLISFLEIPASDIHLLQLAPGEFLFVAIKVAGYSGIIVSIPYIFYQLILFISPGLTKKEKNLILPAVFGSGLLFFLGLIFSWWILVPAAINFFINFGADIVEPTWSIERYFDFVLLLMSSTAIAFQLPVLQFILGSLGIITTEKMLSNWKIVVISSAILSAVITPSTDPLTMSLLSISIIFLFFVGAGLTYISESLKSKTLSSSH
89
+ VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,MTINEKIISDKELKISDWELDFYSRPIIETNGKKRWELIISSSKSFKTEKIFLWNKVCPANEVNSIWLTKALNEALNDAEIEGWAKPLKIRFWRASMKSIIKKSIENIGIEALVSRRTYELFDRIEFLEREIYPLEQGYVRGVLAPTFTSNILNDPKPLPEAVRGDALTISEISIEELKLAKNWPIEFGDIFPIQSSIKNDNLVPGLRLFSKDRSLALAAWFSSLEPVKLLIKQNQLILEASEDDKWLVTDLQEKDAKVLNDKFTQSKKDSYGYQFISIQATPFIEKFAGFWILKDVELIS
90
+ VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,MESIFNNSFATLVAYVGIVSIYLLVIPLILFYWMNNRWNVMGKFERLIVYGLVFLFFPGLILFSPFLNLRLRGDSKG
91
+ VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,MTNKKRILSGVQPTGDLHIGNWLGAINNWVELQEKHETFLCVVDLHAITTEYDTKQLSKNTLSTAALYIACGINPKICSIFVQSQISAHSELCWILNCMTPINWMERMIQFKEKSIQQGNNVSIGLFDYPILMAADILLYDADYVPVGEDQKQHLELAKDIAQQRINAKFGKEENILKIPQPIIMKKGSKIMSLNDGSKKMSKSDINEGSRINLLDTPEIITKKIKRAKSDSYMGMEFNNPERPESRNLLMIYSLLSGKEVSELENDLSQTGWGTFKKIFTEQIIESLKPIQERYQVLINDPHELNKILIQGKEKAEVVANKTLSRVKSELGFFEIEK
92
+ VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,LALIIPSNYHKISDVEKNHISWIEPDLAERQDIRPLRIGILNIMPLGKQYEFNLLHPLGLSPLQIEPVWIKLKTHSYKTWDLNHLNNLYTTWEEANDPEPLDGVIITGAPVEHLAFEEVKYWDEFVNITNEARNSCASTLGLCWAGFALAYLAGVNKTVFDKKLFGVFPLKSLAPGHPLMGTQDDEFICPQSRFAGLPDLEMEEAQKEGKLNLLAYGKDVGYTIFETKDQKQLMHLGHPEYTVHRIISEINRDKEKGDVPPPENFDINSSNTSWRSHRNLLFQQWLWFCYQQVSLS
93
+ VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,MHSKINYFLGIFLSIVILIFNEPSFAINNPNLLPEEKTPVIDLAKTLSPNQKKSLEENLNNLEKESGWKIKYLSQFESVPGIAIKDYWDLDETSLLVIADPRGGNLLNFNVGEAYFAFMPRLFWVELQTRFGNQYYVKDHGEDGAVLDAINSVKICLDRGGCQVVPGLPKEQYIWTLCTSILGGLVAGFAAAPRKEGQIISIGFLALLSPLWGMLFGIFGLAPIISRTSEVLPLFKNGLAFAAAAIAGYLLSQTVFSRYEKPKKS
94
+ VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,LTNYTHYTTVVVHLYYLLMTLGGANVWSNFSYGSRVDSPNGWILNPQGSFLILFENCKKSARNNINVYTHLLFTNHLGEPAGLKNTRLHDLDSAFETWNELIAGGWTEVTNQFQESA
95
+ VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,LKEDSSYLIKYTSSGLYCELADTWIDPIKPVKRALITHAHMDHFTFGCDEYISTYETAVIIKERIGKEINIKTYDYEKEFKINGIKISFHPSGHILGSSQIKFSLAEEIWLITGDFKRQKDETCKEYEIVKTDYLISESTFGLPIFKWDEPQKTASDITKWVNSSQEKTSILFCYSLGKAQRLLNEISKTNFINNIYTHSSIYRMNNCYKKLGIDIIETTKLEQTKNNSDLKGSLIILPPALNKSSSLKNFKDIQTGFASGWMSIRALRKRSGYDKGFSISDHADWIAILKTIKESKAKNVFFHHGESEALNKYLKEKNSINVLEFEFKK
96
+ VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,MNFKNHHQKKRFGQHWLVNNLILEKIKEVAELEEKDFILEIGPGRGALTSKLLDSKISRLHAIELDEDLIDLLNNKFRNDKNFSLQQGDILSTNLDSINKKITKVIANIPYNITGPILDIFVGRLGIISKNNYNKIIFLMQKDVVDRILAKDGNTNAGAMSVRMQLISNIRRICDVPPSSFDPPPKVFSTLVVFEPLRPEMRLDIKLEKYLDKLLRISFNSRRKMIRNTLNSILSAEEIEKLSESSQICFNSRPQDISINKWIKLAEACIKITNKNQ
97
+ VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,LSRLLISFIFFAIVFLSPLSTFASHTSDPTVSLLQSRISKNFSKKFCNAIQNGLSKDEAMTSAIVKTENIVSFSYNPQKKWIEKEDLANQISIKVINDCGWSFGLIGKEGIDYFNSYFLEIYDKTTPDKKLSS
98
+ VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,VNKKKLYLANPYGFSKQTKNLLPEFIKIFQNLNVEVYEPFERTKHLITNKNNWAYDLAKANFNDLKSCDCIFAIVNGNPPDEGVMVELGISIALNKEIFLFRDDFRNCSDSDQYPLNLMLFVGLSKESWSKNYFESIEDILNPKKSFLNWAKRI
99
+ VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF,VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF
100
+ MANSQVTTESGGRQNMFPSETRPYIDESVSYDSYPKNAEKVNGRWAMIGFVALLGAYVTTGQIIPGIF,VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF
101
+ MLNLIKKNLNIKSGIALIVLATIFVFLSNSFKKNKSKDISNFVVSVEKGILSESINTSGEVKATRTSNIGPRKQGILEEIKVEEGDLVEKGQILATLDDEDFIYKLEELELNLKKQKSEYLRREFLFKEGAVSKEDYESYKNKYNTSEAKFSDAKAEKDFYSIRAPYPGKITAKYAEIGSYVTPSSNLSSNSKAKNFIFELSEGLEIIAKVPESDIGRIKTGQEASVRIEAYPSNKYRAIVKKIAERAVKDNNVTSFEVTLKFKEISEEIKIGMTADLEFKVKSSEEKILVPTVSIVTEKGEKGVLKVDKNNTPKFEKIEIGISSGNKTSIIEGLRPGEQIFIDIPPWANKRK,VRIFMKLFKSLLVAPATIGLLAPFSTFAGEANLNDISKYSNLEHLDLANAFVNDEPKNNSLLAGGEGLVDSGSSDGGFSQTTTASFSVDAVLGAIDGNASATTGQGEETGFDFQFNIGLSTSFTGEDSLDIAIDNGSATASPIGAKMGFDTGTSLVVDGVTYSFPVGGATMVVGDATDVSATYTGACTYSAFTDTTLDDCGTGNSIGAGGKGVAASLGYAFDSGFSIAGGISSPTTEIVGDDADLYGLNVAYSTDSYGVAVGYAIDDGGTGAETTTWGLNGFYTFDLASLSVGYETSETGGTDSSGYFVGLSFSEVGPGSVNVGAATTGLFADSVTEYLIYEASYSYPVNDAMTITPGIFIEETAGDDLTGVAVKTSFSF
MED4_RRS_100.csv ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LSKRNPIILIHGLWNTADIFSSITSKLDEIGIEYFSPTLKHEYGMTSIVELTNLLNYLILEKYGYEKELDILGFSMGGIIGRYWIKKLNGYKRTRRFITIGSPHNGTLSSQLIPKYPFKGISEMKINSPLLRELSRSDYLLSGIDCISFFTYWDLMVFPGWRACLNSGEKISLKIYKHKNLVRNPDAVDKIIEKLLN,VRNSPFLPNRPLKVAVLGSSGAVGSELLKILEERDFPISELVLLSSQRSEGKIVKWKGEEIITKKASKEEFLNVDLVLASAGGSISKQWLSTVKDQNAVLIDNSSAFRLENDVPLVVPEVNACEALKHNGVIANPNCTTILLTLVLAPLNKISPIKRVIVSTYQSVSGAGQLAMEELQFLTKKYLQGDPKESEVLPYSLAFNLFLHNSPMLSNNYCEEEMKMTNETRKILNITDLKLSSTCVRVPVLRAHSESVNVEFDDVIKPSYAINQLKKAKGLEIIEDYEKNRFPMPNDVMGRDNIAVGRIRTDISNSNGLELWLCGDQIRKGAALNAVQIAELLIAKK
2
+ LTNYTHYTTVVVHLYYLLMTLGGANVWSNFSYGSRVDSPNGWILNPQGSFLILFENCKKSARNNINVYTHLLFTNHLGEPAGLKNTRLHDLDSAFETWNELIAGGWTEVTNQFQESA,MLRKLIHPILILPFCLYINSQEALLSKANNSIEEILHENENQIFLNYSDIDNITLKNNRELKALESLVNSTMFTLSSKIAKRYPSLDLQASGLPKYTSGKNYNSSSSTTKTSQFSANPSLNIKLDLIDPLRGSEIKIARNNYAIAKNNYEIKKKDLIKEAKSRYHKLQKSYQDIKNKTLSLDLSITSLKDAQSKFDAGIGTKFEVLEAEAQLSRDMQSLNEKKIQNQINKIELKEILNINGDFEINQKQKLIGFWNHKLNKNITEGLANSLSLKNINLKKSIKENQAKNYLNVYKPNVYISNNFTSSFSKGDSLSVKIDPEKSGSSYTNTVSLNFSWNIFDGGQNKNLYKSSKADVKSEDYSYKNIENVLKTNISKAYLNLKLNEEKILSSLKEISSTEESLRLARLRYDIGISTLKDVLVRQKELSNANSKKIDSIYNYNLNLDELVRLTFLEISNICNEENNLIKNEIQSICNI
3
+ MNQFFSRRSFILIPIMSILKFILQPKKVLAAFAASDDDWNLSKEDWKNKLSPESYYILREEGTERAFSSQLNNEKRKGIFYCAGCNQPLFTSDTKFDSGTGWPSFWDPIQGSVETKVDFKLIVPRTEYHCSRCGGHQGHVFNDGPLPTGKRYCNNGLALKFIAE,MTNTKVSNNNPDKESIINKSITKAKDNEIIKNKTIQNKKVNSVSKEPNKSVDDISNELFSELISKKISLVQEIKDLETKKNELEKDIESNFKGQSDNIAKRVKGFQEYLTGSLQNLSQNVEKLELVSQPIVVKPSPLDEKKEASNKNELLTVPALSETFKPDEQLIRSCFSNFIEQPDFYSEPWKLRRSLDSSDIEVMDDWFFNMGGRGSLESRGSRQKNALLSAGFIAILGELYGDQFQTLILASQPERLGEWRRVLQDSLGLTRDDFGPNSGIVLFERPEGVIERADRLEANEELPFIIVDAAETSVEIPILQFPLWLAFAGSNDEIYDDLELN
4
+ MVKEDPVRLELSITPSYGKNPVIVGIVESLDLVARRDREGRMPRDLQGTWDWTVRHGKVSTGGWNPMLKEALQTMFETGLPSIIYEELTGDEYKPVDGIRHVR,MSEYRDSSSNNFLSLISGAFIGAAGLAWWLISEADKRKEEKKQKAMMYSSRIQDGSEAIDTNENIKDVEGDKLEQKVEELNSAIADVRRQLEELGQ
5
+ MSLTQSKEVNSLSRRYSTYIERRITRTVMVGDIAIGSDYPVRVQSMINEDTMDVDNSYLAIKRLHEVGCEIVRLTVPSLAHAKAVGDIKEKLIKNNIDTPLVADVHHNGMKIAMEVAKHVDKVRINPGLFVFEKSDPTRTEYTDTEFETIKKTILKRFTPLVEVLKSENKALRIGVNHGSLSERMLFTYGDTPLGMTESAMEFVKICDELDFHNIIISMKASRAPVMLAAYRMIADRLDAEGYNYPLHLGVTEAGDGDYGRIKSTAGIGTLLAEGLGDTIRVSLTEAPEKEIPVCYSILQSLGLRKTMVEYISCPSCGRTLFNLEEVVDKVRKATSHLTGLDIAIMGCIVNGPGEMADADYGYVGKGKGTIALYRRKEEIKRVPEDEGVDALIRLIKDDGKWIDP,MSKVELISLTPEAEKTMAYIARVSNPSNQANDKFAGLLRYCIKHEHWSVFEQSCMTLKIETNRGIAAQILRHRSFTFQEFSQRYAETSLLGNEIPIPNLRRQDQKNRQNSIDDIPDELKIKFSEKISKHFQEANKLYEEMLNEGIAKECARFIMPLATPTRIYMTGSCRSWIHYIQLRSKEGTQKEHMEIAEDCKKVFIKYFPSVSEALNWE
6
+ MKKLFLLSLLISLISPIKTSAGFPEGEKGYDLKKIEDSFKLPCDEIGNDECIARAFGVGACTWVFGIKNGKDSKEALRIADGVLIALLKGNNLDINSIFEKDGSIKETIQKESVYRINFCKDATKLAIPKLIKKLPEGVELDDERIENLADVFPLQYLTMFEQMRKRN,MKAAILVNQKKKLIVDELDLPTSLKVGQVLVKLEYSGICGTQIGEIDGVKGEDKFLPHLLGHEGSGIVEKVGPGVKTVREGDSVVLHWRQGNGIQSEPPKYNWNGKTVNAGWVTTFNTKAIISENRCTKIPANISKEDAALFGCAVTTGFGVIENNAKLKMGESIVVFGAGGIGLNIIQAARLTSAWPIIAVDLFDNRLDLAKKLGATHSVNSSNKSYLDEIENILKDRELDVFIDNTGNTSIIEMGYNLISDQGRLILVGVPKTGENINIFSLPLHFGKKITGSFGGECNPAKDIPRFIKMMQNGLWDLKGLITESYDLENINEAIFSMRTGKTSGRVIIKL
7
+ MNNVVQNKSKIFYQLQKLRRLAQPFFLPIDQCNGFQFIWLLISLLFCVGGIVLVALTGIISFFESIQPIFLDKYFGGVVNTVNTIWSGWWGLLFSGLFLIGSGSFFSLRRQLKNRRWVHWLFLAVIVLMLLAVNGINAGIGFIARDLTNALVEKQQDGFYRILGIYACCFAVALPIRVSQIFFTYKLGIIWRDWLSKSLVKDYMTNKAYYQLNPNDEEQTDVDNPDQRITDDTRAFTGQSLSFTLGVFDALLTFSLNILILWSISTTLTFSLFGYAAFATAILLIAGKNLVKIDFDQLRYEADFRYGLVHIRDNAESIAFYSGEKPEKSETERRLGEVVRNFNLLIIWRVIIDVMRRSINYAGNFFPYLIMAIPYFRGDIDYGRFIQASFAFGMVEGSLFFIVNQIEELAKFTAGIGRLEGFQSKVESISQTKPIDNQNIISDYSSILINNADLFPPGSDKAIIKNLNLSIETNQSLLVVGPSGCGKTSLLRMISGLWEPNQGSIKKPKTGDLLFIPQKPYMLLGSLREQLCYPTEVDKFSDDHLISVLNEVNLNSIVDRYPNLDVKQDWPRILSLGEQQRLAFARLLLNSPRFAVLDEATSALDIKTEKRLYNLLRDRELSLISVGHRPSLKDFHENILELNGQGGWKLFTTDKYNFKN,MNRWVLLEHKILSSKFIDIHYDFLVEDQLDCLTWKFHEIPSLNKGVIKIVKQPNHRLVWLSRVEYQLSKNRGLVKRIDHGIFSNIPHNQDSQKLKIILNGKLLNGLFIIDGNFCQLTKNN
8
+ MKYLILGSGSFAGQLIFSEYLERNYDVYGFNRSRVKDHYQWPWIKKYKNDLGNRWFEYNLTNDVEEMISHINRLKPNFIIDFMGQGMVAPSWLKPEVWYTTNIAIKSRLMNALIDSSFLQKYIRIGTPEVFGSNENFLKEDECFNPSTPYAVSHAAIDFNLRCLYKQYNFPYLIGRFANFYGVGQQLYRIIPRLFLSCRSERNFILDGKGESRRSFIFSKDIVSAIDSMIKFDGIGQEFNFSSNEEISIMSLVNKICNLTNVDKSRILKFGPERPGKDRYYRLDIKKSKNVLNWEPEVSLDEGLNIINIWISENIENLSNKSWTYEYKD,LSLIFINLLTSIPEYISKAVETNSTIAYLTICLAMFLENIIPPIPSEIIMPLGGFFVYQQKLNFYILVFWGVFGTILGSMPWYYLGKLVNEKRLSNFLDKRGKYIGITSNDLIKSRRWFDKYGVSLVFWGRLVPGIRTLISVPAGMELMPLRKFLIWTSLGSLIWVTLLTYAGFVFGENYPIIETYLNQIKFIVKPILILIFVYFLIKFFIRLYKKKIT
9
+ MGENLPLLLSAALGKKVNRPPVWMMRQAGRYMKIYRDLRERYPSFRERSENPELSYEISMQPFLAFKPDGVILFSDILTPLPGMGINFEIIESKGPIIEDPIRNIRQVEKLKELIPNESLSFVGEVLSSLKKDVKNEATVLGFVGAPWTLAAYVVEGKSSKNYSLIKSMAFKEPDLLHKLLDHFAKSIGEYLKYQIKSGAQVVQIFDSWAGQLSPQDYDIFAGPYQKKVVDIVKEEFPDTPIILYISGSAGVLERMAKTGVDIISLDWTVDIEEACKRIPTGIGIQGNVDPGILFGNKDSIKERIDNTFNKVKERKYILNLGHGILPGTPEENAKTFFEHGKKLTY,MRGSIKRSNESYQDSYSPNGIIGEKDACGVGFIANIDGKESNWILKQSLKGLNCMEHRGGCGGDSDSGDGAGILCSIPWEFLDRELNLNTESYEKRGLGMIFMPNNELKVKESKLICDEEAKELNFKQSFWRNVPIKNETLGILAKANAPFINQWIVCLEKDDSRDIEMLLFQLRKRIEKRIRDNTKNAIGECEFYFASLSSKTVVYKGMVRSEVLSEFYEDLKKEDFKVSFSVYHRRFSTNTLPKWPLAQPMRFLGHNGEINTLLGNINWAKASEIHIDDYWGELSRDIKPIVDKNKSDSSNLDATLEINIRSGKPITDSLLKLVPEAFRDQPELESREDIKAFYEYSATLQEAWDGPALLVFADGNYVGATLDRNGLRPARYSITNDGFVIMGSETGVVDIEENRVIEKGRLGPGQMLAVDLSQNKILRNWEVKAEAAKRKNYKKLIQKRTIKLKNNEWSNTCNLKDFELLQQQTAFGFSSEDNDLILDSMASLSKEPTYCMGDDIPLAVLSSKPHILYDYFKQRFAQVTNPPIDPLREKLVMSLEMHLGERCSPFEFNGIKPFIHLKSPIINEKELISLKESEIKSKTISSLFDIEERIKGFEAKLDDICKVSEKAIKEGCSLIIISDKGVSSKQSFIPPLLAVGAIHHYLLKKEIRLKASLIIETGQCWSTHHLACLIGYGVSAVCPWLTLESGRHWLQHPKTQKLIATKKINPLSIDDVQENIKKALEDGLRKILSKIGISLLSSYHGAQIFEAVGLGSDLIKIAFDGTTSRIAGITLKELANESLLIHTKAFPEIDLKKLEFLGFVQFRNNGEYHSNNPEMSKVLHSALKQGPGYDHFETYKTLIRNRPVTSLRDLLSINSTRKSIPIDEVESVESICKRFCTGGMSLGALSREAHEVLAVAMNRIGGKSNSGEGGEDPARFNVLNDIDENTQSAILPSIKGLENGDTACSAIKQIASGRFGVTPEYLRSGKQLEIKMAQGAKPGEGGQLPGPKVDSYIAKLRNSKPGVALISPPPHHDIYSIEDLAQLIHDLHQVHPRAKVSVKLVSEIGIGTIAAGVSKANADVIQISGHDGGTGASPLSSIKHAGLPWELGVAEVHKSLMENNLRGRVLLRTDGGLKTGWDVVIAAILGAEEFGFGSVAMIAEGCIMARVCHTNKCPVGVATQKEELRKRFKGLPENVVNFFLYIAEEIRQIMSSIGVSNMEELIGNQEFLTARDIKLPKTANIDLSSLIKKGTQYKDRSWLKHSKTAHTNGYVLEDQFLSDNEFMNSIKNHGKVIKEIEIKNTDRSVCAKISGEIAGLYGNNGFNGELNLNFKGYAGQSFGAFLLKGMHIQLIGEANDYVCKGMNGGVLTIVPPQVDEKSSEQVILGNTCLYGATGGKLFALGKSGERFAVRNSGATAVTEGSGDHCCEYMTGGKIVILGSTGRNIGAGMTGGIAYILDENNDLENKVNKEIVSIHKITSLKQEEILLGILGEYLEKTKSLKASKIINNWSNFKGIFKIVVPPSEEETLGI
10
+ MIEKKGDNIRSENFYPDSNYYLDQDNTPEETTLPEDQIFNTKKFEWPNSYWFIAERTNGRLAMIGFMAVIINYTLFGWIAYPIL,VHKNKILVPLSNNSYEVIIKQGLINNIGEELIRIGINSNRKILIVSNKEISTLFGRKLLNNLKKNNFNAEIFNIKAGESHKNFASLSEIFNAAFEVGLDRNSLLIALGGGIVGDVTGFAAATWLRGIEYIQIPTTLLSMVDSSVGGKTAVNHPKGKNLIGAFYQPKAVFIDPETLITLPTREFKAGMAEVIKYGVIKDKSLFEYLENEKNRDKILNLENESLIKIINKSIKTKACIVSEDEKENGIRAILNYGHSFGHVIENLCGYGEYLHGEAISIGMKIAGDIATEKNLWSKEHSLRQDHLIESYGLPIQTPKIKKNDVMKILMGDKKVRNGKMRFILPIELGEVDIFNDINESQFLKYFN
11
+ MLKNDLWINQKASKGMINPFQSNLVRHLDPNNKKNAVLSYGCSSYGYDLRLSSKEFLIFKHVPGTVMNPKKFNPDNLEKTILHEDKDGEFFILPAHSYGLGVALEKMKVPENITVICIGKSTYARLGIIVNTTPAEAGWEGHLTLEFSNSSGADCRIYANEGICQLLFFEGDPCSTTYEDRKGKYQNQPEKVTLAKI,MKKTKVICIGEALIDRIKNKSNQEFTDFLGGAPANVACALRKLQIDSVFIGRIGSDEFGKKFINQFKELEVNINFLQLDDCLPTRIVKVNRDNSGDRYFSGFDTSLNTFFADEAFDKNEIKKDLKSLENLFSKTKYLVCGTIILSSSISADTINFLLSLANKFDVKIIIDLNWREVFWDFATSSSETNKKERVDLIRNLLNKAHILKLAKEEAILFFENKNPLEISERLLNRPDVIITDGANPICWLINGVQGTTEVSKSLKIIDTTGAGDAFLAGLISQLLSFDYPSNESEIQNCVKFASICGLLTCLGEGAIEQQPDYSKVNKFFGSQIL
12
+ MVSVPFSNNGSNKNFKKDFNNENAGLVPPQNIQAEEAVLGGILLDPDAIGRIADLIKPEAFYINAHQEIYKTALMLHTQGKPTDLTSMSAWLADNGSLEKIGGNSKLVELVENVSSTASIEQVANLISDKFIRRQLIRSGNEVVQLGFDQTQETNEVLDKAEQKIFEISQEKPTKGLTQAAEILTSTFNEIESRSLGTSVAGIPVNFYDLDAMTQGFQRSDLIIVAGRPSMGKTSMVLNLAKNVAQSQDLPVCVFSLEMSKEQLTYRLLSMEVGIESGRLRTGRLQQEEWPLLGEGINSLGQLPIFIDDKPNLSVLEMRSLCRRLIAEQKKELGLIVIDYLQLMEGTTPDNRVQELSRITRGLKSMARELKVPVVALSQLSRGVESRTNKRPMLSDLRESGSIEQDADLVLMIYRDEYYNPETEDRGITEIIVTKHRNGPVGTVKLLFEPQFTRFRNLAN,VELMGQFFSNVARYPKYLISIIAGGLVALLEPLFKNRSNPLTLVGLISSVISAFITFYFVLKAMTNPINL
13
+ MSFSKLVEIKNNFKFDANNSCKNLYKGACVKIKNSQKTFQVVGINPQSKVCWIREWPFALEVNKTFSLELNQITLQTYCSDTFNEK,MAENFSFDVVSDFDRQELVNALDQVKREISQRYDLKGTDTSLDLEKDNIFITTNSELTLNSVIDIIRQKAIKRKLSIKIFDFNSIEVVSGNKVKQTITLKKGLNQEIAKKISKNIRDEIKKINVSINGETLRVMSKSKNDLQLAIKLLENLEETYKIPLQTNNYR
14
+ MNKRHSLQRKTTLKWNSNGDLSEIDMLRILDRISAYELNQCELTCDLDE,MASQDYLIAIALIEQNNIRAMPLGGKEIKEKLEEEGNLIKLGEEVILNLLLRVFQRSDEGALKRVSEDKGLLLVHMHPKRMQKELPFIKSEWIRDGDTTQFLKYLGNLSKEIWTASLIKYKGLELVSIAKNEDI
15
+ MVEKFKTLFFVKSSLISLYLALTCPIPFISSEKLKIFSIITFFFGLLLIINITNDYVDTCDKKISYKTSFISKIFGKKNWEIFWKDIKLIKSLPTSQGSNIHYFISNKNESFLVPQRVENFERFVSIIEEKTKLNIDKLSYISPLWTYKLLTYMSILMIIGELIAFII,MSSNKICLNCGSSDLVSDRSLGGRMVCFKCGSSSFKNNSFSRIQNKKIIYLLIVLVILLIVVL
16
+ LTNSIKGRNSKEVTIQLKRAETQKNILIKNIYKEYETYFDIVRKSMLISAKKGIAGIYSDFSISDKALHSKELNIFLNKNISLLINSKLPFITIEQLKLGDISYPTKQLVNASVLKELVKRKEYQTVHIDHENEKTANESIEFHCDNNLNTYEYYESLSEDEISSVNLDESCYLNSFSKEISIENIEEGKRLVNAFLELIEETSDNKLIDYEKINDQAPDVFISSDNLNTFEFIDKSFSNFLLNLSYNINLELFKIELIKKIITEETFKCLSNNNSIIKHPYPFVIRYDLYPDNLYPRKNKSSDVYLFNITNVELELYNLDLSICRNNINDLKNRFKLLNKKQRYWKNKELASNSSK,MTDILVLILFVLSGAASGWLGVDLLPIDILKQVSNVEGFRIVLAIIGFFIGLAAGFVFLQLRKTFLDQIRTMPTDLLISRAVGLILGLLVANLLLAPILLIPFPREVFFAKPLAAILSNFFFGALGYKLADTHGRTLLRLFNPTNTDAYLVNEGIIPAASPKILDTSVIIDGRINGLLSCGLLEGQLIVAQSVIDELQTLADSSSNEKRGKGRRGLKLLKELRELYGRRLVINPTKYEGNGVDEKLLRITEDMAGTLITADYNLSQIAEVKELKVMNLSDLVIALRPEVQPGESLNIKIVREGKEKLQGIGYLDDGTMVVIDDAKKFVGERLDIVITGALQSPTGRMVFGKLINNPESNKSFKSPATQG
17
+ VHLDKSSSEIINKFKLSPHPEGGWFREIIRSKNHVTRNDGQKRNNITSIYYLLCKSERSKWHRVNSSDEIWIYLQGAPLNLYFLDDNKELRNIRLDLNNPIEMIPSGYWQAASSTGEFTLTSCCVGPGFDFNDFQMLRNIDPSLRPAKAIKELI,MFFWYRVLSLWPLRRRIINLERFRGLHDDYSKSSTSLNAIRELNDTCNVNLLCTPYVAYIPNSDYWRPNQARDLYKLHLKKSSKKLKIKFIDGSTVIDTKDIKNYAPLGPHLSKLGYQKFAELLSSHLSKKK
18
+ MSKVEIYTWRFCPFCIRAKSLLEKKNITFTEHKIDGDDNARELMMERANGKRTVPQIFIDDKSIGGCDELYELEKEDKLDLLLN,LTIYLGFLYLFFGIIFLLMPLIYIELGRPRDFIKGGLNLVIGMLLIYKQNIFNTLNYLIFSVITTLLTFYIVEIFSIRWNQLTNQEKNNLLTLEELKKNLSIFLKAISLARQDFLNSNNIFKFGRKNENLNKKKWVRNDENDNIVNSNKNNLLTLEMPKKATNKSTKDTINEGK
19
+ MKLSLLSAVLFLFTEISFAQEKLNYTVTSDSQIQSIKGNFEAIGNVIIKSTNNNFEASSNKLTYDKDAKTLKLVGNVFVKNLESEGLSIQKSYGDELTIFTDSGLFKFNSENKNRVKTKLKF,MKNLKSNKRKIHRKVAAISSIPLLITLISGTIYSFLQPLGVDAFWLIKWHTGNFGIINLQPFYSIFLGIASIISVISGIRLLQKNS
20
+ MVCVSNNKSYLKSQHLKIIGQKTLRGKVKISGAKNSALVLLAASLLTDEKIILDNVPLLTDIEKMGNILKNLGVKLHNKDHQLIIDSKNISIQELPYELVNGLRASFFCIGALLTRFGEASIPLPGGCNIGERPINEHINGLRALGAEIIIDRDVVKAKLVKKKTKLFGANIRLNCPSVGATETLIMAASLAEGRTVIENAAREPEIQDLCQMLNKMGAKIYDSGKEKIIIDGVHKLHGCTHKVIPDRIEAGTFLIAAAATSSSITVSPVIPNHLEAVLNKLEESGSKIIIKGNSISIKGNNIKAVDIKTAPFPGFPTDLQAPFMALMTIAKGRSKITETIFENRMNHVDLLNQMGSSITLKNNIAHINGVKKLRGMTLVGSDLRSSAALIIAALTSKSVSYVYGLEHLDRGYENFEQKLSKLGIEIKRQITKQTINKSKNRSSNSKLKEVSEIRAA,LELVDNINPGLVNNLLKMKSKIKQTFKLILFIFLTNTHFLQAHNLFNGGCKNHCKESVKPLIMNKELNNSSYKNQIEDDDSCLIKSLCRG
21
+ LIFIMIQFASFAIGGFVPSAAIAGVLVLIGLGAFFYLGLKGPTDY,MYSLEISLRYSPFPLSIQKKEYEDIKRIYDEIKDSMNSDNQNSPLIELSCEKVQDKLITVLAKEVISVQIYEKSAVAGGSKRPGFSLDI
22
+ MSGIKTKNNTQKLSFRLAPYLFIAVAIFTAFGTNGGTWV,MRNSWIQPRIGQKNITQMNFAKNGHITEEMNYVAKKENLPPSLIMEEVARGRLIIPANVNHVNLEPMAIGIASKCKVNANIGASPNASDINEEVEKLKLAVKYGADTVMDLSTGGVNLDEVRQAIIKESSVPIGTVPVYQALESAHGSIERLTEDDFLHIIEKHCQQGVDYQTIHAGLLIEHLPKVKGRITGIVSRGGGILAQWMLHHFKQNPLYTRFDDICEIFKKYDCTFSLGDSLRPGCLHDASDDAQLAELKTLGELTRRAWTHNVQVMVEGPGHVPMDQIEFNVRKQMEECSEAPFYVLGPLVTDISPGYDHISSAIGAAMAGWYGTAMLCYVTPKEHLGLPNAEDVREGLIAYKIAAHAADIARHRAGARDRDDELSHARYNFDWNKQFELSLDPERAKQYHDETLPEEIFKKAEFCSMCGPNHCPMNSKISDETLDELNNKLTKCDTSV
23
+ MKSPVLKNPNQDWHPNIWPPFTQIINSKPQLEVTHGKNALIYTKNPKQELIDGISSWWVTLHGHSNDYIADAIYHQAKTLEQVIFADFLHPQAQILSERLSGLTKLERLFFSDNGSTAVEVALKIAYQSWQNQGETRNQIIAFDGAYHGDTFGAMALGERNIFNENFDNLMFPVKRAPWPSTWINDEEVERKENNAIQILTKLLKKPTVAVILEPLVQGAGGMNMVRPEFIRRVSEVVKNNNSLLIADEVLTGFGRCGSLFAFQKANIIPDLISISKGLTGGFLPMGITLAKETIFQSFISDSPKKTFWHGHSFTANPLGCAAANASLDLLEKDPIKYLSFEEKHLSHLKKIKKLPFVKNIRVTGTIAAFDIEIGKNEGYLNNVGKRIKALSIKKGLFIRPLGNVIYLLPPLCITDRQLEKSYRIIFEILSDL,MKSLLNTHTKFEIMHIVLRNFKFLIFLFLLSLNLSGYSNAHMRGTFLSEEDARNRSLELGCEGIHKNQDKWMPCKNEKELHKFLNKKGSSRGNNLASSLAWIFILSSSFGILWLSIVKIKRK
24
+ MNNAKNLKIKQIDKKNISFKELSLIKNIIFWVDIIPGDQTQKNAIFARPFHDKNAIPQKLTGDNFYIKSNFHGYGGKSYQCIEVNDHIYLIWVDQLSKAMWLKIFKVQEKVLKNDNQYLLCDVEPRQLTESIKTNFDTSFVISKNNLLFGLCEIKHRDYLFSLNLKKTKQDIRIIKKFDNFAGNLSSNISADLFSWIEWNAGSMPWERNELFFAMIDNDGEIQNIKNFSNKFVNEEKNVSFFQPYWMSDTTLVCSEDSTGWWNLLFLDLTDIKNIILKKRIIKPLTEYGSPQWVSGISFFSGNIKNLFCVAKKDNSWVLEHYQNCECIKELKLPFCSIGDLDVCDQKLVIRGCSFGCFEELFECDFGEKSHTKLLNEISLESINEYSRPESFWFKGFNNQPTHSFIYKPLFERFIKSPLIVKAHSGPTACFDGSLNSEVQHWTSKGFTVAEVNYGGSSGFGREYRERLNYKWGILDSYDCKALVLDLIRLNLVDRTKVAILGNSAGGLTAINALCEGDLFKVAICKYPVLDLNDMHQHTHRFEKGYLNSLIGRYSKFHNEYKLRSPIYKINHLKKPVLLFHGKKDLVISCKKTLQIKEKLLKNNKNSEVIIFENEGHGFKNTNNKKQVLIKTQEFLEKTLNI,LSRILLLSNGHGEDLSGSLLAKYFVKKGDLVDALPIVGDGENYKKENIRIIGKTKKFRTGGIGYNSFSGRIFEIFGGQIIYFFKKLYLSYKLKNKYDFYLVIGDIVPVFFAWFAKKDFFTYLVAYSSHYEGKLKLPWPCKFFLISKYAKKIYARDFLTADDLSQQLRKKVSFLGNPFMDKFSFFENKPKIVPFNIGLFPGSRFPELLDNLKLILEVLETMSKLQYFENIAFKFAIVKALSMEEIRQILNQRKWIYIEKKGKNDGLEFTFGFITINLNWNLFEEILFESNFVISMAGTASEQAIGLAKPVIQIEGNGPQFTKSFAEAQRRLLGRYVFCSTNYINKKDQINQTINLILKVIYLIKLDKKFLVSCLDNANLRIGESNSCLKIINDIKGFHEK
25
+ MMIPISLQKNQNRHIIKSVRKFIDRFFKIKKNQNLNMTEQRKIEKFGVGNLYPDIHPPEFSFFKEKCIDVALGYDDGFTFTPKFGNFKETEDIFDYLKQYLEDKELEKLDIRFDTLKTCIYQINPETLELGELLECEGSDVEYFEWNKKTKSIDEVDSNSLSDEEEEYFH,MIKIFALTFSEIGIGKLEIFVIGIVSLLFPILFIIASRNLDAKGVFDWMMEKPNDWIGKK
26
+ VFDISKENFFKNLIKFPKKNIFMILLFLGFGEWFLSDLINFAGGSIGFFILCFGGYFYLKSEKPKFNEPKDLDGWIKLCNEDLDFFEEIELHNNLEKQNINRKKALELILNREKKEEIYCIGQKNFDSNATLFKNYFKEDKFKLNFMERLPKYNSSEIVPEVILNSDAILYFLKLPLSANDFLWLEKLPKNMPIWLVASFTKGLSFNNEIEEVKAQISGEYANRIIKFDKTKNSFANIPFSLRKFFISSNNNIENTKKRLLKRLHTNWQSEIEGIRRMQLNDLQRRNQIIVATSVFFSPIPSIDVLSMTVLNSLMIKEIKSIWGCNWSPEILDKVSKQIIKTAIAQGVIEWSGQTLIGLTKLHGPNWLVTGAFQAISAAYLTRVVSSSLADFMALTKGVSEPDLEFIKENSDKIVERAFENEKINWKSLIPELNIPLTRLT,MIKNTKKSQKNKILTLEDVSISYGTFEAVRNVFCNFKSGDITSLIGPSGCGKSTVLRALNRMNDLIPNCSLRGTVLFDGTNIYDKRVDPVEVRRRIGMVFQQPNPFPKSIYENIAFGARINGFVGDMDELVESSLRKAAVWSECKDKLNDSGYSLSGGQQQRLCIARTIAIEPEIILMDEPCSALDPISTLKIEETMHELKKNYTIIIVTHNMQQALRVSDMTAFFNAVEYEEGDGGKVGYLAEFDSTKKIFSSPKEKTTQEYISGKFG
27
+ MWIRFHEKNGSERLNFTAFYEALLEAKGVNLGDTGVAGIGKGGRKLSYIATVQGNGNLLIGKAYTALLDLKAGDEFEIKLGRKQIRLLPSE,MTISDKIRVYELSRDLKLENKDILDAAQKLSISVKSHSSSISLEDAKKIKNLINKNSSKKILSVSKSAIKAKNENPKNNDNKNNKNFSNPSHPEKLSKEGLNKKPLLIKPTNKVVNSLVSSNIKNPNPPTIVSNLKSQALSKNQNKTNTSVITTPNLKDKKNPSALQDKKPLKNSSGSPAKTTARPPIQLIEKPKNLANSNRNINANKINNSVNQKAQSLNRADNNKLSRADNNNFPKKNLNSPNVKSTPELVGAPIRREDPKINTNRPNSNSRQPSSNTQISANRPGGQNRQGVPNREGGPYRQGSPNRPGTPYRQGAPNRPGGQNRQGVPNREGGGPYRQGSPNRPGTPNRPGTPYRQGAPNRPGGQNRQGVPNREGGGPYRQGSPNRPGTPYRQGASGIRKPVAPNELMQLQKTNASNKEKPNISNVNKQKIEGANQKTKAPNSRLNTSPSPTAKKPARSFASNTKKPGRTDWDDSAKLEALRNKNPQKQRQKVHIIGENDDSLTSETSGYSGEKVSILSASLARPKKEKSEEIKSQKPSKQFKKKKKETTRQRQKRRAMELRAAKDAKQVRPEMIIIPEDNLTVQELADKLSLESSEIIKSLFFKGITATVTQSLDLATIETVAEEFGVPVLQDDVQEAAKKTVDMIETDDIESLIKRPPVITVMGHVDHGKTSLLDSIRESRVASGEAGGITQHIGAYQVEFEHESKKKKLTFLDTPGHEAFTAMRARGTKVTDVAVLVVAADDGCRPQTLEAISHARAAKVPIVVAINKIDKEGASPDRVKQELSEKDLIAEDWGGDVVMVPVSAIKKQNIDKLLEMILLVSEVEDLQANPERLAKGTVIEAHLDKAKGPVATLLVQNGTLKAGDVLAAGSVLGKIRAMVDEHGNRIKEAGPSCPVEALGFSEVPTAGDEFEVYRDEKSARAIVGDRATDARATKLAQQMASRRVSLSSLSTQANDGELKELNLILKADVQGSVEAILGSLEQLPKNEVQVRVLLSAPGEITETDIDLAAASGSVIIGFNTSLASGAKRAADANDVDIREYEVIYKLLEDIQSAMEGLLEPDLVEESLGQAEVRATFAVGKGAIAGCYIQSGKLQRNCSLRVLRSDKVIFEGNLDSLKRSKDDVKEVNTGFECGVGCDKFSTWSEGDIISAFKFVTKKRTLNK
28
+ MNKTITPSIETIERNWFLVDAKDKTLGRLSTEIAAVLRGKNKPTFTPHLDTGDFVIVVNAEKVEVTGKKASQKLYRRHSGRPGGMKVEKFESLQERIPERIIEQAVKGMLPHNSLGRQQFKKLKVYKGSDHPHAAQNPVLLNS,MKIILLVFFFIMFSFIFLKFYKYKKAFKKDKSIKFNKSNLYNWMNLTKKERFDLSKKESNSYLKKRKTLLEEIRKEYKIISKND
29
+ MTELNQKNSGKNIKWHNLTIDRNKLEKMRGHKGMVIWFTGLSGSGKSTLANAVNEVLHLDGFSTYVLDGDNIRHGLCKDLGFSDEDREENIRRIGEVANLFMNAGIITITAFVSPFISDRDKVRKIIGSKDFIEVHCAADIEVCESRDTKGLYKKARLGEIKEFTGISSPYEAPVNPEIVVDTGSLGLNDSVEKVINHLREQNLLERS,MESDNLFSNTYRIESNAPLADKLRPKNLDDFFGQESILGHDSLLRNAILNDKVGNIIFSGPPGVGKTTLIEIISSNTRSSLIKLNAVLSSIKELRTEIANAKERLRSSNRKTILFIDEVHRFTSVQQDALLPSIENGTITFIGATTENPFFAVNKALISRARIFSLLPLNKNDLKKIIDKVIKYYSCLKDSKVVEIKEEAINHLIKFSGGDARNLINALELGISITKENKENLVVIDLAIAEDSIQKKNIVYDKNGQNHFDVISAFIKSIRGSDPDATLYWLANMVEAGEDPNFIFRRLLISACEDIGLADPNAIVVVQSCCDAFDRVGFPEGLFFLSQASLYLAISPKSNSTKSIFKALEAIKATNVSLVPNHLKNNASNYLNPHNYQGKWLQQEYLPTDLQGIKFWKPKDSGWEKNKYEDLPKKQKS
30
+ MSASKREEVSSHLRYIRLELREMHQMLIRDDLLPDLSEAKEVHAQLDALYELLSDKRKKKVKNEFENF,MEAFHPPKEVKETIDDSGLPKEEGISEKWLREKIDSLIPLIQEKWPNIAQQTLETAKGSIDDLVGVIASHTGSSASGIKNQLFQIIDSIQENNWEIADKIEPIESQLEELLDELNSTLRPKIETPIRKKPILSIAIAAGIGLFIGSLINSRNK
31
+ LNYWIQNLAPDGSPDEIGVIQLAWLGDSVWELHQRLRHIHIPLKSRDLHLSVVNEVKAQAQSKALDEIEHLLNSFEINLIRRARNKTKRFPKSSDPAIYSRATGFEALVGWLFLKDPKRLSKFFEYLECK,VTQIDSKKKFDRLRLCKLLETIYKEHTTEELNLICNQLLQILDNFSEKSRYEEISEDKKWDESFAVLITYADGVYKKGETTLVTLRELLSKNFGSLSKVVHILPFLKSTSDGGFAVSSHTSLEEKFGSWEDLKSISNKHYLMADLVLNHVSSSHPWVQQFIKCQEPGLSNVFSPSQDLDWKNVIRPRSSSLFSQINTDDGQKQVWTTFGPDQIDLNWLNPKMTIEFLNLIITYLSNGIKWLRLDAVGFIWKEPGTTCLHLSKAHSIVKILRILLNDLLKDGVLITETNVPQKENLSYLLPEDEADMAYNFPLPPLLLEAIISSRADILNAWICDWPELPKTTTLFNFTASHDGVGLRALEGLMNEQRIKDLLINCEKRGGLVSHRRLSNGEDKPYELNISWWSAMEDPGRDSNRYQYERFLLTQLLVMSLKGVPAFYLPALLASENDIKSFSMTGQRRDLNREKFKSEKLAAVFNNPESNANKNLKYLRHAMDVRAKLPQFHPQSHMECLSKNRADIVALKRGIGSKAVFTIHNMTENKINYRFIDYEFNKLIKNDLNMQDYLTSNKYNSNNIELDPFQVIWLGF
32
+ MTSSKPKKSSRVRKTTKNSKKNHNTMMPLLPKTPPSFKNKVVDKKALKNLVSWAYKTHGTAVTAAMADNLKDLGFKYATQAAVSISVNDLKVPEAKQDLIGQAEAQITATEECYRLGEITEVERHTKVIDTWTETNERLVDAVKNNFNQNDPLNSVWMMANSGARGNMSQVRQLVGMRGLMANPQGEIIDLPIRTNFREGLTVTEYVISSYGARKGLVDTALRTADSGYLTRRLVDVAQDVIVREEDCGTERSIVINSEDGKFGSRLIGRLSAEDILDSEGNLIVPKNTAIDPSLSKTLETSLISKVNIRSPLTCEANRSVCRKCYGWALAHNHLVDLGEAVGIIAAQSIGEPGTQLTMRTFHTGGVSTAESGVVRSKIKGKVEFGSKAKIRGYRTPHGVEAKQAEVDFLLKIIPTGSITNKAQKIEVTSGSLLFVEDGQDIDSDITVAQITSGAVKKSVEKATKDVICDLAGEVRYDKVIQPKEVTDRQGNITLKAQRLGRLWVLAGDVYNLPPNAKPVVSTETKVEQGTVLAEASQSSEFGGEVRLRESVGDSREVQIVTTSMLLSNFKLIEESTHSGELFHLESNDGTIYRLNTSPGSKISSGEVIADLADERFRTKTGGLVKYAPGLSVKKARSSKNGFEVSQGGTLLWIPQETHEINKDISLLMTEDMEWIEAGTEVVKDIFSQTSGIVTVTQKNDILREITVRNGSFHECEDEEILSRFTEEGKLVNPGEKIIDGVDNDEILFVQKLETSKGKGLLLRTVEEYTIPNEAELPELSHVKQEKGPSLALKAIQRLSYKDGELIKSVEGVELLKTNLSIESFDATPQMTIDVETIQDKSDKSINRLNLVILESILVRRDTISDSSHGSTHTELQINNNQLVKAGDVIATTQILCKERGVLQLPDSVEGEPIRRLIVERNEDKIKINIKDKAVVKTGDRVVDGDLISKGVKSTSCGEIEEVSSEYVILRIGRPYMVSPDSVLHVKDGDLVLRGDGLALLVFERQKTGDIVQGLPRIEELLEARRPRDSSILCKKSGVVQIKEGTDEESVSLSVIERDDSISEYQLLMGQNIMVSDGQQVTGGELLTDGPINPHDLLDCLFTDLKDQKPLMEAAQESISKLQRKMVNEVQNVYKSQGVAISDKHIEVIVRQMTSKVRIEDAGDTTLLPGELIELRQVEDTNQAMSITGGAPAEFTPVLLGITKASLNTDSFISAASFQETTRVLTEAAIEGKSDWLRGLKENVIIGRLIPAGTGFSGFVEELASEAGPHPDILAEESGGYRRTQNLRPDYTVDMPQTPIVSSTAILDDPSDEDLETTRNRHGIDPTSSNFAAFARPNAENQFSEDQLPDPAALEGLQEEGLLSDG,MQNPAEKKSSILKDFKNLFIWIIIALIIRWQVIEPRWIPSGSMLPTLQIQDKILVEKLTPKITSKSNLSKLKNKIIVFNVPEQLIDAGYESDIALIKRVIGVPGDKVEVKEGNLYLNDIAQNNYISDKNINYSTGPYYVPEKSLWVMGDNRNNSMDSHIWGFLPYEKVIGKAIFRYWPLNNIGPIRFPSLNNLG
33
+ MEDWQEWEYFDYHGELRSKRTKICITCTHFRYSTTDQCVTILTCPFHQKLIPQGDHLVKGCTYWRKDSRIFAPEAA,VRFHIQQEIDIPASTQLYNQICFAIAARYYPPGHRLPSTRQLAMQTGLHRNTISKVYRQLETDGVVEAIAGSGIYVRDNLKKSFNSKNNLNTTPALETKKAVDKLIKLGCTLQETRNLLTNEIDWRIKCGSRIIVSTPREDIGASMLIAEDLSPNINVPVEVIPMEELEKVLCNSNNGTIVTSRYFLQPLEKLAKQYRVRAIAVDLSDFQKELKIIKELKPGSCVGIVSISPGLLRAAEIIIHSMRGSDIVIMTTISDNSNRLLALLKASNHIVCDGPSLSVIENTLLKNRSQLMRVPQIICAKNYLSIKTINHLKTEIGVIN
34
+ MKVIVIDDDPTGSQTVNNCLLLLKWDYSTLIKGFQSKSNLFFILANTRSLSENDAKLRLVEICNALKKVISKESYKEEFIFVSRGDSTLRGHNFLEPKIMNDCLGPFDATFHIPAFIEGKRKTIDGEHFVDNVPVSQTIFAKDKIFGYKTSNVKQLLFQKCKSQIKFNDIQNLKISELKVLESKEKNIVFNKIRNLKENSHVIVDIENYSQLQKFSLSIKKLSKQKKFLFRTAASFISSISAVKDNPKEPFFYSLIRRKNREKKFLPGFLVIGSYIELTTMQLKEFLEISDCIPIELDVFEFLKISKLKSNQDQLEVFKNKLLAQIRSILKQENTPVLFTSRKEVSLARNDEQVNFNNSLAHFISELVSDLKNEIGYLVSKGGITSNVILSNGFKANYVYLQGQIITGVSLVTFKLENDENLPIVTFPGNIGNQDSLVKVWRILENKNNSSN,MRILHTMLRVGDLDKSIDFYVNILGMNLIRRKDYPHGEFTLAFVGYGSEKDNAVIELTHNWSKKSEDYELGNKYGHIAIGVKDIYDICQGLEDNGCNVTTKPKTMKNSTTVLAFVEDPDGYKIELIERD
35
+ MANNFYQWWKNHRRVVTFGGFLILLGLYVSPVIKEAKYKNMCIKLSEKGALNKLNGDNIGETLLKDTGLSIEELAKIEGYRNCF,MNDLNIEFPLDDFEELISQIGWSSLDEWFIFWNLKKEMLSINNFWDDNVKDDWIWGLALPLLSQAYKLNKKSPDRKIIGISALPGTGKTTLGKWLESISLKLKFKLSVISIDDFYLPSEEMEFAIKNNPWNVSRGFPGSHSIDLMKEKLLKWKTDGQLNVPVFDKSLRKGLGDRAHWREESPDLLIIEGWFLGVKPLSIDLDNSEKFSPPLSVFESSYRNKIQNNLDQYLDIWNMIDQIWHLKPLKFEYLNEWKSNQEKSMHFKSGSSLKGDNLSNFLRMLNVSIPHKSFDDINSDVLLMINQERKLVRVGLNQQISK
36
+ MKLNQFLKWHNIVSSGGEAKILINSGQIKVNGEIEKKRGRKLVKGDKVMFLKSELIFE,MKPQLTLQTPLELPHQEISNYLNQLWISEDEESVGANTFTLMVWQPAWLEQCLVKSGLISGPITGTLSPEIIKVAKKLIIDKGLSHTTSIHSEELLTLLKENLLNKDYEDLRGQFFESSISTLNPRRLITLAPTLNKESEIKTFVSAYCPLSDNTITQPICGDLVVIRGDSNSINNKGLKIIDDLSIKDLPIWLWWNGSLDESQEIFNFFTDQGIRLIIDSANGSPKRCLKILYQSIKSNKAINDLNWVRLKSWRESLAMIFDPPSRRPILEHISDIDIDIAEGNFLQALLLISWISDKLEWVFSKINKHGDLIKIEFKRKNGENILTCINPVPLGNPSIHSGQVIGLRLISKISEVRKNNTCVILGCESVECMRLEAGGMADMQLIEQVVPNSFSSSESDVSKLLGSSRGNTSPLFENAIKVAVQIFNGFNK
37
+ MKDKQEKIRMFLPFSWVICAVISFAYINSHLINT,MEFNIQDKVKLKNPLSYLKTSDNMPMLRPPDLVAIDEVGEIIAIKSPDTVEIKFRRGSFLIDTDKIEKTQI
38
+ MKYLILGSGSFAGQLIFSEYLERNYDVYGFNRSRVKDHYQWPWIKKYKNDLGNRWFEYNLTNDVEEMISHINRLKPNFIIDFMGQGMVAPSWLKPEVWYTTNIAIKSRLMNALIDSSFLQKYIRIGTPEVFGSNENFLKEDECFNPSTPYAVSHAAIDFNLRCLYKQYNFPYLIGRFANFYGVGQQLYRIIPRLFLSCRSERNFILDGKGESRRSFIFSKDIVSAIDSMIKFDGIGQEFNFSSNEEISIMSLVNKICNLTNVDKSRILKFGPERPGKDRYYRLDIKKSKNVLNWEPEVSLDEGLNIINIWISENIENLSNKSWTYEYKD,VTQRHFFVTTSSSSAAEKTLKTKIWKRVFIVCMILLISGSFFYFNHEENNTYILKTLELNGSVKEGDTLFKMNCVGCHGITARGLVGPDLQSITMRLNDAEIIKQVIEGVTPPMPSFEIDPQNMSNLLTYLHSL
39
+ VENDEIKYKKDIPIEWLKMPSSAKEAEKLNIKKYFIGDKRFMRELEDRDEYNAA,VILKTIKISNKLCLIGIIVFCLFQNHSVSASREPLIRVLISKNRNLRIRSDKSIPLIIKGQKFSNKKIKGLTVKKENNTTSLFFDKNKQKIYDLKNKVKLVVKSSDGRGIWVGQKRYSGILNLLVLESEILVINILGIEKYLSSVVGSEMPAKWPLEALKAQAIASRTYALKQKGNQIYDIDSTQKNQVYNGLESRTYKTIRAVRSTRSLVLTYKNKLINALFHSSSGGMTENSQDVWKNEYPYLSSVRDFDRNNPKLQWKKKFSSGELQKLFPEIGGIKKIEILNITNTGRVKNVQIFGKYGSDQISGVDIRKRMNLKSTFMRFKFIEDKKYISDNDNSNNPIEKTLIVFGRGSGHGVGMSQWGARYMASKGQKADRILKHFYKGVGIKPFSKNYL
40
+ MSIETTVLDFKLSNTFEEYQAHMNAPEQQAMFKEMGVKTFYIGKSLEDPKRATVMFQGPVNTCYDIFVNPETKPIVEASGHIYEGTIINRWIS,MMFNKQKKLILNLKILKLLFFSPLLISIPFYLGNSDAKAGLEFQWDQDSGYRRLKWFQKENKKRFRNTIFFFLRPSDRQANLLKITLNIPKTFDSTLKDKVSFCKVKIGGFEGRTKCIEDIPADVDINEDNSSLDIYPYSPIPSNKDSYAIVFKKISNPKKSGLKQFHSYGQYAQKNTSSRYLGSWTIVID
41
+ MNFITNSRQFHKSLAPWVFLPLFISALTGTFYRICKDLLGYSRDEVHWLMSLHEGEWLGDNGELIYVILNSLGLIWMLITGFQMFSKKISFPKKVTKGESKG,MASFTVGIVVFPGSNCDRDVSWALEGCLDIKTKFLWHESSDLNDVDSIVLPGGFSYGDYLRCGAIARFSPLINSLHDFIKSGRRVLGICNGFQILTESGFLPGALVANKNLNFICDDVDLNVITSKGGWFQKLNENQNIKLPIAHGEGCYHCDQDTLKRLVDNDLIALKYKTNPNGSTSDIAGITNEKGNVLGLMPHPERACDESIGGIDGLYTLRSLITQ
42
+ MNNSQRSVTHSQNGDYRTIEQTMEKLSGGTRRLAAQLTTSATFNSLWNVLTDYDRLNLYIPNLLSSRKIYKNNNNVHLKQVGAQDFLGMKFSAEVTIDLFEEKELGLLKFSLIKGDFRRFEGSWKIKKIKDTSKNSLIYDLTVQGCQWMPIGMIEKRLKKDLSENLIAVDKQAKASIK,VILNPELQEKGEIKDLMKSRGSFRAFPLAAITGHSLLKLSLLLAAVDPSLGGVIIAGGRGTGKSILARGLHTLLPPIEVLDNESILEKLTMSNSNTSLRPIGRNLDPDKAEEWDISTNKLLEEVIGSDYLNQIEEIPKKVREAPFIQVPIGITEDRLVGSIDVAASLSSGEQVFQPGVLAEAHRGVLYVDDINLLDDGIVNLILEATGREQNNIERDGLSLSHPCRSLLIATYNPEEGALRDHVLDRFAIVLSADQSIDNNQRVEITKSVLSHAENNIKFSEKWSEESDNLSTQLILARQWLKDVKITKEQITYLVNEALRGGVEGHRSELFAVKVAKANAALRGDENVNSDDLKVAVRLVILPRATQIPPQDDDIQPPPPQDQSPPPPQSNNEDSEPESNEKEDNQEEEQDNSDGEEDSTPDIPEEFILDPESCMVDPDLLLFSSAKSKAGNSGSRSVILSQSRGRYVRPLIPRGKVKRIAVDATLRAAAPYQKSRRLKNPNKTIIIEENDFRAKLLQKQAGALVIFLVDASGSMALNRMQSAKGAVIRLLTEAYENRDEVALIPFRGNQAEVLLPPTRSITAAKRRLETMPCGGGSPLAHGLTQSAKVAKNALSTGDIGQVIVVGITDGRGNVPLGTSLGQAEVNENENVDLKQEVLDIAAKYPMLGIKLLIIDTERKFIASGFGKELAEAAQGKYVQLPKATDKTIAAMALNAINEF
43
+ MSKDFKSGKVKRLPINNLNLPNFVNNSLRNNTKVNTVEGTNVIRVPFGKRFPKKQRPDKNQNIATLILPINTFINPTPPPHVA,MPSLSDGDFYYYRNFYIDQPPQMAQVFYESLHVISFSLKIIFYFLERLSDVN
44
+ MRHQLRVPLLSKPADQRKALLRALTTQLIREGRITTTKARAKALRNEAERMISLAKEGTLSARRRALGYIYDKKLVHSLFEKAQERYGERNGGYTRIVRTVARKGDNAQMAIIELV,MKDIFLVLDSYQYQMESNYQETSSLTNLFTENKFIGWLGLFIVFFSIFAIIIFQFLEWESNDKNKE
45
+ MNERNKSLWKQAIKWPLYSVAILPVFISGAYTLNSFKNVKIYNLIAFTIAAILILIWENLTNDLFDSETGIDEFKFHSIVNLVRSKTIVSITAYTSLLIGLVVIAIISISTSINVMLLVGACCFLGYLYQGPPFRLGYQGLGEPLCWLAFGPFAYAAALIALNPSDIYMISIPWKESLLLGSGSSLATTLVLFCSHFHQIKEDKEHGKNSPLVLLGAKKGAKIIPWIVFIIYVFQLFLIINGFIPILCVLFLISFPQSLKLINLLKYSYNKPEAIKNCKFIAIKFQTLNGIGLIAGFIINYLIYK,MKIMDNFDDDLSLKQKEFVEPIDKATNKDLFEKKDEFKEATPKVLHLNSLITKNIYLFTKDPNYKLFAWLMVQLFIFSLFVLVATLMKNNLVPYINSL
46
+ LLLMKRLLLAAVLFLLSEISFAKEKLNYTITSDSQVQNGKGNFEAISNVVIKSINNNF,MDEDSRKVTEEVWLICPNSTEVRRFTKNKNNKDKFFEYMFVDSGIIIGVLGAKPPLMKTRKEIKIEAARKEYQQLIISGWQVTIPKW
47
+ LDQFEVKVFIRLRPSVLDPAGEAIKSASSKLGVAGIKSLRIGKLIEVKIESNEEDIKEKIELLCDRLFANTVIEDYEYSINKL,MNLKQITQKDQLDLKKIYFDSIISIDQKIYTSEQKRAWASQAWDNKYFNLTLKEGKGWLINEREKIIAFASRYPNNRISLLYCRGDSQRKGYGTKLLKKIEKEAIKEGLPCLTTEASLISYKLFLKNSWKIIRKEKIIIKNITFERYKMIKNF
48
+ MEQGLNNPGPLTIFLVFTAGLLTSLGPCSLSLLPITIAYVGGTKNNKFKLISFSGGVIFSLITLGALSGFLGKIYGQLPSYYASLVALIAIIMGLNLLGILKFQLPNGPDLQFMEDKVPSIITPFVVGGAFGLASSPCITPVLATLLAWVSQAKNPTISIIFLFFFGLGQVTPLILAGATTENLKQFLELRKYSQVIPTLSGVFLVSLGILNLISNWI,MKENITELWFSWFYKNWEKNAPGNLIDKGLSPSQIAERFVNENHKEFLEIANEFDEDNYQALNEFMKLSESELHILKYFLKLIKLKNS
49
+ MGTANLHDSTNKPLYGERIIEESNIICFENPNKKRIYEISIELPEFTCKCPFSGYPDFAKLNIYYQPNMKVYELKSLKLYINKFRDLKISHEEVVNRIMDDLLKAAVPHWIHLNADFNPRGNVSMKLDIYSGQKRN,VRIIFWGTPEYSVKSLEVLKKSDHDIVAVITQPDKKRSRGNKLISSPVKEYATKENIPVFTPETIKENIQFISILNDLSCDLFIVIAYGKILPKAILDIPKYKSWNAHASLLPRWRGAAPIQWSILEGDKITGVGIMRMEEGLDTGDVLVEKQIKIENNDNLKTLTKKLSDLSSELFLRAISDIEQNKNRDINLLLKKQTDFKRELKYARMINKLDYIINWENSATDIYRKINALYPRANTTYKRKNLKIIKIKILTTHEIHNKNYKILSNVFKPGLIIGLIKNVGIIITTKTDPILLLEAKLEGKKVSSQNQLIQQLNPVIGENFSD
50
+ MAQLETRTEPMVVNFGPHHPSMHGVLRLVVTLDGENVIDCEPVIGYLHRGMEKIAENRTNVMYVPYVSRMDYAAGMFYEAIVVNAPERLANIVVPKRASYIRVLMLELNRIANHLLWLGPFLADVGAQTPFFYIFREREMIYDLWEAATGQRLINNNFFRIGGVACDLPYGWLEKCIDFCDWFAPKIDEYEKLITNNPIFKKRIEGLGTIERDQAINWSLSGPMLRASGVSWDLRKVDSYECYDDFEWEIASEKEGDCYARYRVRVQEMRQSLKIIRQACEMIPGGPTENLEAKRMATEDKKSEIFGMDYQYVAKKVAPTFKIPNGELYTRLESGKGEIGVFIQGNNEVTPWRFKIRAADLNNLQILPHILKGAKIADIMAILGSIDVIMGSVDR,MVRKISFIGVGPGDPDLLTIKALKKIESADVIFWADSLIPEKIINFSLKGSEKIKTSTLTLEKITSIMIERFNEGKTVIRLHDGDPCLYGAVKEQLEILRQENIETEVIPGVSAFQVAAAYHQAELTIPDITQTIILTRAGGRTGMPEKESLKDLAKHKSSLCLYLSARHIKSSQKTLLEFYPPETKVIVGYRVSWDDGWTSLIELKDMEKFTLEKELIRTTIYIVSPAINTIANRSNLYNPSYKHLFRGK
51
+ MLRPPFSQESISIDKWDVIVIGAGAAGLMTCLELPENLNVLLLNRNTSKRSSSRWAQGGIASVVRPEDSFALHVEDTLKAGDDLCDLSAVEMLVKDAPGCVDRLQNLGMIFDQSSDQLSTTLEAAHSCRRVLHVKDRTGRALVEVLEDHIENKENILHCRGVRVTELLIEKEVCKGVQVLDGSNLYWITSKAVVLATGGGGHLFTNTTNPAQSAGEGIALSWKAGVAIEDLEFIQFHPTALKFYGSPCFLISEALRGEGAVLVDKNGESPVKHLENGDLATRDQVSRAIMNNMQENDVDHVGLDLRFIDPEKIVERFPMIISRCQDYGVNPLNEVIPVAPAAHYWMGGVHTDLNASSTMKGLYAVGEVASTGVHGANRLASNSLMECLVFARKMSCIELNAPYNLRRLDRYTTEIFMDNPKEDFILGVSDKIDSLRKLCWSNLGVSRNKKNMNKLLKTLQDEIDQLQKNPLLECLNKIEIDQKLKLSEPNRRGLNLLLDLHNRQITTLLLLKACLFREESRGGHYRDDFPIKETTWKCHTRQQLNQEIIKRFIKN,MRTILISGANSGIGLNIAHKELKAGNRISIGLRDLESVKGSVIDPNNWTNEKILLNKYDALDKFSAKKWVENTVSKFGGFDTLINCSGVLSKVPFLYKDGDEEEILNTFNINFLAIWHLCRISWKHLSQSNNGRIIVLVSMSGKRSKGDLAAYSSSKFALMSLCQTMKNKGWEENIRVTAICPSWVNTKMAEKISSIEKSKMTQPGDIAEICSTILKLPMQSVPFEIALNCNYEI
52
+ MSRKSELLKGEETKNFSEFSQLADFSLMNSLNADPHSTKDGNDHRARSVNSGHYVPVTPTPIPEPIYVSHSKTLFKELGLSSDLTKDKNFCRFFSGDIEVAEYPMRPFGWATGYALSIYGTEYTQQCPFGTGNGYGDGRAISVFEGLFNGKRMEMQLKGGGPTPYCRGADGRAVLRSSVREFLAQELMHALGIPTSRSLTLYVSGTEIVRRPWYTEGSRYFEPDIMVDNHAAITTRVAPSFLRVGQLELFARRVRSNSHDDAFNELKIIVQHLIDRNYRDEIDPSYSFNEKVIRLANLYRGRLISLVTNWMRVGYCQGNFNSDNCAAGGFTLDYGPFGFCELFDPRFQPWTGGGEHFSFFNQPFAAEINFKMFCSSLLPLLLENKEDIEKLEKIKNDFSKFMSKEMQLMWAKKLGLEKYDETLTNELFNLMVNSKVDFSIFFRKLSHIPDNISFLKDSFYLPSSEELDKEWFIWLKKWQDCINKQGDLKEISKSMKQVNPKFTWREWMIVPAYQEAEEGNYNKIKELQTIFKNPYEEESLEIEQKYNRLRPREFFNKGGVSHYSCSS,MNYIQIKDLSKSYSDIKALKNLSMEINAGTLFGILGPNGAGKSTLIKILATLVEPDGGEVFVNNINLIKNPRKIRELIGYVAQDIALDKILTGRELLDFQSDLYHMNKKEKYERIKLLINQLEMNDWIDRKCGTYSGGMKRRIDLAAGLLHLPKVLILDEPTVGLDIESRNIIWQLLKDLKNDGMTIILSSHYLDEIDKLADSLVIIDDGKVIAQGTPAQLKNKLGGDRITLKVREFSNHEESKKISEILSSINGISQIIINKAQGYAINFVVDKEKDLLTKLKVELAFSKFEIFSLAQSQPSLDDVYLQATGKTLLDAEISMTGKRDLKKESKQSMR
53
+ MFLISAEKFSLWKKKQLSKGGDNHSLNLLLESLGGLSNIELNLLKINLEKNLNFKVNLDLIESFWDKHLNTSIPIQYLSGISFWRNLKLEVSNRVLIPRPETELIIDIISGIFKNKEEKITFVDLGTGSGAISIALALENPNWNGIATDIDKNAIKIASRNFATYSNQSNLKFYNGNWWDPLKNFKGEIDFAVSNPPYIPQDTYEVLPIEVKNFEPKLALLGGQEGLDHINQIVQNAPLYLKNKGWLLIENHFDQGEKVKKLFLENRFTSVKVLKDFSGIGRFTIGRYK,LFDKENLKYFLIWPMSVLLAIFFKYYGFLKPDFLLINNYLVLLLVCGPALVVTIILVFNKI
54
+ MKLQTQFTVPKKEFRDLDYVNKVKVLEETLKKECMDYPTKEDCLVCCN,MSKLKGPDGRIPDRLPDGRPAVAWERRWTEGTLPLWLVATAGGIAVIFVLGIFFYGSYQGVGAG
55
+ MNNKRIFHDPIHKEIIIDSDKPEELMIMQLIDTLAFQRLRRIKQLGAASLLFHGAESSRFTHSIGVFCVARKIYRKLVEINPDFSQNKFILFGAALLHDLGHGPLSHTSEVIFAHDHELWSKNLVKNYSPISSILKNFGTELPNQIGDLFKTKNLFSRPLKTLISSEIDCDRLDYLLRDSYNTGTKYGLVDLERIISALTFSPDGNIAIKPKGVIAIEHFLVLRNMMYRTIYNHRINEISTWILEKIIQIIKKDSVKKDLWIDESMRRWIFFPNQLEVKDFLANDDIVFYFHLMKWKEESFEPLKTLCKMFIDRKLLKASDISFLTKLKRLEILAFARKKCKLNNYDSEIFCGIKERSFKGFKSDNSLKIWDGTYQNLLENQSDLINTLMSSKDTSLIIYPGEFRKEIEDQIAIERANV,MSLKSVLKNKSLGILVHPSSLPGGSYCGTFGEGAKDWIKKLCKYKINHWQFLPLTPTDSTGSPYSSPSSFALNPWFLDINKLIEENFIISLNKKDLQSINQNEDHFDFDYANNLSKKLGEYLLFDWESQSEMRKTDFYLWNKKNTWVEDYSIFMVLREKFNMLPWWEWPLEFKQKENEFIKTWIKDKKNEILKTKLIQWHLDKQWKEIKVFAKTNGITLIGDLPFYVSRDSVDVWSNKSLFSISQNGDLLFQSGVPPDYFSSTGQLWGTPTYYWAKHKSTAFRWWRKRFKRQFELVDILRLDHFRALAGYWRVDGNAQNAINGSWINSPGKELLNLLKKDLKSDYLPIIAEDLGVITKDVEILRDNYELPGMKILQFAFDGNDNNPYLPKNIEKENWVVYTGTHDNATSTSWWDCLDITIKTHIKDKYKYSIDPSWNLMEIGMSTKANLFISPIQDILSLDDSSRLNTPGTITNNWRWKLNQTLDEIDMNLKKYSDLGNNYGRLSN
56
+ MSIETKKYNNLISISTELRKRIIKTSYEAKIPHIGSCLSCIELLVFLYWKELNIDPSNSEAINRDRFILSKGHGAPALFQVLGLKGFFPIERLNSFGKPGSVFHEHPPKPGYIPGIEAATGSLGHGFPMAVGMSLAKRINNLQYRTYSILSDGECNEGSIWEAAMFAGAQKLDDLTIFIDFNKWQATGRSKEVLALDPLKEKWQSFGWDVYEIDGHKFNQIDKSIELAKTNKNKPSAIIAHTIKGKGVSFMEDNNNWHYKTPNEEEFKKAFEELKN,MSKFSSQEIESQYNLIKTLLSDPEKYNDALDAIKKDIAHMPLELKKKLEEENITF
57
+ MQKKSFSISWGDTSLEMLPSKALLLPQTNELLICDVHLGKAEYFQQNGIPLTNNSDEQNLLSIKKIVENHKPYKLIILGDLFHSKYSISKSIKSKVENLSESLNIKIELIVGNHDIGCKVKNISFLEYKRSSNFIFSHEPIGKFENKILNICGHYHPKTFLKNSKDKLSFKCFAMDEKNNTLYLPAFGDLTGGYPCKNSFKKWAIISEKEIIAV,MKRLDLIFSERELDAIINTLEKANVPGYTVMKHATGRGPERVVTEDMEFTGLGSNAHVIVFCEQELIDQMRDNIKSDLSYYGGVAYISEATPL
58
+ LEELITKKLEVNDNLKSRFHNGFNIVKSTFLSSPIALRLWSSFFVILPIFVQAPWVRFAPISALCATFFILAAAFLLSRKEGDKWFIVGSLLLGVTGSWLGGCLFWGWLSAYPILHIPVEAVALPLAIVGLGTKWKIGSSFYISSLFGTAITDLTIFLTGIMDQWKEVIIADSDNAPLILQKTSENLIQFKSLSIIILAALILWFISKEIFNYATSNSINGKAFLVSSYVIQTTLIVDGIFIMLAIIQPTLSGLV,LNRSFYFKFSVVIISFLLVWTLRDFILLIICSLVISNVVSNLCYQIQTILKLPRFVSLLIVLVGISFMIFAISIIVLPPFIREFNEILIDIPNGLSRVNELVNSNLNKFNDLIYGKESERIVNIFDLVNDVVPIPDGATIAKAIQESFINIINLAGNLGSGFIRVIFVLVVSFMISIEPKAYKEGVLFMIPKVYRNKFRIILDKCNIALTNWTFSIVISSISVGLLSLIVLSILDVKYVVSNAIIAMILNIIPNIGPVLSGIFPISIALLDNFWKPVAVFGAYIVIQNIESYIIMPSILKKKTNLLPGLTLISQFGFTFIFGPLGLVLSLPIVVVTQVLIKELINDN
59
+ MSQIFTWIWVSSGILLILLVLLHSPKGDGMGGIAASGSSMFTSASSAEASLNKITWTILIIFLSLAIILSAGWI,MSKLKGPDGRIPDRLPDGRPAVAWERRWTEGTLPLWLVATAGGIAVIFVLGIFFYGSYQGVGAG
60
+ MGEAKRREELGLPPREKKEAKKDSKSNLNQILNKYPFAPYILGISLLTILIIDLVNYYK,MASNKDNQLVEKNDDNLGVENISNNPSIQSEQKLEVTEDEISFKEEDLDNGFACFGFNKLILNSLESKGYKTPTPIQKAAIPELMLGRDLLGQAQTGTGKTAAFALPLIEKLENNKESNAKVLVMTPTRELATQVADSFKSYSAESTNLRTLAIYGGTDFRNQISSLKRKTDIVVGTPGRIMDHIRQGTFKINNISCLVLDEADEMLKMGFLEDIEWIIDKLPENKQMVLFSATMPNEIRNIAKKYLNEPAEILIKSVKQETQLITQKYINVQRHHKLDALKRILEITNEGVIIFVRTKLLTTSIAEALENSGHSVAVLNGDIPQNQRENTVDRLKKGFIDILVATDVAARGLDVERIKLVINYDFPFDKETYTHRIGRTGRAGRSGEAILFVNQREKHFLRNLENSTRNKIEEIEIPNNKIINEKRMGKLITNLNESSLDQENNEEKKALMIDILDTLREKHSMEDSNIAMAAINLAIGNKSFFINEDESWLYRQNNSDRNRSNRNGNNRMRNTNRRNNYQNDSFETYKFNFGKMDRVRVANIISSICTSTNINGRLIGKIQIFNEYSLVDLPRDLHGEVKNKLKNLRIRN
61
+ MNSKLKFIYEGKAKKIFAYEDSDKVIIEFKDDATAFNALKKAKFEGKGELNCLISSKIFEFLIKNNIPTHYIGLKNNNSMIAQKIKIIPLEVVLRNTAYGSLCKQTTIKPGTVLESPLIDFYLKNDTLNDPLLTKDRINLLKIVDEEELDFISNMTLKINKLLKKFFYNIKLDLVDFKLEFGYNSNGQIVLGDEISPDNCRLWDLNQKNGMIVSLDKDRFRNDLGGFIEAYSEINKRINNFI,MGFVPLHNHSDYSLLDGASQVSKIVDRACELGMDSIALTDHGVMYGVLDLVKKCKSKGIKPIIGNEMYIINGSIDDPQPKKEKRYHLVVLAKNHTGYKNLVKLTTISHLNGMRGRGIFSRPCIDKSLLEKYNDGLIISTACLGGEIPQAILKGRIDVAENTAVWYKRIFGDDFYLEIQDHGSIEDRIVNVELIRIGKEHQIKVIATNDAHYISNMDVEAHDALLCVLTGKLISDEKRLRYTGTEYIKSEDEMLRLFNDHIDKESIKEAINNTVEVSQKIEEFELFGTYRMPKFPLKEETDSLSFLTKITKQGLLSRLNKNNLDEIDEIYKKRLTSELKIIDDMGFPDYFLVVWDYIKFARDSSIPVGPGRGSAAGSLVAYALQITNIDPVKHGLLFERFLNPARKSMPDIDTDFCIDRRNEVIDYVTNRYGEDKVAQIITFNKMTSKAVLKDVARVLDIPYGESDKLAKLIPVVRGKPYKLNEMIDKKSPSPEFRDKYLKDIKVKKWIDLALRIEGTNKTYGVHAAGVVIASDPLDMLVPLQRNNEGQIITQYSMDDIESLGLLKMDFLGLKNLTMIDKTISLIESSTGQKINIDKLPPKDNKTFDLIGRGDLEGVFQLESSGMKQVVKDFKPNSLEDISSILALYRPGPLDAGLIPKFINRKNGSEKIDFPHPFIESILTETYGIMVYQEQIMKIAQDLAGYSLGDADLLRRAMGKKKVSEMVKHRNIFIEGSCKKGVDKKIANDLFDQMVLFAEYCFNKSHSTAYGAVTYQTAFLKAHYPVAYMASLLSVNAGSSDKMQRYISNCYSMGIEVISPSINLSGIDFTIKKDQILFGLSAIKNLGDSAIRNIIDNRNKLGVFKSFSDLCDRLPSNILNKRNLESLIHCGALDEFSENNNRAQLFSDLEYVMEWASSRNRDRISGQGNLFDSISKNDTKEFSLSQGSKVEDYSLIEKLKLEKQLLGFYLSDHPLKHLAKPAKLVSPISISQLENSHDRTKVSLVGMIPELKQITTRKGDRMAIVQLEDLSGSCEAIVFPKTYCRLSEFLLTDTRLLVWGTIDKKSDKTQLIIDDCREIDNLKLLVINLDSSQASDIRIQNTIRDCLVKFKPDRDKCGIKIPVLAAVRNNDSITYVKFGDQFCVGDILGVSKLLSDKSFQVNLKSMIA
62
+ MNIKQPSSHKNPEPESSVLYIVGTPIGNLSDLSSRAINILKNVSLIACEDTRQTKKIMNKFEFTNNLISFNKHNSLKKIPRIINDLNSGKSVALVSDAGMPSICDPGEDLVKNVRSNGSNIICIPGPCAALTALVSSGLPSSKFIFEGFLPKKKSQREKILFEISKNEKTTIIYESPHRLKKLLNELKIYCGGEREIQVSRELTKKFEEHIGNDINNVIKTFQEKEVIGELTIVIKGIKKESNLLINKSDLKKELNELIKAGLSLSAASKYLAKKHGIKKSETYNLN,MSFLNNWWLIPLIITIFSGILCPAMGTVLITHRRLLQVNLISHCVLPGLALALALGIHPSIGGVISGLVGAIIAESLTNKKSENYEAVMNTILAGMLGFGVLLIPLLGIRIDLEAVLFGDLLTANLGDLLRTIIAFLTFILLVTFGYEKVVYVGLDPEGASASGINVSLLNLALSFTTALVIVSSMSAVGVILVIALLSTPTLLGLDKAQSLRIAMMRSSFFGLCISLLGFILSIVFNLSPGPAISVICVASLIIPKIGNKF
63
+ MAAKEHKSLQGSKILLIEDDKSIRLTVTESLISEGFEVSNFKDGSSALDFILGEGIKDFDLILLDLMLPGLNGLELCRKIRNEELYTPILILSAKGNESDRVLGLEVGADDYLTKPFGISELIARCRALLRRSKRGKEKKQKIETIIEYKNIKMFTEECRVTNFNQEIILSPKEFKLLELFIKNPKRVWSRDLILEKIWAIDFIGDTKTVDVHVRWLREKLEENPSAPKIIKTVRGFGYRFG,MRTILISGANSGIGLNIAHKELKAGNRISIGLRDLESVKGSVIDPNNWTNEKILLNKYDALDKFSAKKWVENTVSKFGGFDTLINCSGVLSKVPFLYKDGDEEEILNTFNINFLAIWHLCRISWKHLSQSNNGRIIVLVSMSGKRSKGDLAAYSSSKFALMSLCQTMKNKGWEENIRVTAICPSWVNTKMAEKISSIEKSKMTQPGDIAEICSTILKLPMQSVPFEIALNCNYEI
64
+ VARIAGIDIPREKRVEIALTYIYGVGLTRSKLILSNTGVNPDIRVKDLSDSDVQKLRGATEDFTVEGDLRRKEGMAMKRLQDIGCVRGRRHRMSLPVRGQRTRTNARTRRGSRKTVAGRKK,MTLSSYRMHRIYLAATMGYGLGSDDPEEVAYYKKLRKEMDEMKKDVVKKGIPLTWDIPDGMDK
65
+ MENSKPNYWQNAERTNGRMAMMGFFALVVNYGLFGWIIPGIF,MQILIIPIGFILWYFAYESKPINNDEVTSLWEKENYVKRTKLLNILKESF
66
+ MDLCFLSTNITSFVADPLSHEFMRKALLMSSLVAAVCGFLSSYLTLKGWALMGDAVSHSVMPGVVVAYALGLPFSLGAFIFGVGSVALIGFVKQKSRVKEDTVIGLVFTGFFALGIVLVSKIKSNIDLHSILFGSPLGISLSDVKQTVFISLLVVILLSVFRKDLILYCFDPRHAKTVGINVLFLHYLLLTCLSLAAVVGLQSVGIVLVVAMLITPGATAYLLTDKFDNMTIISVISAIISSVFGIYFSFWFDLETGGSIVLVQTFIFLFAFLFAPRYGIFKFKKLFSSY,MKDMPTWIDEYHKGSRFGLNGKVLLKKNSKYQEILIIETDFYGKALMLDGCWMTSVRDEKYYHECLVHPALSSIDKKSHILIIGGGDGGTARECLKYSQVSKIDLVEIDEEVIKVSKTFLKEIGGGAWSDKRLAIHIDDGVKWVETTKDNSYDVIFIDCSDPSEFSNLLFTDSFYKECKRILTKKGILATQSESPESFENIHIHILKSLNKIFKLSETMYSFVPIYPSGIWSWTFASDEELNLSKVNYKEVMEIENNCDVWNLNFQNAAFKMMPNKIVKKLNS
67
+ LQISNNNYPWPDDWGRKTSIMGIINLTPDSFSDGGDFCSIEKVLNQVNYFVSNGVDVIDLGAQSTRPGAIEIGAKNESKRLIPYLKKIRSEYPNILISIDTFNSEVAHEALSNGANWINDVTGGRRDEEILDVVSEFNCPFVITHSRGNSITMNNLTNYDDFLVDIIHSLESLTKKALNKNVSKDKIIWDPGIGFSKDTKQNIEILRNVPLLKNFEFPLLIGASRKRFIGEILNQPNPKERDIGTLAISCLCSQQKIHLVRVHNVKINYQVLKVADHIFR,VIPSDTPINQHSLQSLELWLKDLGATKDIDNPSKWYLLLSNWNATIIFEQEDLSVVWESGGKLTKRLFSYCINREDIENAILQGP
68
+ MKAKPETTAHVSVKEYCFTKKEVKGVVEASDFKWTFTWSFGKGVLFVTPPLGRALIQDSLLRFFLKKDYELEAGNEYKFIISAKF,MDICLLNIDNNSNKSLNPTSVIGMLWLQTHFEDTQWEALSNNQVIISKENSKLLVKDAISAGLKIKSFSGVSMLDVFQKKN
69
+ MKNKVFPFIKKYPMSILLAIIAINLFSIASSLRTEAYLNREKNLCIKYLKHQIDRDTLIKKLRIVKQANPSSICDSVLKS,MNKFEFFKTDAIQSSYGGQFSYKVIGPCCRLYDREELPWPCSRLAWRSKEPSWRRIGARFVADMASRKCPSYSVQILEPGSKPVETVITLFSKKFSSEIQEWWYSKKPGSKEPGNVLPESI
70
+ MDYKTSGVDIKAGREFVSEIKQSVESTYSSNVLEGIGGFGGLFKIPLEGLKKPVLVSGTDGVGTKLELAQIKNFHFEVGIDLVAMCMNDIITTGAKPLFFLDYIATGKLEKNQLLEVINGIAHSCRENKCSILGGETAEMPGFYSKNKYDLAGFCVGIADEEKLINGKKICENDLIIALQSNGMHSNGFSLVRKIIENNNQIDKQFEKKYNLDFYDELLKPTKIYFKIVNQILSQNIQIKGMSHITGGGIPENLPRCMSSDFIPYIDKKSWKIPVLFEFLKDVGQIPEKDFWNTFNLGVGFCLIIDKKYKDKILNICNAFDISSWVLGKVLKKNNSKENNFLPEIII,MYFQDIIQNLNKFWSEEGCLIMQPYDTEKGAGTMNPHTFLRAIGPEPWSVAYAEPCRRPTDGRFGDNPNRAQHYFQYQVIIKPSPDEIQEKYLTSLEFLGINPKDHDIRFVEDNWESPTLGAWGVGWEVWLDGMEVTQFTYFQQCGGIDCNPIPIEITYGLERIAMFLQDKESIWDLNWNKDINYSDIWLQFEKNQCSFNFSNSNPENMRKLFAIYQEEANSLIEKDLTYPALDFVLKCSHCFNLLDARGVISVTDRAQYIEKIRKLAREVATSWIKERELMNFPLVKK
71
+ MKNFTKNNYSTKRNDTENRRSQSKNNFKKGNDLNTRDDSNRRDNSNRRDNSNRRDNSNKRDNSNRRDDLNRRDDFNRRDNFKRRDDSKRRDNFKSRDDLNRRYDFNRRDNFKRRDDSNRRDDFKRRDDYERKGAIKSNEYSYLKSKEKPRNSFNQSQTRFSSNAQQTENYSENSSKKFQLSPNERNYEDWIWGKHSVFAALNSERPINRIWCTSEIFSSEKFYLLLKDLKSKGVLIEEVPWSRLSQLTSGAVHQGVALQHASTESISLEKLIDISKSKSSNPIIVALDGVTDPHNFGAIIRSAEAFDCKGIIVPQRRSAGLTGTVAKVAAGALEHIPVSRVVNLNRAIDELKKKGFIIIGLSGDGQVPISEFKEKAPVVVIVGAENKGISLLVQKKCDYLLKIPLKGKTSSLNASVAAAISLCYLSNN,MELPCRRFGRTNLKMPVLSLGGMRFQKSWDELKFSEISRKEQNKVENILNLANKFGFNHIETAKYYGTSEIQLGMGFKSIEKKPKIIQTKIPPNRDPKLFEAELLKSFEKLQVKKIDLLAIHGINTPEHLHQAVKDGGCIDILKKFQQENLIGYIGFSTHGELSLIEKAITTNLFDYINLHWYFINQTNSKLIELAHKYDLGVFIISPTDKGGHLHTPSTKILELCSPLHPIVFNDLFCLRNKYVHTISVGIAKEQDFNLHLEAVSLLSESDHYIPKILNRLKEESINSLGIEWYKSWDKNLPNWKNTPGGINIPVLLWLANLIDSFDLEEFAKSRYQLLGNGSHWFPGNNANLLDVNVCESQLLKVLERHIKPKKVIKKLRVLKDKFGDKSLKRLSKN
72
+ MSESKSPLDRIYRLIASHAWMTENEAKVLLVMMYASGTKSLGLEGKGLNKFMERSLEKMCSDNKENLQEYLLKIKDKFPNNELLSED,MEPTSSLNRGDRKKGSSLVTGSEVQSQSNGASCFITTDSEKSLVSRQASQVEQIELRTYVFLDSLQPQLAAYMGTVSRGFLPIPGDSCLWMEVSPGMAVHRVTDIALKASNVRLGQMIVERAFGSLALYHKDQSTVLHSGDVVLDAIGSEVRKRTKPSTSWTEVICAITPDHAVLINRQNRSGSMIQSGMSMFILETEPAGYVLKAANEAEKSANITIIDVKAVGAFGRLTLAGKEGDVEEAAAAAIRAIDQISNY
73
+ MTMNNLKTKKLVNLGPSGRAVAQPMDVSLLDNFYEHLTMERYANVQYFSIYLWFQERDLDGFASHFLSESQGEMEHAYKFANYFIARGQTVKLKELPAPIQTWDSIEDIISYSFNMEADLTSSLQQLYSISERISDTRTSVFLDPIVDAQTKSEDEFAHILGKVKFAANQPSAILLIDSDLKKK,MFLKDHLKDTYQKASFDNNHLMLENIINIWAHRFGPESLNELFVKDQDQDQLKLIEENQAEASQNQINLELIEDHQSEANQNQTNLELIEEHQSEVNQNQINLELLKNLQYEEKIEFKPKETKKSNNTEIINKDIYGSYKNESEFKDKEELPLPNIKNLRKWINNEKKAS
74
+ MIILHIGLFENSFSNIMKSVIFQETANLKKPVPAEKVIELSDKLLEPSSHSKRYPPRLHKTWGTIFFMIAIHLLSLLALQPQFWSMPAVTALFFFYWLTACLGVTLGYHRLLSHRSFVVPKWLERFFATCGAISCQHGPIDWVGLHRHHHSFSDTEVDHHNSKRGFWWSHMGWMFKDVEALKAVPKLSADLIKDPYYRFLNKYFLFLQIPIGLCLYAIGQKLGVGGWALVLWGIPLRLVVVYHITWLVNSATHCWGKAPFESGDGSKNNAWVAALTFGEGWHNNHHAFPNSARQGLFRGQIDLTWEHIKILAKLGFAKKVKLPSRSYY,LNKKLGHKDHFHFIGIGGIGMSAIAMALIKKGYSVSGSDLIQNKETKSLKTLGAIIFDSQIKKNIDFVISKFQDHTLNCVISSAIKDENEELCFCKKNNLSIKHRSEILAMIMNSYTSLSIAGSHGKTSTSTFLSTLLELCTHDSSSITGGIIPIYDSNAHIENTKYLVTEIDESDGTIKNYNSDIGIINNIDFDHCDHYSNIDEVLSSFKKFASNCQKLLINYDCKFTKNNFTSKNQWSIKESNNIAYSLIPNIINKDKTVGKYYEHGKFIDIINIPVPGLHNLSNITAAIAACRMVGVSFKEIKKNTESLKLPKKRFEFRGEINQRIIYDDYAHHPNEIKATIDLARLFIKDKNSSDREEKGRLIAIFQPHRFTRVKQFIHEFVKELSKADVIYVTNIFGAGEKNIDNIDSQLIANLIYKNNKNVTCLKDNYEINEKFFKLTKKNDFIINMGAGDCHNLWSILKNKNTLNN
75
+ MDINWASTQIVKNLDRHEKRDLLAWILTQSERTFQRAFEAGQYSSAIGSLKLIWEMTIKDSKEKDSRYHGNYKH,MSKLHLKRFLKKSYEFSLVLFQFFIIILHFIHLEFIPKKEIMQVNFFFSFVGFLLIIISTIVMLISIKDLGRNLSPFPRPTVNGNLTTSGIYSFIRHPMYYSLILISFGFFITKLSFYHLFLTISLALIIKLKIILEEKYLNKKFKNYFIYTDKVKY
76
+ VHKNKILVPLSNNSYEVIIKQGLINNIGEELIRIGINSNRKILIVSNKEISTLFGRKLLNNLKKNNFNAEIFNIKAGESHKNFASLSEIFNAAFEVGLDRNSLLIALGGGIVGDVTGFAAATWLRGIEYIQIPTTLLSMVDSSVGGKTAVNHPKGKNLIGAFYQPKAVFIDPETLITLPTREFKAGMAEVIKYGVIKDKSLFEYLENEKNRDKILNLENESLIKIINKSIKTKACIVSEDEKENGIRAILNYGHSFGHVIENLCGYGEYLHGEAISIGMKIAGDIATEKNLWSKEHSLRQDHLIESYGLPIQTPKIKKNDVMKILMGDKKVRNGKMRFILPIELGEVDIFNDINESQFLKYFN,MKKIWKIEKLVLPQHSDHAGVMWHGTYFDWLEEGRINALSKAGLNYVDLTKNGFDLPLIDTSIKYISPLFLGDTVTIETIFEISKSPKIKIHSKFINKSKTILTIAKVNLVLINKKSFSIIRKRPDFISKAFLKLNG
77
+ MNDLNIEFPLDDFEELISQIGWSSLDEWFIFWNLKKEMLSINNFWDDNVKDDWIWGLALPLLSQAYKLNKKSPDRKIIGISALPGTGKTTLGKWLESISLKLKFKLSVISIDDFYLPSEEMEFAIKNNPWNVSRGFPGSHSIDLMKEKLLKWKTDGQLNVPVFDKSLRKGLGDRAHWREESPDLLIIEGWFLGVKPLSIDLDNSEKFSPPLSVFESSYRNKIQNNLDQYLDIWNMIDQIWHLKPLKFEYLNEWKSNQEKSMHFKSGSSLKGDNLSNFLRMLNVSIPHKSFDDINSDVLLMINQERKLVRVGLNQQISK,MLTTKITYALSDWIREWRKCRKENPSLDDCIKFTEWKIENYELTDSDRMIIESILLYETEET
78
+ VNITFLGTSSGVPTLTRNVSSLALKLSQTAEVWLFDCGEGTQHQLMKSNIKSSQIKKIFITHMHGDHIYGLPGLLATLGLSGNSNGIEIYGPSELKSFVTSALESSFCKLSFPLRFRAVEDFASLNKILFENDKLKVHCACLKHRLPAYGYRVSEKDKPGVFDIKKAEDSNIPPGPIYSELQAGKTVQLKDGRSFNGQDFCGPPRKGESFVYCTDTVFSKSAVNLSKNADLLVHESTFSKEDEKMAYEKLHSTTIMAAKTALLSNVKKLIITHLSPRYTQRSSIKPSDLLKEAQKIFPNTYLAKDFLTAEIK,MKLSKKFEELIIKQLESFGCSMGVTHLVMYLASTEQGTKASFEMIGQWPQIDRLLVSVEDDPSLKVSSPNRRWYPLQENDILLGVLRVETDLKEGNWPVSLDSRLKALSLSLAKCVSIELERQNKNEEINYLKSQVNVIIHQLRNPLAALRTYAKLLIKRLGSDVDSIEIVERMIIEQKQINNYMDSFAQLNSPIQLPLDIGEERLLLPPNLDNKKLITVQSLLRPILERGQANANLENRDWTEPSLWPDWTLSPLKAKYAVIAEIVANLLENAFKYAHKDAEIGVAIMSKGLCIFDDGKKITKIENEKIFQKGFRGSAAKKKDGTGVGLFLARKLAKQIGGELRLLENSSINDVEELKSFKKKNIFYLELPIKELHS
79
+ MIENPSQIVKEISDEKEIENSTIEENTSDTPKEEDLSFDHKDIPSADSSSSRRNNDLDTAGFTQEEFASLLGKYDYNFKPGDLVKGTVFALEPKGAMIDIGAKTAAFMPMQEVSINRVEGLSDVLQPSESREFFIMSEENEDGQLALSIRRIEYQRAWERVRQLQKEDATIYSEVFATNRGGALVRVEGLRGFIPGSHISARKIKEDLEGEYLPLKFLEVDEERNRLVLSHRRALVEKKMNRLEVGEVVIGSVKGIKPYGAFIDIGGVSGLLHISEISHEHIETPHNVLNVNDQMKVMIIDLDSERGRISLSTKALEPEPGDMLTDPQKVFNKAEEMAAKYKQMLLEQTDENEEQTVEIAESV,LSRSLDLPSTEGVDTLAQELAKLQDNGKRRIAFLGSRHVPVVDIHLIELIARSLAEEGHTILTSGSQGVNAAVIRAVLGINPSLLTVLLPQSLDKQLPEIKNQLESVIHLVEKSENDELPLPMASSLCNQEIINRCDQLICFAFHDSETLLNSCRCAEEMGKVVSLLFFD
80
+ MKKKLAAVSFSALLAIVASSTTSGFASWNTKYWTNEKNFNRISSFNVSENLPEGSKSTTKTSSEVVTASEDGKTLMYTDSDLGVVGLVDISDPAKPKALGIVELEAEPTGIAALGNNIYIGSNTSESYTNPSGALVQYNLDKRRAVKECDLGGQPDSVFVSPDGSFLAVAIENERDEEYKDGQIPQLDEDGKQINPAGYVSLVKLNKKGKIQCNSIKKVDLTGLASIAPSDPEPEFVAINDLGETVVSIQENNHLAVIDKEGKVISHFTAGIVKQMAGMDTKKDGAHKFKKKLKNVRREPDGLTWIDNDHFATANEGDYKHKAPGQAKRGGSRSWTIFKKDGTVVYEDANRLERSIAQIGHFQDGRAGKKGVEPESVTFGKIDGTPYLFVGAERAGIVAVYDITELSQPVLTQLLPSGIGPEGFVAIPDRGLIASANEKDYNKKEPGLSSHVTIYQLQDAPASYPHLTNENGLEFVSWGAISGMVSGEDGKIYAVNDGTFKTQPRIYVIDPSSSPALLERAIDIKLDGKTALFMDQEGITTDGRGGFYISTEGIKKKLTEHPPAIYHVSSEGDILEKITPPPSYLNYAKNPGFEGITRNGNILYIAQQKPWGDDTFNTTKILSYNLISKQWGAVNYQLDRIKKGGVGISELTYHDGALYVIERDSFYGKKAKLKAIYKVDLDGVVFEGLQTTMPPRLYPLVEKELVTDLKPVMKSTGGFILEKVEGLAINNDGQAWISTDNDGTGKKSTGETLFLNIGKI,MKNLKSNKRKIHRKVAAISSIPLLITLISGTIYSFLQPLGVDAFWLIKWHTGNFGIINLQPFYSIFLGIASIISVISGIRLLQKNS
81
+ MGFIKNKLFIFIILILLQSCSGGRIGNFFESSFKNIEETKIKEDVKNNLKNKIVIKSGGIVEKNKNIEETKIKEDVKNNLKNKIVIKSGGIVEKNKNIEETKIKEDVKNNLKNKVLKMSEKKSKNNKKISDKNISPKKIIFQPKSYKIIFILKDVDPKDPTEDLRAILRNSDVNFEIEKIERYFDTKNKTIKSN,MKWIIQEEKEEDHLQILNKDSEIGIDEVGRGSVFGPVFSVAVVLSKKSGLTLKKLGVNDSKKLTPKKRKDFFPKIIALSSDYALGQSSVREIDLLGIRHATELSMIRAVKKLKHMPSELLIDGPLTLRLWEGNQRNIISGDSKFISIATASIIAKVMRDSLMERLESKYPGYFIFKNKGYGTKQHFSSLKKHGLTNLHRKSFLNKLNLI
82
+ MELPCRRFGRTNLKMPVLSLGGMRFQKSWDELKFSEISRKEQNKVENILNLANKFGFNHIETAKYYGTSEIQLGMGFKSIEKKPKIIQTKIPPNRDPKLFEAELLKSFEKLQVKKIDLLAIHGINTPEHLHQAVKDGGCIDILKKFQQENLIGYIGFSTHGELSLIEKAITTNLFDYINLHWYFINQTNSKLIELAHKYDLGVFIISPTDKGGHLHTPSTKILELCSPLHPIVFNDLFCLRNKYVHTISVGIAKEQDFNLHLEAVSLLSESDHYIPKILNRLKEESINSLGIEWYKSWDKNLPNWKNTPGGINIPVLLWLANLIDSFDLEEFAKSRYQLLGNGSHWFPGNNANLLDVNVCESQLLKVLERHIKPKKVIKKLRVLKDKFGDKSLKRLSKN,MIFRNKRSSIKKTNILSQDELIKHYGINSYEFTHQEKKEIFVCSKVKEFDLIELDQLLQTVGWSRRPIRRVKRALEFSILVVGLWRHDEKFPRLVGFARCTGDGIIEATIWDVAINPVYQGLGLGKELMKYILQELKKIGISKVTLFADAEVVSFYKRQGWELEPKGSKCAFWYAN
83
+ MIIIEGFHIFNHKQNCKTKAEWMEQSGMTYDRESEVN,MQIKILVKLFSHLIKVIFKPLLGFAKFFITTYGVFLKFFLQLNGGYWGKIGIGQYSKIERKRFFCILPFYILLALLFGILSLIYWYFVVLFIPFWIERYLTDTAQWNNIFSSIMAFALICGWLLLLSKTK
84
+ VEGKNTSITFDGREIRLTTGLYAPQAGGAVMIECGDTSLLVTATKTTKKQAADFLPLICDYEEKLYAAGRIPGGFMRREGRPPERATLIARLIDRPMRPLFPSWMRDEIQIVASCLSLDERVPADVLGVTGASIATLLAEIPFYGPMAAVRVGLIGDDFILNPSYREIEKGDLDIVVAGSPEGIVMIEAGANQLSEQDTIEAIDFGYEAVSELIKAQENLLKDLGIKQVKPLEPEEDKALATYLEKNCTKPIDLILKKFDQSKEERDLELDKIELEVQTKIDSLKDDNQLKVLTSENEKLIHSDFKKLTKKLMRSQIINEGKRVDGRDLDEVRKISASAGILPKRVHGSALFQRGLTQVLSTTTLGTPSDAQEMDDLNPSTEKTYLHHYNFPPYSVGETRPMRTPGRREIGHGALAERAITPVLPGKETFPYVLRVVSEVLSSNGSTSMGSVCGSTLSLLDAGVPLKAPVSGTAMGLIKEGKEVRILTDIQGIEDFLGDMDFKVAGTEKGITALQMDMKITGLPVSVISDAIKKARPARLHILEKMQEAIDKPQESLSPHAPRLLSFRIDPELIGTVIGPGGRTIKGITERTNTKIDIEDGGIVTIASHDGAAAEEAQKIIEGLTRKVHEGEIFPGVVTRIIPIGAFVEILPGKEGMVHISQLSEARVERVEDVVRQGDEVTVRVREIDSRGRINLTLRGVAQNGGMSYPEPTPTPVAPLN,MPKQLSFSNESREALEKGINTVANAVKVTIGPKAKNVVIERKFGSPDIVRDGSTVAKEINLDNPISNLGAKLIEQVASKTKESAGDGTTTATILTQIMVQEGLKNIAAGASPIELKKGMEKGLNFVLEKLRSKSIKINGSDIKKVATVSAGGDEDIGSIISKAMDIVTSDGVITVEESQSLETELDITEGMSFDRGYSSPYFVTDQERQICELENPKILITDQKISTLTNLVPILEEVQKSASPFLILAEDIEGEALTTLVLNKNSGVLNVSAVRAPSFGERRKAALEDIAILTGAKLISEDQSMKLEEVTLNDLGKAKKITISKDKTTIVAFDDTKDLVQERVEKLKREVEITESEYDKDKINERIAKLAGGVALIKVGAATETEMKYKKLRIEDSLNATKAAIEEGVVSGGGQTLIEISNELSNSRKEISDDLTTGIDIITNALLEPTKQIAKNAGFNGDVVIADIKRLGKGFNANNGEYENLNESGILDPTKVIRLALQDSVSIAAMIITTEVAVADIPEPEAAPGGPGADPMGGMGGMGGMGGMGGMGMPGMGGMGMPGMGGMGMPGMGGMGMPGMM
85
+ MILSLLLSTFITIFIAELGDKTQLATLTMSGTSNKPLAVFLGSSSALVLASLVGALAGGSISNFLPEIILKSIASITFFIIGIRLFVNSFTSKENDNNQ,LKNLLGCSVKDLEKIALNYGQAAFRGRQIYNWLYNYKNRSKSIDEINVLPLKFRDQLKNEAFLFGELTLKEKYLATDGTLKLLLNTRDNESVECVGIPTEKRLTACLSSQVGCPMDCKFCATGKEGLKRSLKVSEILDQILFIENQMNQKVSNIVFMGMGEPLLNIDELLLSIRSINEDFAISQRKITVSTVAIPKMISKLSELSFQVLGKCQFTLAISLHASNQKIREAIIPSAKNYHIKNIIDDCREYVRETGRRVSFEYLMLHGVNDKLEHADELSNLIKGFQCHVNLIQYNHIEEVEFKQTPIKNAQLFQTRLSNSGINVSFRKSRGSDRNAACGQLRQNDKIK
86
+ MYSLEISLRYSPFPLSIQKKEYEDIKRIYDEIKDSMNSDNQNSPLIELSCEKVQDKLITVLAKEVISVQIYEKSAVAGGSKRPGFSLDI,LARDFPLERVRNIGIAAHIDAGKTTTTERILFYSGVVHKIGEVHDGAAVTDWMAQERERGITITAAAISTSWQDHRINIIDTPGHVDFTIEVERSMRVLDGVIAVFCAVGGVQPQSETVWRQADRYSVPRMVFVNKMDRTGADFLKVNQQIKDRLKANAFPIQLPIGAEGDLSGIIDLVSNKAYLYKNDLGTDIEEAPIPDEMKDEALEWRSKLMESVAENDEELIEIFLDKGELTEDQLKKGIREGVLKHGLVPVLCGSAFKNKGVQLVLDAVVDYLPAPIDVKPIQGVLPNGKEDVRPSDDNAPFSALAFKVMSDPYGKLTFVRMYSGVLSKGSYVMNSTKDAKERISRLVILKADEREEVDELRAGDLGAVLGLKNTTTGDTLCNTDDPIVLETLFIPEPVISVAVEPKTKGDMEKLSKALQALSEEDPTFRVSTDQETNQTVIAGMGELHLEILVDRMLREFKVEANIGAPQVSYRETIRSSSKGEGKYARQTGGKGQYGHVVIEMEPAEVGKGFEFVNKIVGGTVPKEYIGPASNGMKETCESGVLAGYPLIDVKVTLVDGSFHDVDSSEMAFKIAGSMAFKDGVKKCNPVLLEPMMKVEVESPDDFLGSVIGDLSSRRGQVEGQSVDDGLSKVQAKVPLAEMFGYATQLRSMTQGRGIFSMEFANYEEVPRNVAEAIISKNQGNS
87
+ VKKSLFKPSRKFTLFSAFVTLLNDRLSESILLPILPSFVLLFDSKASTYGLLSCTYQLAQFTASPFIGLMSDRYGRRPVTLFCITGSIIGISILSFTVLFDWSTSLATIPLFLLFIARLIDGLSGGTAATATTILADISSPEKRAKTFGLIGVAFGLSFFLGNIFVVIFAKNTNNNFIIPVIIASIIPIINFILVFFYLPETKPQNELNKSTQILKNPLKQLFKVFKEEKIRKLSLAFFIYFIAFTGLTNILIFFLQESLNWTTKASSGTLVVVGVIAIIVQGGLIGPLVKKFGEMRLTLIGSGFILLACFLLITTPQKNAIVNIYSAVSFLAVGAGLITPTLRALISKKLDGDNQGSILSNLQGLQSLGGVLGIGMAGKVYDDFGPKAPFIAGSIILLFMIYLIAEGKNNNISYN,MTDIFEVPTPDNELLEKAKQLRLASIKTSQTNNDDRIRALNLMADYLEKNSKEIIEANIEDYKKAEIKGISKSLLSRLKLSKEKLNLGIEGVRQVGNLIDPVGQIQIKRELSKGLILERKTVPIGVLGVIFESRPDAVMQISSLAIRSGNGVMLKGGSEANLTNLAIVSALKEGLQDSNLDENAICLLTSRKDSMAMLNLEKYINLIIPRGSNELVKFIQENTEIPVLGHADGICHLYIDNEVNLDMALKVALDSKIQYPAACNAVETLLIHKDTASEFLNKAIPMFNSNDVKLIGDKKSFQLGVAFEANYEDWQTEYLDLILSIKIVNDLEEAIAHIQKFSSKHTDGIITENINNANKFMSEIDSSGVFHNCSTRFADGFRYGFGAEVGISTQTLPPRGPVGLEGLVTYKYFLRGEGHIVDDFSSGKLIYSHKDV
88
+ MQNITFKGNVNFDNQKEELNENELFSLKITDSLYKKDIGKFLEILSSHFIP,MRVVIAGAGLAGLSCAKYLVDNGHIPIVLEARDVLGGKVAAWKDEDGDWYETGLHIFFGAYPNMLQLFKELDIEDRLQWKSHSMIFNQPSEPGTYSRFDFPDIPAPANGVTAILSNNDMLSWNEKILFGLGLVPAMLRGQKYLDKCDSKSWTEWLKEHNIPERVNDEVFIAMSKALNFIGPDEISSTVLLTALNRFLQEKNGSKMAFLDGAPPERLCQPMVDYITERGGEVHMNSPLRKIDLNEDSTVKSFTIAPLDSDEKKKVITADAYVSAMPVDLFKLIIPDQWKGINAFSKLDGLIGVPVINIHLWFDKKLTDIDHLLFSRSPLLSVYADMSITCKEYEDPNRSMLELVFAPAKEWINRSDQDIVDATMEELKKLFPTHFIGDDKTKLRKFKVVKTPRSVYKAVPGCQEFRPSQRSPIKNFFLAGDYTMQKYLASMEGAVLSGKLCAETINKEYSKTSNIVSRETSKIN
89
+ MLENIWHPSYSAAEYLGITEIKLSHLRENGYFKPGIHWKSSPLGQKKPWNPEVLYNSILCRKIMDEFYSEEKNDQYAA,MRNLIKENIKKTGNNSSRSIKKLLKQRSFVVFISILLTGLGASITSISFKTGIYFINNWRLELLNHFPSVAVLPLFGAVGGAIAGFLIKNFAPAAKGSGVSQIMGFLRHKKVPMNLKVGLVKLISGIIAIGSGFPLGPEGPSVQMGGSVAWQMARWLKAPLAFRRVIVAAGGGAGIAAVFSAPLGGFIYAIEELLNSARPVILLLVVITTFIADSSADIIQALGLDPKAGGFDFNLGFLIQKEYDPSVFFLPIDFIYLVLLGIIIGLFAELYSKYVLFMQKLGKKWYKNKFVLKMSICGLLLGSIYSFLPSSFHNLDELQKIIVEKNTNIEIAFLAVFILFITTGLAAASGAPGGLFYPMLTLGGAIGLIMGTWVEIATGHAPSTYIFAGMGAFVAGCSRTPITAMFLAFALTKNLLIMKPVLISCIASFLVARAFNEESIYERQIQIELED
90
+ MKKKSITYTDLSKKQLQHLKELYIQKKVECMSHKELKEFVLEIISHQINDTIGKEEEMEAWMEMSKFYGDQFEIIILEIQQKFANNENLQNFEEDSKEHRLELLEKNNIEQNKQDMWDD,MKEIGWPTIDSKHLVVYSKQMLDLENEIFSQGMPQEALMEKVGIQLSKWLLKRKSLLKKGVIVFLGPGHNGGDGAVIAKELFLKGYLVKLWCPFPLKKTITINYVNYLTSLGVEILGDSPNPEGKDLWIDAIFGNNQKRKVDEELIELFNKKFEKRSGKVVSIDVPTGLCPNSGKPFLKNAVKADFNLVVGLNKIGLLQDTALPYVGELHHIDIGICRSQLCKLESKILKISYQDLRTIKLPLLPKNSSKYKRGRTLVIAGSEKYPGAAYLAIKGAISSGAGFVSAIIPNLVSNSIWQVEPEVVVTGSLSSDKNGNSILFNALKNVDFSAYDSIVIGPGIGLNEEDWEKSTQYLLDLKGLLILDADALNRISKSNLGPKFFLERKSKTWITPHNKEFMRLFPEIDCTNKVELAKKAAKAFDISILLKGANSVIANNENAWQLFGTDAETSRAGLGDLLSGFIGGCSSIELSSRDYTKTESLAKYVFLHSFAASKCKKGSNASLIGAQLSKLMRKTKTRLMS
91
+ MRQHVNPLSKNFFEIDPIPPLNQVFENPKLPLHLDIGCASGEFLFELSLKNKNWNYIGIEIREKLVLNANLKMKSRENKNLYFSFGNANNIFNQTNNKSIINLITSISFNFPDPWFKKKHHKRRVIQPKLLNLLSNSMKKGSLIFIKTDVRDLFDHMELTISESIKFKKIPYQDVDFCESFNPNRIQTNREKYVILNQLKIYESIYKKI,MTDDINPIESDFNAALSRYQDGQELIPIAQDFQKIIQQIPNHFAAWTCLSWLQLLLKNNEEALAAAREAVRLNQQDPQARMNLSLALLATNNKGVRDHVELIKKMAMMMPDVKTELKESVEDGFNRYPNWPELTKINKWLEF
92
+ VLNDTLSSKFICFHLINISNKLNSTLKIELANPNKSEMFELKSYEKFRDTEDVRFFDISINNSNFRDLVIHNGPAVSPPNDKELGNWQFYIHHKQEDNLLAISGGRTFYLVNLGWEYPFYKVRLESCGLILKIPRGTFHRSVSDENGSVVLNQAIRDKGGSVESEFKVTNSKDNKKLHDCITNLQPKFKIYSVK,LIMSLIPLLPVFHKFNRQFFDQSLTTNREPLVKVRWSDNRLKTTAGFYKRKQLKGVIDSEIILSKPILSKLSCNEIHSTLCHEMIHAWVDRILNINEIHGPNFLSKMNEINKAENNFQISIRHNFPVERKALKYTGKCLNCGEKYMYRKRIKNIACKKCCNLFFNGSWNKKCLILFD
93
+ MIKDHPIFLESIRFIKSNLIENNFNYLENRVLERLVHTSGDFNIQKLLEFSEGACEKGVKSLKAGAPILTDTDMAAAAIKSMAKNTNGNLVVSAKHWFDDRDLSELTKTAYGIEKGWIELSANNSGNQSPIIVIGSSPTALVNLLEIIQNSQQIPSLIIGMPVGFIGVRQSKNKLLNTNYPRIVMNSTRGGAAMAAAAVNALLRESI,MREEDIKSFEDAFFDALNLFNNQKWYEAHDAFEDIWNTLEGDERQIIQGIIQVSVSQFHLSKGNLNGATILMGEGLGRIKNRTNIDLGVDLVSFCKCLDELLRKLQYKEELTKNDKPYLLIKEQNEF
94
+ MRVKLEPETAFIGKKFAYIFLGIIFALNSIVFIWYFFFSNLTWS,LFQSLFSTKYNYFYNIYIVFHIRTSILLLSGLVLGLWTSWPGIVIPNNWKCFKDMIEKSSKE
95
+ VSENIQPSSEENQIVEDLTNKESPEKLPEFKDKELITNLEQNRFECRSCGYIYDPIEGNKKLNIPKNTPFSAIDGNTFACPVCRAGKNLYKDIGPREKPSGFEENLTYGFGFNSLPPGQKNILIFGGLAFAAACFLSLYSLH,LIGIFSAFGAAISWTYACFIWRSQTEKYKSIDINLVKNIIAFLIFLPAFINLSVLNNLKSIITLLFSGVIGIGLGDTFYIKSLQLIGTRRTLSIETLSPILAALSGEIFINENLAFRSYQGILIISISLFILLRQRTNLIVNNLTNITERNNLSVYVFPFLSVLCAVSGGLLSRKVFLESNLSPFQATEIRLLGAIIFLIIIKKFRINFFLKKLDFNDQKRFLLSILLGTNLGILLQQIVFKTLPLGIGWTLLSTSPVISLFFATKEEGQITKGIIFFTTLLFLGLCLIII
96
+ MKILLSVFFLFAFIPPSKGVTTKMFKVLDTCARYRLGEIDAKQAIEKLKLKSVNSSEIDLKNIVSNYCSVFTPNENIKF,MPKKHPTRRQFLNFGKLSLLFFLNSCSNSLKKIKIGFQSSTYPKSFRDTFPAIWQKENINFSKLKLEKNKIKFSKSDFILINDGWLKSINFANFQNINNLFLNDLLDNRSRDYLKSFKEYQRNKLFPIGVVPYAVIIKNNKDLIYEASNNWDFLLDEKLKGKIIFPQSPRILISISKRINVKNSLSKLKEQAMLFDDKNSINWLINSDASVAIIPFSLCEKYLRVDSRLSMVFPNKGVPLMWNFLLTKSKINNIVLFDWIKSLEKRSTIDELANQGWYLPFKNEYSQDKYNIKTENSNYGPSENCWENSWSFSSLNYEEKVNLENLWNQS
97
+ MRFKVSLKKDGKEFDEVVIANNKKDAIEVALKNNPEAEVLNSDWTFKL,MNDHNSKDNYEAQTLILNDSNGNELFCYLEQIVKVEEKEYALLTPVDTPVSLFKINENDEPELIEKIEKNEQVLKNADAVLQEHDLKLIRSAVTLTVSGELEEPIYDELEEDGIEEESETYELLVSFNLLEQEYGLYIPLDPFFIVGKLINQGALLIEDDEFDKVQPLIESELEKSSF
98
+ MENSKPNYWQNAERTNGRMAMMGFFALVVNYGLFGWIIPGIF,LMNKYDVIIIGSGIGGLCCGSLLALAGKKVLIAEAHSQPGGVAHSFNMRGYKFESGPSLWSGIGKWPTTNPLGQILRLLDEKVELIKYQGWHVNVPEGEFNLEVGQEPFKERIRLLRGEKSVKEWDSFVSGIRPLSQIVSEIPLLSSSPETINFLEIIKLASKFLPNIKSLPKLNGGFGDIVDSHLNDPFLRNWVDLLSFLISGMPMHDTNSAAMATLFDEWFKPASYLEYPKGGSESIVKALVDSFKKNGGELILSSKVEAVNFSKNIASGVTLENGSNFISNFVVMNTDAWTSRKLIPQEFQKKWSPKAKDINKCGSFLHIHLGFDASGLQNLPIHAIHVDNWERGITAERNVAVFSIPSVLDKSMAPKGKHVLHGYTPANEPWEIWKNLKSNELAYKELKEERCSIFLKSLRKIIPDIDNRIEIKLLGTPLTHKKYTNTYCGSYGPALSAAQGLFPGCKTSVRNLLTCGASTFPGIGIPAVSASGAYAAEKIMGKKEYKKLLKTIDL
99
+ VFFKSNFSYSDSNKSYSDLLLELDSGNIQSIYFYPRKREIDVLYKNGNKEKIPILYNDQLILEKASENNVDLTINNSRKESSAANSFASVGLFLIFIIAIVLILKSTSKLASKALGFGKNKSKFVTIDDVETRFDDVAGVPEAAEELKEVIKFLNEPKKFTDLGAKVPKGVLLIGPPGTGKTLLAKAIAGESGVPFLSIAASEFVELFVGVGASRVRDLFEKAKEKSPCIIFIDEIDSIGRQRGSGIGGGNDEREQTLNQLLTELDGFADNSGIIVIAATNRPDILDSALLRPGRFDRKIEVMLPDLDGRKKILSVHSLSKPLAKDVDLSYWATRTVGFSGADLANLMNESAIHCAREDSKLITYSHIENALDKVTLGLRTSIISSQNMKKIIAYNEVGRAIVSAVKNGVDSVDKITILPRSGYLGGYTKINPDEDIVSSGLISKKLLLSKIEIALAGRAAEIIVYGKNEITQCSFNDISYATSIIREMVTKYGFSIIGPLSLEDGGEMSIGDGFVRNKSTIADNTYSRIDNEIINISKISLNNAIKIISNNRILLEKLVELLLIKETVENNTFKKITFDLLKV,MRRKIFFEVFNIKKLSILVLGFTLGVIAIWPGIISRNSRKCFFNIIKDGSDGNIQIKTILLVNPNYLLRIKNAKNDYWKVLLVGDACFRKF
100
+ MVTSQKKGPDSSASDNELSPDQTLGLVSLSLMQKLSQKDPSFSWLGEMKPDQLNLKNLRDRLELTELAIKTGAPLTTSEVSILMGAKPGKSKIERGGILAIKVARNVWKLSKLGQGSSYYRN,MILKVLEFEFDLIVLGAGSGGLAAAKRAASYGAKVAIIEVNKIGGTCVIRGCVPKKLMVYAANNRRNMLSSEGYGLISKEITFESNILLKNVREEVSRLSVLHSNSLKKLNVKVFEGLGRFLNQNTVEVVCPKTKNILRKVSAKSILISVGGKPKKLNIPGTDFAWTSDDIFELKDFPKKLLIVGGGYIACEFASIFKNLGTEVTQLIRGENLLNGFDKDLSECLEKSMTSLGINLKFKNQLKSIKKINDGLESTLESGSKLLTDNILVATGREPSLKRLNLDTLNLKMDGIYLEVNELNKTSISNIFAIGDIVKRPNLTPVAIEQGRVFADNYFAALKRKVNYENIPKAVFTIPEISTVGLSEEKANEIYSEVNVQVFKCNFTPMSNTFKKNKSKCMLKLVVNKKNDKVLGCHMFGEAASEIIQMVAVSLNTGITKKDFDTTMALHPTISEEFVTMYG
101
+ MFFLSIPQAWHLAGTWSEQLPNDSNLIGMSQTELMMTLHSIFVPLLLVISYFLFLKISKNESKKVKG,MTSTLPNDNIKNIDEKISNKLISEIIRDRIKSKGTRFSANDNIADFINPGELKVLEKEVASRIKDLLKSLVIDVDNDHNTQETAERVSKMYLNEVFKGRYHEQPKVTSFPNDKNLDEIYTVGPITVRSACSHHLVPILGECWIGIKPGSKVIGLSKFARVADWVFSRPHIQEEAVMILADEIEKLCEPKGLGIIVKAQHYCMKWRGVKEPNTSMINSVVRGDFRHDISLKQEFFELVRQQSSNNNY
README.md ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ppiBTEP
2
+
3
+ A Siamese (twin-branch) protein-protein interaction classifier built on ESM-1b ([Rives et al., 2021](https://doi.org/10.1073/pnas.2016239118)). Also designated SiameseBTPE (BERT-Twin Protein Encoder).
4
+
5
+ ![ppiBTEP Architecture](assets/ppiBTEP.png)
6
+
7
+ ## Overview
8
+
9
+ ppiBTEP processes each protein independently through a shared ESM-1b encoder -- no cross-sequence attention is used between the two proteins. Each branch extracts the `[CLS]` token embedding from the final transformer layer, the two embeddings are concatenated, and a dropout + linear classification head produces binary interaction predictions with softmax probabilities.
10
+
11
+ Unlike the cross-encoding approach (see [ppiDCE](https://github.com/kouroshSA/ppiDCE)), ppiBTEP must capture interaction-predictive features entirely from each protein's own sequence context. This makes it faster per pair and allows protein representations to be precomputed and reused, at the cost of not modeling direct inter-protein residue dependencies.
12
+
13
+ The model was developed for the *Prochlorococcus marinus* MED4 interactome, where it serves as one component of a tri-model consensus framework (alongside [ppiGPLM](https://github.com/kouroshSA/ppiGPLM) and [ppiDCE](https://github.com/kouroshSA/ppiDCE)) for computational PPI screening.
14
+
15
+ ## Architecture
16
+
17
+ | Parameter | Value |
18
+ |-----------|-------|
19
+ | Foundation | ESM-1b (facebook/esm1b_t33_650M_UR50S) |
20
+ | Strategy | Siamese / twin-branch |
21
+ | Layers | 12 default; 6, 8, 12, 16, or 18 selectable via --num_layers |
22
+ | Classification | Concat [CLS_A, CLS_B] -> Dropout(0.1) -> Linear -> 2 |
23
+ | Max sequence length | 1,024 tokens |
24
+ | Optimizer | AdamW (lr = 1 x 10^-5) |
25
+ | Loss | Cross-Entropy |
26
+
27
+ ### Siamese vs Cross-Encoder
28
+
29
+ | | ppiDCE (Cross-Encoder) | ppiBTEP (Siamese) |
30
+ |---|---|---|
31
+ | Input | `[CLS] Seq_A [SEP] Seq_B` (joint) | `[CLS] Seq_A` and `[CLS] Seq_B` (separate) |
32
+ | Cross-attention | Full bidirectional at every layer | None |
33
+ | Classification | Single [CLS] -> Linear | Concat [CLS_A, CLS_B] -> Linear |
34
+ | Complexity | O((n+m)^2) | O(n^2) + O(m^2) |
35
+ | Speed | Slower (joint encoding) | Faster (independent, reusable) |
36
+
37
+ ## Installation
38
+
39
+ ### Prerequisites
40
+
41
+ - Python 3.10+
42
+ - CUDA-capable GPU (recommended)
43
+ - conda (recommended) or pip
44
+
45
+ ### Setup
46
+
47
+ ```bash
48
+ # Clone the repository
49
+ git clone https://github.com/kouroshSA/ppiBTEP.git
50
+ cd ppiBTEP
51
+
52
+ # Create a conda environment
53
+ conda create -n esm python=3.10
54
+ conda activate esm
55
+ pip install -r requirements.txt
56
+ ```
57
+
58
+ ## Repository Structure
59
+
60
+ ```
61
+ ppiBTEP/
62
+ |-- train_ppiBTPE3b.py # Training script
63
+ |-- inference_ppiBTPE_2GPU.py # Batch inference script (multi-GPU)
64
+ |-- roc_analysis_color_threshold_F1e.py # ROC curve analysis with F1 optimization
65
+ |-- assets/
66
+ | +-- ppiBTEP.png # ASCII workflow diagram
67
+ |-- requirements.txt
68
+ |-- LICENSE
69
+ +-- README.md
70
+ ```
71
+
72
+ ## Usage
73
+
74
+ ### Data Format
75
+
76
+ Training and inference use CSV files with columns: `seq1, seq2, label`
77
+
78
+ - `seq1`, `seq2`: Amino acid sequences
79
+ - `label`: `0` or `enemies` (non-interacting), `1` or `friends` (interacting)
80
+
81
+ For inference-only input, only the first two columns are required.
82
+
83
+ ### Training
84
+
85
+ ```bash
86
+ # Train from scratch with 12 layers
87
+ python train_ppiBTPE3b.py \
88
+ --train_file train.csv \
89
+ --val_file val.csv \
90
+ --model_config facebook/esm1b_t33_650M_UR50S \
91
+ --num_layers 12 \
92
+ --freeze_layers 0 \
93
+ --epochs 20 \
94
+ --batch_size 2 \
95
+ --learning_rate 1e-5 \
96
+ --max_length 1024 \
97
+ --output_dir ./out \
98
+ --device cuda
99
+ ```
100
+
101
+ #### Key training options
102
+
103
+ - `--num_layers N`: Total transformer layers (6, 8, 12, 16, or 18)
104
+ - `--freeze_layers N`: Freeze bottom N layers (use 0 for training from scratch)
105
+ - `--checkpoint path.pth`: Resume from a saved checkpoint
106
+ - `--model_config`: ESM model config (default: `facebook/esm1b_t33_650M_UR50S`)
107
+
108
+ **Important:** When training from scratch, use `--freeze_layers 0` to ensure all layers (including embeddings) remain trainable. The default is 20, which would freeze most layers.
109
+
110
+ ### Inference
111
+
112
+ ```bash
113
+ python inference_ppiBTPE_2GPU.py \
114
+ --model_path out/ppiBTPE_epoch_17.pth \
115
+ --model_config facebook/esm1b_t33_650M_UR50S \
116
+ --num_layers 12 \
117
+ --input_file test_pairs.csv \
118
+ --output_file predictions.csv \
119
+ --batch_size 4 \
120
+ --max_length 1024 \
121
+ --device cuda
122
+ ```
123
+
124
+ Multi-GPU inference:
125
+ ```bash
126
+ python inference_ppiBTPE_2GPU.py \
127
+ --model_path out/ppiBTPE_final.pth \
128
+ --model_config facebook/esm1b_t33_650M_UR50S \
129
+ --num_layers 12 \
130
+ --input_file test_pairs.csv \
131
+ --output_file predictions.csv \
132
+ --device cuda:0,1
133
+ ```
134
+
135
+ Output CSV columns: `seq1, seq2, Prediction, Probability_Friends, Probability_Enemies`
136
+
137
+ ### ROC Analysis
138
+
139
+ Evaluate model predictions using ROC curve analysis with threshold-colored visualization and F1 optimization:
140
+
141
+ ```bash
142
+ python roc_analysis_color_threshold_F1e.py \
143
+ --input_csv probabilities.csv \
144
+ --output_file roc_curve.png
145
+ ```
146
+
147
+ The input CSV should have two columns: PRS (positive) and RRS (random/negative) probability values.
148
+
149
+ ## Architecture Diagram
150
+
151
+ The ASCII workflow diagram (`assets/ppiBTEP.png`) covers:
152
+ - **A.** Siamese input strategy (independent per-protein encoding)
153
+ - **B.** Model architecture (twin ESM-1b branches + concat classification head)
154
+ - **C.** Training pipeline
155
+ - **D.** Inference pipeline (multi-GPU)
156
+
157
+ > Note: the diagram shows Softmax in the classification head for clarity, but
158
+ > the implementation returns raw logits — softmax is applied implicitly by
159
+ > CrossEntropyLoss during training and explicitly during inference.
160
+
161
+ ## Citation
162
+
163
+ If you use this software, please cite:
164
+
165
+ ```
166
+ Daakour, S. et al. (2026).
167
+ ```
168
+
169
+ ## License
170
+
171
+ This project is licensed under the MIT License. See [LICENSE](LICENSE) for details.
assets/ppiBTEP.png ADDED

Git LFS Details

  • SHA256: fe4dc9a91875b6c4addb0bf1545d19c6f79d39faa827619ad7f8e3577effdb29
  • Pointer size: 131 Bytes
  • Size of remote file: 300 kB
inference_ppiBTPE_2GPU.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ inference_ppiBTPE_2GPU.py — Batch inference for ppiBTEP / SiameseBTPE,
4
+ supporting single-GPU, multi-GPU (DataParallel), and CPU execution.
5
+
6
+ Inputs
7
+ ------
8
+ CSV with at least 2 columns: seq1, seq2 (label column, if present, is ignored).
9
+
10
+ Outputs
11
+ -------
12
+ CSV with columns: seq1, seq2, Prediction, Probability_Friends, Probability_Enemies
13
+
14
+ Example (single GPU)
15
+ --------------------
16
+ python inference_ppiBTPE_2GPU.py \\
17
+ --model_path out/ppiBTPE_epoch_17.pth \\
18
+ --model_config facebook/esm1b_t33_650M_UR50S \\
19
+ --num_layers 12 \\
20
+ --input_file test_pairs.csv \\
21
+ --output_file predictions.csv \\
22
+ --batch_size 4 \\
23
+ --max_length 1024 \\
24
+ --device cuda
25
+
26
+ Example (multi-GPU)
27
+ -------------------
28
+ python inference_ppiBTPE_2GPU.py \\
29
+ --model_path out/ppiBTPE_final.pth \\
30
+ --model_config facebook/esm1b_t33_650M_UR50S \\
31
+ --num_layers 12 \\
32
+ --input_file test_pairs.csv \\
33
+ --output_file predictions.csv \\
34
+ --device cuda:0,1
35
+ """
36
+ import argparse
37
+ import os
38
+ import torch
39
+ import torch.nn as nn
40
+ from transformers import EsmModel, EsmTokenizer, EsmConfig
41
+ import pandas as pd
42
+ from tqdm import tqdm
43
+
44
+ # Command-line arguments
45
+ parser = argparse.ArgumentParser(
46
+ description='Inference using the trained ppiBTPE Siamese model for PPI prediction.'
47
+ )
48
+ parser.add_argument('--model_path', type=str, required=True, help='Path to the trained ppiBTPE checkpoint (.pth).')
49
+ parser.add_argument('--model_config', type=str, default='facebook/esm1b_t33_650M_UR50S',
50
+ help='ESM config name or path used during training.')
51
+ parser.add_argument('--num_layers', type=int, required=True,
52
+ help='Number of transformer layers used during training (for config override).')
53
+ parser.add_argument('--num_labels', type=int, default=2, help='Number of output labels.')
54
+ parser.add_argument('--input_file', type=str, required=True, help='CSV with protein pairs (seq1, seq2).')
55
+ parser.add_argument('--output_file', type=str, required=True, help='Path to write predictions CSV.')
56
+ parser.add_argument('--batch_size', type=int, default=4, help='Batch size for inference.')
57
+ parser.add_argument('--max_length', type=int, default=1024, help='Max token length.')
58
+ parser.add_argument('--device', type=str, default='cuda', help='Device: cpu or cuda or cuda:0,1')
59
+ args = parser.parse_args()
60
+
61
+ # Device setup
62
+ def get_device(device_str):
63
+ if device_str == 'cpu':
64
+ return torch.device('cpu'), None
65
+ if ',' in device_str:
66
+ devs = [d.strip() for d in device_str.split(',')]
67
+ device = torch.device(devs[0])
68
+ device_ids = [int(d.split(':')[-1]) for d in devs]
69
+ return device, device_ids
70
+ else:
71
+ return torch.device(device_str), None
72
+
73
+ device, device_ids = get_device(args.device)
74
+
75
+ # Dataset
76
+ class PPIDatasetInference(torch.utils.data.Dataset):
77
+ def __init__(self, csv_file, tokenizer, max_length):
78
+ self.data = pd.read_csv(csv_file)
79
+ self.tokenizer = tokenizer
80
+ self.max_length = max_length
81
+
82
+ def __len__(self):
83
+ return len(self.data)
84
+
85
+ def __getitem__(self, idx):
86
+ seq1 = self.data.iloc[idx, 0]
87
+ seq2 = self.data.iloc[idx, 1]
88
+ enc1 = self.tokenizer(seq1, truncation=True, padding='max_length',
89
+ max_length=self.max_length, return_tensors='pt')
90
+ enc2 = self.tokenizer(seq2, truncation=True, padding='max_length',
91
+ max_length=self.max_length, return_tensors='pt')
92
+ return {
93
+ 'input_ids1': enc1.input_ids.squeeze(0),
94
+ 'attention_mask1': enc1.attention_mask.squeeze(0),
95
+ 'input_ids2': enc2.input_ids.squeeze(0),
96
+ 'attention_mask2': enc2.attention_mask.squeeze(0),
97
+ }
98
+
99
+ # Model definition matching training
100
+ class SiameseBTPE(nn.Module):
101
+ def __init__(self, config, num_labels=2):
102
+ super(SiameseBTPE, self).__init__()
103
+ self.esm = EsmModel(config)
104
+ self.dropout = nn.Dropout(0.1)
105
+ self.classifier = nn.Linear(config.hidden_size * 2, num_labels)
106
+
107
+ def forward(self, input_ids1, attention_mask1, input_ids2, attention_mask2):
108
+ o1 = self.esm(input_ids=input_ids1, attention_mask=attention_mask1)
109
+ o2 = self.esm(input_ids=input_ids2, attention_mask=attention_mask2)
110
+ p1 = o1.last_hidden_state[:, 0, :]
111
+ p2 = o2.last_hidden_state[:, 0, :]
112
+ concat = torch.cat((p1, p2), dim=1)
113
+ out = self.dropout(concat)
114
+ logits = self.classifier(out)
115
+ return logits
116
+
117
+
118
+ def main():
119
+ # Tokenizer & config
120
+ tokenizer = EsmTokenizer.from_pretrained(args.model_config)
121
+ config = EsmConfig.from_pretrained(args.model_config)
122
+ config.num_hidden_layers = args.num_layers
123
+ print(f'Overriding config to {args.num_layers} transformer layers.')
124
+
125
+ # Dataset & loader
126
+ ds = PPIDatasetInference(args.input_file, tokenizer, args.max_length)
127
+ loader = torch.utils.data.DataLoader(ds, batch_size=args.batch_size, shuffle=False)
128
+
129
+ # Model init & load
130
+ model = SiameseBTPE(config, num_labels=args.num_labels)
131
+ ckpt = torch.load(args.model_path, map_location='cpu')
132
+ model.load_state_dict(ckpt)
133
+
134
+ # DataParallel if needed
135
+ if device_ids:
136
+ model = nn.DataParallel(model, device_ids=device_ids)
137
+ model.to(device)
138
+ model.eval()
139
+
140
+ all_preds, all_probs = [], []
141
+ with torch.no_grad():
142
+ for batch in tqdm(loader, desc='Inference'):
143
+ ids1 = batch['input_ids1'].to(device)
144
+ mask1 = batch['attention_mask1'].to(device)
145
+ ids2 = batch['input_ids2'].to(device)
146
+ mask2 = batch['attention_mask2'].to(device)
147
+ logits = model(ids1, mask1, ids2, mask2)
148
+ probs = nn.functional.softmax(logits, dim=1)
149
+ preds = probs.argmax(dim=1)
150
+ all_preds.extend(preds.cpu().tolist())
151
+ all_probs.extend(probs.cpu().tolist())
152
+
153
+ # Map & save
154
+ label_map = {0: 'enemies', 1: 'friends'}
155
+ df = pd.read_csv(args.input_file)
156
+ df['Prediction'] = [label_map[p] for p in all_preds]
157
+ df['Probability_Friends'] = [p[1] for p in all_probs]
158
+ df['Probability_Enemies'] = [p[0] for p in all_probs]
159
+ os.makedirs(os.path.dirname(args.output_file), exist_ok=True)
160
+ df.to_csv(args.output_file, index=False)
161
+ print(f'Saved predictions to {args.output_file}')
162
+
163
+ if __name__ == '__main__':
164
+ main()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ torch>=2.0
2
+ transformers>=4.30
3
+ pandas
4
+ tqdm
5
+ numpy
6
+ matplotlib
7
+ scikit-learn
roc_analysis_color_threshold_F1e.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+
3
+ Notes:
4
+
5
+ The script handles variable numbers of PRS and RRS probability values.
6
+ It combines the probabilities and labels, computes the ROC curve, calculates the AUC (Area Under the Curve), and plots the ROC curve using Matplotlib.
7
+ You can customize the script to read multiple PRS and RRS files by modifying the read_probabilities function and how you handle the input arguments.
8
+ Dependencies:
9
+
10
+ Python 3
11
+ numpy
12
+ matplotlib
13
+ scikit-learn
14
+
15
+ Explanation of the Area Under the Curve (AUC):
16
+
17
+ The Area Under the Curve (AUC) refers to the area under the Receiver Operating Characteristic (ROC) curve. The ROC curve is a graphical representation of a classifier's performance across all classification thresholds. It plots the True Positive Rate (TPR) against the False Positive Rate (FPR) at various threshold settings.
18
+
19
+ True Positive Rate (TPR), also known as Sensitivity or Recall, is the proportion of actual positives that are correctly identified.
20
+ False Positive Rate (FPR) is the proportion of actual negatives that are incorrectly identified as positives.
21
+ The AUC provides a single scalar value that summarizes the performance of the classifier:
22
+
23
+ An AUC of 1.0 indicates a perfect classifier.
24
+ An AUC of 0.5 suggests no discriminative ability (equivalent to random guessing).
25
+ An AUC between 0.5 and 1.0 indicates the degree to which the classifier can distinguish between the positive and negative classes.
26
+ Why is AUC important?
27
+
28
+ More on interpreting the ROC Curve:
29
+
30
+ The ROC curve plots the TPR against the FPR at various threshold levels.
31
+ The closer the curve follows the left-hand border and then the top border of the ROC space, the better the classifier.
32
+ The diagonal line represents the performance of a classifier that makes random guesses.
33
+ Understanding AUC Values:
34
+
35
+ AUC = 0.90-1.00: Excellent
36
+ AUC = 0.80-0.90: Good
37
+ AUC = 0.70-0.80: Fair
38
+ AUC = 0.60-0.70: Poor
39
+ AUC = 0.50-0.60: Fail
40
+
41
+
42
+
43
+ Threshold-Independent: AUC measures the classifier's ability to rank predictions without being dependent on a specific threshold.
44
+ Performance Metric: It provides a comprehensive measure of performance across all possible classification thresholds.
45
+ In summary, the AUC quantifies the overall ability of the model to discriminate between positive and negative classes. A higher AUC indicates better model performance.
46
+
47
+
48
+
49
+ F1 = 2 * ( (precision * recall) / (precision + recall) )
50
+
51
+ precision = TP / (TP + FP)
52
+
53
+ recall = TP / (TP + FN)
54
+
55
+
56
+ You can adjust the decimal percision by changing ".6f" to desired value in f'Best F1 Threshold: {best_thresh:.6f}'
57
+
58
+
59
+ """
60
+ #
61
+
62
+ # pip install numpy matplotlib scikit-learn
63
+
64
+ # python roc_analysis_color_threshold_F1e.py --input_csv probabilities.csv --output_file roc_curve.png
65
+
66
+ #!/usr/bin/env python
67
+
68
+ #!/usr/bin/env python
69
+ import argparse
70
+ import numpy as np
71
+ import matplotlib.pyplot as plt
72
+ from sklearn.metrics import roc_curve, auc, f1_score
73
+ import csv
74
+
75
+ def read_probabilities_from_csv(filename):
76
+ """Read PRS and RRS probability values from a CSV file."""
77
+ prs_probs = []
78
+ rrs_probs = []
79
+ with open(filename, 'r') as csvfile:
80
+ reader = csv.reader(csvfile)
81
+ headers = next(reader) # Skip the header row
82
+ for row in reader:
83
+ # Ensure there are at least two columns
84
+ if len(row) >= 2:
85
+ prs_value = row[0].strip()
86
+ rrs_value = row[1].strip()
87
+ # Append PRS probability if not empty
88
+ if prs_value:
89
+ prs_probs.append(float(prs_value))
90
+ # Append RRS probability if not empty
91
+ if rrs_value:
92
+ rrs_probs.append(float(rrs_value))
93
+ return prs_probs, rrs_probs
94
+
95
+ def main():
96
+ parser = argparse.ArgumentParser(description='Compute ROC curve, best F1 score, and annotate thresholds.')
97
+ parser.add_argument('--input_csv', required=True, help='CSV file containing PRS and RRS probability values')
98
+ parser.add_argument('--output_file', default='roc_curve.png', help='Output file name for ROC curve plot')
99
+
100
+ args = parser.parse_args()
101
+
102
+ # Read probability values from CSV file
103
+ prs_probs, rrs_probs = read_probabilities_from_csv(args.input_csv)
104
+
105
+ # Assign labels
106
+ prs_labels = [1] * len(prs_probs)
107
+ rrs_labels = [0] * len(rrs_probs)
108
+
109
+ # Combine probabilities and labels
110
+ probs = np.array(prs_probs + rrs_probs)
111
+ labels = np.array(prs_labels + rrs_labels)
112
+
113
+ # Compute ROC curve and AUC
114
+ fpr, tpr, thresholds = roc_curve(labels, probs)
115
+ roc_auc = auc(fpr, tpr)
116
+
117
+ # Exclude infinite thresholds and thresholds outside [0, 1]
118
+ finite_idxs = np.where(np.isfinite(thresholds))[0]
119
+ fpr = fpr[finite_idxs]
120
+ tpr = tpr[finite_idxs]
121
+ thresholds = thresholds[finite_idxs]
122
+
123
+ # Filter thresholds within [0, 1]
124
+ valid_thresholds_idxs = np.where((thresholds >= 0) & (thresholds <= 1))[0]
125
+ fpr = fpr[valid_thresholds_idxs]
126
+ tpr = tpr[valid_thresholds_idxs]
127
+ thresholds = thresholds[valid_thresholds_idxs]
128
+
129
+ # Compute best F1 score across thresholds
130
+ best_f1 = -1.0
131
+ best_thresh = None
132
+ best_idx = None
133
+ for i, thresh in enumerate(thresholds):
134
+ predicted_labels = (probs >= thresh).astype(int)
135
+ current_f1 = f1_score(labels, predicted_labels)
136
+ if current_f1 > best_f1:
137
+ best_f1 = current_f1
138
+ best_thresh = thresh
139
+ best_idx = i
140
+
141
+ # Retrieve FPR and TPR for the best threshold
142
+ best_fpr = fpr[best_idx]
143
+ best_tpr = tpr[best_idx]
144
+
145
+ # Set global font
146
+ plt.rcParams['font.family'] = 'Arial'
147
+
148
+ # Create figure and colormap
149
+ fig, ax = plt.subplots(figsize=(10, 8))
150
+ norm = plt.Normalize(vmin=thresholds.min(), vmax=thresholds.max())
151
+ cmap = plt.cm.viridis
152
+
153
+ # Plot the ROC curve in segments, color-coded by threshold
154
+ for i in range(len(fpr) - 1):
155
+ x = fpr[i:i + 2]
156
+ y = tpr[i:i + 2]
157
+ z = thresholds[i]
158
+ ax.plot(x, y, color=cmap(norm(z)), lw=2.5)
159
+
160
+ # Diagonal line
161
+ ax.plot([0, 1], [0, 1], color='gray', lw=2, linestyle='--')
162
+
163
+ # Add a colorbar for thresholds
164
+ sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
165
+ sm.set_array([])
166
+ cbar = fig.colorbar(sm, ax=ax)
167
+ cbar.set_label('Threshold', fontsize=16)
168
+ cbar.ax.tick_params(labelsize=14)
169
+
170
+ # Annotate a subset of thresholds on the ROC curve
171
+ num_thresholds_to_annotate = 10 # Number of thresholds to annotate
172
+ idxs = np.linspace(0, len(thresholds) - 1, num_thresholds_to_annotate).astype(int)
173
+ for idx in idxs:
174
+ thresh = thresholds[idx]
175
+ ax.annotate(f'{thresh:.2f}', xy=(fpr[idx], tpr[idx]),
176
+ textcoords='offset points', xytext=(0, 10),
177
+ ha='center', fontsize=12, color='blue')
178
+
179
+ # No red scatter point for the best threshold
180
+ # ax.scatter(best_fpr, best_tpr, color='red', s=100, zorder=5)
181
+
182
+ # Set axis limits and labels
183
+ ax.set_xlim([0.0, 1.0])
184
+ ax.set_ylim([0.0, 1.05])
185
+ ax.set_xlabel('False Positive Rate', fontsize=16)
186
+ ax.set_ylabel('True Positive Rate', fontsize=16)
187
+ ax.set_title('Receiver Operating Characteristic (ROC)', fontsize=18)
188
+ ax.tick_params(axis='both', which='major', labelsize=14)
189
+
190
+ # Add gridlines
191
+ ax.grid(True, linestyle='--', linewidth=0.5, alpha=0.7)
192
+
193
+ # Add legend with 3 decimal places
194
+ legend_text = (f'ROC curve (AUC = {roc_auc:.3f}, '
195
+ f'Best F1 = {best_f1:.3f}, '
196
+ f'Best F1 Threshold = {best_thresh:.3f})')
197
+ ax.legend([legend_text], loc="lower right", fontsize=12)
198
+
199
+ # Adjust layout
200
+ plt.tight_layout()
201
+
202
+ # Save and show the figure
203
+ plt.savefig(args.output_file, dpi=300, format='png')
204
+ plt.show()
205
+
206
+ print(f"ROC curve saved to {args.output_file}")
207
+ print(f"Best F1 Score: {best_f1:.3f} at threshold {best_thresh:.3f}")
208
+
209
+ if __name__ == '__main__':
210
+ main()
train_ppiBTPE3b.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ train_ppiBTPE3b.py — Train or fine-tune the ppiBTEP / SiameseBTPE model for
4
+ protein-protein interaction (PPI) classification.
5
+
6
+ Architecture
7
+ ------------
8
+ SiameseBTPE: two branches with shared weights, each an ESM-1b encoder
9
+ (facebook/esm1b_t33_650M_UR50S by default). Each branch produces a [CLS]
10
+ embedding (last_hidden_state[:, 0, :], dim=1280). The two [CLS] embeddings
11
+ are concatenated (dim=2560), passed through Dropout(0.1) and a Linear layer
12
+ to 2 logits (CrossEntropyLoss; softmax applied at inference).
13
+
14
+ Modes
15
+ -----
16
+ - From scratch: --num_layers N --freeze_layers 0
17
+ - Fine-tuning ESM-1b: omit --num_layers, set --freeze_layers >= 1
18
+ - Resume: --checkpoint <path/to/ppiBTPE_epoch_K.pth>
19
+
20
+ Important
21
+ ---------
22
+ When training from scratch, pass --freeze_layers 0 explicitly. The default is
23
+ 20, which would freeze most of the model.
24
+
25
+ Data format
26
+ -----------
27
+ CSV with columns: seq1, seq2, label
28
+ - label = 1 or 'friends' → interacting
29
+ - label = 0 or 'enemies' → non-interacting
30
+
31
+ Example
32
+ -------
33
+ python train_ppiBTPE3b.py \\
34
+ --train_file train.csv \\
35
+ --val_file val.csv \\
36
+ --model_config facebook/esm1b_t33_650M_UR50S \\
37
+ --num_layers 12 \\
38
+ --freeze_layers 0 \\
39
+ --epochs 20 \\
40
+ --batch_size 2 \\
41
+ --learning_rate 1e-5 \\
42
+ --max_length 1024 \\
43
+ --output_dir ./out \\
44
+ --device cuda
45
+ """
46
+ import argparse
47
+ import os
48
+ import torch
49
+ import torch.nn as nn
50
+ import pandas as pd
51
+ from torch.utils.data import Dataset, DataLoader
52
+ from transformers import EsmModel, EsmTokenizer, EsmConfig
53
+ from tqdm import tqdm
54
+
55
+ # Command-line arguments
56
+ parser = argparse.ArgumentParser(
57
+ description='Train or fine-tune ppiBTPE Siamese model for PPI prediction.'
58
+ )
59
+
60
+ # Input files
61
+ parser.add_argument('--train_file', type=str, required=True,
62
+ help='Path to the training CSV file.')
63
+ parser.add_argument('--val_file', type=str, required=True,
64
+ help='Path to the validation CSV file.')
65
+ parser.add_argument('--checkpoint', type=str, default=None,
66
+ help='(Optional) Path to a .pth checkpoint to load before training/fine-tuning.')
67
+
68
+ # Model parameters
69
+ parser.add_argument('--model_config', type=str,
70
+ default='facebook/esm1b_t33_650M_UR50S',
71
+ help='ESM config name or path for architecture.')
72
+ parser.add_argument('--num_labels', type=int, default=2,
73
+ help='Number of output labels (e.g., 2 for binary classification).')
74
+ parser.add_argument('--num_layers', type=int, default=None,
75
+ help='Total transformer layers to initialize (scratch).')
76
+ parser.add_argument('--freeze_layers', type=int, default=20,
77
+ help='Number of bottom layers to freeze during fine-tuning.')
78
+
79
+ # Training hyperparameters
80
+ parser.add_argument('--epochs', type=int, default=3, help='Number of training epochs.')
81
+ parser.add_argument('--batch_size', type=int, default=4, help='Batch size.')
82
+ parser.add_argument('--learning_rate', type=float, default=1e-5, help='Learning rate.')
83
+ parser.add_argument('--max_length', type=int, default=1024,
84
+ help='Maximum sequence length for tokenization.')
85
+
86
+ # Misc
87
+ parser.add_argument('--output_dir', type=str, default='./',
88
+ help='Directory to save checkpoints and final model.')
89
+ parser.add_argument('--device', type=str, default='cuda', choices=['cpu','cuda'],
90
+ help='Device to run training on.')
91
+ args = parser.parse_args()
92
+
93
+ # Device setup
94
+ if torch.cuda.is_available() and args.device.startswith('cuda'):
95
+ device = torch.device(args.device)
96
+ n_gpu = torch.cuda.device_count()
97
+ print(f"GPUs available: {n_gpu}")
98
+ else:
99
+ device = torch.device('cpu')
100
+ n_gpu = 0
101
+ print("Using CPU.")
102
+
103
+ # Dataset definition
104
+ class SiamesePPIDataset(Dataset):
105
+ def __init__(self, csv_file, tokenizer, max_length):
106
+ self.data = pd.read_csv(csv_file)
107
+ self.tokenizer = tokenizer
108
+ self.max_length = max_length
109
+ def __len__(self):
110
+ return len(self.data)
111
+ def __getitem__(self, idx):
112
+ seq1 = self.data.iloc[idx,0]
113
+ seq2 = self.data.iloc[idx,1]
114
+ lbl = self.data.iloc[idx,2]
115
+ # map labels
116
+ if isinstance(lbl, str):
117
+ label = 1 if lbl=='friends' else 0
118
+ else:
119
+ label = int(lbl)
120
+ enc1 = self.tokenizer(seq1, truncation=True, padding='max_length',
121
+ max_length=self.max_length, return_tensors='pt')
122
+ enc2 = self.tokenizer(seq2, truncation=True, padding='max_length',
123
+ max_length=self.max_length, return_tensors='pt')
124
+ return {
125
+ 'input_ids1': enc1.input_ids.squeeze(0),
126
+ 'attention_mask1': enc1.attention_mask.squeeze(0),
127
+ 'input_ids2': enc2.input_ids.squeeze(0),
128
+ 'attention_mask2': enc2.attention_mask.squeeze(0),
129
+ 'labels': torch.tensor(label, dtype=torch.long)
130
+ }
131
+
132
+ # Model definition
133
+ class SiameseBTPE(nn.Module):
134
+ def __init__(self, config, num_labels=2):
135
+ super().__init__()
136
+ self.esm = EsmModel(config)
137
+ self.dropout = nn.Dropout(0.1)
138
+ self.classifier = nn.Linear(config.hidden_size*2, num_labels)
139
+ def forward(self, id1, mask1, id2, mask2):
140
+ o1 = self.esm(input_ids=id1, attention_mask=mask1)
141
+ o2 = self.esm(input_ids=id2, attention_mask=mask2)
142
+ p1 = o1.last_hidden_state[:,0,:]
143
+ p2 = o2.last_hidden_state[:,0,:]
144
+ x = torch.cat((p1,p2), dim=1)
145
+ x = self.dropout(x)
146
+ return self.classifier(x)
147
+
148
+ # Main training loop
149
+
150
+ def main():
151
+ tokenizer = EsmTokenizer.from_pretrained(args.model_config)
152
+ config = EsmConfig.from_pretrained(args.model_config)
153
+ if args.num_layers is not None:
154
+ config.num_hidden_layers = args.num_layers
155
+ print(f"Using {args.num_layers} layers (override)")
156
+
157
+ # datasets + loaders
158
+ train_ds = SiamesePPIDataset(args.train_file, tokenizer, args.max_length)
159
+ val_ds = SiamesePPIDataset(args.val_file, tokenizer, args.max_length)
160
+ train_loader = DataLoader(train_ds, batch_size=args.batch_size, shuffle=True)
161
+ val_loader = DataLoader(val_ds, batch_size=args.batch_size, shuffle=False)
162
+
163
+ model = SiameseBTPE(config, num_labels=args.num_labels)
164
+ # load checkpoint if provided
165
+ if args.checkpoint:
166
+ state = torch.load(args.checkpoint, map_location='cpu')
167
+ model.load_state_dict(state, strict=False)
168
+ print(f"Loaded checkpoint: {args.checkpoint}")
169
+
170
+ # freeze layers
171
+ total = len(model.esm.encoder.layer)
172
+ to_freeze = min(args.freeze_layers, total)
173
+ for param in model.esm.embeddings.parameters(): param.requires_grad=False
174
+ for layer in model.esm.encoder.layer[:to_freeze]:
175
+ for p in layer.parameters(): p.requires_grad=False
176
+ print(f"Frozen {to_freeze}/{total} layers")
177
+
178
+ # device
179
+ model.to(device)
180
+ if n_gpu>1: model = nn.DataParallel(model)
181
+
182
+ optim = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()),
183
+ lr=args.learning_rate)
184
+ crit = nn.CrossEntropyLoss()
185
+ os.makedirs(args.output_dir, exist_ok=True)
186
+
187
+ for e in range(args.epochs):
188
+ print(f"Epoch {e+1}/{args.epochs}")
189
+ model.train()
190
+ train_loss=0
191
+ for b in tqdm(train_loader, desc='Train'):
192
+ optim.zero_grad()
193
+ logits = model(b['input_ids1'].to(device), b['attention_mask1'].to(device),
194
+ b['input_ids2'].to(device), b['attention_mask2'].to(device))
195
+ loss = crit(logits, b['labels'].to(device))
196
+ loss.backward(); optim.step()
197
+ train_loss+=loss.item()
198
+ print(f"Train loss: {train_loss/len(train_loader):.4f}")
199
+
200
+ model.eval()
201
+ val_loss, correct, total = 0,0,0
202
+ for b in tqdm(val_loader, desc='Val'):
203
+ with torch.no_grad():
204
+ logits = model(b['input_ids1'].to(device), b['attention_mask1'].to(device),
205
+ b['input_ids2'].to(device), b['attention_mask2'].to(device))
206
+ loss = crit(logits, b['labels'].to(device))
207
+ val_loss+=loss.item()
208
+ preds=logits.argmax(dim=1)
209
+ correct+=(preds==b['labels'].to(device)).sum().item()
210
+ total+=len(preds)
211
+ print(f"Val loss: {val_loss/len(val_loader):.4f}, Acc: {correct/total:.4f}")
212
+
213
+ # save
214
+ path = os.path.join(args.output_dir, f"ppiBTPE_epoch_{e+1}.pth")
215
+ torch.save(model.module.state_dict() if n_gpu>1 else model.state_dict(), path)
216
+ print(f"Saved {path}")
217
+
218
+ final = os.path.join(args.output_dir, 'ppiBTPE_final.pth')
219
+ torch.save(model.module.state_dict() if n_gpu>1 else model.state_dict(), final)
220
+ print(f"Saved final model: {final}")
221
+
222
+ if __name__=='__main__':
223
+ main()