Spaces:
No application file
No application file
Upload 42 files
Browse files- .gitattributes +33 -0
- mafft/mafft.bat +8 -0
- mafft/mafftdir/bin/mafft +0 -0
- mafft/mafftdir/libexec/addsingle +3 -0
- mafft/mafftdir/libexec/contrafoldwrap +3 -0
- mafft/mafftdir/libexec/countlen +3 -0
- mafft/mafftdir/libexec/dash_alignments +0 -0
- mafft/mafftdir/libexec/dash_client +3 -0
- mafft/mafftdir/libexec/dash_sequences.fa +138 -0
- mafft/mafftdir/libexec/disttbfast +3 -0
- mafft/mafftdir/libexec/dndblast +3 -0
- mafft/mafftdir/libexec/dndfast7 +3 -0
- mafft/mafftdir/libexec/dndpre +3 -0
- mafft/mafftdir/libexec/dvtditr +3 -0
- mafft/mafftdir/libexec/f2cl +3 -0
- mafft/mafftdir/libexec/filter +3 -0
- mafft/mafftdir/libexec/getlag +3 -0
- mafft/mafftdir/libexec/hat3 +0 -0
- mafft/mafftdir/libexec/hex2maffttext +3 -0
- mafft/mafftdir/libexec/mafft-distance +3 -0
- mafft/mafftdir/libexec/mafft-homologs.1 +131 -0
- mafft/mafftdir/libexec/mafft-profile +3 -0
- mafft/mafftdir/libexec/mafft.1 +479 -0
- mafft/mafftdir/libexec/mafftash_premafft.pl +464 -0
- mafft/mafftdir/libexec/maffttext2hex +3 -0
- mafft/mafftdir/libexec/makedirectionlist +3 -0
- mafft/mafftdir/libexec/mccaskillwrap +3 -0
- mafft/mafftdir/libexec/multi2hat3s +3 -0
- mafft/mafftdir/libexec/nodepair +3 -0
- mafft/mafftdir/libexec/pairash +3 -0
- mafft/mafftdir/libexec/pairlocalalign +3 -0
- mafft/mafftdir/libexec/regtable2seq +3 -0
- mafft/mafftdir/libexec/replaceu +3 -0
- mafft/mafftdir/libexec/restoreu +3 -0
- mafft/mafftdir/libexec/score +3 -0
- mafft/mafftdir/libexec/seekquencer_premafft.pl +600 -0
- mafft/mafftdir/libexec/seq2regtable +3 -0
- mafft/mafftdir/libexec/setcore +3 -0
- mafft/mafftdir/libexec/setdirection +3 -0
- mafft/mafftdir/libexec/sextet5 +3 -0
- mafft/mafftdir/libexec/splittbfast +3 -0
- mafft/mafftdir/libexec/tbfast +3 -0
- mafft/mafftdir/libexec/version +3 -0
.gitattributes
CHANGED
|
@@ -35,3 +35,36 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
iqtree/bin/iqtree3_arm filter=lfs diff=lfs merge=lfs -text
|
| 37 |
iqtree/bin/iqtree3_intel filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
iqtree/bin/iqtree3_arm filter=lfs diff=lfs merge=lfs -text
|
| 37 |
iqtree/bin/iqtree3_intel filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
mafft/mafftdir/libexec/addsingle filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
mafft/mafftdir/libexec/contrafoldwrap filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
mafft/mafftdir/libexec/countlen filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
mafft/mafftdir/libexec/dash_client filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
mafft/mafftdir/libexec/disttbfast filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
mafft/mafftdir/libexec/dndblast filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
mafft/mafftdir/libexec/dndfast7 filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
mafft/mafftdir/libexec/dndpre filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
mafft/mafftdir/libexec/dvtditr filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
mafft/mafftdir/libexec/f2cl filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
mafft/mafftdir/libexec/filter filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
mafft/mafftdir/libexec/getlag filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
mafft/mafftdir/libexec/hex2maffttext filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
mafft/mafftdir/libexec/mafft-distance filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
mafft/mafftdir/libexec/mafft-profile filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
mafft/mafftdir/libexec/maffttext2hex filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
mafft/mafftdir/libexec/makedirectionlist filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
mafft/mafftdir/libexec/mccaskillwrap filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
mafft/mafftdir/libexec/multi2hat3s filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
mafft/mafftdir/libexec/nodepair filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
mafft/mafftdir/libexec/pairash filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
mafft/mafftdir/libexec/pairlocalalign filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
mafft/mafftdir/libexec/regtable2seq filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
mafft/mafftdir/libexec/replaceu filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
mafft/mafftdir/libexec/restoreu filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
mafft/mafftdir/libexec/score filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
mafft/mafftdir/libexec/seq2regtable filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
mafft/mafftdir/libexec/setcore filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
mafft/mafftdir/libexec/setdirection filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
mafft/mafftdir/libexec/sextet5 filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
mafft/mafftdir/libexec/splittbfast filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
mafft/mafftdir/libexec/tbfast filter=lfs diff=lfs merge=lfs -text
|
| 70 |
+
mafft/mafftdir/libexec/version filter=lfs diff=lfs merge=lfs -text
|
mafft/mafft.bat
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#! /bin/bash
|
| 2 |
+
# sh -> bash for debian. By J. R. Peterson. 2015/Jun.
|
| 3 |
+
|
| 4 |
+
pushd "`dirname "$0"`" > /dev/null 2>&1; rootdir="$PWD"; popd > /dev/null 2>&1;
|
| 5 |
+
MAFFT_BINARIES="$rootdir/mafftdir/libexec"; export MAFFT_BINARIES;
|
| 6 |
+
|
| 7 |
+
"$rootdir/mafftdir/bin/mafft" "$@"
|
| 8 |
+
# input file name can have space
|
mafft/mafftdir/bin/mafft
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
mafft/mafftdir/libexec/addsingle
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ec688824d983d572bdaa8d91a458baa85ac1fbcf43b7c1e8334c4bdd5af65910
|
| 3 |
+
size 1315696
|
mafft/mafftdir/libexec/contrafoldwrap
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:18ec4942494455b5a07f5d5ed5efd2fd614ac2560d421f8d7feeee9e597a320c
|
| 3 |
+
size 926072
|
mafft/mafftdir/libexec/countlen
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c5bf406f42385ff89b24b5f9f285e096b57c404157e9824dff220238f9084b8
|
| 3 |
+
size 625368
|
mafft/mafftdir/libexec/dash_alignments
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
mafft/mafftdir/libexec/dash_client
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7b2f1a8f8b922769fefaa0399b057aa20e93da5cece04b31e136b775093c6f9b
|
| 3 |
+
size 4232576
|
mafft/mafftdir/libexec/dash_sequences.fa
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
>DASH_3J5P_A||281||579
|
| 2 |
+
EKNSVLEVIAYSSSETPNRHDMLLVEPLNRLLQDKWDRFVKRIFYFNFFVYCLYMIIFTAAAYYRPVEGLPPYKLKNTVGDYFRVTGEILSVSGGVYFFFRGIQYFLQRRPSLKSLFVDSYSEILFFVQSLFMLVSVVLYFSQRKEYVASMVFSLAMGWTNMLYYTRGFQQMGIYAVMIEKMILRDLCRFMFVYLVFLFGFSTAVVTLIEDGKYNSLYSTCLELFKFTIGMGDLEFTENYDFKAVFIILLLAYVILTYILLLNMLIALMGETVNKIAQESKNIWKLQRAITILDTEKSF
|
| 3 |
+
>DASH_3J5P_A||283||585
|
| 4 |
+
NSVLEVIAYSSSETPNRHDMLLVEPLNRLLQDKWDRFVKRIFYFNFFVYCLYMIIFTAAAYYRPVEGLPPYKLKNTVGDYFRVTGEILSVSGGVYFFFRGIQYFLQRRPSLKSLFVDSYSEILFFVQSLFMLVSVVLYFSQRKEYVASMVFSLAMGWTNMLYYTRGFQQMGIYAVMIEKMILRDLCRFMFVYLVFLFGFSTAVVTLIEDGKYNSLYSTCLELFKFTIGMGDLEFTENYDFKAVFIILLLAYVILTYILLLNMLIALMGETVNKIAQESKNIWKLQRAITILDTEKSFLKCMRK
|
| 5 |
+
>DASH_3J9P_A||1089||1399
|
| 6 |
+
YEPLTALNAMVQNNRIELLNHPVCKEYLLMKWLAYGFRAHMMNLGSYCLGLIPMTILVVNIKPGMAFNSTGIINETSDHSEILDTTNSYLIKTCMILVFLSSIFGYCKEAGQIFQQKRNYFMDISNVLEWIIYTTGIIFVLPLFVEIPAHLQWQCGAIAVYFYWMNFLLYLQRFENCGIFIVMLEVILKTLLRSTVVFIFLLLAFGLSFYILLNLQDPFSSPLLSIIQTFSMMLGDINYRESFLEPYLRNELAHPVLSFAQLVSFTIFVPIVLMNLLIGLAVGDIADVQKHASLKRIAMQVELHTSLEKKL
|
| 7 |
+
>DASH_3J9P_A||1091||1408
|
| 8 |
+
PLTALNAMVQNNRIELLNHPVCKEYLLMKWLAYGFRAHMMNLGSYCLGLIPMTILVVNIKPGMAFNSTGIINETSDHSEILDTTNSYLIKTCMILVFLSSIFGYCKEAGQIFQQKRNYFMDISNVLEWIIYTTGIIFVLPLFVEIPAHLQWQCGAIAVYFYWMNFLLYLQRFENCGIFIVMLEVILKTLLRSTVVFIFLLLAFGLSFYILLNLQDPFSSPLLSIIQTFSMMLGDINYRESFLEPYLRNELAHPVLSFAQLVSFTIFVPIVLMNLLIGLAVGDIADVQKHASLKRIAMQVELHTSLEKKLPLWFLRKVD
|
| 9 |
+
>DASH_3J9P_A||1092||1402
|
| 10 |
+
LTALNAMVQNNRIELLNHPVCKEYLLMKWLAYGFRAHMMNLGSYCLGLIPMTILVVNIKPGMAFNSTGIINETSDHSEILDTTNSYLIKTCMILVFLSSIFGYCKEAGQIFQQKRNYFMDISNVLEWIIYTTGIIFVLPLFVEIPAHLQWQCGAIAVYFYWMNFLLYLQRFENCGIFIVMLEVILKTLLRSTVVFIFLLLAFGLSFYILLNLQDPFSSPLLSIIQTFSMMLGDINYRESFLEPYLRNELAHPVLSFAQLVSFTIFVPIVLMNLLIGLAVGDIADVQKHASLKRIAMQVELHTSLEKKLPLW
|
| 11 |
+
>DASH_3J9P_A||1096||1410
|
| 12 |
+
NAMVQNNRIELLNHPVCKEYLLMKWLAYGFRAHMMNLGSYCLGLIPMTILVVNIKPGMAFNSTGIINETSDHSEILDTTNSYLIKTCMILVFLSSIFGYCKEAGQIFQQKRNYFMDISNVLEWIIYTTGIIFVLPLFVEIPAHLQWQCGAIAVYFYWMNFLLYLQRFENCGIFIVMLEVILKTLLRSTVVFIFLLLAFGLSFYILLNLQDPFSSPLLSIIQTFSMMLGDINYRESFLEPYLRNELAHPVLSFAQLVSFTIFVPIVLMNLLIGLAVGDIADVQKHASLKRIAMQVELHTSLEKKLPLWFLRKVDQK
|
| 13 |
+
>DASH_5AN8_A||277||586
|
| 14 |
+
NSVLEIIAFHSRSPHRHRMVVLEPLNKLLQAKWDRLIPRFCFNFLCYLVYMLIFTAVAYHQPALEKQGFPPLKATAGNSMLLLGHILILLGGVYLLLGQLWYFWRRRLFIWISFMDSYFEILFLLQALLTVLSQVLCFLAIEWYLPLLVSSLVLGWLNLLYYTRGFQHTGIYSVMIQKVILRDLLRFLLVYLVFLFGFAVALVSLSREAQEDEAPPYRSILDASLELFKFTIGMGELAFQEQLRFRGVVLLLLLAYVLLTYVLLLNMLIALMSETVNSVATDSWSIWKLQKAISVLEMENGYWWCRRKKQ
|
| 15 |
+
>DASH_5IRX_A||286||584
|
| 16 |
+
EKNSVLEVIAYSSSETPNRHDMLLVEPLNRLLQDKWDRFVKRIFYFNFFVYCLYMIIFTAAAYYRPVEGLPPYKLKNTVGDYFRVTGEILSVSGGVYFFFRGIQYFLQRRPSLKSLFVDSYSEILFFVQSLFMLVSVVLYFSQRKEYVASMVFSLAMGWTNMLYYTRGFQQMGIYAVMIEKMILRDLCRFMFVYLVFLFGFSTAVVTLIEDGKYNSLYSTCLELFKFTIGMGDLEFTENYDFKAVFIILLLAYVILTYILLLNMLIALMGETVNKIAQESKNIWKLQRAITILDTEKSF
|
| 17 |
+
>DASH_5IRX_A||288||590
|
| 18 |
+
NSVLEVIAYSSSETPNRHDMLLVEPLNRLLQDKWDRFVKRIFYFNFFVYCLYMIIFTAAAYYRPVEGLPPYKLKNTVGDYFRVTGEILSVSGGVYFFFRGIQYFLQRRPSLKSLFVDSYSEILFFVQSLFMLVSVVLYFSQRKEYVASMVFSLAMGWTNMLYYTRGFQQMGIYAVMIEKMILRDLCRFMFVYLVFLFGFSTAVVTLIEDGKYNSLYSTCLELFKFTIGMGDLEFTENYDFKAVFIILLLAYVILTYILLLNMLIALMGETVNKIAQESKNIWKLQRAITILDTEKSFLKCMRK
|
| 19 |
+
>DASH_5IWK_A||287||613
|
| 20 |
+
DDQSLLELIVTTKKREARQILDQTPVKELVSLKWKRYGRPYFCVLGAIYVLYIICFTMCCVYRPLKPRITNRTNPRDNTLLQQKLLQEAYVTPKDDLRLVGELVSIVGAVIILLVEIPDIFRLGVTRFFGQTILGGPFHVIIVTYAFMVLVTMVMRLTNSDGEVVPMSFALVLGWCNVMYFARGFQMLGPFTIMIQKMIFGDLMRFCWQMAVVILGFASAFYIIFQTEDPDELGHFYDYPMALFSTFELFLTIIDGPANYDVDLPFMYSITYAAFAIIATLLMLNLLIAMMGDTHWRVAHERDELWRAQVVATTVMLERKLPRCLWP
|
| 21 |
+
>DASH_5IWK_A||383||608
|
| 22 |
+
LRLVGELVSIVGAVIILLVEIPDIFRLGVTRFFGQTILGGPFHVIIVTYAFMVLVTMVMRLTNSDGEVVPMSFALVLGWCNVMYFARGFQMLGPFTIMIQKMIFGDLMRFCWQMAVVILGFASAFYIIFQTEDPDELGHFYDYPMALFSTFELFLTIIDGPANYDVDLPFMYSITYAAFAIIATLLMLNLLIAMMGDTHWRVAHERDELWRAQVVATTVMLERKLP
|
| 23 |
+
>DASH_5K47_A||19||514
|
| 24 |
+
GTRLMEESSTNREKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAE
|
| 25 |
+
>DASH_5K47_A||20||521
|
| 26 |
+
TRLMEESSTNREKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLI
|
| 27 |
+
>DASH_5K47_A||247||526
|
| 28 |
+
NANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGYH
|
| 29 |
+
>DASH_5K47_A||2||537
|
| 30 |
+
PRVAWAERLVRGLRGLWGTRLMEESSTNREKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGYHKALVKLKLKKN
|
| 31 |
+
>DASH_5K47_A||30||525
|
| 32 |
+
REKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGY
|
| 33 |
+
>DASH_5MKF_A||158||704
|
| 34 |
+
EDQGPPCPSPVGGGDPLHRHLPLEGQPPRVAWAERLVRGLRGLWGTRLMEESSTNREKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLI
|
| 35 |
+
>DASH_5MKF_A||185||720
|
| 36 |
+
PRVAWAERLVRGLRGLWGTRLMEESSTNREKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGYHKALVKLKLKKN
|
| 37 |
+
>DASH_5MKF_A||430||709
|
| 38 |
+
NANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGYH
|
| 39 |
+
>DASH_5TJA_A||3||217
|
| 40 |
+
GLSNQLAVTFREENTIAFRHLFLLGYSDGADDTFAAYTREQLYQAIFHAVDQYLALPDVSLGRYAYVRGGGDPWTNGSGLALCQRYYHRGHVDPANDTFDIDPMVVTDCIQVDPPERPPPPPSDDLTLLESSSSYKNLTLKFHKLVNVTIHFRLKTINLQSLINNEIPDCYTFSVLITFDNKAHSGRIPISLETQAHIQECKHPSVFQHGDNSLE
|
| 41 |
+
>DASH_5VKQ_A||1228||1596
|
| 42 |
+
DKRNVEFLDVLIENEQKEVIAHTVVQRYLQELWHGSLTWASWKILLLLVAFIVCPPVWIGFTFPMGHKFNKVPIIKFMSYLTSHIYLMIHLSIVGITPIYPVLRLSLVPYWYEVGLLIWLSGLLLFELTNPSDKSGLGSIKVLVLLLGMAGVGVHVSAFLFVSKEYWPTLVYCRNQCFALAFLLACVQILDFLSFHHLFGPWAIIIGDLLKDLARFLAVLAIFVFGFSMHIVALNQSFANFSPEDLRSFEKKNRNRGYFSDVRMHPINSFELLFFAVFGQTTTEQTQVDKIKNVATPTQPYWVEYLFKIVFGIYMLVSVVVLIQLLIAMMSDTYQRIQAQSDIEWKFGLSKLIRNMHRTTTAPSPLNLV
|
| 43 |
+
>DASH_5W3S_A||14||541
|
| 44 |
+
HEEENRCNFNQHTSPSEELLLEDQMRRKLKFFFMNPCEKFWARGRKPWKLAIQILKIAMVTIQLVLFGLSNQMVVAFKEENTVAFKHLFLKGYIDRMDDTYAVYTQSDVYDQIIFAVNQYLQLYQVSVGNHAYENKGTDQSAMAICQHFYKRGNIYPGNDTFDIDPEIETDCFFVEPDEPFHIGTPAENKLNLTLDFHRLLTVELQFKLKAINLQTVRHQELPDCYDFTLTITFDNKAHSGRIKISLDNDISIRECKDWHVSGSIQKNTHNMMIFDAFVILTCLVSLILCIRSVISGLQLQQEFVNFFLLHYKKDVSVSDQMEFVNGWYIMIIISDILTIIGSILKMEIQAKSLTSYDVCSILLGTSTMLVWLGVIRYLGFFAKYNLLILTLQAALPNVIRFCCCAAMIYLGYCFCGWIVLGPYHNKFRSLNMVSECLFSLINGDDMFATFAKMQQKSYLVWLFSRIYLYSFISLFIYMILSLFIALITDTYETIKHYQQDGFPETELRTFISECKDLPNSGKFRLED
|
| 45 |
+
>DASH_5W3S_A||28||546
|
| 46 |
+
PSEELLLEDQMRRKLKFFFMNPCEKFWARGRKPWKLAIQILKIAMVTIQLVLFGLSNQMVVAFKEENTVAFKHLFLKGYIDRMDDTYAVYTQSDVYDQIIFAVNQYLQLYQVSVGNHAYENKGTDQSAMAICQHFYKRGNIYPGNDTFDIDPEIETDCFFVEPDEPFHIGTPAENKLNLTLDFHRLLTVELQFKLKAINLQTVRHQELPDCYDFTLTITFDNKAHSGRIKISLDNDISIRECKDWHVSGSIQKNTHNMMIFDAFVILTCLVSLILCIRSVISGLQLQQEFVNFFLLHYKKDVSVSDQMEFVNGWYIMIIISDILTIIGSILKMEIQAKSLTSYDVCSILLGTSTMLVWLGVIRYLGFFAKYNLLILTLQAALPNVIRFCCCAAMIYLGYCFCGWIVLGPYHNKFRSLNMVSECLFSLINGDDMFATFAKMQQKSYLVWLFSRIYLYSFISLFIYMILSLFIALITDTYETIKHYQQDGFPETELRTFISECKDLPNSGKFRLEDDPPVS
|
| 47 |
+
>DASH_5WJ9_A||16||557
|
| 48 |
+
LTPNPGYGTQAGPSPAPPTPPEEEDLRRRLKYFFMSPCDKFRAKGRKPCKLMLQVVKILVVTVQLILFGLSNQLAVTFREENTIAFRHLFLLGYSDGADDTFAAYTREQLYQAIFHAVDQYLALPDVSLGRYAYVRGGGDPWTNGSGLALCQRYYHRGHVDPANDTFDIDPMVVTDCIQVDPPERPPPPPSDDLTLLESSSSYKNLTLKFHKLVNVTIHFRLKTINLQSLINNEIPDCYTFSVLITFDNKAHSGRIPISLETQAHIQECKHPSVFQHGDNSFRLLFDVVVILTCSLSFLLCARSLLRGFLLQNEFVGFMWRQRGRVISLWERLEFVNGWYILLVTSDVLTISGTIMKIGIEAKNLASYDVCSILLGTSTLLVWVGVIRYLTFFHNYNILIATLRVALPSVMRFCCCVAVIYLGYCFCGWIVLGPYHVKFRSLSMVSECLFSLINGDDMFVTFAAMQAQQGRSSLVWLFSQLYLYSFISLFIYMVLSLFIALITGAYDTIKHPGGAGAEESELQAYIAQCQDSPTSGKFRRGS
|
| 49 |
+
>DASH_5WPV_A||16||557
|
| 50 |
+
LTPNPGYGTQVGTSPAPTTPTEEEDLRRRLKYFFMSPCDKFRAKGRKPCKLMLQVVKILVVTVQLILFGLSNQLVVTFREENTIAFRHLFLLGYSDGSDDTFAAYTQEQLYQAIFYAVDQYLILPEISLGRYAYVRGGGGPWANGSALALCQRYYHRGHVDPANDTFDIDPRVVTDCIQVDPPDRPPDIPSEDLDFLDGSASYKNLTLKFHKLINVTIHFQLKTINLQSLINNEIPDCYTFSILITFDNKAHSGRIPIRLETKTHIQECKHPSVSRHGDNSFRLLFDVVVILTCSLSFLLCARSLLRGFLLQNEFVVFMWRRRGREISLWERLEFVNGWYILLVTSDVLTISGTVMKIGIEAKNLASYDVCSILLGTSTLLVWVGVIRYLTFFHKYNILIATLRVALPSVMRFCCCVAVIYLGYCFCGWIVLGPYHVKFRSLSMVSECLFSLINGDDMFVTFAAMQAQQGHSSLVWLFSQLYLYSFISLFIYMVLSLFIALITGAYDTIKHPGGTGTEKSELQAYIEQCQDSPTSGKFRRGS
|
| 51 |
+
>DASH_5Z1W_A||127||515
|
| 52 |
+
YENLLLGAPRLRQLRVRNDSCVVHEDFREDILNCYDVYSPDKEDQLPFGPQNGTAWTYHSQNELGGSSHWGRLTSYSGGGYYLDLPGSRQASAEALQGLQEGLWLDRGTRVVFIDFSVYNANINLFCILRLVVEFPATGGTIPSWQIRTVKLIRYVNNWDFFIVGCEVVFCVFIFYYVVEEILEIHLHRLRYLSSVWNILDLVVILLSIVAVGFHIFRTLEVNRLMGKLLQQPDTYADFEFLAFWQTQYNNMNAVNLFFAWIKIFKYISFNKTMTQLSSTLARCAKDILGFAIMFFIVFFAYAQLGYLLFGTQVENFSTFVKCIFTQFRIILGDFDYNAIDNANRILGPVYFVTYVFFVFFVLLNMFLAIINDTYSEVKEELAGQKDQL
|
| 53 |
+
>DASH_5Z1W_A||13||506
|
| 54 |
+
SIRGLWGTTLTENTAENRELYVKTTLRELVVYIVFLVDICLLTYGMTSSSAYYYTKVMSELFLHTPSDSGVSFQTISSMSDFWDFAQGPLLDSLYWTKWYNNQSLGRGSHSFIYYENLLLGAPRLRQLRVRNDSCVVHEDFREDILNCYDVYSPDKEDQLPFGPQNGTAWTYHSQNELGGSSHWGRLTSYSGGGYYLDLPGSRQASAEALQGLQEGLWLDRGTRVVFIDFSVYNANINLFCILRLVVEFPATGGTIPSWQIRTVKLIRYVNNWDFFIVGCEVVFCVFIFYYVVEEILEIHLHRLRYLSSVWNILDLVVILLSIVAVGFHIFRTLEVNRLMGKLLQQPDTYADFEFLAFWQTQYNNMNAVNLFFAWIKIFKYISFNKTMTQLSSTLARCAKDILGFAIMFFIVFFAYAQLGYLLFGTQVENFSTFVKCIFTQFRIILGDFDYNAIDNANRILGPVYFVTYVFFVFFVLLNMFLAIINDTYSEVKE
|
| 55 |
+
>DASH_5Z1W_A||246||537
|
| 56 |
+
NANINLFCILRLVVEFPATGGTIPSWQIRTVKLIRYVNNWDFFIVGCEVVFCVFIFYYVVEEILEIHLHRLRYLSSVWNILDLVVILLSIVAVGFHIFRTLEVNRLMGKLLQQPDTYADFEFLAFWQTQYNNMNAVNLFFAWIKIFKYISFNKTMTQLSSTLARCAKDILGFAIMFFIVFFAYAQLGYLLFGTQVENFSTFVKCIFTQFRIILGDFDYNAIDNANRILGPVYFVTYVFFVFFVLLNMFLAIINDTYSEVKEELAGQKDQLQLSDFLKQSYNKTLLRLRLRKE
|
| 57 |
+
>DASH_5Z1W_A||29||447
|
| 58 |
+
NRELYVKTTLRELVVYIVFLVDICLLTYGMTSSSAYYYTKVMSELFLHTPSDSGVSFQTISSMSDFWDFAQGPLLDSLYWTKWYNNQSLGRGSHSFIYYENLLLGAPRLRQLRVRNDSCVVHEDFREDILNCYDVYSPDKEDQLPFGPQNGTAWTYHSQNELGGSSHWGRLTSYSGGGYYLDLPGSRQASAEALQGLQEGLWLDRGTRVVFIDFSVYNANINLFCILRLVVEFPATGGTIPSWQIRTVKLIRYVNNWDFFIVGCEVVFCVFIFYYVVEEILEIHLHRLRYLSSVWNILDLVVILLSIVAVGFHIFRTLEVNRLMGKLLQQPDTYADFEFLAFWQTQYNNMNAVNLFFAWIKIFKYISFNKTMTQLSSTLARCAKDILGFAIMFFIVFFAYAQLGYLLFGTQVENFSTFV
|
| 59 |
+
>DASH_5Z1W_A||41||508
|
| 60 |
+
LVVYIVFLVDICLLTYGMTSSSAYYYTKVMSELFLHTPSDSGVSFQTISSMSDFWDFAQGPLLDSLYWTKWYNNQSLGRGSHSFIYYENLLLGAPRLRQLRVRNDSCVVHEDFREDILNCYDVYSPDKEDQLPFGPQNGTAWTYHSQNELGGSSHWGRLTSYSGGGYYLDLPGSRQASAEALQGLQEGLWLDRGTRVVFIDFSVYNANINLFCILRLVVEFPATGGTIPSWQIRTVKLIRYVNNWDFFIVGCEVVFCVFIFYYVVEEILEIHLHRLRYLSSVWNILDLVVILLSIVAVGFHIFRTLEVNRLMGKLLQQPDTYADFEFLAFWQTQYNNMNAVNLFFAWIKIFKYISFNKTMTQLSSTLARCAKDILGFAIMFFIVFFAYAQLGYLLFGTQVENFSTFVKCIFTQFRIILGDFDYNAIDNANRILGPVYFVTYVFFVFFVLLNMFLAIINDTYSEVKEEL
|
| 61 |
+
>DASH_5Z1W_A||9||528
|
| 62 |
+
HICRSIRGLWGTTLTENTAENRELYVKTTLRELVVYIVFLVDICLLTYGMTSSSAYYYTKVMSELFLHTPSDSGVSFQTISSMSDFWDFAQGPLLDSLYWTKWYNNQSLGRGSHSFIYYENLLLGAPRLRQLRVRNDSCVVHEDFREDILNCYDVYSPDKEDQLPFGPQNGTAWTYHSQNELGGSSHWGRLTSYSGGGYYLDLPGSRQASAEALQGLQEGLWLDRGTRVVFIDFSVYNANINLFCILRLVVEFPATGGTIPSWQIRTVKLIRYVNNWDFFIVGCEVVFCVFIFYYVVEEILEIHLHRLRYLSSVWNILDLVVILLSIVAVGFHIFRTLEVNRLMGKLLQQPDTYADFEFLAFWQTQYNNMNAVNLFFAWIKIFKYISFNKTMTQLSSTLARCAKDILGFAIMFFIVFFAYAQLGYLLFGTQVENFSTFVKCIFTQFRIILGDFDYNAIDNANRILGPVYFVTYVFFVFFVLLNMFLAIINDTYSEVKEELAGQKDQLQLSDFLKQSYNKT
|
| 63 |
+
>DASH_5Z96_A||286||659
|
| 64 |
+
LARLKLAIKYRQKEFVAQPNCQQLLASRWYDEFPGWRRRHWAVKMVTCFIIGLLFPVFSVCYLIAPKSPLGLFIRKPFIKFICHTASYLTFLFLLLLASQHIDRSDLNRQGPPPTIVEWMILPWVLGFIWGEIKQMWDGGLQDYIHDWWNLMDFVMNSLYLATISLKIVAFVKYSALNPRESWDMWHPTLVAEALFAIANIFSSLRLISLFTANSHLGPLQISLGRMLLDILKFLFIYCLVLLAFANGLNQLYFYYEETKGLSCKGIRCEKQNNAFSTLFETLQSLFWSIFGLINLYVTNVKAQHEFTEFVGATMFGTYNVISLVVLLNMLIAMMNNSYQLIADHADIEWKFARTKLWMSYFEEGGTLPTPFNV
|
| 65 |
+
>DASH_5Z96_A||289||660
|
| 66 |
+
LKLAIKYRQKEFVAQPNCQQLLASRWYDEFPGWRRRHWAVKMVTCFIIGLLFPVFSVCYLIAPKSPLGLFIRKPFIKFICHTASYLTFLFLLLLASQHIDRSDLNRQGPPPTIVEWMILPWVLGFIWGEIKQMWDGGLQDYIHDWWNLMDFVMNSLYLATISLKIVAFVKYSALNPRESWDMWHPTLVAEALFAIANIFSSLRLISLFTANSHLGPLQISLGRMLLDILKFLFIYCLVLLAFANGLNQLYFYYEETKGLSCKGIRCEKQNNAFSTLFETLQSLFWSIFGLINLYVTNVKAQHEFTEFVGATMFGTYNVISLVVLLNMLIAMMNNSYQLIADHADIEWKFARTKLWMSYFEEGGTLPTPFNVI
|
| 67 |
+
>DASH_5Z96_A||290||663
|
| 68 |
+
KLAIKYRQKEFVAQPNCQQLLASRWYDEFPGWRRRHWAVKMVTCFIIGLLFPVFSVCYLIAPKSPLGLFIRKPFIKFICHTASYLTFLFLLLLASQHIDRSDLNRQGPPPTIVEWMILPWVLGFIWGEIKQMWDGGLQDYIHDWWNLMDFVMNSLYLATISLKIVAFVKYSALNPRESWDMWHPTLVAEALFAIANIFSSLRLISLFTANSHLGPLQISLGRMLLDILKFLFIYCLVLLAFANGLNQLYFYYEETKGLSCKGIRCEKQNNAFSTLFETLQSLFWSIFGLINLYVTNVKAQHEFTEFVGATMFGTYNVISLVVLLNMLIAMMNNSYQLIADHADIEWKFARTKLWMSYFEEGGTLPTPFNVIPSP
|
| 69 |
+
>DASH_5ZX5_A||814||1056
|
| 70 |
+
FKEVKILDSSDGKNEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCA
|
| 71 |
+
>DASH_5ZX5_A||820||1131
|
| 72 |
+
LDSSDGKNEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCANDSTLPTICGPGTWLTPFLQAVYLFVQYIIMVNLLIAFFNNVYLQVKAISNIVWKYQRYHFIMAYHEKPVLPPPL
|
| 73 |
+
>DASH_5ZX5_A||821||1134
|
| 74 |
+
DSSDGKNEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCANDSTLPTICGPGTWLTPFLQAVYLFVQYIIMVNLLIAFFNNVYLQVKAISNIVWKYQRYHFIMAYHEKPVLPPPLIIL
|
| 75 |
+
>DASH_5ZX5_A||827||1228
|
| 76 |
+
NEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCANDSTLPTICGPGTWLTPFLQAVYLFVQYIIMVNLLIAFFNNVYLQVKAISNIVWKYQRYHFIMAYHEKPVLPPPLIILSHIVSLFCCVCKRRKKDKTSDGPKLFLTEEDQKKLHDFEEQCVEMYFDEKDDKFNSGSEERIRVTFERVEQMSIQIKEVGDRVNYIKRSLQSLD
|
| 77 |
+
>DASH_6A70_A||27||559
|
| 78 |
+
SHPQFEKGSAAAPRVAWAERLVRGLRGLWGTRLMEESSTNREKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIR
|
| 79 |
+
>DASH_6A70_A||284||563
|
| 80 |
+
NANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGYH
|
| 81 |
+
>DASH_6A70_A||39||574
|
| 82 |
+
PRVAWAERLVRGLRGLWGTRLMEESSTNREKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGYHKALVKLKLKKN
|
| 83 |
+
>DASH_6A70_A||67||562
|
| 84 |
+
REKYLKSVLRELVTYLLFLIVLCILTYGMMSSNVYYYTRMMSQLFLDTPVSKTEKTNFKTLSSMEDFWKFTEGSLLDGLYWKMQPSNQTEADNRSFIFYENLLLGVPRIRQLRVRNGSCSIPQDLRDEIKECYDVYSVSSEDRAPFGPRNGTAWIYTSEKDLNGSSHWGIIATYSGAGYYLDLSRTREETAAQVASLKKNVWLDRGTRATFIDFSVYNANINLFCVVRLLVEFPATGGVIPSWQFQPLKLIRYVTTFDFFLAACEIIFCFFIFYYVVEEILEIRIHKLHYFRSFWNCLDVVIVVLSVVAIGINIYRTSNVEVLLQFLEDQNTFPNFEHLAYWQIQFNNIAAVTVFFVWIKLFKFINFNRTMSQLSTTMSRCAKDLFGFAIMFFIIFLAYAQLAYLVFGTQVDDFSTFQECIFTQFRIILGDINFAEIEEANRVLGPIYFTTFVFFMFFILLNMFLAIINDTYSEVKSDLAQQKAEMELSDLIRKGY
|
| 85 |
+
>DASH_6A70_B||227||1129
|
| 86 |
+
LRFRRLLVAELQRGFFDKHIWLSIWDRPPRSRFTRIQRATCCVLLICLFLGANAVWYGAVGDSAYSTGHVSRLSPLSVDTVAVGLVSSVVVYPVYLAILFLFRMSRSKVAGSPSPTPAGQQVLDIDSCLDSSVLDSSFLTFSGLHAEQAFVGQMKSDLFLDDSKSLVCWPSGEGTLSWPDLLSDPSIVGSNLRQLARGQAGHGLGPEEDGFSLASPYSPAKSFSASDEDLIQQVLAEGVSSPAPTQDTHMETDLLSSLSSTPGEKTETLALQRLGELGPPSPGLNWEQPQAARLSRTGLVEGLRKRLLPAWCASLAHGLSLLLVAVAVAVSGWVGASFPPGVSVAWLLSSSASFLASFLGWEPLKVLLEALYFSLVAKRLHPDEDDTLVESPAVTPVSARVPRVRPPHGFALFLAKEEARKVKRLHGMLRSLLVYMLFLLVTLLASYGDASCHGHAYRLQSAIKQELHSRAFLAITRSEELWPWMAHVLLPYVHGNQSSPELGPPRLRQVRLQEALYPDPPGPRVHTCSAAGGFSTSDYDVGWESPHNGSGTWAYSAPDLLGAWSWGSCAVYDSGGYVQELGLSLEESRDRLRFLQLHNWLDNRSRAVFLELTRYSPAVGLHAAVTLRLEFPAAGRALAALSVRPFALRRLSAGLSLPLLTSVCLLLFAVHFAVAEARTWHREGRWRVLRLGAWARWLLVALTAATALVRLAQLGAADRQWTRFVRGRPRRFTSFDQVAQLSSAARGLAASLLFLLLVKAAQQLRFVRQWSVFGKTLCRALPELLGVTLGLVVLGVAYAQLAILLVSSCVDSLWSVAQALLVLCPGTGLSTLCPAESWHLSPLLCVGLWALRLWGALRLGAVILRWRYHALRGELYRPAWEPQDYEMVELFLRRLRLWMGLSK
|
| 87 |
+
>DASH_6AEI_A||287||663
|
| 88 |
+
LAKLKVAIKYHQKEFVAQPNCQQLLATLWYDGFPGWRRKHWVVKLLTCMTIGFLFPMLSIAYLISPRSNLGLFIKKPFIKFICHTASYLTFLFMLLLASQHIVRTDLHVQGPPPTVVEWMILPWVLGFIWGEIKEMWDGGFTEYIHDWWNLMDFAMNSLYLATISLKIVAYVKYNGSRPREEWEMWHPTLIAEALFAISNILSSLRLISLFTANSHLGPLQISLGRMLLDILKFLFIYCLVLLAFANGLNQLYFYYETRAIDEPNNCKGIRCEKQNNAFSTLFETLQSLFWSVFGLLNLYVTNVKARHEFTEFVGATMFGTYNVISLVVLLNMLIAMMNNSYQLIADHADIEWKFARTKLWMSYFDEGGTLPPPFNI
|
| 89 |
+
>DASH_6AEI_A||290||664
|
| 90 |
+
LKVAIKYHQKEFVAQPNCQQLLATLWYDGFPGWRRKHWVVKLLTCMTIGFLFPMLSIAYLISPRSNLGLFIKKPFIKFICHTASYLTFLFMLLLASQHIVRTDLHVQGPPPTVVEWMILPWVLGFIWGEIKEMWDGGFTEYIHDWWNLMDFAMNSLYLATISLKIVAYVKYNGSRPREEWEMWHPTLIAEALFAISNILSSLRLISLFTANSHLGPLQISLGRMLLDILKFLFIYCLVLLAFANGLNQLYFYYETRAIDEPNNCKGIRCEKQNNAFSTLFETLQSLFWSVFGLLNLYVTNVKARHEFTEFVGATMFGTYNVISLVVLLNMLIAMMNNSYQLIADHADIEWKFARTKLWMSYFDEGGTLPPPFNII
|
| 91 |
+
>DASH_6AEI_A||291||667
|
| 92 |
+
KVAIKYHQKEFVAQPNCQQLLATLWYDGFPGWRRKHWVVKLLTCMTIGFLFPMLSIAYLISPRSNLGLFIKKPFIKFICHTASYLTFLFMLLLASQHIVRTDLHVQGPPPTVVEWMILPWVLGFIWGEIKEMWDGGFTEYIHDWWNLMDFAMNSLYLATISLKIVAYVKYNGSRPREEWEMWHPTLIAEALFAISNILSSLRLISLFTANSHLGPLQISLGRMLLDILKFLFIYCLVLLAFANGLNQLYFYYETRAIDEPNNCKGIRCEKQNNAFSTLFETLQSLFWSVFGLLNLYVTNVKARHEFTEFVGATMFGTYNVISLVVLLNMLIAMMNNSYQLIADHADIEWKFARTKLWMSYFDEGGTLPPPFNIIPSP
|
| 93 |
+
>DASH_6AYF_A||19||546
|
| 94 |
+
HEEENRCNFNQQTSPSEELLLEDQMRRKLKFFFMNPCEKFWARGRKPWKLAIQILKIAMVTIQLVLFGLSNQMVVAFKEENTIAFKHLFLKGYMDRMDDTYAVYTQSDVYDQLIFAVNQYLQLYNVSVGNHAYENKGTKQSAMAICQHFYKRGNIYPGNDTFDIDPEIETECFFVEPDEPFHIGTPAENKLNLTLDFHRLLTVELQFKLKAINLQTVRHQELPDCYDFTLTITFDNKAHSGRIKISLDNDISIRECKDWHVSGSIQKNTHYMMIFDAFVILTCLVSLILCIRSVIRGLQLQQEFVNFFLLHYKKEVSVSDQMEFVNGWYIMIIISDILTIIGSILKMEIQAKSLTSYDVCSILLGTSTMLVWLGVIRYLGFFAKYNLLILTLQAALPNVIRFCCCAAMIYLGYCFCGWIVLGPYHDKFRSLNMVSECLFSLINGDDMFATFAKMQQKSYLVWLFSRIYLYSFISLFIYMILSLFIALITDTYETIKQYQQDGFPETELRTFISECKDLPNSGKYRLED
|
| 95 |
+
>DASH_6BBJ_A||424||746
|
| 96 |
+
GEEVSVLEILVYNSKVENRHEMLAVEPINELLRDKWQKFGAVSFYISVVSYLIAMIIFTLIAYYRPMDGTPPYPYRTTMDYMRLAGEIVTLLTGVVFFITNIKDLFMKKCPGVNSLFIDGSFQLLYFIYSVLVIITAVLYLVGIESYLAVMVFALVLGWMNALYFTRGLKLTGTYSIMLQKILFKDLFRFLLVYLLFMIGYASALVSLLNPCTSQESCIETSSNCTVPEYPSCRDSSTFSKFLLDLFKLTIGMGDLEMINSAKYPAVFIILLVTYIILTFVLLLNMLIALMGETVGQVSKESKQIWKLQWATTILDIERSFPV
|
| 97 |
+
>DASH_6BBJ_A||428||750
|
| 98 |
+
SVLEILVYNSKVENRHEMLAVEPINELLRDKWQKFGAVSFYISVVSYLIAMIIFTLIAYYRPMDGTPPYPYRTTMDYMRLAGEIVTLLTGVVFFITNIKDLFMKKCPGVNSLFIDGSFQLLYFIYSVLVIITAVLYLVGIESYLAVMVFALVLGWMNALYFTRGLKLTGTYSIMLQKILFKDLFRFLLVYLLFMIGYASALVSLLNPCTSQESCIETSSNCTVPEYPSCRDSSTFSKFLLDLFKLTIGMGDLEMINSAKYPAVFIILLVTYIILTFVLLLNMLIALMGETVGQVSKESKQIWKLQWATTILDIERSFPVCMRK
|
| 99 |
+
>DASH_6BCO_A||735||1081
|
| 100 |
+
PPGTVEPSAKVALERRQRRRPGRALCCGKFSKRWSDFWGAPVTAFLGNVVSYLLFLLLFAHVLLVDFQPTKPSVSELLLYFWAFTLLCEELRQGLGGGWGSLASGGRGPDRAPLRHRLHLYLSDTWNQCDLLALTCFLLGVGCRLTPGLFDLGRTVLCLDFMIFTLRLLHIFTVNKQLGPKIVIVSKMMKDVFFFLFFLCVWLVAYGVATEGILRPQDRSLPSILRRVFYRPYLQIFGQIPQEEMDVALMIPGNCSMERGSWAHPEGPVAGSCVSQYANWLVVLLLIVFLLVANILLLNLLIAMFSYTFSKVHGNSDLYWKAQRYSLIREFHSRPALAPPLIIISHV
|
| 101 |
+
>DASH_6BO5_A||314||642
|
| 102 |
+
NSVLEIIAFHCKSPNRHRMVVLEPLNKLLQEKWDRLVSRFFFNFACYLVYMFIFTVVAYHQPSLDQPAIPSSKATFGESMLLLGHILILLGGIYLLLGQLWYFWRRRLFIWISFMDSYFEILFLLQALLTVLSQVLRFMETEWYLPLLVLSLVLGWLNLLYYTRGFQHTGIYSVMIQKVILRDLLRFLLVYLVFLFGFAVALVSLSREARSPKAPEDNNSTVTEQPTVGQEEEPAPYRSILDASLELFKFTIGMGELAFQEQLRFRGVVLLLLLAYVLLTYVLLLNMLIALMSETVNHVADNSWSIWKLQKAISVLEMENGYWWCRRKK
|
| 103 |
+
>DASH_6BO8_A||288||614
|
| 104 |
+
DEQSLLELIITTKKREARQILDQTPVKELVSLKWKRYGRPYFCMLGAIYLLYIICFTMCCIYRPLKPRTNNRTSPRDNTLLQQKLLQEAYMTPKDDIRLVGELVTVIGAIIILLVEVPDIFRMGVTRFFGQTILGGPFHVLIITYAFMVLVTMVMRLISASGEVVPMSFALVLGWCNVMYFARGFQMLGPFTIMIQKMIFGDLMRFCWLMAVVILGFASAFYIIFQTEDPEELGHFYDYPMALFSTFELFLTIIDGPANYNVDLPFMYSITYAAFAIIATLLMLNLLIAMMGDTHWRVAHERDELWRAQIVATTVMLERKLPRCLWP
|
| 105 |
+
>DASH_6BO8_A||384||609
|
| 106 |
+
IRLVGELVTVIGAIIILLVEVPDIFRMGVTRFFGQTILGGPFHVLIITYAFMVLVTMVMRLISASGEVVPMSFALVLGWCNVMYFARGFQMLGPFTIMIQKMIFGDLMRFCWLMAVVILGFASAFYIIFQTEDPEELGHFYDYPMALFSTFELFLTIIDGPANYNVDLPFMYSITYAAFAIIATLLMLNLLIAMMGDTHWRVAHERDELWRAQIVATTVMLERKLP
|
| 107 |
+
>DASH_6BPQ_A||570||883
|
| 108 |
+
IILCLFFFPLIGCGFISFRKKPVEKTKKLFLYYVSFFTSPFVVFSWNVIFYIAFLLLFAYVLLMDFQKEPTALEIILYVLVFILLCDEVRQWYMNGSKYFSDLWNVMDTLAIFYFIAGIVFRLHSDESSWYSGRVIFCLDYIVFTLRLIHIFTVSRNLGPKIIMLQRMMIDVFFFLFLFAVWMVAFGVARQGILRKNEHRWEWIFRSVIYEPYLAMFGQYPDDIDGTTYNFDHCTFSGNESKPLCVELDANNQPRFPEWITIPLVCIYMLSTNILLVNLLVAMFGYTVGSVQENNDQVWKFQRFFLVQEYCSRL
|
| 109 |
+
>DASH_6BQR_A||661||1011
|
| 110 |
+
PVGTADPAEKTPLGVPRQSGRPGCCGGRCGGRRCLRRWFHFWGAPVTIFMGNVVSYLLFLLLFSRVLLVDFQPAPPGSLELLLYFWAFTLLCEELRQGLSGGGGSLASGGPGPGHASLSQRLRLYLADSWNQCDLVALTCFLLGVGCRLTPGLYHLGRTVLCIDFMVFTVRLLHIFTVNKQLGPKIVIVSKMMKDVFFFLFFLGVWLVAYGVATEGLLRPRDSDFPSILRRVFYRPYLQIFGQIPQEDMDVALMEHSNCSSEPGFWAHPPGAQAGTCVSQYANWLVVLLLVIFLLVANILLVNLLIAMFSYTFGKVQGNSDLYWKAQRYRLIREFHSRPALAPPFIVISHL
|
| 111 |
+
>DASH_6BQV_A||739||1089
|
| 112 |
+
PVGTADPAEKTPLGVPRQSGRPGCCGGRCGGRRCLRRWFHFWGAPVTIFMGNVVSYLLFLLLFSRVLLVDFQPAPPGSLELLLYFWAFTLLCEELRQGLSGGGGSLASGGPGPGHASLSQRLRLYLADSWNQCDLVALTCFLLGVGCRLTPGLYHLGRTVLCIDFMVFTVRLLHIFTVNKQLGPKIVIVSKMMKDVFFFLFFLGVWLVAYGVATEGLLRPRDSDFPSILRRVFYRPYLQIFGQIPQEDMDVALMEHSNCSSEPGFWAHPPGAQAGTCVSQYANWLVVLLLVIFLLVANILLVNLLIAMFSYTFGKVQGNSDLYWKAQRYRLIREFHSRPALAPPFIVISHL
|
| 113 |
+
>DASH_6BWD_A||470||712
|
| 114 |
+
FKEVKILDSSDGKNEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCA
|
| 115 |
+
>DASH_6BWD_A||476||787
|
| 116 |
+
LDSSDGKNEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCANDSTLPTICGPGTWLTPFLQAVYLFVQYIIMVNLLIAFFNNVYLQVKAISNIVWKYQRYHFIMAYHEKPVLPPPL
|
| 117 |
+
>DASH_6BWD_A||477||790
|
| 118 |
+
DSSDGKNEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCANDSTLPTICGPGTWLTPFLQAVYLFVQYIIMVNLLIAFFNNVYLQVKAISNIVWKYQRYHFIMAYHEKPVLPPPLIIL
|
| 119 |
+
>DASH_6BWD_A||480||793
|
| 120 |
+
DGKNEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCANDSTLPTICGPGTWLTPFLQAVYLFVQYIIMVNLLIAFFNNVYLQVKAISNIVWKYQRYHFIMAYHEKPVLPPPLIILSHI
|
| 121 |
+
>DASH_6BWD_A||483||904
|
| 122 |
+
NEMEIHIKSKKLPITRKFYAFYHAPIVKFWFNTLAYLGFLMLYTFVVLVKMEQLPSVQEWIVIAYIFTYAIEKVREVFMSEAGKISQKIKVWFSDYFNVSDTIAIISFFVGFGLRFGAKWNYINAYDNHVFVAGRLIYCLNIIFWYVRLLDFLAVNQQAGPYVMMIGKMVANMFYIVVIMALVLLSFGVPRKAILYPHEEPSWSLAKDIVFHPYWMIFGEVYAYEIDVCANDSTLPTICGPGTWLTPFLQAVYLFVQYIIMVNLLIAFFNNVYLQVKAISNIVWKYQRYHFIMAYHEKPVLPPPLIILSHIVSLFCCVCKRRKKDKTSDGPKLFLTEEDQKKLHDFEEQCVEMYFDEKDDKFNSGSEERIRVTFERVEQMSIQIKEVGDRVNYIKRSLQSLDSQIGHLQDLSALTVDTLKTL
|
| 123 |
+
>DASH_6BWI_A||536||886
|
| 124 |
+
PVGTADPAEKTPLGVPRQSGRPGCCGGRCGGRRCLRRWFHFWGAPVTIFMGNVVSYLLFLLLFSRVLLVDFQPAPPGSLELLLYFWAFTLLCEELRQGLSGGGGSLASGGPGPGHASLSQRLRLYLADSWNQCDLVALTCFLLGVGCRLTPGLYHLGRTVLCIDFMVFTVRLLHIFTVNKQLGPKIVIVSKMMKDVFFFLFFLGVWLVAYGVATEGLLRPRDSDFPSILRRVFYRPYLQIFGQIPQEDMDVALMEHSNCSSEPGFWAHPPGAQAGTCVSQYANWLVVLLLVIFLLVANILLVNLLIAMFSYTFGKVQGNSDLYWKAQRYRLIREFHSRPALAPPFIVISHL
|
| 125 |
+
>DASH_6BWJ_A||351||680
|
| 126 |
+
NSVLEIIAFHSRSPHRHRMVVLEPLNKLLQAKWDRLIPRFCFNFLCYLVYMLIFTAVAYHQPALEKQGFPPLKATAGNSMLLLGHILILLGGVYLLLGQLWYFWRRRLFIWISFMDSYSEILFLLQALLTVLSQVLCFLAIEWYLPLLVSSLVMGWTNLLYYTRGFQHTGIYSVMIEKVILRDLLRFLLVYLVFLFGFAVALVSLSREAQNSRTPAGPNATEVGQPGAGQEDEAPPYRSILDASLELFKFTIGMGELAFQEQLRFRGVVLLLLLAYVLLTYVLLLNMLIALMSETVNSVATDSWSIWKLQKAISVLEMENGYWWCRRKKQ
|
| 127 |
+
>DASH_6C8G_A||293||615
|
| 128 |
+
GEEVSVLEILVYNSKVENRHEMLAVEPINELLRDKWQKFGAVSFYISVVSYLIAMIIFTLIAYYRPMDGTPPYPYRTTMDYMRLAGEIVTLLTGVVFFITNIKDLFMKKCPGVNSLFIDGSFQLLYFIYSVLVIITAVLYLVGIESYLAVMVFALVLGWMNALYFTRGLKLTGTYSIMLQKILFKDLFRFLLVYLLFMIGYASALVSLLNPCTSQESCIETSSQCTVPEYPSCRDSSTFSKFLLDLFKLTIGMGDLEMINSAKYPAVFIILLVTYIILTFVLLLNMLIALMGETVGQVSKESKQIWKLQWATTILDIERSFPV
|
| 129 |
+
>DASH_6C8G_A||294||617
|
| 130 |
+
EEVSVLEILVYNSKVENRHEMLAVEPINELLRDKWQKFGAVSFYISVVSYLIAMIIFTLIAYYRPMDGTPPYPYRTTMDYMRLAGEIVTLLTGVVFFITNIKDLFMKKCPGVNSLFIDGSFQLLYFIYSVLVIITAVLYLVGIESYLAVMVFALVLGWMNALYFTRGLKLTGTYSIMLQKILFKDLFRFLLVYLLFMIGYASALVSLLNPCTSQESCIETSSQCTVPEYPSCRDSSTFSKFLLDLFKLTIGMGDLEMINSAKYPAVFIILLVTYIILTFVLLLNMLIALMGETVGQVSKESKQIWKLQWATTILDIERSFPVCM
|
| 131 |
+
>DASH_6C8G_A||295||619
|
| 132 |
+
EVSVLEILVYNSKVENRHEMLAVEPINELLRDKWQKFGAVSFYISVVSYLIAMIIFTLIAYYRPMDGTPPYPYRTTMDYMRLAGEIVTLLTGVVFFITNIKDLFMKKCPGVNSLFIDGSFQLLYFIYSVLVIITAVLYLVGIESYLAVMVFALVLGWMNALYFTRGLKLTGTYSIMLQKILFKDLFRFLLVYLLFMIGYASALVSLLNPCTSQESCIETSSQCTVPEYPSCRDSSTFSKFLLDLFKLTIGMGDLEMINSAKYPAVFIILLVTYIILTFVLLLNMLIALMGETVGQVSKESKQIWKLQWATTILDIERSFPVCMRK
|
| 133 |
+
>DASH_6C8G_A||419||661
|
| 134 |
+
FIYSVLVIITAVLYLVGIESYLAVMVFALVLGWMNALYFTRGLKLTGTYSIMLQKILFKDLFRFLLVYLLFMIGYASALVSLLNPCTSQESCIETSSQCTVPEYPSCRDSSTFSKFLLDLFKLTIGMGDLEMINSAKYPAVFIILLVTYIILTFVLLLNMLIALMGETVGQVSKESKQIWKLQWATTILDIERSFPVCMRKAFRSGEMVTVGKNLDGTPDRRWCFRVDEVNWSHWNQNLGIIN
|
| 135 |
+
>DASH_6CO7_A||808||1043
|
| 136 |
+
EIREDDSMEVIMRNKKLGFCDRIMHFYSAPFSKFVGNVVGYLAFIFLYAYVVLFNFPRFDPAKTLGGIHPTEIVLYFWVFTILIEEIRQLAAKPPKYIKDKVSVYFSDTWNFVDIFSLTVFIIAIILRFFTNSRIFTASRIILSLDIIFFIVRSLQIFSVNRLLGPKLVMIQKMMQDLAQFIIILAVFTIAYGIALHAVMFPSPGIYARNNTWVTITSVVQYPYWQMYGELFLDEI
|
| 137 |
+
>DASH_6CO7_A||810||1124
|
| 138 |
+
REDDSMEVIMRNKKLGFCDRIMHFYSAPFSKFVGNVVGYLAFIFLYAYVVLFNFPRFDPAKTLGGIHPTEIVLYFWVFTILIEEIRQLAAKPPKYIKDKVSVYFSDTWNFVDIFSLTVFIIAIILRFFTNSRIFTASRIILSLDIIFFIVRSLQIFSVNRLLGPKLVMIQKMMQDLAQFIIILAVFTIAYGIALHAVMFPSPGIYARNNTWVTITSVVQYPYWQMYGELFLDEIQGEKPKEFGEVDPDGRWLSPLLLAIYMVFTNILLLNLLIAIFNYTFERVQEDSDKVWKFQRYDLVQEYHSRPVFAPPLVLL
|
mafft/mafftdir/libexec/disttbfast
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:310007e6e480182187037bd92220503ef6e4f5fa0f056afc9e400f6eafcb2683
|
| 3 |
+
size 1339248
|
mafft/mafftdir/libexec/dndblast
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c64e1ffab4716780019a2f0265c805a43155952b41e41f7a5ca3af25e86dcb5f
|
| 3 |
+
size 937744
|
mafft/mafftdir/libexec/dndfast7
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:acb2030d64db016ab3ff54510edd4e520f1dca9d847e5e3b84788910b519f490
|
| 3 |
+
size 936976
|
mafft/mafftdir/libexec/dndpre
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c2681cfa34bf58ce95b19751b847c299bd63cb879d28818bbddfad246a3c0480
|
| 3 |
+
size 924048
|
mafft/mafftdir/libexec/dvtditr
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:88c4833952f46c59468d1ffdbc6ac84256bec7645ff9b9bbf584c7c8550349bd
|
| 3 |
+
size 1271664
|
mafft/mafftdir/libexec/f2cl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:341bde9f61f8a38c6c1a0fe85bb6eaf05a69236809a1cc354a979b2c8b7637c2
|
| 3 |
+
size 735344
|
mafft/mafftdir/libexec/filter
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1a50de40f60648f8e36274261767fe499ae9d990f252f7fdcad0a0933266119a
|
| 3 |
+
size 862672
|
mafft/mafftdir/libexec/getlag
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:39162ae5f020a43f7bc1add205d9488b55ac4dc22ff9d1483d9a5f0e44001058
|
| 3 |
+
size 1200976
|
mafft/mafftdir/libexec/hat3
ADDED
|
File without changes
|
mafft/mafftdir/libexec/hex2maffttext
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a5817c00d02c388b49b0f8e82a95da316b68aa42d8f901a0df65b3cb750d5e64
|
| 3 |
+
size 542776
|
mafft/mafftdir/libexec/mafft-distance
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2079d859d55c81f95318af8c161a886baf0ea16e60317393d037691b424163d4
|
| 3 |
+
size 936344
|
mafft/mafftdir/libexec/mafft-homologs.1
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.\" Title: MAFFT-HOMOLOGS
|
| 2 |
+
.\" Author: Kazutaka Katoh <katoh_at_bioreg.kyushu-u.ac.jp.>
|
| 3 |
+
.\" Generator: DocBook XSL Stylesheets v1.72.0 <http://docbook.sf.net/>
|
| 4 |
+
.\" Date: 2007-08-14
|
| 5 |
+
.\" Manual: Mafft Manual
|
| 6 |
+
.\" Source: mafft-homologs 2.1
|
| 7 |
+
.\"
|
| 8 |
+
.TH "MAFFT\-HOMOLOGS" "1" "2007\-06\-09" "mafft\-homologs 2.1" "Mafft Manual"
|
| 9 |
+
.\" disable hyphenation
|
| 10 |
+
.nh
|
| 11 |
+
.\" disable justification (adjust text to left margin only)
|
| 12 |
+
.ad l
|
| 13 |
+
.SH "NAME"
|
| 14 |
+
.RS 0
|
| 15 |
+
mafft\-homologs \- aligns sequences together with homologues automatically collected from SwissProt via NCBI BLAST
|
| 16 |
+
.RE
|
| 17 |
+
.SH "SYNOPSIS"
|
| 18 |
+
.RS 0
|
| 19 |
+
\fBmafft\-homologs\fR [\fBoptions\fR] \fIinput\fR [>\ \fIoutput\fR]
|
| 20 |
+
.RE
|
| 21 |
+
.SH "DESCRIPTION"
|
| 22 |
+
.RS 0
|
| 23 |
+
The accuracy of an alignment of a few distantly related sequences is considerably improved when being aligned together with their close homologs. The reason for the improvement is probably the same as that for PSI\-BLAST. That is, the positions of highly conserved residues, those with many gaps and other additional information is brought by close homologs. According to Katoh et al. (2005), the improvement by adding close homologs is 10% or so, which is comparable to the improvement by incorporating structural information of a pair of sequences. Mafft\-homologs in a mafft server works like this:
|
| 24 |
+
.sp
|
| 25 |
+
.RS 4
|
| 26 |
+
\h'-04' 1.\h'+02'Collect a number (50 by default) of close homologs (E=1e\-10 by default) of the input sequences.
|
| 27 |
+
.RE
|
| 28 |
+
.sp
|
| 29 |
+
.RS 4
|
| 30 |
+
\h'-04' 2.\h'+02'Align the input sequences and homologs all together using the L\-INS\-i strategy.
|
| 31 |
+
.RE
|
| 32 |
+
.sp
|
| 33 |
+
.RS 4
|
| 34 |
+
\h'-04' 3.\h'+02'Remove the homologs.
|
| 35 |
+
.RE
|
| 36 |
+
.RE
|
| 37 |
+
.SH "OPTIONS"
|
| 38 |
+
.RS 0
|
| 39 |
+
.PP
|
| 40 |
+
\fB\-a\fR \fI\fIn\fR\fR
|
| 41 |
+
.RS 4
|
| 42 |
+
The number of collected sequences (default: 50).
|
| 43 |
+
.RE
|
| 44 |
+
.PP
|
| 45 |
+
\fB\-e\fR \fI\fIn\fR\fR
|
| 46 |
+
.RS 4
|
| 47 |
+
Threshold value (default: 1e\-10).
|
| 48 |
+
.RE
|
| 49 |
+
.PP
|
| 50 |
+
\fB\-o\fR \fI\fIxxx\fR\fR
|
| 51 |
+
.RS 4
|
| 52 |
+
Options for mafft (default: " \-\-op 1.53 \-\-ep 0.123 \-\-maxiterate 1000 --localpair --reorder").
|
| 53 |
+
.RE
|
| 54 |
+
.PP
|
| 55 |
+
\fB\-l\fR
|
| 56 |
+
.RS 4
|
| 57 |
+
Locally carries out BLAST searches instead of NCBI BLAST (requires locally installed BLAST and a database).
|
| 58 |
+
.RE
|
| 59 |
+
.PP
|
| 60 |
+
\fB\-f\fR
|
| 61 |
+
.RS 4
|
| 62 |
+
Outputs collected homologues also (default: off).
|
| 63 |
+
.RE
|
| 64 |
+
.PP
|
| 65 |
+
\fB\-w\fR
|
| 66 |
+
.RS 4
|
| 67 |
+
entire sequences are subjected to BLAST search (default: well\-aligned region only)
|
| 68 |
+
.RE
|
| 69 |
+
.RE
|
| 70 |
+
.SH "REQUIREMENTS"
|
| 71 |
+
.RS 0
|
| 72 |
+
.PP
|
| 73 |
+
MAFFT version > 5.58.
|
| 74 |
+
.PP
|
| 75 |
+
Either of
|
| 76 |
+
.RS 4
|
| 77 |
+
.PP
|
| 78 |
+
lynx (when remote BLAST server is used)
|
| 79 |
+
.PP
|
| 80 |
+
BLAST and a protein sequence database (when local BLAST is used)
|
| 81 |
+
.RE
|
| 82 |
+
.RE
|
| 83 |
+
.SH "REFERENCES"
|
| 84 |
+
.RS 0
|
| 85 |
+
.PP
|
| 86 |
+
Katoh, Kuma, Toh and Miyata (Nucleic Acids Res. 33:511\-518, 2005) MAFFT version 5: improvement in accuracy of multiple sequence alignment.
|
| 87 |
+
.RE
|
| 88 |
+
.SH "SEE ALSO"
|
| 89 |
+
.RS 0
|
| 90 |
+
.PP
|
| 91 |
+
\fBmafft\fR(1)
|
| 92 |
+
.RE
|
| 93 |
+
.SH "AUTHORS"
|
| 94 |
+
.RS 0
|
| 95 |
+
.PP
|
| 96 |
+
\fBKazutaka Katoh\fR <\&katoh_at_bioreg.kyushu\-u.ac.jp.\&>
|
| 97 |
+
.sp -1n
|
| 98 |
+
.IP "" 4
|
| 99 |
+
Wrote Mafft.
|
| 100 |
+
.PP
|
| 101 |
+
\fBCharles Plessy\fR <\&charles\-debian\-nospam@plessy.org\&>
|
| 102 |
+
.sp -1n
|
| 103 |
+
.IP "" 4
|
| 104 |
+
Wrote this manpage in DocBook XML for the Debian distribution, using Mafft's homepage as a template.
|
| 105 |
+
.RE
|
| 106 |
+
.SH "COPYRIGHT"
|
| 107 |
+
.RS 0
|
| 108 |
+
Copyright \(co 2002\-2007 Kazutaka Katoh (mafft)
|
| 109 |
+
.br
|
| 110 |
+
Copyright \(co 2007 Charles Plessy (this manpage)
|
| 111 |
+
.br
|
| 112 |
+
.PP
|
| 113 |
+
Mafft and its manpage are offered under the following conditions:
|
| 114 |
+
.PP
|
| 115 |
+
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
| 116 |
+
.sp
|
| 117 |
+
.RS 4
|
| 118 |
+
\h'-04' 1.\h'+02'Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
| 119 |
+
.RE
|
| 120 |
+
.sp
|
| 121 |
+
.RS 4
|
| 122 |
+
\h'-04' 2.\h'+02'Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
| 123 |
+
.RE
|
| 124 |
+
.sp
|
| 125 |
+
.RS 4
|
| 126 |
+
\h'-04' 3.\h'+02'The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission.
|
| 127 |
+
.RE
|
| 128 |
+
.PP
|
| 129 |
+
THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
| 130 |
+
.br
|
| 131 |
+
.RE
|
mafft/mafftdir/libexec/mafft-profile
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8a74f63ea3728d7f3a4514a3d533f5f0d02c801fd8cf1cdf80e7b79baa7a8943
|
| 3 |
+
size 1199856
|
mafft/mafftdir/libexec/mafft.1
ADDED
|
@@ -0,0 +1,479 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.\" Title: MAFFT
|
| 2 |
+
.\" Author: Kazutaka Katoh <kazutaka.katoh@aist.go.jp>
|
| 3 |
+
.\" Generator: DocBook XSL Stylesheets v1.72.0 <http://docbook.sf.net/>
|
| 4 |
+
.\" Date: 2007-08-14
|
| 5 |
+
.\" Manual: Mafft Manual
|
| 6 |
+
.\" Source: mafft 6.240
|
| 7 |
+
.\"
|
| 8 |
+
.TH "MAFFT" "1" "2007\-06\-09" "mafft 6.240" "Mafft Manual"
|
| 9 |
+
.\" disable hyphenation
|
| 10 |
+
.nh
|
| 11 |
+
.\" disable justification (adjust text to left margin only)
|
| 12 |
+
.ad l
|
| 13 |
+
.SH "THIS MANUAL IS FOR V6.2XX (2007)"
|
| 14 |
+
Recent versions (v7.1xx; 2013 Jan.) have more features than those described below.
|
| 15 |
+
See also the tips page at
|
| 16 |
+
http://mafft.cbrc.jp/alignment/software/tips0.html
|
| 17 |
+
.SH "NAME"
|
| 18 |
+
.RS 0
|
| 19 |
+
.sp
|
| 20 |
+
mafft \- Multiple alignment program for amino acid or nucleotide sequences
|
| 21 |
+
.RE
|
| 22 |
+
.SH "SYNOPSIS"
|
| 23 |
+
.RS 0
|
| 24 |
+
.HP 6
|
| 25 |
+
\fBmafft\fR [\fBoptions\fR] \fIinput\fR [>\ \fIoutput\fR]
|
| 26 |
+
.HP 6
|
| 27 |
+
\fBlinsi\fR \fIinput\fR [>\ \fIoutput\fR]
|
| 28 |
+
.HP 6
|
| 29 |
+
\fBginsi\fR \fIinput\fR [>\ \fIoutput\fR]
|
| 30 |
+
.HP 6
|
| 31 |
+
\fBeinsi\fR \fIinput\fR [>\ \fIoutput\fR]
|
| 32 |
+
.HP 7
|
| 33 |
+
\fBfftnsi\fR \fIinput\fR [>\ \fIoutput\fR]
|
| 34 |
+
.HP 6
|
| 35 |
+
\fBfftns\fR \fIinput\fR [>\ \fIoutput\fR]
|
| 36 |
+
.HP 5
|
| 37 |
+
\fBnwns\fR \fIinput\fR [>\ \fIoutput\fR]
|
| 38 |
+
.HP 6
|
| 39 |
+
\fBnwnsi\fR \fIinput\fR [>\ \fIoutput\fR]
|
| 40 |
+
.HP 14
|
| 41 |
+
\fBmafft\-profile\fR \fIgroup1\fR \fIgroup2\fR [>\ \fIoutput\fR]
|
| 42 |
+
.HP
|
| 43 |
+
.sp
|
| 44 |
+
\fIinput\fR, \fIgroup1\fR and \fIgroup2\fR must be in FASTA format.
|
| 45 |
+
.RE
|
| 46 |
+
.SH "DESCRIPTION"
|
| 47 |
+
.RS 0
|
| 48 |
+
\fBMAFFT\fR is a multiple sequence alignment program for unix\-like operating systems. It offers a range of multiple alignment methods.
|
| 49 |
+
.SS "Accuracy\-oriented methods:"
|
| 50 |
+
.sp
|
| 51 |
+
.RS 4
|
| 52 |
+
\h'-04'\(bu\h'+03'L\-INS\-i (probably most accurate; recommended for <200 sequences; iterative refinement method incorporating local pairwise alignment information):
|
| 53 |
+
.HP 6
|
| 54 |
+
\fBmafft\fR \fB\-\-localpair\fR \fB\-\-maxiterate\fR\ \fI1000\fR \fIinput\fR [>\ \fIoutput\fR]
|
| 55 |
+
.HP 6
|
| 56 |
+
\fBlinsi\fR \fIinput\fR [>\ \fIoutput\fR]
|
| 57 |
+
.RE
|
| 58 |
+
.sp
|
| 59 |
+
.RS 4
|
| 60 |
+
\h'-04'\(bu\h'+03'G\-INS\-i (suitable for sequences of similar lengths; recommended for <200 sequences; iterative refinement method incorporating global pairwise alignment information):
|
| 61 |
+
.HP 6
|
| 62 |
+
\fBmafft\fR \fB\-\-globalpair\fR \fB\-\-maxiterate\fR\ \fI1000\fR \fIinput\fR [>\ \fIoutput\fR]
|
| 63 |
+
.HP 6
|
| 64 |
+
\fBginsi\fR \fIinput\fR [>\ \fIoutput\fR]
|
| 65 |
+
.RE
|
| 66 |
+
.sp
|
| 67 |
+
.RS 4
|
| 68 |
+
\h'-04'\(bu\h'+03'E\-INS\-i (suitable for sequences containing large unalignable regions; recommended for <200 sequences):
|
| 69 |
+
.HP 6
|
| 70 |
+
\fBmafft\fR \fB\-\-ep\fR\ \fI0\fR \fB\-\-genafpair\fR \fB\-\-maxiterate\fR\ \fI1000\fR \fIinput\fR [>\ \fIoutput\fR]
|
| 71 |
+
.HP 6
|
| 72 |
+
\fBeinsi\fR \fIinput\fR [>\ \fIoutput\fR]
|
| 73 |
+
.br
|
| 74 |
+
|
| 75 |
+
For E\-INS\-i, the
|
| 76 |
+
\fB\-\-ep\fR
|
| 77 |
+
\fI0\fR
|
| 78 |
+
option is recommended to allow large gaps.
|
| 79 |
+
.RE
|
| 80 |
+
.SS "Speed\-oriented methods:"
|
| 81 |
+
.sp
|
| 82 |
+
.RS 4
|
| 83 |
+
\h'-04'\(bu\h'+03'FFT\-NS\-i (iterative refinement method; two cycles only):
|
| 84 |
+
.HP 6
|
| 85 |
+
\fBmafft\fR \fB\-\-retree\fR\ \fI2\fR \fB\-\-maxiterate\fR\ \fI2\fR \fIinput\fR [>\ \fIoutput\fR]
|
| 86 |
+
.HP 7
|
| 87 |
+
\fBfftnsi\fR \fIinput\fR [>\ \fIoutput\fR]
|
| 88 |
+
.RE
|
| 89 |
+
.sp
|
| 90 |
+
.RS 4
|
| 91 |
+
\h'-04'\(bu\h'+03'FFT\-NS\-i (iterative refinement method; max. 1000 iterations):
|
| 92 |
+
.HP 6
|
| 93 |
+
\fBmafft\fR \fB\-\-retree\fR\ \fI2\fR \fB\-\-maxiterate\fR\ \fI1000\fR \fIinput\fR [>\ \fIoutput\fR]
|
| 94 |
+
.RE
|
| 95 |
+
.sp
|
| 96 |
+
.RS 4
|
| 97 |
+
\h'-04'\(bu\h'+03'FFT\-NS\-2 (fast; progressive method):
|
| 98 |
+
.HP 6
|
| 99 |
+
\fBmafft\fR \fB\-\-retree\fR\ \fI2\fR \fB\-\-maxiterate\fR\ \fI0\fR \fIinput\fR [>\ \fIoutput\fR]
|
| 100 |
+
.HP 6
|
| 101 |
+
\fBfftns\fR \fIinput\fR [>\ \fIoutput\fR]
|
| 102 |
+
.RE
|
| 103 |
+
.sp
|
| 104 |
+
.RS 4
|
| 105 |
+
\h'-04'\(bu\h'+03'FFT\-NS\-1 (very fast; recommended for >2000 sequences; progressive method with a rough guide tree):
|
| 106 |
+
.HP 6
|
| 107 |
+
\fBmafft\fR \fB\-\-retree\fR\ \fI1\fR \fB\-\-maxiterate\fR\ \fI0\fR \fIinput\fR [>\ \fIoutput\fR]
|
| 108 |
+
.RE
|
| 109 |
+
.sp
|
| 110 |
+
.RS 4
|
| 111 |
+
\h'-04'\(bu\h'+03'NW\-NS\-i (iterative refinement method without FFT approximation; two cycles only):
|
| 112 |
+
.HP 6
|
| 113 |
+
\fBmafft\fR \fB\-\-retree\fR\ \fI2\fR \fB\-\-maxiterate\fR\ \fI2\fR \fB\-\-nofft\fR\ \fIinput\fR [>\ \fIoutput\fR]
|
| 114 |
+
.HP 7
|
| 115 |
+
\fBnwnsi\fR \fIinput\fR [>\ \fIoutput\fR]
|
| 116 |
+
.RE
|
| 117 |
+
.sp
|
| 118 |
+
.RS 4
|
| 119 |
+
\h'-04'\(bu\h'+03'NW\-NS\-2 (fast; progressive method without the FFT approximation):
|
| 120 |
+
.HP 6
|
| 121 |
+
\fBmafft\fR \fB\-\-retree\fR\ \fI2\fR \fB\-\-maxiterate\fR\ \fI0\fR \fB\-\-nofft\fR\ \fIinput\fR [>\ \fIoutput\fR]
|
| 122 |
+
.HP 6
|
| 123 |
+
\fBnwns\fR \fIinput\fR [>\ \fIoutput\fR]
|
| 124 |
+
.RE
|
| 125 |
+
.sp
|
| 126 |
+
.RS 4
|
| 127 |
+
\h'-04'\(bu\h'+03'NW\-NS\-PartTree\-1 (recommended for ~10,000 to ~50,000 sequences; progressive method with the PartTree algorithm):
|
| 128 |
+
.HP 6
|
| 129 |
+
\fBmafft\fR \fB\-\-retree\fR\ \fI1\fR \fB\-\-maxiterate\fR\ \fI0\fR \fB\-\-nofft\fR\ \fB\-\-parttree\fR \fIinput\fR [>\ \fIoutput\fR]
|
| 130 |
+
.RE
|
| 131 |
+
.SS "Group\-to\-group alignments"
|
| 132 |
+
.HP 6
|
| 133 |
+
.RS 4
|
| 134 |
+
\fBmafft\-profile\fR \fIgroup1\fR \fIgroup2\fR [>\ \fIoutput\fR]
|
| 135 |
+
.sp
|
| 136 |
+
or:
|
| 137 |
+
.sp
|
| 138 |
+
\fBmafft\fR \fB\-\-maxiterate\fR\ \fI1000\fR \fB\-\-seed\fR\ \fIgroup1\fR \fB\-\-seed\fR\ \fIgroup2\fR /dev/null [>\ \fIoutput\fR]
|
| 139 |
+
.RE
|
| 140 |
+
.RE
|
| 141 |
+
.RE
|
| 142 |
+
.SH "OPTIONS"
|
| 143 |
+
.SS "Algorithm"
|
| 144 |
+
.RS 0
|
| 145 |
+
.PP
|
| 146 |
+
\fB\-\-auto\fR
|
| 147 |
+
.RS 4
|
| 148 |
+
Automatically selects an appropriate strategy from L\-INS\-i, FFT\-NS\-i and FFT\-NS\-2, according to data
|
| 149 |
+
size. Default: off (always FFT\-NS\-2)
|
| 150 |
+
.RE
|
| 151 |
+
.PP
|
| 152 |
+
\fB\-\-6merpair\fR
|
| 153 |
+
.RS 4
|
| 154 |
+
Distance is calculated based on the number of shared 6mers. Default: on
|
| 155 |
+
.RE
|
| 156 |
+
.PP
|
| 157 |
+
\fB\-\-globalpair\fR
|
| 158 |
+
.RS 4
|
| 159 |
+
All pairwise alignments are computed with the Needleman\-Wunsch
|
| 160 |
+
algorithm. More accurate but slower
|
| 161 |
+
than \-\-6merpair. Suitable for a set of
|
| 162 |
+
globally alignable sequences. Applicable to
|
| 163 |
+
up to ~200 sequences. A combination with \-\-maxiterate 1000 is recommended (G\-INS\-i). Default: off (6mer distance is used)
|
| 164 |
+
.RE
|
| 165 |
+
.PP
|
| 166 |
+
\fB\-\-localpair\fR
|
| 167 |
+
.RS 4
|
| 168 |
+
All pairwise alignments are computed with the Smith\-Waterman
|
| 169 |
+
algorithm. More accurate but slower
|
| 170 |
+
than \-\-6merpair. Suitable for a set of
|
| 171 |
+
locally alignable sequences. Applicable to
|
| 172 |
+
up to ~200 sequences. A combination with \-\-maxiterate 1000 is recommended (L\-INS\-i). Default: off (6mer distance is used)
|
| 173 |
+
.RE
|
| 174 |
+
.PP
|
| 175 |
+
\fB\-\-genafpair\fR
|
| 176 |
+
.RS 4
|
| 177 |
+
All pairwise alignments are computed with a local
|
| 178 |
+
algorithm with the generalized affine gap cost
|
| 179 |
+
(Altschul 1998). More accurate but slower
|
| 180 |
+
than \-\-6merpair. Suitable when large internal gaps
|
| 181 |
+
are expected. Applicable to
|
| 182 |
+
up to ~200 sequences. A combination with \-\-maxiterate 1000 is recommended (E\-INS\-i). Default: off (6mer distance is used)
|
| 183 |
+
.RE
|
| 184 |
+
.\".PP
|
| 185 |
+
.\"\fB\-\-fastswpair\fR
|
| 186 |
+
.\".RS 4
|
| 187 |
+
.\"Distance is calculated based on a FASTA alignment.
|
| 188 |
+
.\"FASTA is required. Default: off (6mer distance is used)
|
| 189 |
+
.\".RE
|
| 190 |
+
.PP
|
| 191 |
+
\fB\-\-fastapair\fR
|
| 192 |
+
.RS 4
|
| 193 |
+
All pairwise alignments are computed with FASTA (Pearson and Lipman 1988).
|
| 194 |
+
FASTA is required. Default: off (6mer distance is used)
|
| 195 |
+
.RE
|
| 196 |
+
.\".PP
|
| 197 |
+
.\"\fB\-\-blastpair\fR
|
| 198 |
+
.\".RS 4
|
| 199 |
+
.\"Distance is calculated based on a BLAST alignment. BLAST is
|
| 200 |
+
.\"required. Default: off (6mer distance is used)
|
| 201 |
+
.\".RE
|
| 202 |
+
.PP
|
| 203 |
+
\fB\-\-weighti\fR \fInumber\fR
|
| 204 |
+
.RS 4
|
| 205 |
+
Weighting factor for the consistency term calculated from pairwise alignments. Valid when
|
| 206 |
+
either of \-\-globalpair, \-\-localpair, \-\-genafpair, \-\-fastapair or
|
| 207 |
+
\-\-blastpair is selected. Default: 2.7
|
| 208 |
+
.RE
|
| 209 |
+
.PP
|
| 210 |
+
\fB\-\-retree\fR \fInumber\fR
|
| 211 |
+
.RS 4
|
| 212 |
+
Guide tree is built \fInumber\fR times in the
|
| 213 |
+
progressive stage. Valid with 6mer distance. Default: 2
|
| 214 |
+
.RE
|
| 215 |
+
.PP
|
| 216 |
+
\fB\-\-maxiterate\fR \fInumber\fR
|
| 217 |
+
.RS 4
|
| 218 |
+
\fInumber\fR cycles of iterative refinement are performed. Default: 0
|
| 219 |
+
.RE
|
| 220 |
+
.PP
|
| 221 |
+
\fB\-\-fft\fR
|
| 222 |
+
.RS 4
|
| 223 |
+
Use FFT approximation in group\-to\-group alignment. Default: on
|
| 224 |
+
.RE
|
| 225 |
+
.PP
|
| 226 |
+
\fB\-\-nofft\fR
|
| 227 |
+
.RS 4
|
| 228 |
+
Do not use FFT approximation in group\-to\-group alignment. Default: off
|
| 229 |
+
.RE
|
| 230 |
+
.PP
|
| 231 |
+
\fB\-\-noscore\fR
|
| 232 |
+
.RS 4
|
| 233 |
+
Alignment score is not checked in the iterative refinement stage. Default: off (score is checked)
|
| 234 |
+
.RE
|
| 235 |
+
.PP
|
| 236 |
+
\fB\-\-memsave\fR
|
| 237 |
+
.RS 4
|
| 238 |
+
Use the Myers\-Miller (1988) algorithm. Default: automatically turned on when the alignment length exceeds 10,000 (aa/nt).
|
| 239 |
+
.RE
|
| 240 |
+
.PP
|
| 241 |
+
\fB\-\-parttree\fR
|
| 242 |
+
.RS 4
|
| 243 |
+
Use a fast tree\-building method (PartTree, Katoh and Toh 2007) with
|
| 244 |
+
the 6mer distance. Recommended for a large number (> ~10,000)
|
| 245 |
+
of sequences are input. Default: off
|
| 246 |
+
.RE
|
| 247 |
+
.PP
|
| 248 |
+
\fB\-\-dpparttree\fR
|
| 249 |
+
.RS 4
|
| 250 |
+
The PartTree algorithm is used with distances based on DP. Slightly
|
| 251 |
+
more accurate and slower than \-\-parttree. Recommended for a large
|
| 252 |
+
number (> ~10,000) of sequences are input. Default: off
|
| 253 |
+
.RE
|
| 254 |
+
.PP
|
| 255 |
+
\fB\-\-fastaparttree\fR
|
| 256 |
+
.RS 4
|
| 257 |
+
The PartTree algorithm is used with distances based on FASTA. Slightly more accurate and slower than \-\-parttree. Recommended for a large number (> ~10,000) of sequences are input. FASTA is required. Default: off
|
| 258 |
+
.RE
|
| 259 |
+
.PP
|
| 260 |
+
\fB\-\-partsize\fR \fInumber\fR
|
| 261 |
+
.RS 4
|
| 262 |
+
The number of partitions in the PartTree algorithm. Default: 50
|
| 263 |
+
.RE
|
| 264 |
+
.PP
|
| 265 |
+
\fB\-\-groupsize\fR \fInumber\fR
|
| 266 |
+
.RS 4
|
| 267 |
+
Do not make alignment larger than \fInumber\fR sequences. Valid only with the \-\-*parttree options. Default: the number of input sequences
|
| 268 |
+
.RE
|
| 269 |
+
.RE
|
| 270 |
+
.SS "Parameter"
|
| 271 |
+
.RS 0
|
| 272 |
+
.PP
|
| 273 |
+
\fB\-\-op\fR \fInumber\fR
|
| 274 |
+
.RS 4
|
| 275 |
+
Gap opening penalty at group\-to\-group alignment. Default: 1.53
|
| 276 |
+
.RE
|
| 277 |
+
.PP
|
| 278 |
+
\fB\-\-ep\fR \fInumber\fR
|
| 279 |
+
.RS 4
|
| 280 |
+
Offset value, which works like gap extension penalty, for
|
| 281 |
+
group\-to\-group alignment. Default: 0.123
|
| 282 |
+
.RE
|
| 283 |
+
.PP
|
| 284 |
+
\fB\-\-lop\fR \fInumber\fR
|
| 285 |
+
.RS 4
|
| 286 |
+
Gap opening penalty at local pairwise
|
| 287 |
+
alignment. Valid when
|
| 288 |
+
the \-\-localpair or \-\-genafpair option is selected. Default: \-2.00
|
| 289 |
+
.RE
|
| 290 |
+
.PP
|
| 291 |
+
\fB\-\-lep\fR \fInumber\fR
|
| 292 |
+
.RS 4
|
| 293 |
+
Offset value at local pairwise alignment. Valid when
|
| 294 |
+
the \-\-localpair or \-\-genafpair option is selected. Default: 0.1
|
| 295 |
+
.RE
|
| 296 |
+
.PP
|
| 297 |
+
\fB\-\-lexp\fR \fInumber\fR
|
| 298 |
+
.RS 4
|
| 299 |
+
Gap extension penalty at local pairwise alignment. Valid when
|
| 300 |
+
the \-\-localpair or \-\-genafpair option is selected. Default: \-0.1
|
| 301 |
+
.RE
|
| 302 |
+
.PP
|
| 303 |
+
\fB\-\-LOP\fR \fInumber\fR
|
| 304 |
+
.RS 4
|
| 305 |
+
Gap opening penalty to skip the alignment. Valid when the
|
| 306 |
+
\-\-genafpair option is selected. Default: \-6.00
|
| 307 |
+
.RE
|
| 308 |
+
.PP
|
| 309 |
+
\fB\-\-LEXP\fR \fInumber\fR
|
| 310 |
+
.RS 4
|
| 311 |
+
Gap extension penalty to skip the alignment. Valid when the
|
| 312 |
+
\-\-genafpair option is selected. Default: 0.00
|
| 313 |
+
.RE
|
| 314 |
+
.PP
|
| 315 |
+
\fB\-\-bl\fR \fInumber\fR
|
| 316 |
+
.RS 4
|
| 317 |
+
BLOSUM \fInumber\fR matrix (Henikoff and Henikoff 1992) is used. \fInumber\fR=30, 45, 62 or 80. Default: 62
|
| 318 |
+
.RE
|
| 319 |
+
.PP
|
| 320 |
+
\fB\-\-jtt\fR \fInumber\fR
|
| 321 |
+
.RS 4
|
| 322 |
+
JTT PAM \fInumber\fR (Jones et al. 1992) matrix is used. \fInumber\fR>0. Default: BLOSUM62
|
| 323 |
+
.RE
|
| 324 |
+
.PP
|
| 325 |
+
\fB\-\-tm\fR \fInumber\fR
|
| 326 |
+
.RS 4
|
| 327 |
+
Transmembrane PAM \fInumber\fR (Jones et al. 1994) matrix is used. \fInumber\fR>0. Default: BLOSUM62
|
| 328 |
+
.RE
|
| 329 |
+
.PP
|
| 330 |
+
\fB\-\-aamatrix\fR \fImatrixfile\fR
|
| 331 |
+
.RS 4
|
| 332 |
+
Use a user\-defined AA scoring matrix. The format of \fImatrixfile\fR is
|
| 333 |
+
the same to that of BLAST. Ignored when nucleotide sequences are input. Default: BLOSUM62
|
| 334 |
+
.RE
|
| 335 |
+
.PP
|
| 336 |
+
\fB\-\-fmodel\fR
|
| 337 |
+
.RS 4
|
| 338 |
+
Incorporate the AA/nuc composition information into
|
| 339 |
+
the scoring matrix. Default: off
|
| 340 |
+
.RE
|
| 341 |
+
.RE
|
| 342 |
+
.SS "Output"
|
| 343 |
+
.RS 0
|
| 344 |
+
.PP
|
| 345 |
+
\fB\-\-clustalout\fR
|
| 346 |
+
.RS 4
|
| 347 |
+
Output format: clustal format. Default: off (fasta format)
|
| 348 |
+
.RE
|
| 349 |
+
.PP
|
| 350 |
+
\fB\-\-inputorder\fR
|
| 351 |
+
.RS 4
|
| 352 |
+
Output order: same as input. Default: on
|
| 353 |
+
.RE
|
| 354 |
+
.PP
|
| 355 |
+
\fB\-\-reorder\fR
|
| 356 |
+
.RS 4
|
| 357 |
+
Output order: aligned. Default: off (inputorder)
|
| 358 |
+
.RE
|
| 359 |
+
.PP
|
| 360 |
+
\fB\-\-treeout\fR
|
| 361 |
+
.RS 4
|
| 362 |
+
Guide tree is output to the \fIinput\fR.tree file. Default: off
|
| 363 |
+
.RE
|
| 364 |
+
.PP
|
| 365 |
+
\fB\-\-quiet\fR
|
| 366 |
+
.RS 4
|
| 367 |
+
Do not report progress. Default: off
|
| 368 |
+
.RE
|
| 369 |
+
.RE
|
| 370 |
+
.SS "Input"
|
| 371 |
+
.RS 0
|
| 372 |
+
.PP
|
| 373 |
+
\fB\-\-nuc\fR
|
| 374 |
+
.RS 4
|
| 375 |
+
Assume the sequences are nucleotide. Default: auto
|
| 376 |
+
.RE
|
| 377 |
+
.PP
|
| 378 |
+
\fB\-\-amino\fR
|
| 379 |
+
.RS 4
|
| 380 |
+
Assume the sequences are amino acid. Default: auto
|
| 381 |
+
.RE
|
| 382 |
+
.PP
|
| 383 |
+
\fB\-\-seed\fR \fIalignment1\fR [\fB--seed\fR \fIalignment2\fR \fB--seed\fR \fIalignment3\fR ...]
|
| 384 |
+
.RS 4
|
| 385 |
+
Seed alignments given in \fIalignment_n\fR (fasta format) are aligned with
|
| 386 |
+
sequences in \fIinput\fR. The alignment within every seed is preserved.
|
| 387 |
+
.RE
|
| 388 |
+
.RE
|
| 389 |
+
.SH "FILES"
|
| 390 |
+
.RS 0
|
| 391 |
+
.PP
|
| 392 |
+
Mafft stores the input sequences and other files in a temporary directory, which by default is located in
|
| 393 |
+
\fI/tmp\fR.
|
| 394 |
+
.RE
|
| 395 |
+
.SH "ENVIONMENT"
|
| 396 |
+
.RS 0
|
| 397 |
+
.PP
|
| 398 |
+
\fBMAFFT_BINARIES\fR
|
| 399 |
+
.RS 4
|
| 400 |
+
Indicates the location of the binary files used by mafft. By default, they are searched in
|
| 401 |
+
\fI/usr/local/lib/mafft\fR, but on Debian systems, they are searched in
|
| 402 |
+
\fI/usr/lib/mafft\fR.
|
| 403 |
+
.RE
|
| 404 |
+
.PP
|
| 405 |
+
\fBFASTA_4_MAFFT\fR
|
| 406 |
+
.RS 4
|
| 407 |
+
This variable can be set to indicate to mafft the location to the fasta34 program if it is not in the PATH.
|
| 408 |
+
.RE
|
| 409 |
+
.RE
|
| 410 |
+
.SH "SEE ALSO"
|
| 411 |
+
.RS 0
|
| 412 |
+
.PP
|
| 413 |
+
|
| 414 |
+
\fBmafft\-homologs\fR(1)
|
| 415 |
+
.RE
|
| 416 |
+
.SH "REFERENCES"
|
| 417 |
+
.RS 0
|
| 418 |
+
.SS "In English"
|
| 419 |
+
.sp
|
| 420 |
+
.RS 4
|
| 421 |
+
\h'-04'\(bu\h'+03'Katoh and Toh (Bioinformatics 23:372\-374, 2007) PartTree: an algorithm to build an approximate tree from a large number of unaligned sequences (describes the PartTree algorithm).
|
| 422 |
+
.RE
|
| 423 |
+
.sp
|
| 424 |
+
.RS 4
|
| 425 |
+
\h'-04'\(bu\h'+03'Katoh, Kuma, Toh and Miyata (Nucleic Acids Res. 33:511\-518, 2005) MAFFT version 5: improvement in accuracy of multiple sequence alignment (describes [ancestral versions of] the G\-INS\-i, L\-INS\-i and E\-INS\-i strategies)
|
| 426 |
+
.RE
|
| 427 |
+
.sp
|
| 428 |
+
.RS 4
|
| 429 |
+
\h'-04'\(bu\h'+03'Katoh, Misawa, Kuma and Miyata (Nucleic Acids Res. 30:3059\-3066, 2002) MAFFT: a novel method for rapid multiple sequence alignment based on fast Fourier transform (describes the FFT\-NS\-1, FFT\-NS\-2 and FFT\-NS\-i strategies)
|
| 430 |
+
.RE
|
| 431 |
+
.SS "In Japanese"
|
| 432 |
+
.sp
|
| 433 |
+
.RS 4
|
| 434 |
+
\h'-04'\(bu\h'+03'Katoh and Misawa (Seibutsubutsuri 46:312\-317, 2006) Multiple Sequence Alignments: the Next Generation
|
| 435 |
+
.RE
|
| 436 |
+
.sp
|
| 437 |
+
.RS 4
|
| 438 |
+
\h'-04'\(bu\h'+03'Katoh and Kuma (Kagaku to Seibutsu 44:102\-108, 2006) Jissen\-teki Multiple Alignment
|
| 439 |
+
.RE
|
| 440 |
+
.RE
|
| 441 |
+
.SH "AUTHORS"
|
| 442 |
+
.RS 0
|
| 443 |
+
.PP
|
| 444 |
+
\fBKazutaka Katoh\fR <\&kazutaka.katoh_at_aist.go.jp\&>
|
| 445 |
+
.sp -1n
|
| 446 |
+
.IP "" 4
|
| 447 |
+
Wrote Mafft.
|
| 448 |
+
.PP
|
| 449 |
+
\fBCharles Plessy\fR <\&charles\-debian\-nospam_at_plessy.org\&>
|
| 450 |
+
.sp -1n
|
| 451 |
+
.IP "" 4
|
| 452 |
+
Wrote this manpage in DocBook XML for the Debian distribution, using Mafft's homepage as a template.
|
| 453 |
+
.RE
|
| 454 |
+
.SH "COPYRIGHT"
|
| 455 |
+
.RS 0
|
| 456 |
+
Copyright \(co 2002\-2007 Kazutaka Katoh (mafft)
|
| 457 |
+
.br
|
| 458 |
+
Copyright \(co 2007 Charles Plessy (this manpage)
|
| 459 |
+
.br
|
| 460 |
+
.PP
|
| 461 |
+
Mafft and its manpage are offered under the following conditions:
|
| 462 |
+
.PP
|
| 463 |
+
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
| 464 |
+
.sp
|
| 465 |
+
.RS 4
|
| 466 |
+
\h'-04' 1.\h'+02'Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
| 467 |
+
.RE
|
| 468 |
+
.sp
|
| 469 |
+
.RS 4
|
| 470 |
+
\h'-04' 2.\h'+02'Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
| 471 |
+
.RE
|
| 472 |
+
.sp
|
| 473 |
+
.RS 4
|
| 474 |
+
\h'-04' 3.\h'+02'The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission.
|
| 475 |
+
.RE
|
| 476 |
+
.PP
|
| 477 |
+
THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
| 478 |
+
.br
|
| 479 |
+
.RE
|
mafft/mafftdir/libexec/mafftash_premafft.pl
ADDED
|
@@ -0,0 +1,464 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/perl
|
| 2 |
+
|
| 3 |
+
#####################################################################
|
| 4 |
+
# Author: KM Amada (kmamada@ifrec.osaka-u.ac.jp)
|
| 5 |
+
#
|
| 6 |
+
# Ver. Date Changelog
|
| 7 |
+
#####################################################################
|
| 8 |
+
# 1.0 07.26.13 Initial release
|
| 9 |
+
# 2.0 09.03.13 Added extensive warnings and error messages
|
| 10 |
+
# 3.0 10.28.13 Fix for retrieving large files. Added STDERR logs
|
| 11 |
+
# 3.1 11.08.13 Added LWP failsafe. Made hat3 not a required output
|
| 12 |
+
# 3.2 12.08.14 Removed 5-char restriction for own structure files
|
| 13 |
+
#
|
| 14 |
+
#####################################################################
|
| 15 |
+
|
| 16 |
+
use strict;
|
| 17 |
+
use Getopt::Long;
|
| 18 |
+
use File::Path qw(make_path remove_tree);
|
| 19 |
+
use LWP::Simple;
|
| 20 |
+
use LWP::UserAgent;
|
| 21 |
+
|
| 22 |
+
# to prevent error 'Header line too long (limit is 8192)' [v3.1]
|
| 23 |
+
use LWP::Protocol::http;
|
| 24 |
+
push(@LWP::Protocol::http::EXTRA_SOCK_OPTS, MaxLineLength => 0);
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
my $BASEURL = "http://sysimm.ifrec.osaka-u.ac.jp/MAFFTash/REST/service.cgi/premafft";
|
| 29 |
+
|
| 30 |
+
my ( $WORKDIR, $PDBLIST, $OWNLIST, $HAT3FILE, $INSTRFILE );
|
| 31 |
+
|
| 32 |
+
GetOptions
|
| 33 |
+
(
|
| 34 |
+
'd=s' => \$WORKDIR,
|
| 35 |
+
'p=s' => \$PDBLIST,
|
| 36 |
+
'o=s' => \$OWNLIST,
|
| 37 |
+
'h=s' => \$HAT3FILE,
|
| 38 |
+
'i=s' => \$INSTRFILE,
|
| 39 |
+
);
|
| 40 |
+
|
| 41 |
+
print STDERR "[MAFFTash-premafft]\n";
|
| 42 |
+
|
| 43 |
+
# set temp directory
|
| 44 |
+
my $TMP = "/tmp/mapremafft$$";
|
| 45 |
+
make_path($TMP) unless -d $TMP;
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
######
|
| 50 |
+
# validation
|
| 51 |
+
&help("Required parameter : atleast one of either '-p' or '-o'") unless ( defined $PDBLIST || defined $OWNLIST);
|
| 52 |
+
&help("Required parameter : '-d'") if defined $OWNLIST && ! defined $WORKDIR;
|
| 53 |
+
|
| 54 |
+
$HAT3FILE = "hat3" unless defined $HAT3FILE;
|
| 55 |
+
$INSTRFILE = "instr" unless defined $INSTRFILE;
|
| 56 |
+
chop $WORKDIR if defined $WORKDIR && $WORKDIR =~ m/\/$/g;
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
######
|
| 60 |
+
# prepare inputs
|
| 61 |
+
print STDERR "Preparing inputs for service request...\n";
|
| 62 |
+
|
| 63 |
+
my @files = ();
|
| 64 |
+
push(@files, "strweight" => "0.5");
|
| 65 |
+
push(@files, "premafft" => "1");
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
# pdb entries
|
| 69 |
+
if ( defined $PDBLIST )
|
| 70 |
+
{
|
| 71 |
+
print STDERR "PDB List defined!\n";
|
| 72 |
+
&bail("Error: Input file $PDBLIST does not exists!") unless -e $PDBLIST;
|
| 73 |
+
my $listfile = "$TMP/pdblist.inp";
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
open(INPF,"<$PDBLIST") or &bail("Error: Cannot open file $PDBLIST for reading!");
|
| 77 |
+
open(OUTF,">$listfile") or &bail("Error: Cannot open temporary file $listfile for writing!");
|
| 78 |
+
|
| 79 |
+
while(<INPF>)
|
| 80 |
+
{
|
| 81 |
+
chomp;
|
| 82 |
+
if ( /^(\w{5})$/ )
|
| 83 |
+
{
|
| 84 |
+
print OUTF ">PDBID\n$1\n";
|
| 85 |
+
}
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
close OUTF;
|
| 89 |
+
close INPF;
|
| 90 |
+
|
| 91 |
+
push(@files, "inputfile" => ["$listfile"]);
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
# upload own structures
|
| 97 |
+
my %ownids = ();
|
| 98 |
+
|
| 99 |
+
if ( defined $OWNLIST )
|
| 100 |
+
{
|
| 101 |
+
print STDERR "OWN List defined!\n";
|
| 102 |
+
&bail("Error: Input file $OWNLIST does not exists!") unless -e $OWNLIST;
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
open(OWNINPF,"<$OWNLIST") or &bail("Error: Cannot open file $OWNLIST for reading!");
|
| 106 |
+
|
| 107 |
+
while(<OWNINPF>)
|
| 108 |
+
{
|
| 109 |
+
chomp;
|
| 110 |
+
|
| 111 |
+
if ( /^(\S+)$/ )
|
| 112 |
+
{
|
| 113 |
+
my $fileref = "$WORKDIR/$1.pdb";
|
| 114 |
+
|
| 115 |
+
unless (-e $fileref)
|
| 116 |
+
{
|
| 117 |
+
close OWNINPF;
|
| 118 |
+
&bail("Error: File $fileref does not exists!");
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
push(@files, "inputownfile[]" => ["$fileref"]);
|
| 122 |
+
$ownids{$1} = 1;
|
| 123 |
+
}
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
close OWNINPF;
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
######
|
| 132 |
+
# start rest service
|
| 133 |
+
print STDERR "Sending service request...\n";
|
| 134 |
+
|
| 135 |
+
my $browser = LWP::UserAgent->new;
|
| 136 |
+
$browser->timeout(0);
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
# post: running a mafftash job
|
| 140 |
+
my $postResponse = $browser->post( $BASEURL, \@files, 'Content_Type' => 'form-data' );
|
| 141 |
+
&bail(sprintf("[%d] %s\n", $postResponse->code, &parseError($postResponse->content))) unless($postResponse->is_success);
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
# get response from post request
|
| 145 |
+
my ($status, $mafftashid) = &parseResponse($postResponse->content);
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
my $MAXTRIES = 3;
|
| 150 |
+
my $STIMER = 4;
|
| 151 |
+
my $longtimer = 0;
|
| 152 |
+
|
| 153 |
+
print STDERR "Request sent! Waiting for response...[$mafftashid]\n";
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
# wait for results until it becomes available
|
| 157 |
+
while(1)
|
| 158 |
+
{
|
| 159 |
+
$longtimer = $longtimer <= ($STIMER*3) ? $longtimer+$STIMER : $STIMER;
|
| 160 |
+
sleep $longtimer;
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
# get: get results for mafftash job
|
| 164 |
+
my $getResponse = $browser->get("$BASEURL/$mafftashid");
|
| 165 |
+
|
| 166 |
+
if ( $getResponse->is_success )
|
| 167 |
+
{
|
| 168 |
+
|
| 169 |
+
# get response from get request
|
| 170 |
+
($status, $mafftashid) = &parseResponse($getResponse->content);
|
| 171 |
+
next unless ( $status eq "done" );
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
# if job is finished and ready
|
| 175 |
+
print STDERR "Results found!\n";
|
| 176 |
+
my $csfile = "$TMP/checksum.tar.gz";
|
| 177 |
+
my $try1 = 1;
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
while(1)
|
| 181 |
+
{
|
| 182 |
+
print STDERR "Fetching Results... [Trial $try1]\n";
|
| 183 |
+
|
| 184 |
+
if ( is_success(getstore("$BASEURL/getmdlist/$mafftashid", $csfile)) && -e $csfile && -s $csfile )
|
| 185 |
+
{
|
| 186 |
+
# get response from get request
|
| 187 |
+
my $checklist = &extractchecksum($csfile);
|
| 188 |
+
&bail("Error retrieving list of compressed files!") unless ( scalar %$checklist > 0 );
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
foreach my $id ( keys %$checklist )
|
| 192 |
+
{
|
| 193 |
+
my $checkfile = "$TMP/$id";
|
| 194 |
+
my $checkid = $checklist->{$id};
|
| 195 |
+
my $try2 = 1;
|
| 196 |
+
|
| 197 |
+
while(1)
|
| 198 |
+
{
|
| 199 |
+
unlink $checkfile if -e $checkfile;
|
| 200 |
+
|
| 201 |
+
if ( is_success(getstore("$BASEURL/get/$mafftashid/$id", $checkfile)) && -e $checkfile && -s $checkfile )
|
| 202 |
+
{
|
| 203 |
+
my $hashid = &getchecksum($checkfile);
|
| 204 |
+
#print STDERR "[hashid]$hashid [checkid]$checkid\n";
|
| 205 |
+
|
| 206 |
+
if ($hashid ne "" && $hashid ne $checkid )
|
| 207 |
+
{
|
| 208 |
+
unlink $checkfile if -e $checkfile;
|
| 209 |
+
&bail("Error retrieving compressed file from server! [Checksum Failed]") if $try2 >= $MAXTRIES;
|
| 210 |
+
$try2++;
|
| 211 |
+
sleep $STIMER;
|
| 212 |
+
}
|
| 213 |
+
else
|
| 214 |
+
{
|
| 215 |
+
last;
|
| 216 |
+
}
|
| 217 |
+
}
|
| 218 |
+
else
|
| 219 |
+
{
|
| 220 |
+
&bail("Error retrieving compressed file from server!") if $try2 >= $MAXTRIES;
|
| 221 |
+
$try2++;
|
| 222 |
+
sleep $STIMER;
|
| 223 |
+
}
|
| 224 |
+
}
|
| 225 |
+
}
|
| 226 |
+
|
| 227 |
+
last;
|
| 228 |
+
}
|
| 229 |
+
else
|
| 230 |
+
{
|
| 231 |
+
&bail("Error retrieving list of compressed files from server!") if $try1 >= $MAXTRIES;
|
| 232 |
+
$try1++;
|
| 233 |
+
sleep $STIMER;
|
| 234 |
+
}
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
last;
|
| 238 |
+
|
| 239 |
+
}
|
| 240 |
+
else
|
| 241 |
+
{
|
| 242 |
+
&bail(sprintf("[%d] %s\n", $getResponse->code, &parseError($getResponse->content)));
|
| 243 |
+
}
|
| 244 |
+
|
| 245 |
+
}
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
# make sure outputs were generated
|
| 249 |
+
# decompress
|
| 250 |
+
print STDERR "Assembling final results...\n";
|
| 251 |
+
|
| 252 |
+
&backticks("cat $TMP/archive.tar.gz* | tar -zxf - -C $TMP/");
|
| 253 |
+
&backticks("mv -f $TMP/instr $INSTRFILE") if -e "$TMP/instr";
|
| 254 |
+
&backticks("mv -f $TMP/hat3 $HAT3FILE") if -e "$TMP/hat3";
|
| 255 |
+
|
| 256 |
+
# sometimes no hat3 file is generated [v3.1]
|
| 257 |
+
#&bail("Error: Output file $HAT3FILE not found!") unless -e $HAT3FILE;
|
| 258 |
+
&bail("Error: Output file $INSTRFILE not found!") unless -e $INSTRFILE;
|
| 259 |
+
|
| 260 |
+
|
| 261 |
+
# warn if some ownids were ommitted
|
| 262 |
+
if ( scalar keys(%ownids) > 0 )
|
| 263 |
+
{
|
| 264 |
+
my %instrids = ();
|
| 265 |
+
|
| 266 |
+
open(INSTRF,"<$INSTRFILE") or &bail("Error: Cannot open file $INSTRFILE for reading!");
|
| 267 |
+
|
| 268 |
+
while(<INSTRF>)
|
| 269 |
+
{
|
| 270 |
+
chomp;
|
| 271 |
+
|
| 272 |
+
if ( /^>\d+_(\S+)$/ )
|
| 273 |
+
{
|
| 274 |
+
$instrids{$1} = 1;
|
| 275 |
+
}
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
close INSTRF;
|
| 279 |
+
|
| 280 |
+
foreach my $id ( keys %ownids )
|
| 281 |
+
{
|
| 282 |
+
warn "Warning: Own structure $id was excluded from instr/hat3.\n" unless $instrids{$id};
|
| 283 |
+
}
|
| 284 |
+
|
| 285 |
+
}
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
|
| 289 |
+
&cleanup();
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
|
| 293 |
+
####################
|
| 294 |
+
####################
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
|
| 298 |
+
sub parseResponse
|
| 299 |
+
{
|
| 300 |
+
my $response = shift;
|
| 301 |
+
|
| 302 |
+
#"status":"wait","mafftashid":"Ma8211432R"
|
| 303 |
+
|
| 304 |
+
my $status = "";
|
| 305 |
+
my $mafftashid = "";
|
| 306 |
+
|
| 307 |
+
if ( $response =~ /^([^\s:]+):([^\s:]+)$/ )
|
| 308 |
+
{
|
| 309 |
+
$mafftashid = $1;
|
| 310 |
+
$status = $2;
|
| 311 |
+
}
|
| 312 |
+
|
| 313 |
+
return ($status, $mafftashid);
|
| 314 |
+
|
| 315 |
+
}
|
| 316 |
+
|
| 317 |
+
|
| 318 |
+
sub extractchecksum
|
| 319 |
+
{
|
| 320 |
+
my $infile = shift;
|
| 321 |
+
my %dataset = ();
|
| 322 |
+
|
| 323 |
+
open CSUM, "tar -zxf $infile -O|" or return \%dataset;
|
| 324 |
+
|
| 325 |
+
while(<CSUM>)
|
| 326 |
+
{
|
| 327 |
+
chomp;
|
| 328 |
+
if ( /^(\S+)\s+(\S+)$/ )
|
| 329 |
+
{
|
| 330 |
+
$dataset{$2} = $1;
|
| 331 |
+
}
|
| 332 |
+
|
| 333 |
+
}
|
| 334 |
+
|
| 335 |
+
close CSUM;
|
| 336 |
+
|
| 337 |
+
return \%dataset;
|
| 338 |
+
|
| 339 |
+
}
|
| 340 |
+
|
| 341 |
+
|
| 342 |
+
sub parseError
|
| 343 |
+
{
|
| 344 |
+
my $response = shift;
|
| 345 |
+
|
| 346 |
+
#"error":"Invalid number of inputs found."
|
| 347 |
+
my $errorstr = ( $response =~ /\"error\"\s*:\s*\"([^\"]+)\"/ ) ? $1 : "";
|
| 348 |
+
return $errorstr;
|
| 349 |
+
}
|
| 350 |
+
|
| 351 |
+
|
| 352 |
+
sub getchecksum
|
| 353 |
+
{
|
| 354 |
+
my $infile = shift;
|
| 355 |
+
|
| 356 |
+
# md5 binary check
|
| 357 |
+
my $MD5BIN = "";
|
| 358 |
+
|
| 359 |
+
if ( -x "/usr/bin/md5sum" )
|
| 360 |
+
{
|
| 361 |
+
$MD5BIN = "/usr/bin/md5sum";
|
| 362 |
+
}
|
| 363 |
+
elsif ( -x "/sbin/md5" )
|
| 364 |
+
{
|
| 365 |
+
$MD5BIN = "/sbin/md5 -q";
|
| 366 |
+
}
|
| 367 |
+
|
| 368 |
+
return "" if $MD5BIN eq "";
|
| 369 |
+
|
| 370 |
+
|
| 371 |
+
my $checksum = "";
|
| 372 |
+
open MD5EXE, "$MD5BIN $infile|" or return "";
|
| 373 |
+
|
| 374 |
+
while(<MD5EXE>)
|
| 375 |
+
{
|
| 376 |
+
if (/^(\S+)\s+(\S+)$/)
|
| 377 |
+
{
|
| 378 |
+
$checksum = $1;
|
| 379 |
+
last;
|
| 380 |
+
}
|
| 381 |
+
elsif (/^(\S+)$/)
|
| 382 |
+
{
|
| 383 |
+
$checksum = $1;
|
| 384 |
+
last;
|
| 385 |
+
}
|
| 386 |
+
}
|
| 387 |
+
|
| 388 |
+
close MD5EXE;
|
| 389 |
+
|
| 390 |
+
return $checksum;
|
| 391 |
+
|
| 392 |
+
}
|
| 393 |
+
|
| 394 |
+
|
| 395 |
+
sub backticks
|
| 396 |
+
{
|
| 397 |
+
my $command = shift;
|
| 398 |
+
|
| 399 |
+
`$command`;
|
| 400 |
+
return ($? == -1) ? 0 : 1;
|
| 401 |
+
}
|
| 402 |
+
|
| 403 |
+
|
| 404 |
+
sub bail
|
| 405 |
+
{
|
| 406 |
+
my $str = shift;
|
| 407 |
+
print STDERR "$str\n" if defined $str;
|
| 408 |
+
|
| 409 |
+
&cleanup();
|
| 410 |
+
exit(1);
|
| 411 |
+
}
|
| 412 |
+
|
| 413 |
+
|
| 414 |
+
sub cleanup
|
| 415 |
+
{
|
| 416 |
+
return if ($TMP eq "" || !-d $TMP);
|
| 417 |
+
|
| 418 |
+
opendir(MAINDIR, $TMP);
|
| 419 |
+
my @files = readdir(MAINDIR);
|
| 420 |
+
closedir(MAINDIR);
|
| 421 |
+
|
| 422 |
+
foreach my $file (@files)
|
| 423 |
+
{
|
| 424 |
+
unlink "$TMP/$file" if -e "$TMP/$file";
|
| 425 |
+
}
|
| 426 |
+
|
| 427 |
+
remove_tree($TMP);
|
| 428 |
+
|
| 429 |
+
}
|
| 430 |
+
|
| 431 |
+
|
| 432 |
+
sub help
|
| 433 |
+
{
|
| 434 |
+
my $str = shift;
|
| 435 |
+
|
| 436 |
+
print <<'HELPME';
|
| 437 |
+
|
| 438 |
+
USAGE
|
| 439 |
+
./mafftash_premafft.pl -p [FILE]
|
| 440 |
+
./mafftash_premafft.pl -o [FILE] -d [DIRECTORY]
|
| 441 |
+
./mafftash_premafft.pl -p [FILE] -o [FILE] -d [DIRECTORY]
|
| 442 |
+
|
| 443 |
+
|
| 444 |
+
PARAMETERS
|
| 445 |
+
-p [FILE]
|
| 446 |
+
FILE contains a list of PDBIDs (one entry per line); make sure that the PDBIDs are in the standard 5-character pdbid+chain naming format
|
| 447 |
+
|
| 448 |
+
-o [FILE] -d [DIRECTORY]
|
| 449 |
+
FILE contains a list of IDs from your own structure/pdb files (one entry per line)
|
| 450 |
+
for each ID in the list make sure that a corresponding structure file (same ID with .pdb extension) is stored in DIRECTORY
|
| 451 |
+
|
| 452 |
+
-h [HATFILE]
|
| 453 |
+
save the output hat3 file in HATFILE; if not set, the output is written to a file named 'hat3' in your current directory
|
| 454 |
+
|
| 455 |
+
-i [INSTRFILE]
|
| 456 |
+
save the output instr file in INSTRFILE; if not set, the output is written to a file named 'instr' in your current directory
|
| 457 |
+
|
| 458 |
+
HELPME
|
| 459 |
+
|
| 460 |
+
&bail($str);
|
| 461 |
+
}
|
| 462 |
+
|
| 463 |
+
|
| 464 |
+
|
mafft/mafftdir/libexec/maffttext2hex
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ced34fedac7ce84d43b93c93d36097c2457eecfeb08881a6557286a6cdcaf19
|
| 3 |
+
size 542776
|
mafft/mafftdir/libexec/makedirectionlist
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e6e0c9489f6f11da1567bb23a67f403b415c7970a4a7cd1cc603b5adacec8295
|
| 3 |
+
size 1223416
|
mafft/mafftdir/libexec/mccaskillwrap
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cffda11fed2b49841b91b7b3b3dec5d653d470bf0050e4b398e6c2bfa2859963
|
| 3 |
+
size 928560
|
mafft/mafftdir/libexec/multi2hat3s
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:72b2e34505f4d317939953e8362da36f41239d5eaf415dc83c4f37c769ddd36b
|
| 3 |
+
size 984240
|
mafft/mafftdir/libexec/nodepair
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5cb033f02de47bdb419efd9abd515b785ade70d3b150580446e86c5119381ad6
|
| 3 |
+
size 1264016
|
mafft/mafftdir/libexec/pairash
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d53ee2b635c23207edd1668a61d8a45e18137b9c1c85127b8c61672477863cd
|
| 3 |
+
size 1213072
|
mafft/mafftdir/libexec/pairlocalalign
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0bf4f1a3c4be95f7a0e7e081d8b83ebf28310c7728c348163a983a1450a60ad7
|
| 3 |
+
size 1239448
|
mafft/mafftdir/libexec/regtable2seq
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9dfc7eada977c3cc5c56dde7a133878ccf6265491b696affd18a943f5da8846a
|
| 3 |
+
size 866768
|
mafft/mafftdir/libexec/replaceu
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1aa80f99a4bb1eb72944979df35f421fde9b017d58db7de45e9c60e2e95d38b8
|
| 3 |
+
size 866768
|
mafft/mafftdir/libexec/restoreu
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95f7b302b29051e3730e99fd8aca2f54b9c6f10b95fa668ce7bacf260ae1ce44
|
| 3 |
+
size 866768
|
mafft/mafftdir/libexec/score
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:062c67f0867c0896b98b368c2d886b31b58d452bb3cecda708f5570a979d4e81
|
| 3 |
+
size 924040
|
mafft/mafftdir/libexec/seekquencer_premafft.pl
ADDED
|
@@ -0,0 +1,600 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/perl
|
| 2 |
+
|
| 3 |
+
####################################################################################
|
| 4 |
+
# Author: KM Amada (kmamada@ifrec.osaka-u.ac.jp)
|
| 5 |
+
#
|
| 6 |
+
# Ver. Date Changelog
|
| 7 |
+
####################################################################################
|
| 8 |
+
# 1.0 11.01.13 Initial release
|
| 9 |
+
#
|
| 10 |
+
# **Skipped version 2 to standardise version numbers to seekquencer.pl script**
|
| 11 |
+
#
|
| 12 |
+
# 3.0 04.24.14 Added split option -mod <mafftash-split> for output
|
| 13 |
+
# Uses seekquencer_v3 backend
|
| 14 |
+
#
|
| 15 |
+
# 4.0 05.12.14 Added new options: -run <thread|normal> -trd <count> -noin
|
| 16 |
+
# Sets -seqa fast in seekquencer.pl
|
| 17 |
+
# Uses seekquencer_v4 backend
|
| 18 |
+
#
|
| 19 |
+
# 4.1 05.19.14 Added a check on running REST requests before proceeding
|
| 20 |
+
# to avoid server load problems
|
| 21 |
+
#
|
| 22 |
+
# 4.2 05.27.14 Seq limit processing done in seekquencer.pl script
|
| 23 |
+
# to avoid server load problems
|
| 24 |
+
#
|
| 25 |
+
# 4.3 07.22.14 Added new option: -seqd <uniref100|uniref90|uniref70|uniprot>
|
| 26 |
+
# Blast limit changed from factor of 10 to -blim option
|
| 27 |
+
# Timing on sleep changed; added srand() for making seed
|
| 28 |
+
# Moved the job limit processing to server side
|
| 29 |
+
#
|
| 30 |
+
# 4.4 08.05.14 Modified to work in multiple OS
|
| 31 |
+
#
|
| 32 |
+
#
|
| 33 |
+
####################################################################################
|
| 34 |
+
|
| 35 |
+
use strict;
|
| 36 |
+
use Getopt::Long;
|
| 37 |
+
use File::Path qw(make_path remove_tree);
|
| 38 |
+
use Cwd;
|
| 39 |
+
use LWP::Simple;
|
| 40 |
+
use LWP::UserAgent;
|
| 41 |
+
|
| 42 |
+
# to prevent error: Header line too long (limit is 8192)
|
| 43 |
+
use LWP::Protocol::http;
|
| 44 |
+
push(@LWP::Protocol::http::EXTRA_SOCK_OPTS, MaxLineLength => 0);
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
my $BASEURL = "http://sysimm.ifrec.osaka-u.ac.jp/seekquencer/REST/service.cgi/premafft";
|
| 49 |
+
my ( $INPUTFILE, $IDLISTFILE, $SEQFASTAFILE, $OUTPUTFILE, $SEQFLAG, $STRFLAG, $EVALFLAG, $NOINFLAG );
|
| 50 |
+
my $OUTTYPE = "mafftash";
|
| 51 |
+
|
| 52 |
+
my $SEQDATABASE = "uniref100";
|
| 53 |
+
my $SEQLIMIT = 100;
|
| 54 |
+
my $SEQBLASTLIMIT = 100;
|
| 55 |
+
|
| 56 |
+
my $RUNMODE = "normal"; # thread|normal
|
| 57 |
+
my $THREADCOUNT = 3;
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
GetOptions
|
| 61 |
+
(
|
| 62 |
+
'inp=s' => \$INPUTFILE,
|
| 63 |
+
'idf=s' => \$IDLISTFILE,
|
| 64 |
+
'seqf=s' => \$SEQFASTAFILE,
|
| 65 |
+
'out=s' => \$OUTPUTFILE,
|
| 66 |
+
'str' => \$STRFLAG,
|
| 67 |
+
'seq' => \$SEQFLAG,
|
| 68 |
+
'seqd=s' => \$SEQDATABASE,
|
| 69 |
+
'lim=i' => \$SEQLIMIT,
|
| 70 |
+
'blim=i' => \$SEQBLASTLIMIT,
|
| 71 |
+
'pre' => \$EVALFLAG,
|
| 72 |
+
'noin' => \$NOINFLAG,
|
| 73 |
+
'mod=s' => \$OUTTYPE,
|
| 74 |
+
'run=s' => \$RUNMODE,
|
| 75 |
+
'trd=i' => \$THREADCOUNT,
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
);
|
| 79 |
+
|
| 80 |
+
my $ISWINDOWS = ( $^O =~ /^MSWin/ ) ? 1 : 0;
|
| 81 |
+
print STDERR "[Seekquencer-premafft 4.4 on $^O]\n";
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
# set temp directory
|
| 85 |
+
my $CWD = getcwd;
|
| 86 |
+
my $TMP = "$CWD/seekpremafft$$";
|
| 87 |
+
make_path($TMP) unless -d $TMP;
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
######
|
| 92 |
+
# validation
|
| 93 |
+
help("Required parameter: define input as '-inp' or '-idf' or '-seqf'") if ( !defined $INPUTFILE && !defined $IDLISTFILE && !defined $SEQFASTAFILE );
|
| 94 |
+
help("'-inp' is already defined") if ( defined $INPUTFILE && (defined $IDLISTFILE || defined $SEQFASTAFILE) );
|
| 95 |
+
help("Input file $INPUTFILE does not exist (or filesize is 0)") if ( defined $INPUTFILE && (! -e $INPUTFILE || !-s $INPUTFILE) );
|
| 96 |
+
help("Input file $IDLISTFILE does not exist (or filesize is 0)") if ( defined $IDLISTFILE && (! -e $IDLISTFILE || !-s $IDLISTFILE) );
|
| 97 |
+
help("Input file $SEQFASTAFILE does not exist (or filesize is 0)") if ( defined $SEQFASTAFILE && (! -e $SEQFASTAFILE || !-s $SEQFASTAFILE) );
|
| 98 |
+
help("Required parameter: output file '-out'") unless ( defined $OUTPUTFILE );
|
| 99 |
+
help("Set either '-str' or '-seq' or dont set any at all") if ( defined $STRFLAG && defined $SEQFLAG );
|
| 100 |
+
|
| 101 |
+
help("Invalid value for '-seqd <uniref100|uniref90|uniref70|uniprot>'") if ( $SEQDATABASE ne "uniref100" && $SEQDATABASE ne "uniref90" && $SEQDATABASE ne "uniref70" && $SEQDATABASE ne "uniprot");
|
| 102 |
+
help("Invalid value for '-mod <fasta|mafftash|mafftash-split>'") if ( $OUTTYPE ne "fasta" && $OUTTYPE ne "mafftash" && $OUTTYPE ne "mafftash-split" );
|
| 103 |
+
help("Invalid value for '-run <thread|normal>'") if ( $RUNMODE ne "thread" && $RUNMODE ne "normal" );
|
| 104 |
+
help("Invalid value for '-trd <count>'; count should be between 1 and 5 (inclusive)") if ( $RUNMODE eq "thread" && ($THREADCOUNT <= 0 || $THREADCOUNT > 5) );
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
######
|
| 108 |
+
# check existing requests
|
| 109 |
+
print STDERR "Checking server status...\n";
|
| 110 |
+
|
| 111 |
+
# generate seed
|
| 112 |
+
srand($$);
|
| 113 |
+
|
| 114 |
+
# sleep a bit to give time for lsf response
|
| 115 |
+
sleep(int(rand(6))+1);
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
my $browser = LWP::UserAgent->new;
|
| 119 |
+
$browser->timeout(0);
|
| 120 |
+
|
| 121 |
+
# get: check if you can send a new request this time
|
| 122 |
+
my $jobsResponse = $browser->get("$BASEURL/isAllowed");
|
| 123 |
+
|
| 124 |
+
if ( $jobsResponse->is_success )
|
| 125 |
+
{
|
| 126 |
+
my $status = parseJobQueryResponse($jobsResponse->content);
|
| 127 |
+
bail("Max jobs reached. The server cannot process your request right now; try again later.", 0) unless $status > 0;
|
| 128 |
+
}
|
| 129 |
+
else
|
| 130 |
+
{
|
| 131 |
+
bail(sprintf("[%d] %s\n", $jobsResponse->code, parseError($jobsResponse->content)));
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
######
|
| 136 |
+
# make a temporary input if lists were provided
|
| 137 |
+
unless ( defined $INPUTFILE )
|
| 138 |
+
{
|
| 139 |
+
$INPUTFILE = "$TMP/input.homemade";
|
| 140 |
+
open INPF, ">$INPUTFILE" or bail("Error writing to input file.");
|
| 141 |
+
|
| 142 |
+
if ( defined $IDLISTFILE )
|
| 143 |
+
{
|
| 144 |
+
open IDLIST, "<$IDLISTFILE" or bail("Error reading input file.");
|
| 145 |
+
while( <IDLIST> )
|
| 146 |
+
{
|
| 147 |
+
chomp;
|
| 148 |
+
if ( /(\w{5})/ )
|
| 149 |
+
{
|
| 150 |
+
print INPF ">PDBID\n$1\n";
|
| 151 |
+
}
|
| 152 |
+
}
|
| 153 |
+
close IDLIST;
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
if ( defined $SEQFASTAFILE )
|
| 158 |
+
{
|
| 159 |
+
open FASTA, "<$SEQFASTAFILE" or bail("Error reading input file.");
|
| 160 |
+
while( <FASTA> )
|
| 161 |
+
{
|
| 162 |
+
chomp;
|
| 163 |
+
print INPF "$_\n";
|
| 164 |
+
}
|
| 165 |
+
close FASTA;
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
close INPF;
|
| 169 |
+
}
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
######
|
| 173 |
+
# prepare parameters
|
| 174 |
+
print STDERR "Preparing parameters for service request...\n";
|
| 175 |
+
|
| 176 |
+
my @parameters = ();
|
| 177 |
+
push(@parameters, "fileinput" => ["$INPUTFILE"]);
|
| 178 |
+
push(@parameters, "out_type" => $OUTTYPE);
|
| 179 |
+
|
| 180 |
+
push(@parameters, "rest_flag" => "1");
|
| 181 |
+
push(@parameters, "cls_flag" => "1");
|
| 182 |
+
push(@parameters, "pre_flag" => "1") if defined $EVALFLAG;
|
| 183 |
+
push(@parameters, "noin_flag" => "1") if defined $NOINFLAG;
|
| 184 |
+
|
| 185 |
+
push(@parameters, "run_mode" => $RUNMODE);
|
| 186 |
+
push(@parameters, "thread_count" => $THREADCOUNT) if $RUNMODE eq "thread";
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
if ( defined $STRFLAG )
|
| 190 |
+
{
|
| 191 |
+
push(@parameters, "str_flag" => "1");
|
| 192 |
+
push(@parameters, "ash_flag" => "1");
|
| 193 |
+
}
|
| 194 |
+
elsif ( defined $SEQFLAG )
|
| 195 |
+
{
|
| 196 |
+
push(@parameters, "seq_flag" => "1");
|
| 197 |
+
push(@parameters, "seq_algorithm" => "fast");
|
| 198 |
+
push(@parameters, "seq_database" => $SEQDATABASE);
|
| 199 |
+
push(@parameters, "seq_blastlimit" => $SEQBLASTLIMIT);
|
| 200 |
+
push(@parameters, "seq_outputlimit" => $SEQLIMIT);
|
| 201 |
+
}
|
| 202 |
+
else
|
| 203 |
+
{
|
| 204 |
+
push(@parameters, "str_flag" => "1");
|
| 205 |
+
push(@parameters, "ash_flag" => "1");
|
| 206 |
+
push(@parameters, "seq_flag" => "1");
|
| 207 |
+
push(@parameters, "seq_algorithm" => "fast");
|
| 208 |
+
push(@parameters, "seq_database" => $SEQDATABASE);
|
| 209 |
+
push(@parameters, "seq_blastlimit" => $SEQBLASTLIMIT);
|
| 210 |
+
push(@parameters, "seq_outputlimit" => $SEQLIMIT);
|
| 211 |
+
}
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
######
|
| 216 |
+
# start rest service
|
| 217 |
+
print STDERR "Sending service request...\n";
|
| 218 |
+
|
| 219 |
+
# post: running a mafftash job
|
| 220 |
+
my $postResponse = $browser->post( $BASEURL, \@parameters, 'Content_Type' => 'form-data' );
|
| 221 |
+
bail(sprintf("[%d] %s\n", $postResponse->code, parseError($postResponse->content))) unless($postResponse->is_success);
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
# get response from post request
|
| 225 |
+
my ($status, $seekid) = parseResponse($postResponse->content);
|
| 226 |
+
|
| 227 |
+
my $MAXTRIES = 3;
|
| 228 |
+
my $STIMER = 5;
|
| 229 |
+
my $timer = 0;
|
| 230 |
+
|
| 231 |
+
print STDERR "Request sent! Waiting for response...[$seekid]\n";
|
| 232 |
+
|
| 233 |
+
my $checklist = {};
|
| 234 |
+
|
| 235 |
+
# wait for results until it becomes available
|
| 236 |
+
while(1)
|
| 237 |
+
{
|
| 238 |
+
# sleeps for 5+random, 10+random, 15+random, 20+random, 25+random, 30+random ,,, 60+random, 60+random,,,
|
| 239 |
+
$timer = $timer >= 60 ? 60 : $timer+$STIMER;
|
| 240 |
+
sleep($timer+int(rand(4)));
|
| 241 |
+
|
| 242 |
+
# get: get results for mafftash job
|
| 243 |
+
my $getResponse = $browser->get("$BASEURL/$seekid");
|
| 244 |
+
|
| 245 |
+
if ( $getResponse->is_success )
|
| 246 |
+
{
|
| 247 |
+
|
| 248 |
+
# get response from get request
|
| 249 |
+
($status, $seekid) = parseResponse($getResponse->content);
|
| 250 |
+
next unless ( $status eq "done" );
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
# if job is finished and ready
|
| 254 |
+
print STDERR "Results found!\n";
|
| 255 |
+
my $csfile = "$TMP/checksum";
|
| 256 |
+
my $try1 = 1;
|
| 257 |
+
|
| 258 |
+
|
| 259 |
+
while(1)
|
| 260 |
+
{
|
| 261 |
+
print STDERR "Fetching Results... [Trial $try1]\n";
|
| 262 |
+
|
| 263 |
+
if ( is_success(getstore("$BASEURL/get/$seekid/checksum", $csfile)) && -e $csfile && -s $csfile )
|
| 264 |
+
{
|
| 265 |
+
# get response from get request
|
| 266 |
+
$checklist = extractchecksum($csfile);
|
| 267 |
+
bail("Error retrieving list of compressed files!") unless ( scalar %$checklist > 0 );
|
| 268 |
+
|
| 269 |
+
|
| 270 |
+
foreach my $id ( sort keys %$checklist )
|
| 271 |
+
{
|
| 272 |
+
sleep 1;
|
| 273 |
+
my $checkfile = "$TMP/$id";
|
| 274 |
+
my $checkid = $checklist->{$id};
|
| 275 |
+
my $try2 = 1;
|
| 276 |
+
|
| 277 |
+
while(1)
|
| 278 |
+
{
|
| 279 |
+
unlink $checkfile if -e $checkfile;
|
| 280 |
+
|
| 281 |
+
if ( is_success(getstore("$BASEURL/get/$seekid/$id", $checkfile)) && -e $checkfile && -s $checkfile )
|
| 282 |
+
{
|
| 283 |
+
last if $ISWINDOWS;
|
| 284 |
+
|
| 285 |
+
my $hashid = getchecksum($checkfile);
|
| 286 |
+
#print STDERR "[hashid]$hashid [checkid]$checkid\n";
|
| 287 |
+
|
| 288 |
+
if ($hashid ne "" && $hashid ne $checkid )
|
| 289 |
+
{
|
| 290 |
+
#unlink $checkfile if -e $checkfile;
|
| 291 |
+
bail("Error retrieving compressed file from server! [Checksum Failed]") if $try2 >= $MAXTRIES;
|
| 292 |
+
$try2++;
|
| 293 |
+
sleep $STIMER;
|
| 294 |
+
}
|
| 295 |
+
else
|
| 296 |
+
{
|
| 297 |
+
last;
|
| 298 |
+
}
|
| 299 |
+
}
|
| 300 |
+
else
|
| 301 |
+
{
|
| 302 |
+
bail("Error retrieving compressed file from server!") if $try2 >= $MAXTRIES;
|
| 303 |
+
$try2++;
|
| 304 |
+
sleep $STIMER;
|
| 305 |
+
}
|
| 306 |
+
}
|
| 307 |
+
}
|
| 308 |
+
|
| 309 |
+
last;
|
| 310 |
+
}
|
| 311 |
+
else
|
| 312 |
+
{
|
| 313 |
+
bail("Error retrieving list of compressed files from server!") if $try1 >= $MAXTRIES;
|
| 314 |
+
$try1++;
|
| 315 |
+
sleep $STIMER;
|
| 316 |
+
}
|
| 317 |
+
}
|
| 318 |
+
|
| 319 |
+
last;
|
| 320 |
+
|
| 321 |
+
}
|
| 322 |
+
else
|
| 323 |
+
{
|
| 324 |
+
bail(sprintf("[%d] %s\n", $getResponse->code, parseError($getResponse->content)));
|
| 325 |
+
}
|
| 326 |
+
|
| 327 |
+
}
|
| 328 |
+
|
| 329 |
+
|
| 330 |
+
# make sure outputs were generated
|
| 331 |
+
# decompress
|
| 332 |
+
print STDERR "Assembling final results...\n";
|
| 333 |
+
|
| 334 |
+
foreach my $id ( sort keys %$checklist )
|
| 335 |
+
{
|
| 336 |
+
if ( $id =~ /^$seekid\.out(\.str|\.seq)?/ )
|
| 337 |
+
{
|
| 338 |
+
bail("Error: Output file corrupted!") unless -e "$TMP/$id";
|
| 339 |
+
appendToFile("$TMP/$id","$OUTPUTFILE".$1);
|
| 340 |
+
}
|
| 341 |
+
}
|
| 342 |
+
|
| 343 |
+
cleanup();
|
| 344 |
+
|
| 345 |
+
|
| 346 |
+
|
| 347 |
+
####################
|
| 348 |
+
####################
|
| 349 |
+
|
| 350 |
+
|
| 351 |
+
sub parseResponse
|
| 352 |
+
{
|
| 353 |
+
my $response = shift;
|
| 354 |
+
my $status = "";
|
| 355 |
+
my $seekid = "";
|
| 356 |
+
|
| 357 |
+
if ( $response =~ /^([^\s:]+):([^\s:]+)$/ )
|
| 358 |
+
{
|
| 359 |
+
$seekid = $1;
|
| 360 |
+
$status = $2;
|
| 361 |
+
}
|
| 362 |
+
|
| 363 |
+
return ($status, $seekid);
|
| 364 |
+
}
|
| 365 |
+
|
| 366 |
+
|
| 367 |
+
sub parseJobQueryResponse
|
| 368 |
+
{
|
| 369 |
+
my $response = shift;
|
| 370 |
+
my $jobs = 100;
|
| 371 |
+
|
| 372 |
+
if ( $response =~ /^(\d+)$/ )
|
| 373 |
+
{
|
| 374 |
+
$jobs = $1;
|
| 375 |
+
}
|
| 376 |
+
|
| 377 |
+
return $jobs;
|
| 378 |
+
}
|
| 379 |
+
|
| 380 |
+
|
| 381 |
+
sub extractchecksum
|
| 382 |
+
{
|
| 383 |
+
my $infile = shift;
|
| 384 |
+
my %dataset = ();
|
| 385 |
+
|
| 386 |
+
#open CSUM, "tar -zxf $infile -O|" or return \%dataset;
|
| 387 |
+
open CSUM, "<$infile" or return \%dataset;
|
| 388 |
+
|
| 389 |
+
while(<CSUM>)
|
| 390 |
+
{
|
| 391 |
+
chomp;
|
| 392 |
+
if ( /^(\S+)\s+(\S+)$/ )
|
| 393 |
+
{
|
| 394 |
+
$dataset{$2} = $1;
|
| 395 |
+
}
|
| 396 |
+
}
|
| 397 |
+
|
| 398 |
+
close CSUM;
|
| 399 |
+
|
| 400 |
+
return \%dataset;
|
| 401 |
+
}
|
| 402 |
+
|
| 403 |
+
|
| 404 |
+
sub parseError
|
| 405 |
+
{
|
| 406 |
+
my $response = shift;
|
| 407 |
+
|
| 408 |
+
#"error":"Invalid number of inputs found."
|
| 409 |
+
my $errorstr = ( $response =~ /\"error\"\s*:\s*\"([^\"]+)\"/ ) ? $1 : $response;
|
| 410 |
+
return $errorstr;
|
| 411 |
+
}
|
| 412 |
+
|
| 413 |
+
|
| 414 |
+
sub getchecksum
|
| 415 |
+
{
|
| 416 |
+
my $infile = shift;
|
| 417 |
+
|
| 418 |
+
# md5 binary check
|
| 419 |
+
my $MD5BIN = "";
|
| 420 |
+
|
| 421 |
+
if ( -x "/usr/bin/md5sum" )
|
| 422 |
+
{
|
| 423 |
+
$MD5BIN = "/usr/bin/md5sum";
|
| 424 |
+
}
|
| 425 |
+
elsif ( -x "/sbin/md5" )
|
| 426 |
+
{
|
| 427 |
+
$MD5BIN = "/sbin/md5 -q";
|
| 428 |
+
}
|
| 429 |
+
|
| 430 |
+
return "" if $MD5BIN eq "";
|
| 431 |
+
|
| 432 |
+
|
| 433 |
+
my $checksum = "";
|
| 434 |
+
open MD5EXE, "$MD5BIN $infile|" or return "";
|
| 435 |
+
|
| 436 |
+
while(<MD5EXE>)
|
| 437 |
+
{
|
| 438 |
+
if (/^(\S+)\s+(\S+)$/)
|
| 439 |
+
{
|
| 440 |
+
$checksum = $1;
|
| 441 |
+
last;
|
| 442 |
+
}
|
| 443 |
+
elsif (/^(\S+)$/)
|
| 444 |
+
{
|
| 445 |
+
$checksum = $1;
|
| 446 |
+
last;
|
| 447 |
+
}
|
| 448 |
+
}
|
| 449 |
+
|
| 450 |
+
close MD5EXE;
|
| 451 |
+
|
| 452 |
+
return $checksum;
|
| 453 |
+
|
| 454 |
+
}
|
| 455 |
+
|
| 456 |
+
|
| 457 |
+
sub backticks
|
| 458 |
+
{
|
| 459 |
+
my $command = shift;
|
| 460 |
+
|
| 461 |
+
`$command`;
|
| 462 |
+
return ($? == -1) ? 0 : 1;
|
| 463 |
+
}
|
| 464 |
+
|
| 465 |
+
|
| 466 |
+
sub bail
|
| 467 |
+
{
|
| 468 |
+
my $str = shift;
|
| 469 |
+
my $status = shift;
|
| 470 |
+
|
| 471 |
+
#0 for success and 1 for error
|
| 472 |
+
$status = 1 unless defined;
|
| 473 |
+
|
| 474 |
+
print STDERR "$str\n" if defined $str;
|
| 475 |
+
|
| 476 |
+
cleanup();
|
| 477 |
+
|
| 478 |
+
exit($status);
|
| 479 |
+
}
|
| 480 |
+
|
| 481 |
+
|
| 482 |
+
sub cleanup
|
| 483 |
+
{
|
| 484 |
+
return if ($TMP eq "" || !-d $TMP);
|
| 485 |
+
|
| 486 |
+
opendir(MAINDIR, $TMP);
|
| 487 |
+
my @files = readdir(MAINDIR);
|
| 488 |
+
closedir(MAINDIR);
|
| 489 |
+
|
| 490 |
+
foreach my $file (@files)
|
| 491 |
+
{
|
| 492 |
+
unlink "$TMP/$file" if -e "$TMP/$file";
|
| 493 |
+
}
|
| 494 |
+
|
| 495 |
+
remove_tree($TMP);
|
| 496 |
+
|
| 497 |
+
}
|
| 498 |
+
|
| 499 |
+
|
| 500 |
+
sub appendToFile
|
| 501 |
+
{
|
| 502 |
+
my $inpfile = shift;
|
| 503 |
+
my $outfile = shift;
|
| 504 |
+
|
| 505 |
+
open INPF, "<$inpfile" or bail("Server Error: Error in reading file.");
|
| 506 |
+
open OUTF, ">>$outfile" or bail("Server Error: Error in writing to file.");
|
| 507 |
+
|
| 508 |
+
while(<INPF>)
|
| 509 |
+
{
|
| 510 |
+
print OUTF $_;
|
| 511 |
+
}
|
| 512 |
+
|
| 513 |
+
close OUTF;
|
| 514 |
+
close INPF;
|
| 515 |
+
}
|
| 516 |
+
|
| 517 |
+
|
| 518 |
+
|
| 519 |
+
sub help
|
| 520 |
+
{
|
| 521 |
+
my $str = shift;
|
| 522 |
+
|
| 523 |
+
print <<'HELPME';
|
| 524 |
+
|
| 525 |
+
USAGE
|
| 526 |
+
./seekquencer_premafft.pl -inp <INFILE> -out <OUTFILE> [-str|-seq]
|
| 527 |
+
./seekquencer_premafft.pl -idf <LISTFILE> -seqf <SEQFASTA> -out <OUTFILE> [-str|-seq]
|
| 528 |
+
|
| 529 |
+
|
| 530 |
+
PARAMETERS
|
| 531 |
+
-inp <INFILE>
|
| 532 |
+
INFILE is a FASTA-formatted file
|
| 533 |
+
PDB entries are written as:
|
| 534 |
+
>PDBID
|
| 535 |
+
[5-character pdbid+chain]
|
| 536 |
+
|
| 537 |
+
While sequence entries are written as:
|
| 538 |
+
>[id]
|
| 539 |
+
[sequence]
|
| 540 |
+
|
| 541 |
+
-idf <LISTFILE>
|
| 542 |
+
IDLISTFILE is a file containing a list of pdbids
|
| 543 |
+
pdbids should be a 5-character pdbid + chain
|
| 544 |
+
|
| 545 |
+
-seqf <SEQFASTA>
|
| 546 |
+
SEQFASTA is a fasta file
|
| 547 |
+
entries are written as:
|
| 548 |
+
>[id]
|
| 549 |
+
[sequence]
|
| 550 |
+
|
| 551 |
+
-out <OUTFILE>
|
| 552 |
+
Results are writen to a file named OUTFILE
|
| 553 |
+
|
| 554 |
+
-str
|
| 555 |
+
Only structures will be collected by Seekquencer
|
| 556 |
+
If neither -str nor -seq is set, both structures and sequences will be collected by Seekquencer
|
| 557 |
+
|
| 558 |
+
-seq
|
| 559 |
+
Only sequences will be collected by Seekquencer
|
| 560 |
+
If neither -str nor -seq is set, both structures and sequences will be collected by Seekquencer
|
| 561 |
+
|
| 562 |
+
|
| 563 |
+
OPTIONAL PARAMETERS:
|
| 564 |
+
-seqd <uniref100|uniref90|uniref70|uniprot>
|
| 565 |
+
Search Database for sequence homologs. Default value: uniref100
|
| 566 |
+
|
| 567 |
+
-lim <count>
|
| 568 |
+
this sets the maximum number of sequence homologs collected. Default value: 100
|
| 569 |
+
|
| 570 |
+
-blim <count>
|
| 571 |
+
this sets the -b and -v value when running blastall. Default value: 100
|
| 572 |
+
|
| 573 |
+
-pre
|
| 574 |
+
When -str is set, this will compare all structures against all using pdp-ash
|
| 575 |
+
This would ensure that all structures collected are matching
|
| 576 |
+
All structures that do not match will be removed
|
| 577 |
+
|
| 578 |
+
-noin
|
| 579 |
+
When set, inputs will not be included in the output
|
| 580 |
+
|
| 581 |
+
-mod <mafftash|mafftash-split|fasta>
|
| 582 |
+
Defines the output format
|
| 583 |
+
mafftash (default) will print a mafftash-formatted fasta file
|
| 584 |
+
mafftash-split will make 2 files separating the structures (OUTFILE.str) from sequences (OUTFILE.seq)
|
| 585 |
+
fasta will print a regular fasta file
|
| 586 |
+
|
| 587 |
+
-run <thread|normal>
|
| 588 |
+
thread will run simultaneous jobs during blast queries (faster but takes more nodes)
|
| 589 |
+
normal will run sequential blast queries (slower but takes less nodes)
|
| 590 |
+
Default value: normal
|
| 591 |
+
|
| 592 |
+
-trd <count>
|
| 593 |
+
if -run <thread> is defined, this sets the number of parallel jobs to run. Default value: 3
|
| 594 |
+
|
| 595 |
+
|
| 596 |
+
HELPME
|
| 597 |
+
|
| 598 |
+
bail($str);
|
| 599 |
+
}
|
| 600 |
+
|
mafft/mafftdir/libexec/seq2regtable
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4cfb58dc72fa5c2696bccac0663332ae4f056e28d65fb650022d2d8f05cd87a4
|
| 3 |
+
size 625496
|
mafft/mafftdir/libexec/setcore
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c216c3a174c7ff4db6664e7ab84e2caa1b4d6ceae9cb19d663aada1023fed38
|
| 3 |
+
size 1199888
|
mafft/mafftdir/libexec/setdirection
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:56afd41a55f4b06bc9420fd5ffdf6530cd11aad61e3fb1debcab9a659587d608
|
| 3 |
+
size 866768
|
mafft/mafftdir/libexec/sextet5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:247ac9432a6c7be91bf0ebbf7724535e8e3ea3fa9bade5ea3b7a320e9e8fe80b
|
| 3 |
+
size 933392
|
mafft/mafftdir/libexec/splittbfast
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81f9b708042914e2a8e02f23966f447b1f61b1ee92f600237643885257ddb5b1
|
| 3 |
+
size 1227152
|
mafft/mafftdir/libexec/tbfast
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69af170fa1d0ce6b955eedd388fb7c1dbd5f47d213eae1bf2cc3c2f4d39f9525
|
| 3 |
+
size 1309776
|
mafft/mafftdir/libexec/version
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d575bf0831ebd0ec0326d5e7aa5b653226e83c4d04e2a6f6810390d336a7dee6
|
| 3 |
+
size 538904
|