|
|
|
|
|
|
|
|
""" |
|
|
Title : codon_table.py |
|
|
project : web |
|
|
Created by: julse |
|
|
Created on: 2025/7/9 20:40 |
|
|
des: TODO |
|
|
""" |
|
|
import re |
|
|
import pandas as pd |
|
|
|
|
|
Homo_sapiens = """ |
|
|
UUU F 0.46 17.6 (714298) UCU S 0.19 15.2 (618711) UAU Y 0.44 12.2 (495699) UGU C 0.46 10.6 (430311) |
|
|
UUC F 0.54 20.3 (824692) UCC S 0.22 17.7 (718892) UAC Y 0.56 15.3 (622407) UGC C 0.54 12.6 (513028) |
|
|
UUA L 0.08 7.7 (311881) UCA S 0.15 12.2 (496448) UAA * 0.30 1.0 ( 40285) UGA * 0.47 1.6 ( 63237) |
|
|
UUG L 0.13 12.9 (525688) UCG S 0.05 4.4 (179419) UAG * 0.24 0.8 ( 32109) UGG W 1.00 13.2 (535595) |
|
|
|
|
|
CUU L 0.13 13.2 (536515) CCU P 0.29 17.5 (713233) CAU H 0.42 10.9 (441711) CGU R 0.08 4.5 (184609) |
|
|
CUC L 0.20 19.6 (796638) CCC P 0.32 19.8 (804620) CAC H 0.58 15.1 (613713) CGC R 0.18 10.4 (423516) |
|
|
CUA L 0.07 7.2 (290751) CCA P 0.28 16.9 (688038) CAA Q 0.27 12.3 (501911) CGA R 0.11 6.2 (250760) |
|
|
CUG L 0.40 39.6 (1611801) CCG P 0.11 6.9 (281570) CAG Q 0.73 34.2 (1391973) CGG R 0.20 11.4 (464485) |
|
|
|
|
|
AUU I 0.36 16.0 (650473) ACU T 0.25 13.1 (533609) AAU N 0.47 17.0 (689701) AGU S 0.15 12.1 (493429) |
|
|
AUC I 0.47 20.8 (846466) ACC T 0.36 18.9 (768147) AAC N 0.53 19.1 (776603) AGC S 0.24 19.5 (791383) |
|
|
AUA I 0.17 7.5 (304565) ACA T 0.28 15.1 (614523) AAA K 0.43 24.4 (993621) AGA R 0.21 12.2 (494682) |
|
|
AUG M 1.00 22.0 (896005) ACG T 0.11 6.1 (246105) AAG K 0.57 31.9 (1295568) AGG R 0.21 12.0 (486463) |
|
|
|
|
|
GUU V 0.18 11.0 (448607) GCU A 0.27 18.4 (750096) GAU D 0.46 21.8 (885429) GGU G 0.16 10.8 (437126) |
|
|
GUC V 0.24 14.5 (588138) GCC A 0.40 27.7 (1127679) GAC D 0.54 25.1 (1020595) GGC G 0.34 22.2 (903565) |
|
|
GUA V 0.12 7.1 (287712) GCA A 0.23 15.8 (643471) GAA E 0.42 29.0 (1177632) GGA G 0.25 16.5 (669873) |
|
|
GUG V 0.46 28.1 (1143534) GCG A 0.11 7.4 (299495) GAG E 0.58 39.6 (1609975) GGG G 0.25 16.5 (669768) |
|
|
""" |
|
|
|
|
|
Mus_musculus = """ |
|
|
UUU F 0.44 17.2 (422153) UCU S 0.20 16.2 (398250) UAU Y 0.43 12.2 (298518) UGU C 0.48 11.4 (279729) |
|
|
UUC F 0.56 21.8 (535439) UCC S 0.22 18.1 (444041) UAC Y 0.57 16.1 (394074) UGC C 0.52 12.3 (301384) |
|
|
UUA L 0.07 6.7 (165150) UCA S 0.14 11.8 (289799) UAA * 0.28 1.0 ( 23403) UGA * 0.49 1.6 ( 40148) |
|
|
UUG L 0.13 13.4 (329668) UCG S 0.05 4.2 (103815) UAG * 0.23 0.8 ( 19126) UGG W 1.00 12.5 (306619) |
|
|
|
|
|
CUU L 0.13 13.4 (329757) CCU P 0.31 18.4 (450637) CAU H 0.41 10.6 (260637) CGU R 0.08 4.7 (114854) |
|
|
CUC L 0.20 20.2 (495018) CCC P 0.30 18.2 (446868) CAC H 0.59 15.3 (375626) CGC R 0.17 9.4 (229758) |
|
|
CUA L 0.08 8.1 (198032) CCA P 0.29 17.3 (423707) CAA Q 0.26 12.0 (293318) CGA R 0.12 6.6 (161412) |
|
|
CUG L 0.39 39.5 (969515) CCG P 0.10 6.2 (151521) CAG Q 0.74 34.1 (836320) CGG R 0.19 10.2 (250836) |
|
|
|
|
|
AUU I 0.34 15.4 (377698) ACU T 0.25 13.7 (335039) AAU N 0.43 15.6 (382284) AGU S 0.15 12.7 (311331) |
|
|
AUC I 0.50 22.5 (552184) ACC T 0.35 19.0 (465115) AAC N 0.57 20.3 (499149) AGC S 0.24 19.7 (483013) |
|
|
AUA I 0.16 7.4 (180467) ACA T 0.29 16.0 (391437) AAA K 0.39 21.9 (537723) AGA R 0.22 12.1 (297135) |
|
|
AUG M 1.00 22.8 (559953) ACG T 0.10 5.6 (138180) AAG K 0.61 33.6 (825270) AGG R 0.22 12.2 (299472) |
|
|
|
|
|
GUU V 0.17 10.7 (262535) GCU A 0.29 20.0 (491093) GAU D 0.45 21.0 (515049) GGU G 0.18 11.4 (280522) |
|
|
GUC V 0.25 15.4 (377902) GCC A 0.38 26.0 (637878) GAC D 0.55 26.0 (638504) GGC G 0.33 21.2 (520069) |
|
|
GUA V 0.12 7.4 (182733) GCA A 0.23 15.8 (388723) GAA E 0.41 27.0 (661498) GGA G 0.26 16.8 (411344) |
|
|
GUG V 0.46 28.4 (696158) GCG A 0.09 6.4 (157124) GAG E 0.59 39.4 (965963) GGG G 0.23 15.2 (372099) |
|
|
""" |
|
|
|
|
|
|
|
|
Pichia = """ |
|
|
UUU F 0.54 24.1 ( 1963) UCU S 0.29 24.4 ( 1983) UAU Y 0.47 16.0 ( 1300) UGU C 0.64 7.7 ( 626) |
|
|
UUC F 0.46 20.6 ( 1675) UCC S 0.20 16.5 ( 1344) UAC Y 0.53 18.1 ( 1473) UGC C 0.36 4.4 ( 356) |
|
|
UUA L 0.16 15.6 ( 1265) UCA S 0.18 15.2 ( 1234) UAA * 0.51 0.8 ( 69) UGA * 0.20 0.3 ( 27) |
|
|
UUG L 0.33 31.5 ( 2562) UCG S 0.09 7.4 ( 598) UAG * 0.29 0.5 ( 40) UGG W 1.00 10.3 ( 834) |
|
|
|
|
|
CUU L 0.16 15.9 ( 1289) CCU P 0.35 15.8 ( 1282) CAU H 0.57 11.8 ( 960) CGU R 0.17 6.9 ( 564) |
|
|
CUC L 0.08 7.6 ( 620) CCC P 0.15 6.8 ( 553) CAC H 0.43 9.1 ( 737) CGC R 0.05 2.2 ( 175) |
|
|
CUA L 0.11 10.7 ( 873) CCA P 0.42 18.9 ( 1540) CAA Q 0.61 25.4 ( 2069) CGA R 0.10 4.2 ( 340) |
|
|
CUG L 0.16 14.9 ( 1215) CCG P 0.09 3.9 ( 320) CAG Q 0.39 16.3 ( 1323) CGG R 0.05 1.9 ( 158) |
|
|
|
|
|
AUU I 0.50 31.1 ( 2532) ACU T 0.40 22.4 ( 1820) AAU N 0.48 25.1 ( 2038) AGU S 0.15 12.5 ( 1020) |
|
|
AUC I 0.31 19.4 ( 1580) ACC T 0.26 14.5 ( 1175) AAC N 0.52 26.7 ( 2168) AGC S 0.09 7.6 ( 621) |
|
|
AUA I 0.18 11.1 ( 906) ACA T 0.24 13.8 ( 1118) AAA K 0.47 29.9 ( 2433) AGA R 0.48 20.1 ( 1634) |
|
|
AUG M 1.00 18.7 ( 1517) ACG T 0.11 6.0 ( 491) AAG K 0.53 33.8 ( 2748) AGG R 0.16 6.6 ( 539) |
|
|
|
|
|
GUU V 0.42 26.9 ( 2188) GCU A 0.45 28.9 ( 2351) GAU D 0.58 35.7 ( 2899) GGU G 0.44 25.5 ( 2075) |
|
|
GUC V 0.23 14.9 ( 1210) GCC A 0.26 16.6 ( 1348) GAC D 0.42 25.9 ( 2103) GGC G 0.14 8.1 ( 655) |
|
|
GUA V 0.15 9.9 ( 804) GCA A 0.23 15.1 ( 1228) GAA E 0.56 37.4 ( 3043) GGA G 0.33 19.1 ( 1550) |
|
|
GUG V 0.19 12.3 ( 998) GCG A 0.06 3.9 ( 314) GAG E 0.44 29.0 ( 2360) GGG G 0.10 5.8 ( 468) |
|
|
""" |
|
|
|
|
|
|
|
|
Escherichia_coli = """ |
|
|
UUU F 0.57 22.2 ( 30462) UCU S 0.15 8.4 ( 11512) UAU Y 0.57 16.1 ( 22037) UGU C 0.44 5.1 ( 7016) |
|
|
UUC F 0.43 16.5 ( 22705) UCC S 0.15 8.6 ( 11802) UAC Y 0.43 12.2 ( 16795) UGC C 0.56 6.4 ( 8797) |
|
|
UUA L 0.13 13.8 ( 18894) UCA S 0.12 7.0 ( 9620) UAA * 0.64 2.0 ( 2765) UGA * 0.29 0.9 ( 1249) |
|
|
UUG L 0.13 13.6 ( 18664) UCG S 0.15 8.9 ( 12210) UAG * 0.07 0.2 ( 321) UGG W 1.00 15.2 ( 20889) |
|
|
|
|
|
CUU L 0.10 11.0 ( 15082) CCU P 0.16 7.0 ( 9540) CAU H 0.57 13.0 ( 17791) CGU R 0.38 21.0 ( 28866) |
|
|
CUC L 0.10 11.1 ( 15272) CCC P 0.12 5.5 ( 7490) CAC H 0.43 9.8 ( 13399) CGC R 0.40 22.3 ( 30530) |
|
|
CUA L 0.04 3.8 ( 5266) CCA P 0.19 8.4 ( 11569) CAA Q 0.35 15.4 ( 21121) CGA R 0.06 3.5 ( 4810) |
|
|
CUG L 0.50 53.1 ( 72898) CCG P 0.53 23.4 ( 32080) CAG Q 0.65 29.0 ( 39835) CGG R 0.10 5.4 ( 7401) |
|
|
|
|
|
AUU I 0.51 30.4 ( 41644) ACU T 0.16 8.8 ( 12119) AAU N 0.45 17.6 ( 24106) AGU S 0.15 8.7 ( 11924) |
|
|
AUC I 0.42 25.2 ( 34568) ACC T 0.44 23.5 ( 32265) AAC N 0.55 21.6 ( 29581) AGC S 0.28 16.1 ( 22067) |
|
|
AUA I 0.07 4.2 ( 5733) ACA T 0.13 6.9 ( 9452) AAA K 0.76 33.6 ( 46116) AGA R 0.04 2.0 ( 2771) |
|
|
AUG M 1.00 27.8 ( 38167) ACG T 0.27 14.4 ( 19820) AAG K 0.24 10.3 ( 14174) AGG R 0.02 1.1 ( 1496) |
|
|
|
|
|
GUU V 0.26 18.2 ( 24991) GCU A 0.16 15.2 ( 20813) GAU D 0.63 32.2 ( 44217) GGU G 0.34 24.7 ( 33875) |
|
|
GUC V 0.22 15.3 ( 21050) GCC A 0.27 25.7 ( 35252) GAC D 0.37 19.1 ( 26270) GGC G 0.41 29.8 ( 40849) |
|
|
GUA V 0.15 10.9 ( 14901) GCA A 0.21 20.1 ( 27567) GAA E 0.69 39.7 ( 54431) GGA G 0.11 7.9 ( 10774) |
|
|
GUG V 0.37 26.3 ( 36108) GCG A 0.36 33.9 ( 46524) GAG E 0.31 18.0 ( 24629) GGG G 0.15 11.0 ( 15115) |
|
|
""" |
|
|
|
|
|
Saccharomyces_cerevisiae = """ |
|
|
UUU F 0.59 26.1 (170666) UCU S 0.26 23.5 (153557) UAU Y 0.56 18.8 (122728) UGU C 0.63 8.1 ( 52903) |
|
|
UUC F 0.41 18.4 (120510) UCC S 0.16 14.2 ( 92923) UAC Y 0.44 14.8 ( 96596) UGC C 0.37 4.8 ( 31095) |
|
|
UUA L 0.28 26.2 (170884) UCA S 0.21 18.7 (122028) UAA * 0.47 1.1 ( 6913) UGA * 0.30 0.7 ( 4447) |
|
|
UUG L 0.29 27.2 (177573) UCG S 0.10 8.6 ( 55951) UAG * 0.23 0.5 ( 3312) UGG W 1.00 10.4 ( 67789) |
|
|
|
|
|
CUU L 0.13 12.3 ( 80076) CCU P 0.31 13.5 ( 88263) CAU H 0.64 13.6 ( 89007) CGU R 0.14 6.4 ( 41791) |
|
|
CUC L 0.06 5.4 ( 35545) CCC P 0.15 6.8 ( 44309) CAC H 0.36 7.8 ( 50785) CGC R 0.06 2.6 ( 16993) |
|
|
CUA L 0.14 13.4 ( 87619) CCA P 0.42 18.3 (119641) CAA Q 0.69 27.3 (178251) CGA R 0.07 3.0 ( 19562) |
|
|
CUG L 0.11 10.5 ( 68494) CCG P 0.12 5.3 ( 34597) CAG Q 0.31 12.1 ( 79121) CGG R 0.04 1.7 ( 11351) |
|
|
|
|
|
AUU I 0.46 30.1 (196893) ACU T 0.35 20.3 (132522) AAU N 0.59 35.7 (233124) AGU S 0.16 14.2 ( 92466) |
|
|
AUC I 0.26 17.2 (112176) ACC T 0.22 12.7 ( 83207) AAC N 0.41 24.8 (162199) AGC S 0.11 9.8 ( 63726) |
|
|
AUA I 0.27 17.8 (116254) ACA T 0.30 17.8 (116084) AAA K 0.58 41.9 (273618) AGA R 0.48 21.3 (139081) |
|
|
AUG M 1.00 20.9 (136805) ACG T 0.14 8.0 ( 52045) AAG K 0.42 30.8 (201361) AGG R 0.21 9.2 ( 60289) |
|
|
|
|
|
GUU V 0.39 22.1 (144243) GCU A 0.38 21.2 (138358) GAU D 0.65 37.6 (245641) GGU G 0.47 23.9 (156109) |
|
|
GUC V 0.21 11.8 ( 76947) GCC A 0.22 12.6 ( 82357) GAC D 0.35 20.2 (132048) GGC G 0.19 9.8 ( 63903) |
|
|
GUA V 0.21 11.8 ( 76927) GCA A 0.29 16.2 (105910) GAA E 0.70 45.6 (297944) GGA G 0.22 10.9 ( 71216) |
|
|
GUG V 0.19 10.8 ( 70337) GCG A 0.11 6.2 ( 40358) GAG E 0.30 19.2 (125717) GGG G 0.12 6.0 ( 39359) |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
species_dict = dict(zip(['Homo_sapiens', 'Mus_musculus', 'Pichia', 'Escherichia_coli','Saccharomyces_cerevisiae'],[Homo_sapiens, Mus_musculus, Pichia, Escherichia_coli,Saccharomyces_cerevisiae])) |
|
|
|
|
|
def parse_text(data,pattern_txt = None): |
|
|
|
|
|
if pattern_txt: |
|
|
pattern = re.compile(pattern_txt) |
|
|
else: |
|
|
pattern = re.compile(r'([A-Z]{3})\s+([A-Z\*])\s+([\d\.]+)\s+([\d\.]+)\s+\(([\d\s]+)\)') |
|
|
|
|
|
results = [] |
|
|
for line in data.split('\n'): |
|
|
if not line.strip(): |
|
|
continue |
|
|
|
|
|
matches = pattern.finditer(line) |
|
|
for match in matches: |
|
|
triplet = match.group(1) |
|
|
amino_acid = match.group(2) |
|
|
fraction = match.group(3) |
|
|
frequency = match.group(4) |
|
|
number = match.group(5).replace(' ', '') |
|
|
if triplet == '': continue |
|
|
results.append([triplet, amino_acid, fraction, frequency, number]) |
|
|
|
|
|
|
|
|
|
|
|
df = pd.DataFrame(results, columns=['triplet', 'amino_acid', 'fraction', 'frequency', 'number']) |
|
|
return df |