|
|
import os |
|
|
import random |
|
|
|
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
from typing import List, Dict, Tuple, Union |
|
|
from collections import defaultdict |
|
|
|
|
|
|
|
|
class Codon: |
|
|
CODON_TO_AA = { |
|
|
'UUU': 'F', 'UUC': 'F', |
|
|
'UUA': 'L', 'UUG': 'L', 'CUU': 'L', 'CUC': 'L', 'CUA': 'L', 'CUG': 'L', |
|
|
'AUU': 'I', 'AUC': 'I', 'AUA': 'I', |
|
|
'AUG': 'M', |
|
|
'GUU': 'V', 'GUC': 'V', 'GUA': 'V', 'GUG': 'V', |
|
|
'UCU': 'S', 'UCC': 'S', 'UCA': 'S', 'UCG': 'S', 'AGU': 'S', 'AGC': 'S', |
|
|
'CCU': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', |
|
|
'ACU': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', |
|
|
'GCU': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', |
|
|
'UAU': 'Y', 'UAC': 'Y', |
|
|
'UAA': '*', 'UAG': '*', 'UGA': '*', |
|
|
'CAU': 'H', 'CAC': 'H', |
|
|
'CAA': 'Q', 'CAG': 'Q', |
|
|
'AAU': 'N', 'AAC': 'N', |
|
|
'AAA': 'K', 'AAG': 'K', |
|
|
'GAU': 'D', 'GAC': 'D', |
|
|
'GAA': 'E', 'GAG': 'E', |
|
|
'UGU': 'C', 'UGC': 'C', |
|
|
'UGG': 'W', |
|
|
'CGU': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'AGA': 'R', 'AGG': 'R', |
|
|
'GGU': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G' |
|
|
} |
|
|
def __init__(self, codon_usage_path, rna=True): |
|
|
self.bases = 'GAUC' |
|
|
self.aas = 'ACDEFGHIKLMNPQRSTVWY*'.lower() |
|
|
self.codon_table = {} |
|
|
self.max_aa_table = {} |
|
|
self.cai_best_aa2nn_table = {} |
|
|
self.frame_ith_aa_base_fraction = { |
|
|
i: { |
|
|
a: { |
|
|
base: 0.0 for base in self.bases |
|
|
} for a in self.aas |
|
|
} for i in range(3) |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.output_rna = rna |
|
|
|
|
|
|
|
|
self.standard_codon_table = self.CODON_TO_AA |
|
|
|
|
|
|
|
|
self.degeneracy_groups = { |
|
|
'2-fold': ['F', 'Y', 'C', 'H', 'Q', 'N', 'K', 'D', 'E'], |
|
|
'3-fold': ['I'], |
|
|
'4-fold': ['V', 'P', 'T', 'A', 'G'], |
|
|
'6-fold': ['L', 'S', 'R'] |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if os.access(codon_usage_path, os.R_OK) and os.path.getsize(codon_usage_path) > 0: |
|
|
with open(codon_usage_path, 'r') as codon_file: |
|
|
next(codon_file) |
|
|
for line in codon_file: |
|
|
line = line.strip() |
|
|
if not line: |
|
|
continue |
|
|
codon, aa, fraction, *_ = line.split(',') |
|
|
|
|
|
codon = codon.upper().replace('T', 'U') |
|
|
aa = aa.lower() |
|
|
fraction = float(fraction) |
|
|
|
|
|
self.codon_table[codon] = (aa, fraction) |
|
|
for i,base in enumerate(codon): |
|
|
|
|
|
|
|
|
self.frame_ith_aa_base_fraction[i][aa][base] = fraction + self.frame_ith_aa_base_fraction[i][aa][base] |
|
|
|
|
|
if aa not in self.max_aa_table or self.max_aa_table[aa] < fraction: |
|
|
self.max_aa_table[aa] = fraction |
|
|
self.cai_best_aa2nn_table[aa] = codon |
|
|
|
|
|
print(f"Codon usage table loaded, {len(self.codon_table)} codons loaded from {codon_usage_path}") |
|
|
else: |
|
|
print(f'codon usage table is missing',codon_usage_path) |
|
|
|
|
|
self.aa_to_codons = self._build_aa_to_codons() |
|
|
|
|
|
self.aa_to_weights = self._build_aa_to_weights() |
|
|
self.calculate_CAI = self.calc_cai |
|
|
|
|
|
def _build_aa_to_codons(self): |
|
|
"""构建氨基酸到密码子列表的映射""" |
|
|
aa_to_codons = defaultdict(list) |
|
|
for codon, (aa, _) in self.codon_table.items(): |
|
|
aa_to_codons[aa].append(codon) |
|
|
return dict(aa_to_codons) |
|
|
|
|
|
def _build_aa_to_weights(self): |
|
|
"""构建氨基酸到密码子权重的映射""" |
|
|
aa_to_weights = defaultdict(list) |
|
|
for codon, (aa, weight) in self.codon_table.items(): |
|
|
aa_to_weights[aa].append(weight) |
|
|
return dict(aa_to_weights) |
|
|
|
|
|
def _normalize_sequence(self, sequence: str) -> str: |
|
|
"""标准化序列为RNA格式""" |
|
|
sequence = sequence.upper() |
|
|
|
|
|
sequence = sequence.replace('T', 'U') |
|
|
return sequence |
|
|
|
|
|
def _validate_sequence(self, sequence: str) -> str: |
|
|
"""验证并标准化序列""" |
|
|
sequence = self._normalize_sequence(sequence) |
|
|
|
|
|
if len(sequence) % 3 != 0: |
|
|
raise ValueError(f"序列长度必须是3的倍数,当前长度: {len(sequence)}") |
|
|
|
|
|
valid_bases = {'A', 'U', 'C', 'G'} |
|
|
if not all(base in valid_bases for base in sequence): |
|
|
raise ValueError("序列包含无效的碱基字符") |
|
|
|
|
|
return sequence |
|
|
|
|
|
def _count_codons(self, sequence: str) -> Dict[str, int]: |
|
|
"""统计序列中密码子使用次数""" |
|
|
sequence = self._validate_sequence(sequence) |
|
|
codon_count = {} |
|
|
num_codons = len(sequence) // 3 |
|
|
|
|
|
for i in range(num_codons): |
|
|
codon = sequence[i * 3:(i + 1) * 3] |
|
|
if codon in self.standard_codon_table and self.standard_codon_table[codon] != '*': |
|
|
codon_count[codon] = codon_count.get(codon, 0) + 1 |
|
|
|
|
|
return codon_count |
|
|
|
|
|
@staticmethod |
|
|
def translate_sequence(sequence: str) -> str: |
|
|
"""将序列翻译为氨基酸序列""" |
|
|
sequence = sequence.upper().replace('T', 'U') |
|
|
aa_seq = '' |
|
|
for i in range(0, len(sequence), 3): |
|
|
codon = sequence[i:i + 3] |
|
|
if codon in Codon.CODON_TO_AA: |
|
|
aa = Codon.CODON_TO_AA[codon] |
|
|
aa_seq += aa |
|
|
return aa_seq |
|
|
def calc_cai(self, seq): |
|
|
"""计算CAI值,输入可以是RNA或DNA序列""" |
|
|
|
|
|
seq = self._normalize_sequence(seq) |
|
|
if len(seq) % 3 != 0: |
|
|
|
|
|
return np.nan |
|
|
cai = 0.0 |
|
|
valid_num = 0 |
|
|
for i in range(0, len(seq), 3): |
|
|
codon = seq[i:i + 3] |
|
|
if codon not in self.codon_table: |
|
|
continue |
|
|
aa, fraction = self.codon_table[codon] |
|
|
f_c_max = self.max_aa_table[aa] |
|
|
|
|
|
w_i = fraction / f_c_max |
|
|
cai += np.log2(w_i) |
|
|
valid_num += 1 |
|
|
|
|
|
return np.exp2(cai / valid_num) if valid_num > 0 else 0.0 |
|
|
|
|
|
|
|
|
def cai_opt_codon(self, aa_seq): |
|
|
aa_seq = aa_seq.lower() |
|
|
"""获取CAI最优密码子序列""" |
|
|
cai_opt_codon = [] |
|
|
for i in range(0, len(aa_seq), 1): |
|
|
aa = aa_seq[i] |
|
|
codon = self.cai_best_aa2nn_table.get(aa, '___') |
|
|
|
|
|
if not self.output_rna: |
|
|
codon = codon.replace('U', 'T') |
|
|
cai_opt_codon.append(codon) |
|
|
return ''.join(cai_opt_codon) |
|
|
|
|
|
def random_codon(self, aa_seq): |
|
|
""" |
|
|
根据密码子频率加权随机生成CDS序列 |
|
|
|
|
|
参数: |
|
|
aa_sequence (str): 氨基酸序列(单字母) |
|
|
|
|
|
返回: |
|
|
str: 随机生成的DNA序列 |
|
|
""" |
|
|
|
|
|
aa_seq = aa_seq.lower() |
|
|
opt_codon = [] |
|
|
for i in range(0, len(aa_seq), 1): |
|
|
aa = aa_seq[i] |
|
|
|
|
|
if aa not in self.aa_to_codons: |
|
|
codon = '___' |
|
|
else: |
|
|
codons = self.aa_to_codons[aa] |
|
|
weights = self.aa_to_weights[aa] |
|
|
|
|
|
codon = random.choices(codons, weights=weights, k=1)[0] |
|
|
opt_codon.append(codon) |
|
|
|
|
|
opt_nn = ''.join(opt_codon) |
|
|
|
|
|
if not self.output_rna: |
|
|
opt_nn = opt_nn.replace('U', 'T') |
|
|
return opt_nn |
|
|
|
|
|
def random_codon_weight(self, aa_seq,weights_df=None): |
|
|
""" |
|
|
根据密码子频率加权随机生成CDS序列 |
|
|
|
|
|
参数: |
|
|
aa_sequence (str): 氨基酸序列(单字母) |
|
|
|
|
|
返回: |
|
|
str: 随机生成的DNA序列 |
|
|
""" |
|
|
if weights_df is None: |
|
|
return self.random_codon(aa_seq) |
|
|
|
|
|
|
|
|
|
|
|
aa_seq = aa_seq.lower() |
|
|
opt_codon = [] |
|
|
for i in range(0, len(aa_seq), 1): |
|
|
aa = aa_seq[i] |
|
|
|
|
|
if aa not in self.aa_to_codons: |
|
|
codon = '___' |
|
|
else: |
|
|
tmp = weights_df[weights_df['amino_acid']==aa] |
|
|
codon = random.choices(tmp['triplet'].to_list(), weights=tmp['fraction'].to_list(), k=1)[0] |
|
|
opt_codon.append(codon) |
|
|
|
|
|
opt_nn = ''.join(opt_codon) |
|
|
|
|
|
if not self.output_rna: |
|
|
opt_nn = opt_nn.replace('U', 'T') |
|
|
return opt_nn |
|
|
|
|
|
def calculate_ENC(self, sequence: str) -> float: |
|
|
""" |
|
|
计算单条序列的ENC值,输入可以是RNA或DNA序列 |
|
|
|
|
|
参数: |
|
|
sequence: 序列字符串 |
|
|
|
|
|
返回: |
|
|
enc_value: ENC值 |
|
|
""" |
|
|
codon_count = self._count_codons(sequence) |
|
|
|
|
|
|
|
|
amino_acid_counts = {} |
|
|
for codon, aa in self.standard_codon_table.items(): |
|
|
if aa in ['M', 'W'] or aa == '*': |
|
|
continue |
|
|
if aa not in amino_acid_counts: |
|
|
amino_acid_counts[aa] = {} |
|
|
amino_acid_counts[aa][codon] = codon_count.get(codon, 0) |
|
|
|
|
|
|
|
|
F_values = {'2-fold': [], '3-fold': [], '4-fold': [], '6-fold': []} |
|
|
|
|
|
for aa, codon_counts in amino_acid_counts.items(): |
|
|
|
|
|
degeneracy = None |
|
|
for deg, aas in self.degeneracy_groups.items(): |
|
|
if aa in aas: |
|
|
degeneracy = deg |
|
|
break |
|
|
|
|
|
if not degeneracy: |
|
|
continue |
|
|
|
|
|
|
|
|
codons_for_aa = [c for c, a in self.standard_codon_table.items() |
|
|
if a == aa and a not in ['M', 'W'] and a != '*'] |
|
|
s = len(codons_for_aa) |
|
|
|
|
|
|
|
|
n_i_values = [codon_counts.get(codon, 0) for codon in codons_for_aa] |
|
|
total_n = sum(n_i_values) |
|
|
|
|
|
if total_n == 0 or s <= 1: |
|
|
continue |
|
|
|
|
|
|
|
|
sum_squared_freq = sum((n_i / total_n) ** 2 for n_i in n_i_values) |
|
|
F = (s * sum_squared_freq - 1) / (s - 1) |
|
|
|
|
|
F_values[degeneracy].append(F) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enc_value = 2.0 |
|
|
|
|
|
if F_values['2-fold']: |
|
|
enc_value += 9.0 / np.mean(F_values['2-fold']) |
|
|
if F_values['3-fold']: |
|
|
enc_value += 1.0 / np.mean(F_values['3-fold']) |
|
|
if F_values['4-fold']: |
|
|
enc_value += 5.0 / np.mean(F_values['4-fold']) |
|
|
if F_values['6-fold']: |
|
|
enc_value += 3.0 / np.mean(F_values['6-fold']) |
|
|
|
|
|
|
|
|
|
|
|
return enc_value |
|
|
|
|
|
def calculate_RSCU(self, sequences: List[str]) -> Dict[str, float]: |
|
|
""" |
|
|
计算相对同义密码子使用度 (Relative Synonymous Codon Usage, RSCU) |
|
|
|
|
|
参数: |
|
|
sequences: 序列列表(可以是RNA或DNA) |
|
|
|
|
|
返回: |
|
|
rscu_dict: 每个密码子的RSCU值字典(RNA格式) |
|
|
""" |
|
|
total_codon_count = defaultdict(int) |
|
|
aa_observed_codons = defaultdict(set) |
|
|
|
|
|
|
|
|
for seq in sequences: |
|
|
try: |
|
|
codon_count = self._count_codons(seq) |
|
|
for codon, count in codon_count.items(): |
|
|
aa = self.standard_codon_table[codon] |
|
|
total_codon_count[codon] += count |
|
|
aa_observed_codons[aa].add(codon) |
|
|
except ValueError: |
|
|
continue |
|
|
|
|
|
|
|
|
rscu_dict = {} |
|
|
aa_total_count = defaultdict(int) |
|
|
|
|
|
|
|
|
for codon, count in total_codon_count.items(): |
|
|
aa = self.standard_codon_table[codon] |
|
|
aa_total_count[aa] += count |
|
|
|
|
|
|
|
|
for codon, count in total_codon_count.items(): |
|
|
aa = self.standard_codon_table[codon] |
|
|
if aa_total_count[aa] > 0: |
|
|
|
|
|
synonymous_codons = len([c for c in aa_observed_codons[aa] |
|
|
if self.standard_codon_table[c] == aa]) |
|
|
expected_count = aa_total_count[aa] / synonymous_codons |
|
|
rscu_dict[codon] = count / expected_count if expected_count > 0 else 0.0 |
|
|
else: |
|
|
rscu_dict[codon] = 0.0 |
|
|
|
|
|
return rscu_dict |
|
|
|
|
|
def analyze_sequence(self, sequence: str, sequence_name: str = "") -> Dict: |
|
|
""" |
|
|
综合分析单条序列的密码子使用特征 |
|
|
|
|
|
参数: |
|
|
sequence: 序列字符串(可以是RNA或DNA) |
|
|
sequence_name: 序列名称(可选) |
|
|
|
|
|
返回: |
|
|
包含所有指标的字典 |
|
|
""" |
|
|
try: |
|
|
enc = self.calculate_ENC(sequence) |
|
|
cai = self.calc_cai(sequence) |
|
|
result = { |
|
|
'Sequence_Name': sequence_name, |
|
|
'Sequence_Length': len(sequence), |
|
|
'ENC': round(enc, 3), |
|
|
'ENC_Preference': 'strong' if enc <= 35 else 'week', |
|
|
'CAI': round(cai, 3), |
|
|
'CAI_Level': 'high' if cai > 0.7 else 'low' |
|
|
} |
|
|
|
|
|
return result |
|
|
|
|
|
except Exception as e: |
|
|
return { |
|
|
'Sequence_Name': sequence_name, |
|
|
'Sequence_Length': len(sequence), |
|
|
'ENC': None, |
|
|
'CAI': None, |
|
|
'Error': str(e) |
|
|
} |
|
|
|
|
|
@staticmethod |
|
|
def modify_func(sequence): |
|
|
return '_'*len(sequence) |
|
|
@staticmethod |
|
|
def modify_codon_by_frames(sequence, frames=[1,2,3], modify_func=None): |
|
|
""" |
|
|
高级版本:支持自定义修改函数 |
|
|
|
|
|
参数: |
|
|
sequence (str): 输入序列 |
|
|
frame (int): 要修改的密码子位置 (1, 2, 3) |
|
|
modify_func (callable): 修改函数,接收原帧字符串,返回修改后的字符串 |
|
|
|
|
|
返回: |
|
|
str: 修改后的重建序列 |
|
|
""" |
|
|
|
|
|
seq = sequence.upper().replace(' ', '').replace('\n', '') |
|
|
seq = seq[:len(seq) - len(seq) % 3] |
|
|
|
|
|
|
|
|
frames = [seq[0::3], seq[1::3], seq[2::3]] |
|
|
|
|
|
reconstructed_list =[] |
|
|
|
|
|
for frame in frames: |
|
|
frame_index = frame - 1 |
|
|
if modify_func: |
|
|
frames[frame_index] = modify_func(frames[frame_index]) |
|
|
|
|
|
|
|
|
reconstructed = ''.join( |
|
|
frames[0][i] + frames[1][i] + frames[2][i] |
|
|
for i in range(len(frames[0])) |
|
|
) |
|
|
reconstructed_list.append(reconstructed) |
|
|
|
|
|
return reconstructed_list |
|
|
|
|
|
|
|
|
def example_usage(): |
|
|
"""测试所有功能""" |
|
|
print("=" * 60) |
|
|
print("测试 Codon 类的所有功能") |
|
|
print("=" * 60) |
|
|
|
|
|
|
|
|
species_list = ["mouse", "Ec", "Sac", "Pic", "Human"] |
|
|
test_species = "mouse" |
|
|
|
|
|
|
|
|
aa_seq = "MASV" |
|
|
dna_seq = "ATGGCCATGGCGCCCAGAACTGAGATCAAATAGTACCCGTATTAACGGGTA" |
|
|
rna_seq = dna_seq.replace('T', 'U') |
|
|
|
|
|
test_sequences = [ |
|
|
"AUGGCUUCUUUUUUCUUCUUCUUCUUCUUCUUCCUCCUCCUCCUCCUCCUCCUCCUC", |
|
|
"ATGGCUUCUUUUCUCGUAUACACAGATGACTACGTTAGCAGCTACGTTACGTTACGTTACG", |
|
|
"AUGGUUUGUUGGUUGGUUGGUUGGUUGGUUGGUUGGUUGGUUGGUUGGUUGGUUGGA" |
|
|
] |
|
|
|
|
|
|
|
|
test_sequence = "AUGGCUUCUUUUCUCGUAUACACAGAUGACUACGUAGCAGCUACGUACGUACGUACG" |
|
|
|
|
|
|
|
|
Codon.translate_sequence(dna_seq) |
|
|
|
|
|
|
|
|
|
|
|
codon_table_path = "/Users/gz_julse/code/minimind_RiboUTR/maotao_file/codon_table/codon_usage_{species}.csv" |
|
|
|
|
|
print(f"\n1. 初始化 Codon 实例 (物种: {species_list})") |
|
|
print("-" * 50) |
|
|
|
|
|
|
|
|
codon_instance_dna = {species: Codon(codon_table_path.format(species=species), rna=False) for species in |
|
|
species_list} |
|
|
codon_instance_rna = {species: Codon(codon_table_path.format(species=species), rna=True) for species in |
|
|
species_list} |
|
|
|
|
|
|
|
|
print(f"✓ 成功创建 {len(species_list)} 个物种的 Codon 实例") |
|
|
|
|
|
print(f"\n2. 测试 CAI 计算") |
|
|
print("-" * 50) |
|
|
|
|
|
|
|
|
print("DNA序列CAI:", [codon_instance_rna[species].calc_cai(dna_seq) for species in species_list]) |
|
|
print("RNA序列CAI:", [codon_instance_rna[species].calc_cai(rna_seq) for species in species_list]) |
|
|
|
|
|
|
|
|
dna_cai = codon_instance_rna[test_species].calc_cai(dna_seq) |
|
|
rna_cai = codon_instance_rna[test_species].calc_cai(rna_seq) |
|
|
print(f"✓ DNA和RNA输入结果一致: {np.isclose(dna_cai, rna_cai)}") |
|
|
|
|
|
print(f"\n3. 测试 CAI 最优密码子序列") |
|
|
print("-" * 50) |
|
|
|
|
|
|
|
|
opt_rna = codon_instance_rna[test_species].cai_opt_codon(aa_seq) |
|
|
opt_dna = codon_instance_dna[test_species].cai_opt_codon(aa_seq) |
|
|
|
|
|
print(f"氨基酸序列: {aa_seq}") |
|
|
print(f"RNA格式最优密码子: {opt_rna}") |
|
|
print(f"DNA格式最优密码子: {opt_dna}") |
|
|
print(f"✓ 输出格式正确: RNA={opt_rna.replace('T', '') == opt_rna}, DNA={opt_dna.replace('U', '') == opt_dna}") |
|
|
|
|
|
print(f"\n4. 测试 ENC 计算") |
|
|
print("-" * 50) |
|
|
|
|
|
|
|
|
enc_dna = codon_instance_rna[test_species].calculate_ENC(dna_seq) |
|
|
enc_rna = codon_instance_rna[test_species].calculate_ENC(rna_seq) |
|
|
|
|
|
print(f"DNA序列ENC: {enc_dna:.3f}") |
|
|
print(f"RNA序列ENC: {enc_rna:.3f}") |
|
|
print(f"✓ DNA和RNA输入结果一致: {np.isclose(enc_dna, enc_rna)}") |
|
|
|
|
|
print(f"\n5. 测试 RSCU 计算") |
|
|
print("-" * 50) |
|
|
|
|
|
|
|
|
rscu_results = codon_instance_rna[test_species].calculate_RSCU(test_sequences) |
|
|
print(f"计算了 {len(rscu_results)} 个密码子的RSCU值") |
|
|
print("前10个密码子的RSCU值:") |
|
|
for i, (codon, rscu) in enumerate(list(rscu_results.items())[:10]): |
|
|
print(f" {codon}: {rscu:.3f}") |
|
|
|
|
|
print(f"\n6. 测试综合分析 (analyze_sequence)") |
|
|
print("-" * 50) |
|
|
|
|
|
|
|
|
analysis_result = codon_instance_rna[test_species].analyze_sequence(test_sequence, "Test_Gene") |
|
|
print("综合分析结果:") |
|
|
for key, value in analysis_result.items(): |
|
|
print(f" {key}: {value}") |
|
|
|
|
|
print(f"\n7. 测试序列验证功能") |
|
|
print("-" * 50) |
|
|
|
|
|
|
|
|
invalid_seqs = [ |
|
|
"AUGGCUUCUUUUCUCG", |
|
|
"AUGXXXUUUUCUCGUAUACACAGAUGACUACGUAGCAGCUACGUACGUACGUACG", |
|
|
] |
|
|
|
|
|
for i, seq in enumerate(invalid_seqs): |
|
|
try: |
|
|
codon_instance_rna[test_species]._validate_sequence(seq) |
|
|
print(f"序列 {i + 1}: 错误地通过了验证") |
|
|
except ValueError as e: |
|
|
print(f"序列 {i + 1}: 正确捕获错误 - {e}") |
|
|
|
|
|
print(f"\n8. 测试密码子计数") |
|
|
print("-" * 50) |
|
|
|
|
|
|
|
|
codon_count = codon_instance_rna[test_species]._count_codons(test_sequence) |
|
|
print(f"序列 '{test_sequence[:20]}...' 的密码子计数:") |
|
|
for codon, count in list(codon_count.items())[:5]: |
|
|
print(f" {codon}: {count}") |
|
|
print(f" ... (共 {len(codon_count)} 种密码子)") |
|
|
|
|
|
print(f"\n9. 测试不同输出格式的兼容性") |
|
|
print("-" * 50) |
|
|
|
|
|
|
|
|
cai_rna_instance = codon_instance_rna[test_species].calc_cai(test_sequence) |
|
|
cai_dna_instance = codon_instance_dna[test_species].calc_cai(test_sequence) |
|
|
|
|
|
print(f"RNA输出实例CAI: {cai_rna_instance:.4f}") |
|
|
print(f"DNA输出实例CAI: {cai_dna_instance:.4f}") |
|
|
print(f"✓ 不同输出格式实例的CAI计算相同: {np.isclose(cai_rna_instance, cai_dna_instance)}") |
|
|
|
|
|
print(f"\n" + "=" * 60) |
|
|
print("所有功能测试完成!") |
|
|
print("=" * 60) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
example_usage() |