SynCodonLM / clean_split_sequence.py
jheuschkel's picture
Upload 9 files
3385332 verified
raw
history blame
373 Bytes
def clean_split_sequence(seq):
seq = seq.upper()
seq = seq.replace('U', 'T')
for base in seq:
if base not in {'A', 'T', 'G', 'C'}:
raise ValueError(f"Invalid character '{base}' found in sequence. Only A, T, G, C, and U are allowed.")
spaced_seq = " ".join([seq[i:i+3] for i in range(0, len(seq), 3)])
return spaced_seq