""" utils.py - 通用工具函数 包含:序列清洗、FASTA 解析、安全浮点转换、网络会话创建 """ import re import math import requests from typing import Optional, Dict AA_ALLOWED = set("ACDEFGHIKLMNPQRSTVWYX") def clean_sequence(seq: str) -> str: """清洗序列:去除非字母字符,将非标准氨基酸替换为 X,转为大写""" return "".join((ch if ch in AA_ALLOWED else "X") for ch in seq.upper() if ch.isalpha()) def parse_fasta(text: str) -> Dict[str, str]: """ 解析 FASTA 文本,返回 {header: sequence} 字典。 自动处理 BOM、Windows/Mac 换行符,忽略空序列。 """ # 移除 UTF-8 BOM if text.startswith('\ufeff'): text = text[1:] records = {} header = None seq_chunks = [] # 统一换行符为 \n text = text.replace('\r\n', '\n').replace('\r', '\n') for line in text.splitlines(): line = line.strip() if not line: continue if line.startswith(">"): if header is not None: seq = clean_sequence("".join(seq_chunks)) if seq: # 忽略空序列 records[header] = seq header = line[1:].strip() seq_chunks = [] else: seq_chunks.append(line) # 处理最后一条 if header is not None: seq = clean_sequence("".join(seq_chunks)) if seq: records[header] = seq return records def safe_float(s: str) -> Optional[float]: try: val = float(str(s).strip()) if math.isnan(val) or math.isinf(val): return None return val except (ValueError, TypeError): return None def create_session(timeout: int = 30) -> requests.Session: sess = requests.Session() sess.trust_env = False sess.headers.update({"User-Agent": "Mozilla/5.0 (compatible; LLPS-Predictor/2.0)"}) sess.timeout = timeout return sess