llps_prediction / utils.py
Elbereth123's picture
Update utils.py
5b7623e verified
"""
utils.py - 通用工具函数
包含:序列清洗、FASTA 解析、安全浮点转换、网络会话创建
"""
import re
import math
import requests
from typing import Optional, Dict
AA_ALLOWED = set("ACDEFGHIKLMNPQRSTVWYX")
def clean_sequence(seq: str) -> str:
"""清洗序列:去除非字母字符,将非标准氨基酸替换为 X,转为大写"""
return "".join((ch if ch in AA_ALLOWED else "X") for ch in seq.upper() if ch.isalpha())
def parse_fasta(text: str) -> Dict[str, str]:
"""
解析 FASTA 文本,返回 {header: sequence} 字典。
自动处理 BOM、Windows/Mac 换行符,忽略空序列。
"""
# 移除 UTF-8 BOM
if text.startswith('\ufeff'):
text = text[1:]
records = {}
header = None
seq_chunks = []
# 统一换行符为 \n
text = text.replace('\r\n', '\n').replace('\r', '\n')
for line in text.splitlines():
line = line.strip()
if not line:
continue
if line.startswith(">"):
if header is not None:
seq = clean_sequence("".join(seq_chunks))
if seq: # 忽略空序列
records[header] = seq
header = line[1:].strip()
seq_chunks = []
else:
seq_chunks.append(line)
# 处理最后一条
if header is not None:
seq = clean_sequence("".join(seq_chunks))
if seq:
records[header] = seq
return records
def safe_float(s: str) -> Optional[float]:
try:
val = float(str(s).strip())
if math.isnan(val) or math.isinf(val):
return None
return val
except (ValueError, TypeError):
return None
def create_session(timeout: int = 30) -> requests.Session:
sess = requests.Session()
sess.trust_env = False
sess.headers.update({"User-Agent": "Mozilla/5.0 (compatible; LLPS-Predictor/2.0)"})
sess.timeout = timeout
return sess