Spaces:
Sleeping
Sleeping
| """ | |
| utils.py - 通用工具函数 | |
| 包含:序列清洗、FASTA 解析、安全浮点转换、网络会话创建 | |
| """ | |
| import re | |
| import math | |
| import requests | |
| from typing import Optional, Dict | |
| AA_ALLOWED = set("ACDEFGHIKLMNPQRSTVWYX") | |
| def clean_sequence(seq: str) -> str: | |
| """清洗序列:去除非字母字符,将非标准氨基酸替换为 X,转为大写""" | |
| return "".join((ch if ch in AA_ALLOWED else "X") for ch in seq.upper() if ch.isalpha()) | |
| def parse_fasta(text: str) -> Dict[str, str]: | |
| """ | |
| 解析 FASTA 文本,返回 {header: sequence} 字典。 | |
| 自动处理 BOM、Windows/Mac 换行符,忽略空序列。 | |
| """ | |
| # 移除 UTF-8 BOM | |
| if text.startswith('\ufeff'): | |
| text = text[1:] | |
| records = {} | |
| header = None | |
| seq_chunks = [] | |
| # 统一换行符为 \n | |
| text = text.replace('\r\n', '\n').replace('\r', '\n') | |
| for line in text.splitlines(): | |
| line = line.strip() | |
| if not line: | |
| continue | |
| if line.startswith(">"): | |
| if header is not None: | |
| seq = clean_sequence("".join(seq_chunks)) | |
| if seq: # 忽略空序列 | |
| records[header] = seq | |
| header = line[1:].strip() | |
| seq_chunks = [] | |
| else: | |
| seq_chunks.append(line) | |
| # 处理最后一条 | |
| if header is not None: | |
| seq = clean_sequence("".join(seq_chunks)) | |
| if seq: | |
| records[header] = seq | |
| return records | |
| def safe_float(s: str) -> Optional[float]: | |
| try: | |
| val = float(str(s).strip()) | |
| if math.isnan(val) or math.isinf(val): | |
| return None | |
| return val | |
| except (ValueError, TypeError): | |
| return None | |
| def create_session(timeout: int = 30) -> requests.Session: | |
| sess = requests.Session() | |
| sess.trust_env = False | |
| sess.headers.update({"User-Agent": "Mozilla/5.0 (compatible; LLPS-Predictor/2.0)"}) | |
| sess.timeout = timeout | |
| return sess |