Spaces:

Elbereth123
/

llps_prediction

Sleeping

App Files Files Community

llps_prediction / utils.py

Elbereth123

Update utils.py

5b7623e verified 12 days ago

raw

history blame contribute delete

1.96 kB

	"""
	utils.py - 通用工具函数
	包含：序列清洗、FASTA 解析、安全浮点转换、网络会话创建
	"""

	import re
	import math
	import requests
	from typing import Optional, Dict

	AA_ALLOWED = set("ACDEFGHIKLMNPQRSTVWYX")

	def clean_sequence(seq: str) -> str:
	"""清洗序列：去除非字母字符，将非标准氨基酸替换为 X，转为大写"""
	return "".join((ch if ch in AA_ALLOWED else "X") for ch in seq.upper() if ch.isalpha())

	def parse_fasta(text: str) -> Dict[str, str]:
	"""
	解析 FASTA 文本，返回 {header: sequence} 字典。
	自动处理 BOM、Windows/Mac 换行符，忽略空序列。
	"""
	# 移除 UTF-8 BOM
	if text.startswith('\ufeff'):
	text = text[1:]

	records = {}
	header = None
	seq_chunks = []

	# 统一换行符为 \n
	text = text.replace('\r\n', '\n').replace('\r', '\n')
	for line in text.splitlines():
	line = line.strip()
	if not line:
	continue
	if line.startswith(">"):
	if header is not None:
	seq = clean_sequence("".join(seq_chunks))
	if seq: # 忽略空序列
	records[header] = seq
	header = line[1:].strip()
	seq_chunks = []
	else:
	seq_chunks.append(line)
	# 处理最后一条
	if header is not None:
	seq = clean_sequence("".join(seq_chunks))
	if seq:
	records[header] = seq
	return records

	def safe_float(s: str) -> Optional[float]:
	try:
	val = float(str(s).strip())
	if math.isnan(val) or math.isinf(val):
	return None
	return val
	except (ValueError, TypeError):
	return None

	def create_session(timeout: int = 30) -> requests.Session:
	sess = requests.Session()
	sess.trust_env = False
	sess.headers.update({"User-Agent": "Mozilla/5.0 (compatible; LLPS-Predictor/2.0)"})
	sess.timeout = timeout
	return sess