Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -23,27 +23,47 @@ def get_ready_for_primers(gene_symbol, species="human"):
|
|
| 23 |
return {"error": f"未找到基因: {gene_symbol}"}
|
| 24 |
gene_id = record["IdList"][0]
|
| 25 |
|
| 26 |
-
# 2.
|
| 27 |
-
# 不再使用
|
| 28 |
link_handle = Entrez.elink(dbfrom="gene", db="nucleotide", id=gene_id)
|
| 29 |
link_record = Entrez.read(link_handle)
|
| 30 |
|
| 31 |
-
|
| 32 |
-
|
| 33 |
for link_set in link_record[0].get("LinkSetDb", []):
|
| 34 |
-
# 通常 RefSeq 序列存储在名为 'gene_nucleotide' 或 'gene_nucleotide_refseq' 的链接中
|
| 35 |
if "nucleotide" in link_set["DbTo"]:
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
break
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
if not nucl_id:
|
| 40 |
-
return {"error": f"基因 {gene_symbol} 找不到
|
| 41 |
|
| 42 |
-
#
|
| 43 |
handle = Entrez.efetch(db="nucleotide", id=nucl_id, rettype="gb", retmode="text")
|
| 44 |
seq_record = SeqIO.read(handle, "genbank")
|
| 45 |
|
| 46 |
-
#
|
| 47 |
junctions = []
|
| 48 |
current_pos = 0
|
| 49 |
for feature in seq_record.features:
|
|
@@ -61,7 +81,7 @@ def get_ready_for_primers(gene_symbol, species="human"):
|
|
| 61 |
"junctions": junctions
|
| 62 |
}
|
| 63 |
except Exception as e:
|
| 64 |
-
return {"error": f"
|
| 65 |
|
| 66 |
def design_qpcr_primers(gene_data):
|
| 67 |
"""设计qPCR引物,包含失败重试机制"""
|
|
|
|
| 23 |
return {"error": f"未找到基因: {gene_symbol}"}
|
| 24 |
gene_id = record["IdList"][0]
|
| 25 |
|
| 26 |
+
# 2. 获取关联序列
|
| 27 |
+
# 注意:不再使用 term,避免 NCBI 内部过滤器的不确定性
|
| 28 |
link_handle = Entrez.elink(dbfrom="gene", db="nucleotide", id=gene_id)
|
| 29 |
link_record = Entrez.read(link_handle)
|
| 30 |
|
| 31 |
+
# 3. 筛选高质量的 RefSeq 序列 (NM_ 或 NR_)
|
| 32 |
+
all_ids = []
|
| 33 |
for link_set in link_record[0].get("LinkSetDb", []):
|
|
|
|
| 34 |
if "nucleotide" in link_set["DbTo"]:
|
| 35 |
+
all_ids.extend([link["Id"] for link in link_set["Link"]])
|
| 36 |
+
|
| 37 |
+
if not all_ids:
|
| 38 |
+
return {"error": "找不到关联序列"}
|
| 39 |
+
|
| 40 |
+
# 使用 esummary 批量检查这些 ID 哪个是我们要的 RefSeq 转录本
|
| 41 |
+
summary_handle = Entrez.esummary(db="nucleotide", id=",".join(all_ids))
|
| 42 |
+
summaries = Entrez.read(summary_handle)
|
| 43 |
+
|
| 44 |
+
nucl_id = None
|
| 45 |
+
for summary in summaries:
|
| 46 |
+
accession = summary.get('Caption', '')
|
| 47 |
+
# 关键:只选择以 NM_ (编码) 或 NR_ (非编码) 开头的 RefSeq 序列
|
| 48 |
+
if accession.startswith('NM_') or accession.startswith('NR_'):
|
| 49 |
+
nucl_id = summary['Id']
|
| 50 |
break
|
| 51 |
|
| 52 |
+
# 如果没找到 NM/NR,退而求其次找 XM/XR (预测的转录本)
|
| 53 |
+
if not nucl_id:
|
| 54 |
+
for summary in summaries:
|
| 55 |
+
if summary.get('Caption', '').startswith(('XM_', 'XR_')):
|
| 56 |
+
nucl_id = summary['Id']
|
| 57 |
+
break
|
| 58 |
+
|
| 59 |
if not nucl_id:
|
| 60 |
+
return {"error": f"基因 {gene_symbol} 找不到标准的 RefSeq 转录本 (NM_/NR_)"}
|
| 61 |
|
| 62 |
+
# 4. 下载并解析序列
|
| 63 |
handle = Entrez.efetch(db="nucleotide", id=nucl_id, rettype="gb", retmode="text")
|
| 64 |
seq_record = SeqIO.read(handle, "genbank")
|
| 65 |
|
| 66 |
+
# 5. 提取外显子
|
| 67 |
junctions = []
|
| 68 |
current_pos = 0
|
| 69 |
for feature in seq_record.features:
|
|
|
|
| 81 |
"junctions": junctions
|
| 82 |
}
|
| 83 |
except Exception as e:
|
| 84 |
+
return {"error": f"处理出错: {str(e)}"}
|
| 85 |
|
| 86 |
def design_qpcr_primers(gene_data):
|
| 87 |
"""设计qPCR引物,包含失败重试机制"""
|