PharC commited on
Commit
4bedae3
·
verified ·
1 Parent(s): 03357ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -10
app.py CHANGED
@@ -23,27 +23,47 @@ def get_ready_for_primers(gene_symbol, species="human"):
23
  return {"error": f"未找到基因: {gene_symbol}"}
24
  gene_id = record["IdList"][0]
25
 
26
- # 2. 改进的 Link 获取逻辑:兼容 NM_, NR_, XM_, XR_ 等所有转录本
27
- # 不再使用复杂的 term 过滤,直接获取所有关联核苷酸序列
28
  link_handle = Entrez.elink(dbfrom="gene", db="nucleotide", id=gene_id)
29
  link_record = Entrez.read(link_handle)
30
 
31
- nucl_id = None
32
- # 深度遍历:寻找存储在 'gene_nucleotide' 链接中的 ID
33
  for link_set in link_record[0].get("LinkSetDb", []):
34
- # 通常 RefSeq 序列存储在名为 'gene_nucleotide' 或 'gene_nucleotide_refseq' 的链接中
35
  if "nucleotide" in link_set["DbTo"]:
36
- nucl_id = link_set["Link"][0]["Id"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  break
38
 
 
 
 
 
 
 
 
39
  if not nucl_id:
40
- return {"error": f"基因 {gene_symbol} 找不到关联核苷酸序列 ID"}
41
 
42
- # 3. 下载数据
43
  handle = Entrez.efetch(db="nucleotide", id=nucl_id, rettype="gb", retmode="text")
44
  seq_record = SeqIO.read(handle, "genbank")
45
 
46
- # 4. 提取外显子(逻辑同前)
47
  junctions = []
48
  current_pos = 0
49
  for feature in seq_record.features:
@@ -61,7 +81,7 @@ def get_ready_for_primers(gene_symbol, species="human"):
61
  "junctions": junctions
62
  }
63
  except Exception as e:
64
- return {"error": f"获取基因 {gene_symbol} 信息失败: {str(e)}"}
65
 
66
  def design_qpcr_primers(gene_data):
67
  """设计qPCR引物,包含失败重试机制"""
 
23
  return {"error": f"未找到基因: {gene_symbol}"}
24
  gene_id = record["IdList"][0]
25
 
26
+ # 2. 获取关联序列
27
+ # 注意:不再使用 term,避免 NCBI 内部过滤不确定性
28
  link_handle = Entrez.elink(dbfrom="gene", db="nucleotide", id=gene_id)
29
  link_record = Entrez.read(link_handle)
30
 
31
+ # 3. 筛选高质量的 RefSeq 序列 (NM_ 或 NR_)
32
+ all_ids = []
33
  for link_set in link_record[0].get("LinkSetDb", []):
 
34
  if "nucleotide" in link_set["DbTo"]:
35
+ all_ids.extend([link["Id"] for link in link_set["Link"]])
36
+
37
+ if not all_ids:
38
+ return {"error": "找不到关联序列"}
39
+
40
+ # 使用 esummary 批量检查这些 ID 哪个是我们要的 RefSeq 转录本
41
+ summary_handle = Entrez.esummary(db="nucleotide", id=",".join(all_ids))
42
+ summaries = Entrez.read(summary_handle)
43
+
44
+ nucl_id = None
45
+ for summary in summaries:
46
+ accession = summary.get('Caption', '')
47
+ # 关键:只选择以 NM_ (编码) 或 NR_ (非编码) 开头的 RefSeq 序列
48
+ if accession.startswith('NM_') or accession.startswith('NR_'):
49
+ nucl_id = summary['Id']
50
  break
51
 
52
+ # 如果没找到 NM/NR,退而求其次找 XM/XR (预测的转录本)
53
+ if not nucl_id:
54
+ for summary in summaries:
55
+ if summary.get('Caption', '').startswith(('XM_', 'XR_')):
56
+ nucl_id = summary['Id']
57
+ break
58
+
59
  if not nucl_id:
60
+ return {"error": f"基因 {gene_symbol} 找不到标准RefSeq 转录本 (NM_/NR_)"}
61
 
62
+ # 4. 下载并解析序列
63
  handle = Entrez.efetch(db="nucleotide", id=nucl_id, rettype="gb", retmode="text")
64
  seq_record = SeqIO.read(handle, "genbank")
65
 
66
+ # 5. 提取外显子
67
  junctions = []
68
  current_pos = 0
69
  for feature in seq_record.features:
 
81
  "junctions": junctions
82
  }
83
  except Exception as e:
84
+ return {"error": f"处理出错: {str(e)}"}
85
 
86
  def design_qpcr_primers(gene_data):
87
  """设计qPCR引物,包含失败重试机制"""