PharC commited on
Commit
03357ec
·
verified ·
1 Parent(s): 53df90c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -22
app.py CHANGED
@@ -12,7 +12,6 @@ app = Flask(__name__)
12
  ssl._create_default_https_context = ssl._create_unverified_context
13
 
14
  def get_ready_for_primers(gene_symbol, species="human"):
15
- """获取基因序列和外显子交界点信息(增强报错处理版)"""
16
  Entrez.email = "your_email@example.com"
17
 
18
  try:
@@ -21,32 +20,30 @@ def get_ready_for_primers(gene_symbol, species="human"):
21
  handle = Entrez.esearch(db="gene", term=search_term)
22
  record = Entrez.read(handle)
23
  if not record["IdList"]:
24
- return {"error": f"在 {species} 中未找到基因符号: {gene_symbol}"}
25
  gene_id = record["IdList"][0]
26
 
27
- # 2. 尝试获取关联RefSeq mRNA (NM_)
28
- # 增加多种筛选条件以提高匹配率
29
- link_handle = Entrez.elink(
30
- dbfrom="gene",
31
- db="nucleotide",
32
- id=gene_id,
33
- term="srcdb_refseq[prop] AND (mRNA[filter] OR RNA[filter])"
34
- )
35
  link_record = Entrez.read(link_handle)
36
 
37
- # 检查是否有跳转结果
38
- if not link_record[0]["LinkSetDb"]:
39
- return {"error": f"基因 {gene_symbol} (ID: {gene_id}) 暂无已证实的 RefSeq mRNA 序列 (NM_),无法进行 qPCR 引物设计。"}
40
-
41
- # 3. 提取核苷酸 ID
42
- nucl_id = link_record[0]["LinkSetDb"][0]["Link"][0]["Id"]
 
43
 
44
- # 4. 下载 GenBank 数据
 
 
 
45
  handle = Entrez.efetch(db="nucleotide", id=nucl_id, rettype="gb", retmode="text")
46
  seq_record = SeqIO.read(handle, "genbank")
47
- seq_type = "mRNA (编码)" if seq_record.id.startswith("NM_") else "ncRNA (非编码)"
48
 
49
- # 提取外显子(此处逻辑保持不变
50
  junctions = []
51
  current_pos = 0
52
  for feature in seq_record.features:
@@ -55,13 +52,11 @@ def get_ready_for_primers(gene_symbol, species="human"):
55
  current_pos += (end - start)
56
  junctions.append(int(current_pos))
57
 
58
- if junctions:
59
- junctions.pop()
60
 
61
  return {
62
  "symbol": gene_symbol,
63
  "nm_id": seq_record.id,
64
- "type": seq_type, # 传给前端展示
65
  "sequence": str(seq_record.seq),
66
  "junctions": junctions
67
  }
 
12
  ssl._create_default_https_context = ssl._create_unverified_context
13
 
14
  def get_ready_for_primers(gene_symbol, species="human"):
 
15
  Entrez.email = "your_email@example.com"
16
 
17
  try:
 
20
  handle = Entrez.esearch(db="gene", term=search_term)
21
  record = Entrez.read(handle)
22
  if not record["IdList"]:
23
+ return {"error": f"未找到基因: {gene_symbol}"}
24
  gene_id = record["IdList"][0]
25
 
26
+ # 2. 改进Link 获取逻辑:兼容 NM_, NR_, XM_, XR_ 等所有转录本
27
+ # 不再使用复杂的 term 过滤,直接获取所有关联的核苷酸序列
28
+ link_handle = Entrez.elink(dbfrom="gene", db="nucleotide", id=gene_id)
 
 
 
 
 
29
  link_record = Entrez.read(link_handle)
30
 
31
+ nucl_id = None
32
+ # 深度遍历:寻找存储在 'gene_nucleotide' 链接中的 ID
33
+ for link_set in link_record[0].get("LinkSetDb", []):
34
+ # 通常 RefSeq 序列存储在名为 'gene_nucleotide' 或 'gene_nucleotide_refseq' 的链接中
35
+ if "nucleotide" in link_set["DbTo"]:
36
+ nucl_id = link_set["Link"][0]["Id"]
37
+ break
38
 
39
+ if not nucl_id:
40
+ return {"error": f"基因 {gene_symbol} 找不到关联的核苷酸序列 ID"}
41
+
42
+ # 3. 下载数据
43
  handle = Entrez.efetch(db="nucleotide", id=nucl_id, rettype="gb", retmode="text")
44
  seq_record = SeqIO.read(handle, "genbank")
 
45
 
46
+ # 4. 提取外显子(逻辑同前
47
  junctions = []
48
  current_pos = 0
49
  for feature in seq_record.features:
 
52
  current_pos += (end - start)
53
  junctions.append(int(current_pos))
54
 
55
+ if junctions: junctions.pop()
 
56
 
57
  return {
58
  "symbol": gene_symbol,
59
  "nm_id": seq_record.id,
 
60
  "sequence": str(seq_record.seq),
61
  "junctions": junctions
62
  }