Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,7 +12,6 @@ app = Flask(__name__)
|
|
| 12 |
ssl._create_default_https_context = ssl._create_unverified_context
|
| 13 |
|
| 14 |
def get_ready_for_primers(gene_symbol, species="human"):
|
| 15 |
-
"""获取基因序列和外显子交界点信息(增强报错处理版)"""
|
| 16 |
Entrez.email = "your_email@example.com"
|
| 17 |
|
| 18 |
try:
|
|
@@ -21,32 +20,30 @@ def get_ready_for_primers(gene_symbol, species="human"):
|
|
| 21 |
handle = Entrez.esearch(db="gene", term=search_term)
|
| 22 |
record = Entrez.read(handle)
|
| 23 |
if not record["IdList"]:
|
| 24 |
-
return {"error": f"
|
| 25 |
gene_id = record["IdList"][0]
|
| 26 |
|
| 27 |
-
# 2.
|
| 28 |
-
#
|
| 29 |
-
link_handle = Entrez.elink(
|
| 30 |
-
dbfrom="gene",
|
| 31 |
-
db="nucleotide",
|
| 32 |
-
id=gene_id,
|
| 33 |
-
term="srcdb_refseq[prop] AND (mRNA[filter] OR RNA[filter])"
|
| 34 |
-
)
|
| 35 |
link_record = Entrez.read(link_handle)
|
| 36 |
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
|
|
|
| 43 |
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
| 45 |
handle = Entrez.efetch(db="nucleotide", id=nucl_id, rettype="gb", retmode="text")
|
| 46 |
seq_record = SeqIO.read(handle, "genbank")
|
| 47 |
-
seq_type = "mRNA (编码)" if seq_record.id.startswith("NM_") else "ncRNA (非编码)"
|
| 48 |
|
| 49 |
-
# 提取外显子(
|
| 50 |
junctions = []
|
| 51 |
current_pos = 0
|
| 52 |
for feature in seq_record.features:
|
|
@@ -55,13 +52,11 @@ def get_ready_for_primers(gene_symbol, species="human"):
|
|
| 55 |
current_pos += (end - start)
|
| 56 |
junctions.append(int(current_pos))
|
| 57 |
|
| 58 |
-
if junctions:
|
| 59 |
-
junctions.pop()
|
| 60 |
|
| 61 |
return {
|
| 62 |
"symbol": gene_symbol,
|
| 63 |
"nm_id": seq_record.id,
|
| 64 |
-
"type": seq_type, # 传给前端展示
|
| 65 |
"sequence": str(seq_record.seq),
|
| 66 |
"junctions": junctions
|
| 67 |
}
|
|
|
|
| 12 |
ssl._create_default_https_context = ssl._create_unverified_context
|
| 13 |
|
| 14 |
def get_ready_for_primers(gene_symbol, species="human"):
|
|
|
|
| 15 |
Entrez.email = "your_email@example.com"
|
| 16 |
|
| 17 |
try:
|
|
|
|
| 20 |
handle = Entrez.esearch(db="gene", term=search_term)
|
| 21 |
record = Entrez.read(handle)
|
| 22 |
if not record["IdList"]:
|
| 23 |
+
return {"error": f"未找到基因: {gene_symbol}"}
|
| 24 |
gene_id = record["IdList"][0]
|
| 25 |
|
| 26 |
+
# 2. 改进的 Link 获取逻辑:兼容 NM_, NR_, XM_, XR_ 等所有转录本
|
| 27 |
+
# 不再使用复杂的 term 过滤,直接获取所有关联的核苷酸序列
|
| 28 |
+
link_handle = Entrez.elink(dbfrom="gene", db="nucleotide", id=gene_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
link_record = Entrez.read(link_handle)
|
| 30 |
|
| 31 |
+
nucl_id = None
|
| 32 |
+
# 深度遍历:寻找存储在 'gene_nucleotide' 链接中的 ID
|
| 33 |
+
for link_set in link_record[0].get("LinkSetDb", []):
|
| 34 |
+
# 通常 RefSeq 序列存储在名为 'gene_nucleotide' 或 'gene_nucleotide_refseq' 的链接中
|
| 35 |
+
if "nucleotide" in link_set["DbTo"]:
|
| 36 |
+
nucl_id = link_set["Link"][0]["Id"]
|
| 37 |
+
break
|
| 38 |
|
| 39 |
+
if not nucl_id:
|
| 40 |
+
return {"error": f"基因 {gene_symbol} 找不到关联的核苷酸序列 ID"}
|
| 41 |
+
|
| 42 |
+
# 3. 下载数据
|
| 43 |
handle = Entrez.efetch(db="nucleotide", id=nucl_id, rettype="gb", retmode="text")
|
| 44 |
seq_record = SeqIO.read(handle, "genbank")
|
|
|
|
| 45 |
|
| 46 |
+
# 4. 提取外显子(逻辑同前)
|
| 47 |
junctions = []
|
| 48 |
current_pos = 0
|
| 49 |
for feature in seq_record.features:
|
|
|
|
| 52 |
current_pos += (end - start)
|
| 53 |
junctions.append(int(current_pos))
|
| 54 |
|
| 55 |
+
if junctions: junctions.pop()
|
|
|
|
| 56 |
|
| 57 |
return {
|
| 58 |
"symbol": gene_symbol,
|
| 59 |
"nm_id": seq_record.id,
|
|
|
|
| 60 |
"sequence": str(seq_record.seq),
|
| 61 |
"junctions": junctions
|
| 62 |
}
|