Spaces:
Sleeping
Sleeping
| from flask import Flask, render_template, request, jsonify, send_file | |
| from Bio import Entrez, SeqIO | |
| import primer3 | |
| import ssl | |
| import pandas as pd | |
| import io | |
| import json | |
| from datetime import datetime | |
| import time | |
| app = Flask(__name__) | |
| ssl._create_default_https_context = ssl._create_unverified_context | |
| def get_ready_for_primers(gene_symbol, species="human"): | |
| """获取基因序列和外显子交界点信息""" | |
| Entrez.email = "your_email@example.com" | |
| # 1. 搜索基因并获取 NCBI 内部 ID | |
| search_term = f"{gene_symbol}[Gene Name] AND {species}[Organism]" | |
| handle = Entrez.esearch(db="gene", term=search_term) | |
| record = Entrez.read(handle) | |
| if not record["IdList"]: | |
| return {"error": "未找到该基因"} | |
| gene_id = record["IdList"][0] | |
| # 2. 获取该基因关联的 NM_ 编号 | |
| link_handle = Entrez.elink(dbfrom="gene", db="nucleotide", id=gene_id, term="srcdb_refseq[prop] AND mRNA[filter]") | |
| link_record = Entrez.read(link_handle) | |
| try: | |
| # 获取第一个关联的核苷酸 UID | |
| nucl_id = link_record[0]["LinkSetDb"][0]["Link"][0]["Id"] | |
| # 3. 下载完整的 GenBank 格式数据 | |
| handle = Entrez.efetch(db="nucleotide", id=nucl_id, rettype="gb", retmode="text") | |
| seq_record = SeqIO.read(handle, "genbank") | |
| # 提取外显子分界点 | |
| junctions = [] | |
| current_pos = 0 | |
| for feature in seq_record.features: | |
| if feature.type == "exon": | |
| start, end = feature.location.start, feature.location.end | |
| current_pos += (end - start) | |
| junctions.append(int(current_pos)) | |
| if junctions: | |
| junctions.pop() # 移除最后一个边界 | |
| return { | |
| "symbol": gene_symbol, | |
| "nm_id": seq_record.id, | |
| "sequence": str(seq_record.seq), | |
| "junctions": junctions | |
| } | |
| except Exception as e: | |
| return {"error": f"获取基因信息失败: {str(e)}"} | |
| def design_qpcr_primers(gene_data): | |
| """设计qPCR引物,包含失败重试机制""" | |
| if "error" in gene_data: | |
| return gene_data | |
| # 基础序列参数 | |
| seq_args = { | |
| 'SEQUENCE_ID': gene_data['nm_id'], | |
| 'SEQUENCE_TEMPLATE': gene_data['sequence'], | |
| } | |
| # 基础约束参数 | |
| global_args = { | |
| 'PRIMER_OPT_SIZE': 20, | |
| 'PRIMER_MIN_SIZE': 18, | |
| 'PRIMER_MAX_SIZE': 25, | |
| 'PRIMER_OPT_TM': 60.0, | |
| 'PRIMER_MIN_TM': 57.0, | |
| 'PRIMER_MAX_TM': 63.0, | |
| 'PRIMER_TM_MAX_DIFF': 2.0, # 放宽一点点 Tm 差限制 | |
| 'PRIMER_MIN_GC': 30.0, # 放宽 GC 下限 | |
| 'PRIMER_MAX_GC': 70.0, # 放宽 GC 上限 | |
| 'PRIMER_PRODUCT_SIZE_RANGE': [80, 250], # qPCR 通常不需要太长的产物 | |
| 'PRIMER_NUM_RETURN': 5, | |
| } | |
| try: | |
| # 尝试 1: 强制要求跨外显子交界 | |
| # 注意:这里我们只取前几个交界点,有时太多点会导致搜索空间太小 | |
| junctions = gene_data.get('junctions', []) | |
| if junctions: | |
| seq_args['SEQUENCE_OVERLAP_JUNCTION_LIST'] = junctions | |
| try: | |
| results = primer3.bindings.design_primers(seq_args, global_args) | |
| except Exception: | |
| # 尝试 2: 如果失败,去掉跨外显子限制,进行普通设计 | |
| if 'SEQUENCE_OVERLAP_JUNCTION_LIST' in seq_args: | |
| del seq_args['SEQUENCE_OVERLAP_JUNCTION_LIST'] | |
| results = primer3.bindings.design_primers(seq_args, global_args) | |
| else: | |
| raise | |
| primer_pairs = [] | |
| for i in range(global_args['PRIMER_NUM_RETURN']): | |
| try: | |
| # 检查这个引物是否真的跨越了交界点 | |
| # (在返回结果中查看是否有指定标志,或者统一标记) | |
| is_junction = "是" if 'SEQUENCE_OVERLAP_JUNCTION_LIST' in seq_args else "否(普通设计)" | |
| pair = { | |
| "id": i + 1, | |
| "forward": results[f'PRIMER_LEFT_{i}_SEQUENCE'], | |
| "reverse": results[f'PRIMER_RIGHT_{i}_SEQUENCE'], | |
| "f_tm": f"{results[f'PRIMER_LEFT_{i}_TM']:.2f}", | |
| "r_tm": f"{results[f'PRIMER_RIGHT_{i}_TM']:.2f}", | |
| "product_size": results[f'PRIMER_PAIR_{i}_PRODUCT_SIZE'], | |
| "junction_info": is_junction | |
| } | |
| primer_pairs.append(pair) | |
| except KeyError: | |
| break | |
| if not primer_pairs: | |
| return {"error": "无法找到符合条件的引物,请尝试放宽筛选标准"} | |
| return {"primers": primer_pairs, "gene_info": gene_data} | |
| except Exception as e: | |
| return {"error": f"引物设计深度失败: {str(e)}"} | |
| def index(): | |
| return render_template('index.html') | |
| def design_primers_api(): | |
| data = request.json | |
| gene_symbol = data.get('gene_symbol', '').strip() | |
| species = data.get('species', 'human') | |
| if not gene_symbol: | |
| return jsonify({"error": "请输入基因名称"}) | |
| # 获取基因信息 | |
| gene_data = get_ready_for_primers(gene_symbol, species) | |
| # 设计引物 | |
| result = design_qpcr_primers(gene_data) | |
| return jsonify(result) | |
| def batch_design_primers_api(): | |
| data = request.json | |
| gene_list = data.get('gene_list', []) | |
| species = data.get('species', 'human') | |
| if not gene_list: | |
| return jsonify({"error": "请输入基因列表"}) | |
| results = [] | |
| for gene_symbol in gene_list: | |
| gene_symbol = gene_symbol.strip() | |
| if not gene_symbol: | |
| continue | |
| time.sleep(1) | |
| # 获取基因信息 | |
| gene_data = get_ready_for_primers(gene_symbol, species) | |
| # 设计引物 | |
| result = design_qpcr_primers(gene_data) | |
| if "error" in result: | |
| results.append({ | |
| "gene": gene_symbol, | |
| "status": "failed", | |
| "error": result["error"] | |
| }) | |
| else: | |
| results.append({ | |
| "gene": gene_symbol, | |
| "status": "success", | |
| "data": result | |
| }) | |
| return jsonify({"results": results}) | |
| def export_primers(): | |
| data = request.json | |
| export_format = data.get('format', 'excel') # excel, csv, json | |
| results_data = data.get('data', []) | |
| if export_format == 'excel': | |
| return export_to_excel(results_data) | |
| elif export_format == 'csv': | |
| return export_to_csv(results_data) | |
| elif export_format == 'json': | |
| return export_to_json(results_data) | |
| else: | |
| return jsonify({"error": "不支持的导出格式"}) | |
| def export_to_excel(results_data): | |
| """导出为Excel格式""" | |
| rows = [] | |
| for result in results_data: | |
| if result.get('status') == 'success': | |
| gene_info = result['data']['gene_info'] | |
| primers = result['data']['primers'] | |
| for primer in primers: | |
| rows.append({ | |
| '基因名称': gene_info['symbol'], | |
| 'RefSeq ID': gene_info['nm_id'], | |
| '引物对编号': primer['id'], | |
| '正向引物序列': primer['forward'], | |
| '反向引物序列': primer['reverse'], | |
| '正向引物Tm(°C)': primer['f_tm'], | |
| '反向引物Tm(°C)': primer['r_tm'], | |
| '产物长度(bp)': primer['product_size'], | |
| '外显子交界点': ', '.join(map(str, gene_info['junctions'])), | |
| '设计时间': datetime.now().strftime('%Y-%m-%d %H:%M:%S') | |
| }) | |
| else: | |
| rows.append({ | |
| '基因名称': result['gene'], | |
| 'RefSeq ID': 'N/A', | |
| '引物对编号': 'N/A', | |
| '正向引物序列': 'N/A', | |
| '反向引物序列': 'N/A', | |
| '正向引物Tm(°C)': 'N/A', | |
| '反向引物Tm(°C)': 'N/A', | |
| '产物长度(bp)': 'N/A', | |
| '外显子交界点': 'N/A', | |
| '设计时间': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), | |
| '错误信息': result.get('error', '未知错误') | |
| }) | |
| df = pd.DataFrame(rows) | |
| # 创建Excel文件 | |
| output = io.BytesIO() | |
| with pd.ExcelWriter(output, engine='openpyxl') as writer: | |
| df.to_excel(writer, sheet_name='引物设计结果', index=False) | |
| output.seek(0) | |
| filename = f"qPCR_primers_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx" | |
| return send_file( | |
| output, | |
| mimetype='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', | |
| as_attachment=True, | |
| download_name=filename | |
| ) | |
| def export_to_csv(results_data): | |
| """导出为CSV格式""" | |
| rows = [] | |
| for result in results_data: | |
| if result.get('status') == 'success': | |
| gene_info = result['data']['gene_info'] | |
| primers = result['data']['primers'] | |
| for primer in primers: | |
| rows.append({ | |
| '基因名称': gene_info['symbol'], | |
| 'RefSeq ID': gene_info['nm_id'], | |
| '引物对编号': primer['id'], | |
| '正向引物序列': primer['forward'], | |
| '反向引物序列': primer['reverse'], | |
| '正向引物Tm(°C)': primer['f_tm'], | |
| '反向引物Tm(°C)': primer['r_tm'], | |
| '产物长度(bp)': primer['product_size'], | |
| '外显子交界点': ', '.join(map(str, gene_info['junctions'])), | |
| '设计时间': datetime.now().strftime('%Y-%m-%d %H:%M:%S') | |
| }) | |
| df = pd.DataFrame(rows) | |
| output = io.StringIO() | |
| df.to_csv(output, index=False, encoding='utf-8-sig') | |
| filename = f"qPCR_primers_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" | |
| return send_file( | |
| io.BytesIO(output.getvalue().encode('utf-8-sig')), | |
| mimetype='text/csv', | |
| as_attachment=True, | |
| download_name=filename | |
| ) | |
| def export_to_json(results_data): | |
| """导出为JSON格式""" | |
| export_data = { | |
| "export_time": datetime.now().strftime('%Y-%m-%d %H:%M:%S'), | |
| "total_genes": len(results_data), | |
| "results": results_data | |
| } | |
| filename = f"qPCR_primers_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" | |
| return send_file( | |
| io.BytesIO(json.dumps(export_data, ensure_ascii=False, indent=2).encode('utf-8')), | |
| mimetype='application/json', | |
| as_attachment=True, | |
| download_name=filename | |
| ) | |
| if __name__ == '__main__': | |
| app.run(debug=True, host='0.0.0.0', port=5000) |