Spaces:
Running
Running
| import gradio as gr | |
| import csv | |
| import os | |
| from Bio import Entrez | |
| import xml.etree.ElementTree as ET | |
| import time | |
| import pandas as pd | |
| from datetime import datetime | |
| # 设置NCBI要求的电子邮件和API密钥从环境变量获取 | |
| Entrez.email = os.environ.get("EMAIL") | |
| Entrez.api_key = os.environ.get("NCBI_API_KEY") | |
| def search_pubmed(query, max_results=100): | |
| """在PubMed中搜索并返回文章ID列表""" | |
| try: | |
| handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results, usehistory="y") | |
| record = Entrez.read(handle) | |
| handle.close() | |
| # return record["IdList"] | |
| return record | |
| except Exception as e: | |
| return f"Error during search: {str(e)}" | |
| def fetch_details(search_res): | |
| """获取文章的详细信息""" | |
| pmids = search_res['IdList'] | |
| if not pmids or isinstance(pmids, str): | |
| return [] | |
| try: | |
| # 批量获取文章详情 | |
| handle = Entrez.efetch(db="pubmed", rettype="medline", retmode="xml", id=",".join(pmids), | |
| webenv=search_res['WebEnv'], query_key=search_res['QueryKey']) | |
| records = handle.read() | |
| handle.close() | |
| # 解析XML | |
| root = ET.fromstring(records) | |
| articles = [] | |
| for article in root.findall(".//PubmedArticle"): | |
| try: | |
| # 获取标题 | |
| title = article.find(".//ArticleTitle").text if article.find(".//ArticleTitle") is not None else "N/A" | |
| # 获取作者列表 | |
| authors = article.findall(".//Author") | |
| author_list = [] | |
| for author in authors: | |
| last_name = author.find("LastName").text if author.find("LastName") is not None else "" | |
| initials = author.find("Initials").text if author.find("Initials") is not None else "" | |
| author_list.append(f"{last_name} {initials}".strip()) | |
| authors_str = "; ".join(author_list) if author_list else "N/A" | |
| # 获取摘要 | |
| abstract = article.find(".//AbstractText") | |
| abstract_text = abstract.text if abstract is not None else "N/A" | |
| # 获取PMID | |
| pmid = article.find(".//PMID").text if article.find(".//PMID") is not None else "N/A" | |
| # 获取发表年份 | |
| pub_year = article.find(".//PubDate/Year") | |
| pub_year = pub_year.text if pub_year is not None else "N/A" | |
| # 获取期刊 | |
| journal = article.find(".//Journal/Title").text if article.find(".//Journal/Title") is not None else "N/A" | |
| articles.append({ | |
| "PMID": pmid, | |
| "Title": title, | |
| "Authors": authors_str, | |
| "Abstract": abstract_text, | |
| "Year": pub_year, | |
| "Journal": journal | |
| }) | |
| except Exception as e: | |
| print(f"Error processing article with PMID {pmid}: {e}") | |
| continue | |
| return articles | |
| except Exception as e: | |
| return f"Error fetching details: {str(e)}" | |
| def save_to_csv(articles, filename="pubmed_results.csv"): | |
| """将文章信息保存到CSV文件并返回文件路径""" | |
| if not articles or isinstance(articles, str): | |
| return None | |
| headers = ["PMID", "Title", "Authors", "Abstract", "Year", "Journal"] | |
| with open(filename, "w", newline="", encoding="utf-8") as csvfile: | |
| writer = csv.DictWriter(csvfile, fieldnames=headers) | |
| writer.writeheader() | |
| for article in articles: | |
| writer.writerow(article) | |
| return filename | |
| def search_and_display(query, max_results): | |
| """主函数:执行搜索并返回结果和CSV下载链接""" | |
| if not query: | |
| return "Please enter a search query.", None, None | |
| try: | |
| max_results = int(max_results) | |
| if max_results <= 0: | |
| return "Max results must be a positive number.", None, None | |
| except ValueError: | |
| return "Max results must be a valid number.", None, None | |
| # 执行搜索 | |
| pmids = search_pubmed(query, max_results) | |
| if isinstance(pmids, str): | |
| return pmids, None, None | |
| if not pmids: | |
| return "No results found.", None, None | |
| # 获取详细信息 | |
| articles = fetch_details(pmids) | |
| if isinstance(articles, str): | |
| return articles, None, None | |
| if not articles: | |
| return "No valid articles retrieved.", None, None | |
| # 转换为DataFrame用于显示 | |
| df = pd.DataFrame(articles) | |
| # 保存CSV文件 | |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| csv_filename = f"pubmed_results_{timestamp}.csv" | |
| csv_path = save_to_csv(articles, csv_filename) | |
| return df, csv_path, f"Found {len(articles)} articles." | |
| # Gradio界面 | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# PubMed Search App") | |
| gr.Markdown("Enter a PubMed search query and the maximum number of results to retrieve. Results will be displayed in a table and available for download as a CSV file.") | |
| with gr.Row(): | |
| query_input = gr.Textbox(label="Search Query", placeholder="e.g., breast cancer AND 2020[PDAT]") | |
| max_results_input = gr.Number(label="Max Results", value=10, minimum=1, maximum=100) | |
| search_button = gr.Button("Search") | |
| output_text = gr.Textbox(label="Status") | |
| output_table = gr.DataFrame(label="Search Results") | |
| output_file = gr.File(label="Download CSV") | |
| search_button.click( | |
| fn=search_and_display, | |
| inputs=[query_input, max_results_input], | |
| outputs=[output_table, output_file, output_text] | |
| ) | |
| # 启动Gradio应用 | |
| if __name__ == "__main__": | |
| demo.launch(mcp_server=True) |