import streamlit as st import requests import xml.etree.ElementTree as ET import pandas as pd from googletrans import Translator # 语言翻译字典 translations = { "en": { "title": "PubMed Literature Search", "search_placeholder": "Enter search query (e.g., cancer treatment)", "search_button": "Search", "language_option": "English", "language_label": "Language", "results_title": "Search Results", "showing_results": "Showing {count} articles.", "error_message": "An error occurred while fetching data from PubMed.", "no_results": "No articles found for the given query.", "col_index": "Index", "col_title": "Title", "col_abstract": "Abstract", "col_authors": "Authors", "col_link": "Link", "spinner_searching": "Searching PubMed and fetching up to 200 articles...", "spinner_translating": "Translating results to Chinese (this may take a moment)...", "translation_warning": "Could not translate some entries. Displaying original text for those." }, "zh": { "title": "PubMed 文献检索", "search_placeholder": "输入检索词(例如:cancer treatment)", "search_button": "检索", "language_option": "中文", "language_label": "语言", "results_title": "检索结果", "showing_results": "共找到 {count} 篇文献。", "error_message": "从 PubMed 获取数据时出错。", "no_results": "未找到相关文献。", "col_index": "序号", "col_title": "文献标题", "col_abstract": "文献摘要", "col_authors": "文献作者", "col_link": "文献链接", "spinner_searching": "正在检索 PubMed 并获取最多 200 篇文献...", "spinner_translating": "正在批量翻译结果 (请稍候)...", "translation_warning": "部分条目翻译失败,将显示原文。" } } def get_translation(lang, key): return translations[lang][key] @st.cache_data(ttl=3600) def search_pubmed(query, retmax=200): base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/" search_url = f"{base_url}esearch.fcgi?db=pubmed&term={query}&retmax={retmax}" try: search_response = requests.get(search_url, timeout=20) # 搜索ID一般很快 search_response.raise_for_status() search_root = ET.fromstring(search_response.content) id_list = [id_elem.text for id_elem in search_root.findall(".//Id")] return id_list except requests.exceptions.RequestException as e: st.error(f"Error during PubMed ID search: {e}") return [] @st.cache_data(ttl=3600) def fetch_articles(_id_list): if not _id_list: return None base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/" ids = ",".join(_id_list) fetch_url = f"{base_url}efetch.fcgi?db=pubmed&id={ids}&rettype=xml" try: # 1. 增加超时时间:从30秒增加到90秒,以处理200篇文章的大请求 fetch_response = requests.get(fetch_url, timeout=90) fetch_response.raise_for_status() return fetch_response.content except requests.exceptions.RequestException as e: st.error(f"Error during fetching article details (อาจเป็นเพราะหมดเวลา): {e}") return None def parse_articles(xml_data): articles = [] if not xml_data: return articles root = ET.fromstring(xml_data) for article in root.findall(".//PubmedArticle"): title_elem = article.find(".//ArticleTitle") title = "".join(title_elem.itertext()) if title_elem is not None else "No Title" abstract_elem = article.find(".//Abstract/AbstractText") abstract = "".join(abstract_elem.itertext()) if abstract_elem is not None else "No Abstract" author_list = article.findall(".//Author") authors = ", ".join([ f"{author.find('LastName').text if author.find('LastName') is not None else ''} {author.find('Initials').text if author.find('Initials') is not None else ''}".strip() for author in author_list ]) if author_list else "No Authors" pmid_elem = article.find(".//PMID") pmid = pmid_elem.text if pmid_elem is not None else "" link = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/" if pmid else "No Link" articles.append({ "Title": title, "Abstract": abstract, "Authors": authors, "Link": link }) return articles def main(): st.set_page_config(layout="wide", page_title="PubMed Search Tool") lang = st.sidebar.radio( get_translation("en", "language_label") + " / " + get_translation("zh", "language_label"), ("en", "zh"), format_func=lambda x: "English" if x == "en" else "中文" ) st.title(get_translation(lang, "title")) search_query = st.text_input("", placeholder=get_translation(lang, "search_placeholder")) if st.button(get_translation(lang, "search_button")): if search_query: with st.spinner(get_translation(lang, "spinner_searching")): id_list = search_pubmed(search_query, retmax=200) articles = [] if id_list: xml_data = fetch_articles(tuple(id_list)) if xml_data: articles = parse_articles(xml_data) if articles: st.subheader(get_translation(lang, "results_title")) st.info(get_translation(lang, "showing_results").format(count=len(articles))) df = pd.DataFrame(articles) if lang == 'zh': with st.spinner(get_translation(lang, "spinner_translating")): try: translator = Translator() df_translated = df.copy() # 2. 批量翻译优化 # 收集所有需要翻译的标题和摘要 titles_to_translate = df_translated[df_translated['Title'] != "No Title"]['Title'].tolist() abstracts_to_translate = df_translated[df_translated['Abstract'] != "No Abstract"]['Abstract'].tolist() # 获取对应的索引,以便稍后写回 title_indices = df_translated[df_translated['Title'] != "No Title"].index abstract_indices = df_translated[df_translated['Abstract'] != "No Abstract"].index # 一次性翻译所有标题 if titles_to_translate: translated_titles = translator.translate(titles_to_translate, dest='zh-cn') df_translated.loc[title_indices, 'Title'] = [t.text for t in translated_titles] # 一次性翻译所有摘要 if abstracts_to_translate: translated_abstracts = translator.translate(abstracts_to_translate, dest='zh-cn') df_translated.loc[abstract_indices, 'Abstract'] = [t.text for t in translated_abstracts] df = df_translated except Exception as e: st.warning(f"{get_translation(lang, 'translation_warning')} (Error: {e})", icon="⚠️") df.index = range(1, len(df) + 1) df.rename(columns={ "Title": get_translation(lang, "col_title"), "Abstract": get_translation(lang, "col_abstract"), "Authors": get_translation(lang, "col_authors"), "Link": get_translation(lang, "col_link") }, inplace=True) df.index.name = get_translation(lang, "col_index") st.dataframe(df) else: st.warning(get_translation(lang, "no_results")) if __name__ == "__main__": main()