leonsimon23's picture
Update app.py
f73be48 verified
import streamlit as st
import requests
import xml.etree.ElementTree as ET
import pandas as pd
from googletrans import Translator
# 语言翻译字典
translations = {
"en": {
"title": "PubMed Literature Search",
"search_placeholder": "Enter search query (e.g., cancer treatment)",
"search_button": "Search",
"language_option": "English",
"language_label": "Language",
"results_title": "Search Results",
"showing_results": "Showing {count} articles.",
"error_message": "An error occurred while fetching data from PubMed.",
"no_results": "No articles found for the given query.",
"col_index": "Index",
"col_title": "Title",
"col_abstract": "Abstract",
"col_authors": "Authors",
"col_link": "Link",
"spinner_searching": "Searching PubMed and fetching up to 200 articles...",
"spinner_translating": "Translating results to Chinese (this may take a moment)...",
"translation_warning": "Could not translate some entries. Displaying original text for those."
},
"zh": {
"title": "PubMed 文献检索",
"search_placeholder": "输入检索词(例如:cancer treatment)",
"search_button": "检索",
"language_option": "中文",
"language_label": "语言",
"results_title": "检索结果",
"showing_results": "共找到 {count} 篇文献。",
"error_message": "从 PubMed 获取数据时出错。",
"no_results": "未找到相关文献。",
"col_index": "序号",
"col_title": "文献标题",
"col_abstract": "文献摘要",
"col_authors": "文献作者",
"col_link": "文献链接",
"spinner_searching": "正在检索 PubMed 并获取最多 200 篇文献...",
"spinner_translating": "正在批量翻译结果 (请稍候)...",
"translation_warning": "部分条目翻译失败,将显示原文。"
}
}
def get_translation(lang, key):
return translations[lang][key]
@st.cache_data(ttl=3600)
def search_pubmed(query, retmax=200):
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
search_url = f"{base_url}esearch.fcgi?db=pubmed&term={query}&retmax={retmax}"
try:
search_response = requests.get(search_url, timeout=20) # 搜索ID一般很快
search_response.raise_for_status()
search_root = ET.fromstring(search_response.content)
id_list = [id_elem.text for id_elem in search_root.findall(".//Id")]
return id_list
except requests.exceptions.RequestException as e:
st.error(f"Error during PubMed ID search: {e}")
return []
@st.cache_data(ttl=3600)
def fetch_articles(_id_list):
if not _id_list:
return None
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
ids = ",".join(_id_list)
fetch_url = f"{base_url}efetch.fcgi?db=pubmed&id={ids}&rettype=xml"
try:
# 1. 增加超时时间:从30秒增加到90秒,以处理200篇文章的大请求
fetch_response = requests.get(fetch_url, timeout=90)
fetch_response.raise_for_status()
return fetch_response.content
except requests.exceptions.RequestException as e:
st.error(f"Error during fetching article details (อาจเป็นเพราะหมดเวลา): {e}")
return None
def parse_articles(xml_data):
articles = []
if not xml_data:
return articles
root = ET.fromstring(xml_data)
for article in root.findall(".//PubmedArticle"):
title_elem = article.find(".//ArticleTitle")
title = "".join(title_elem.itertext()) if title_elem is not None else "No Title"
abstract_elem = article.find(".//Abstract/AbstractText")
abstract = "".join(abstract_elem.itertext()) if abstract_elem is not None else "No Abstract"
author_list = article.findall(".//Author")
authors = ", ".join([
f"{author.find('LastName').text if author.find('LastName') is not None else ''} {author.find('Initials').text if author.find('Initials') is not None else ''}".strip()
for author in author_list
]) if author_list else "No Authors"
pmid_elem = article.find(".//PMID")
pmid = pmid_elem.text if pmid_elem is not None else ""
link = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/" if pmid else "No Link"
articles.append({
"Title": title,
"Abstract": abstract,
"Authors": authors,
"Link": link
})
return articles
def main():
st.set_page_config(layout="wide", page_title="PubMed Search Tool")
lang = st.sidebar.radio(
get_translation("en", "language_label") + " / " + get_translation("zh", "language_label"),
("en", "zh"),
format_func=lambda x: "English" if x == "en" else "中文"
)
st.title(get_translation(lang, "title"))
search_query = st.text_input("", placeholder=get_translation(lang, "search_placeholder"))
if st.button(get_translation(lang, "search_button")):
if search_query:
with st.spinner(get_translation(lang, "spinner_searching")):
id_list = search_pubmed(search_query, retmax=200)
articles = []
if id_list:
xml_data = fetch_articles(tuple(id_list))
if xml_data:
articles = parse_articles(xml_data)
if articles:
st.subheader(get_translation(lang, "results_title"))
st.info(get_translation(lang, "showing_results").format(count=len(articles)))
df = pd.DataFrame(articles)
if lang == 'zh':
with st.spinner(get_translation(lang, "spinner_translating")):
try:
translator = Translator()
df_translated = df.copy()
# 2. 批量翻译优化
# 收集所有需要翻译的标题和摘要
titles_to_translate = df_translated[df_translated['Title'] != "No Title"]['Title'].tolist()
abstracts_to_translate = df_translated[df_translated['Abstract'] != "No Abstract"]['Abstract'].tolist()
# 获取对应的索引,以便稍后写回
title_indices = df_translated[df_translated['Title'] != "No Title"].index
abstract_indices = df_translated[df_translated['Abstract'] != "No Abstract"].index
# 一次性翻译所有标题
if titles_to_translate:
translated_titles = translator.translate(titles_to_translate, dest='zh-cn')
df_translated.loc[title_indices, 'Title'] = [t.text for t in translated_titles]
# 一次性翻译所有摘要
if abstracts_to_translate:
translated_abstracts = translator.translate(abstracts_to_translate, dest='zh-cn')
df_translated.loc[abstract_indices, 'Abstract'] = [t.text for t in translated_abstracts]
df = df_translated
except Exception as e:
st.warning(f"{get_translation(lang, 'translation_warning')} (Error: {e})", icon="⚠️")
df.index = range(1, len(df) + 1)
df.rename(columns={
"Title": get_translation(lang, "col_title"),
"Abstract": get_translation(lang, "col_abstract"),
"Authors": get_translation(lang, "col_authors"),
"Link": get_translation(lang, "col_link")
}, inplace=True)
df.index.name = get_translation(lang, "col_index")
st.dataframe(df)
else:
st.warning(get_translation(lang, "no_results"))
if __name__ == "__main__":
main()