Spaces:
Sleeping
Sleeping
File size: 8,201 Bytes
cedac74 f73be48 cedac74 5a53f4f cedac74 09d7a27 5a53f4f f73be48 cedac74 5a53f4f cedac74 09d7a27 5a53f4f f73be48 09d7a27 cedac74 5a53f4f cedac74 f73be48 cedac74 09d7a27 cedac74 5a53f4f 09d7a27 cedac74 09d7a27 cedac74 f73be48 cedac74 f73be48 cedac74 f73be48 cedac74 09d7a27 cedac74 09d7a27 cedac74 09d7a27 cedac74 09d7a27 5a53f4f f73be48 cedac74 5a53f4f f73be48 09d7a27 5a53f4f 09d7a27 f73be48 09d7a27 cedac74 09d7a27 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
import streamlit as st
import requests
import xml.etree.ElementTree as ET
import pandas as pd
from googletrans import Translator
# 语言翻译字典
translations = {
"en": {
"title": "PubMed Literature Search",
"search_placeholder": "Enter search query (e.g., cancer treatment)",
"search_button": "Search",
"language_option": "English",
"language_label": "Language",
"results_title": "Search Results",
"showing_results": "Showing {count} articles.",
"error_message": "An error occurred while fetching data from PubMed.",
"no_results": "No articles found for the given query.",
"col_index": "Index",
"col_title": "Title",
"col_abstract": "Abstract",
"col_authors": "Authors",
"col_link": "Link",
"spinner_searching": "Searching PubMed and fetching up to 200 articles...",
"spinner_translating": "Translating results to Chinese (this may take a moment)...",
"translation_warning": "Could not translate some entries. Displaying original text for those."
},
"zh": {
"title": "PubMed 文献检索",
"search_placeholder": "输入检索词(例如:cancer treatment)",
"search_button": "检索",
"language_option": "中文",
"language_label": "语言",
"results_title": "检索结果",
"showing_results": "共找到 {count} 篇文献。",
"error_message": "从 PubMed 获取数据时出错。",
"no_results": "未找到相关文献。",
"col_index": "序号",
"col_title": "文献标题",
"col_abstract": "文献摘要",
"col_authors": "文献作者",
"col_link": "文献链接",
"spinner_searching": "正在检索 PubMed 并获取最多 200 篇文献...",
"spinner_translating": "正在批量翻译结果 (请稍候)...",
"translation_warning": "部分条目翻译失败,将显示原文。"
}
}
def get_translation(lang, key):
return translations[lang][key]
@st.cache_data(ttl=3600)
def search_pubmed(query, retmax=200):
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
search_url = f"{base_url}esearch.fcgi?db=pubmed&term={query}&retmax={retmax}"
try:
search_response = requests.get(search_url, timeout=20) # 搜索ID一般很快
search_response.raise_for_status()
search_root = ET.fromstring(search_response.content)
id_list = [id_elem.text for id_elem in search_root.findall(".//Id")]
return id_list
except requests.exceptions.RequestException as e:
st.error(f"Error during PubMed ID search: {e}")
return []
@st.cache_data(ttl=3600)
def fetch_articles(_id_list):
if not _id_list:
return None
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
ids = ",".join(_id_list)
fetch_url = f"{base_url}efetch.fcgi?db=pubmed&id={ids}&rettype=xml"
try:
# 1. 增加超时时间:从30秒增加到90秒,以处理200篇文章的大请求
fetch_response = requests.get(fetch_url, timeout=90)
fetch_response.raise_for_status()
return fetch_response.content
except requests.exceptions.RequestException as e:
st.error(f"Error during fetching article details (อาจเป็นเพราะหมดเวลา): {e}")
return None
def parse_articles(xml_data):
articles = []
if not xml_data:
return articles
root = ET.fromstring(xml_data)
for article in root.findall(".//PubmedArticle"):
title_elem = article.find(".//ArticleTitle")
title = "".join(title_elem.itertext()) if title_elem is not None else "No Title"
abstract_elem = article.find(".//Abstract/AbstractText")
abstract = "".join(abstract_elem.itertext()) if abstract_elem is not None else "No Abstract"
author_list = article.findall(".//Author")
authors = ", ".join([
f"{author.find('LastName').text if author.find('LastName') is not None else ''} {author.find('Initials').text if author.find('Initials') is not None else ''}".strip()
for author in author_list
]) if author_list else "No Authors"
pmid_elem = article.find(".//PMID")
pmid = pmid_elem.text if pmid_elem is not None else ""
link = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/" if pmid else "No Link"
articles.append({
"Title": title,
"Abstract": abstract,
"Authors": authors,
"Link": link
})
return articles
def main():
st.set_page_config(layout="wide", page_title="PubMed Search Tool")
lang = st.sidebar.radio(
get_translation("en", "language_label") + " / " + get_translation("zh", "language_label"),
("en", "zh"),
format_func=lambda x: "English" if x == "en" else "中文"
)
st.title(get_translation(lang, "title"))
search_query = st.text_input("", placeholder=get_translation(lang, "search_placeholder"))
if st.button(get_translation(lang, "search_button")):
if search_query:
with st.spinner(get_translation(lang, "spinner_searching")):
id_list = search_pubmed(search_query, retmax=200)
articles = []
if id_list:
xml_data = fetch_articles(tuple(id_list))
if xml_data:
articles = parse_articles(xml_data)
if articles:
st.subheader(get_translation(lang, "results_title"))
st.info(get_translation(lang, "showing_results").format(count=len(articles)))
df = pd.DataFrame(articles)
if lang == 'zh':
with st.spinner(get_translation(lang, "spinner_translating")):
try:
translator = Translator()
df_translated = df.copy()
# 2. 批量翻译优化
# 收集所有需要翻译的标题和摘要
titles_to_translate = df_translated[df_translated['Title'] != "No Title"]['Title'].tolist()
abstracts_to_translate = df_translated[df_translated['Abstract'] != "No Abstract"]['Abstract'].tolist()
# 获取对应的索引,以便稍后写回
title_indices = df_translated[df_translated['Title'] != "No Title"].index
abstract_indices = df_translated[df_translated['Abstract'] != "No Abstract"].index
# 一次性翻译所有标题
if titles_to_translate:
translated_titles = translator.translate(titles_to_translate, dest='zh-cn')
df_translated.loc[title_indices, 'Title'] = [t.text for t in translated_titles]
# 一次性翻译所有摘要
if abstracts_to_translate:
translated_abstracts = translator.translate(abstracts_to_translate, dest='zh-cn')
df_translated.loc[abstract_indices, 'Abstract'] = [t.text for t in translated_abstracts]
df = df_translated
except Exception as e:
st.warning(f"{get_translation(lang, 'translation_warning')} (Error: {e})", icon="⚠️")
df.index = range(1, len(df) + 1)
df.rename(columns={
"Title": get_translation(lang, "col_title"),
"Abstract": get_translation(lang, "col_abstract"),
"Authors": get_translation(lang, "col_authors"),
"Link": get_translation(lang, "col_link")
}, inplace=True)
df.index.name = get_translation(lang, "col_index")
st.dataframe(df)
else:
st.warning(get_translation(lang, "no_results"))
if __name__ == "__main__":
main() |