Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,7 +2,7 @@ import streamlit as st
|
|
| 2 |
import requests
|
| 3 |
import xml.etree.ElementTree as ET
|
| 4 |
import pandas as pd
|
| 5 |
-
from googletrans import Translator
|
| 6 |
|
| 7 |
# 语言翻译字典
|
| 8 |
translations = {
|
|
@@ -19,7 +19,10 @@ translations = {
|
|
| 19 |
"col_title": "Title",
|
| 20 |
"col_abstract": "Abstract",
|
| 21 |
"col_authors": "Authors",
|
| 22 |
-
"col_link": "Link"
|
|
|
|
|
|
|
|
|
|
| 23 |
},
|
| 24 |
"zh": {
|
| 25 |
"title": "PubMed 文献检索",
|
|
@@ -34,13 +37,17 @@ translations = {
|
|
| 34 |
"col_title": "文献标题",
|
| 35 |
"col_abstract": "文献摘要",
|
| 36 |
"col_authors": "文献作者",
|
| 37 |
-
"col_link": "文献链接"
|
|
|
|
|
|
|
|
|
|
| 38 |
}
|
| 39 |
}
|
| 40 |
|
| 41 |
def get_translation(lang, key):
|
| 42 |
return translations[lang][key]
|
| 43 |
|
|
|
|
| 44 |
def search_pubmed(query, retmax=200):
|
| 45 |
"""
|
| 46 |
使用PubMed API进行检索
|
|
@@ -48,28 +55,31 @@ def search_pubmed(query, retmax=200):
|
|
| 48 |
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
|
| 49 |
search_url = f"{base_url}esearch.fcgi?db=pubmed&term={query}&retmax={retmax}"
|
| 50 |
try:
|
| 51 |
-
search_response = requests.get(search_url)
|
| 52 |
search_response.raise_for_status()
|
| 53 |
search_root = ET.fromstring(search_response.content)
|
| 54 |
id_list = [id_elem.text for id_elem in search_root.findall(".//Id")]
|
| 55 |
return id_list
|
| 56 |
except requests.exceptions.RequestException as e:
|
| 57 |
-
st.error(f"Error during
|
| 58 |
return []
|
| 59 |
|
| 60 |
-
|
|
|
|
| 61 |
"""
|
| 62 |
根据ID列表获取文献详情
|
| 63 |
"""
|
|
|
|
|
|
|
| 64 |
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
|
| 65 |
-
ids = ",".join(
|
| 66 |
fetch_url = f"{base_url}efetch.fcgi?db=pubmed&id={ids}&rettype=xml"
|
| 67 |
try:
|
| 68 |
-
fetch_response = requests.get(fetch_url)
|
| 69 |
fetch_response.raise_for_status()
|
| 70 |
return fetch_response.content
|
| 71 |
except requests.exceptions.RequestException as e:
|
| 72 |
-
st.error(f"Error during
|
| 73 |
return None
|
| 74 |
|
| 75 |
def parse_articles(xml_data):
|
|
@@ -81,14 +91,14 @@ def parse_articles(xml_data):
|
|
| 81 |
root = ET.fromstring(xml_data)
|
| 82 |
for article in root.findall(".//PubmedArticle"):
|
| 83 |
title_elem = article.find(".//ArticleTitle")
|
| 84 |
-
title = title_elem.
|
| 85 |
|
| 86 |
-
abstract_elem = article.find(".//AbstractText")
|
| 87 |
-
abstract = abstract_elem.
|
| 88 |
|
| 89 |
author_list = article.findall(".//Author")
|
| 90 |
authors = ", ".join([
|
| 91 |
-
f"{author.find('LastName').text if author.find('LastName') is not None else ''} {author.find('
|
| 92 |
for author in author_list
|
| 93 |
]) if author_list else "No Authors"
|
| 94 |
|
|
@@ -105,11 +115,11 @@ def parse_articles(xml_data):
|
|
| 105 |
return articles
|
| 106 |
|
| 107 |
def main():
|
| 108 |
-
st.set_page_config(layout="wide")
|
| 109 |
|
| 110 |
# 语言选择
|
| 111 |
lang = st.sidebar.radio(
|
| 112 |
-
"
|
| 113 |
("en", "zh"),
|
| 114 |
format_func=lambda x: "English" if x == "en" else "中文"
|
| 115 |
)
|
|
@@ -117,48 +127,58 @@ def main():
|
|
| 117 |
st.title(get_translation(lang, "title"))
|
| 118 |
|
| 119 |
# 搜索框
|
| 120 |
-
search_query = st.text_input(get_translation(lang, "search_placeholder"))
|
| 121 |
|
| 122 |
if st.button(get_translation(lang, "search_button")):
|
| 123 |
if search_query:
|
| 124 |
-
with st.spinner(
|
| 125 |
id_list = search_pubmed(search_query)
|
| 126 |
if id_list:
|
| 127 |
-
xml_data = fetch_articles(id_list)
|
| 128 |
if xml_data:
|
| 129 |
articles = parse_articles(xml_data)
|
| 130 |
-
if articles:
|
| 131 |
-
st.subheader(get_translation(lang, "results_title"))
|
| 132 |
-
|
| 133 |
-
# 翻译标题和摘要
|
| 134 |
-
if lang == 'zh':
|
| 135 |
-
translator = Translator()
|
| 136 |
-
for article in articles:
|
| 137 |
-
try:
|
| 138 |
-
article['Title'] = translator.translate(article['Title'], dest='zh-cn').text
|
| 139 |
-
article['Abstract'] = translator.translate(article['Abstract'], dest='zh-cn').text
|
| 140 |
-
except Exception as e:
|
| 141 |
-
st.warning(f"翻译时出错: {e}")
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
df = pd.DataFrame(articles)
|
| 145 |
-
df.index = range(1, len(df) + 1)
|
| 146 |
-
df.rename(columns={
|
| 147 |
-
"Title": get_translation(lang, "col_title"),
|
| 148 |
-
"Abstract": get_translation(lang, "col_abstract"),
|
| 149 |
-
"Authors": get_translation(lang, "col_authors"),
|
| 150 |
-
"Link": get_translation(lang, "col_link")
|
| 151 |
-
}, inplace=True)
|
| 152 |
-
df.index.name = get_translation(lang, "col_index")
|
| 153 |
-
|
| 154 |
-
st.dataframe(df)
|
| 155 |
-
else:
|
| 156 |
-
st.warning(get_translation(lang, "no_results"))
|
| 157 |
else:
|
| 158 |
-
|
| 159 |
else:
|
| 160 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
|
| 162 |
if __name__ == "__main__":
|
| 163 |
-
main()
|
| 164 |
-
|
|
|
|
| 2 |
import requests
|
| 3 |
import xml.etree.ElementTree as ET
|
| 4 |
import pandas as pd
|
| 5 |
+
from googletrans import Translator, LANGUAGES
|
| 6 |
|
| 7 |
# 语言翻译字典
|
| 8 |
translations = {
|
|
|
|
| 19 |
"col_title": "Title",
|
| 20 |
"col_abstract": "Abstract",
|
| 21 |
"col_authors": "Authors",
|
| 22 |
+
"col_link": "Link",
|
| 23 |
+
"spinner_searching": "Searching PubMed and fetching articles...",
|
| 24 |
+
"spinner_translating": "Translating results to Chinese...",
|
| 25 |
+
"translation_warning": "Could not translate an entry. Displaying original text."
|
| 26 |
},
|
| 27 |
"zh": {
|
| 28 |
"title": "PubMed 文献检索",
|
|
|
|
| 37 |
"col_title": "文献标题",
|
| 38 |
"col_abstract": "文献摘要",
|
| 39 |
"col_authors": "文献作者",
|
| 40 |
+
"col_link": "文献链接",
|
| 41 |
+
"spinner_searching": "正在检索 PubMed 并获取文献...",
|
| 42 |
+
"spinner_translating": "正在将结果翻译成中文...",
|
| 43 |
+
"translation_warning": "部分条目翻译失败,将显示原文。"
|
| 44 |
}
|
| 45 |
}
|
| 46 |
|
| 47 |
def get_translation(lang, key):
|
| 48 |
return translations[lang][key]
|
| 49 |
|
| 50 |
+
@st.cache_data(ttl=3600) # 缓存1小时,避免重复请求
|
| 51 |
def search_pubmed(query, retmax=200):
|
| 52 |
"""
|
| 53 |
使用PubMed API进行检索
|
|
|
|
| 55 |
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
|
| 56 |
search_url = f"{base_url}esearch.fcgi?db=pubmed&term={query}&retmax={retmax}"
|
| 57 |
try:
|
| 58 |
+
search_response = requests.get(search_url, timeout=15)
|
| 59 |
search_response.raise_for_status()
|
| 60 |
search_root = ET.fromstring(search_response.content)
|
| 61 |
id_list = [id_elem.text for id_elem in search_root.findall(".//Id")]
|
| 62 |
return id_list
|
| 63 |
except requests.exceptions.RequestException as e:
|
| 64 |
+
st.error(f"Error during PubMed ID search: {e}")
|
| 65 |
return []
|
| 66 |
|
| 67 |
+
@st.cache_data(ttl=3600) # 缓存1小时
|
| 68 |
+
def fetch_articles(_id_list): # _id_list to indicate it's cached based on this value
|
| 69 |
"""
|
| 70 |
根据ID列表获取文献详情
|
| 71 |
"""
|
| 72 |
+
if not _id_list:
|
| 73 |
+
return None
|
| 74 |
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
|
| 75 |
+
ids = ",".join(_id_list)
|
| 76 |
fetch_url = f"{base_url}efetch.fcgi?db=pubmed&id={ids}&rettype=xml"
|
| 77 |
try:
|
| 78 |
+
fetch_response = requests.get(fetch_url, timeout=30)
|
| 79 |
fetch_response.raise_for_status()
|
| 80 |
return fetch_response.content
|
| 81 |
except requests.exceptions.RequestException as e:
|
| 82 |
+
st.error(f"Error during fetching article details: {e}")
|
| 83 |
return None
|
| 84 |
|
| 85 |
def parse_articles(xml_data):
|
|
|
|
| 91 |
root = ET.fromstring(xml_data)
|
| 92 |
for article in root.findall(".//PubmedArticle"):
|
| 93 |
title_elem = article.find(".//ArticleTitle")
|
| 94 |
+
title = "".join(title_elem.itertext()) if title_elem is not None else "No Title"
|
| 95 |
|
| 96 |
+
abstract_elem = article.find(".//Abstract/AbstractText")
|
| 97 |
+
abstract = "".join(abstract_elem.itertext()) if abstract_elem is not None else "No Abstract"
|
| 98 |
|
| 99 |
author_list = article.findall(".//Author")
|
| 100 |
authors = ", ".join([
|
| 101 |
+
f"{author.find('LastName').text if author.find('LastName') is not None else ''} {author.find('Initials').text if author.find('Initials') is not None else ''}".strip()
|
| 102 |
for author in author_list
|
| 103 |
]) if author_list else "No Authors"
|
| 104 |
|
|
|
|
| 115 |
return articles
|
| 116 |
|
| 117 |
def main():
|
| 118 |
+
st.set_page_config(layout="wide", page_title="PubMed Search Tool")
|
| 119 |
|
| 120 |
# 语言选择
|
| 121 |
lang = st.sidebar.radio(
|
| 122 |
+
get_translation("en", "language_label") + " / " + get_translation("zh", "language_label"),
|
| 123 |
("en", "zh"),
|
| 124 |
format_func=lambda x: "English" if x == "en" else "中文"
|
| 125 |
)
|
|
|
|
| 127 |
st.title(get_translation(lang, "title"))
|
| 128 |
|
| 129 |
# 搜索框
|
| 130 |
+
search_query = st.text_input("", placeholder=get_translation(lang, "search_placeholder"))
|
| 131 |
|
| 132 |
if st.button(get_translation(lang, "search_button")):
|
| 133 |
if search_query:
|
| 134 |
+
with st.spinner(get_translation(lang, "spinner_searching")):
|
| 135 |
id_list = search_pubmed(search_query)
|
| 136 |
if id_list:
|
| 137 |
+
xml_data = fetch_articles(tuple(id_list)) # Use tuple for caching
|
| 138 |
if xml_data:
|
| 139 |
articles = parse_articles(xml_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
else:
|
| 141 |
+
articles = []
|
| 142 |
else:
|
| 143 |
+
articles = []
|
| 144 |
+
|
| 145 |
+
if articles:
|
| 146 |
+
st.subheader(get_translation(lang, "results_title"))
|
| 147 |
+
df = pd.DataFrame(articles)
|
| 148 |
+
|
| 149 |
+
# 如果是中文,则进行翻译
|
| 150 |
+
if lang == 'zh':
|
| 151 |
+
with st.spinner(get_translation(lang, "spinner_translating")):
|
| 152 |
+
translator = Translator()
|
| 153 |
+
# 创建一个新的DataFrame来存储翻译结果
|
| 154 |
+
df_translated = df.copy()
|
| 155 |
+
for index, row in df.iterrows():
|
| 156 |
+
try:
|
| 157 |
+
# 翻译标题
|
| 158 |
+
if row['Title'] != "No Title":
|
| 159 |
+
df_translated.at[index, 'Title'] = translator.translate(row['Title'], dest='zh-cn').text
|
| 160 |
+
# 翻译摘要
|
| 161 |
+
if row['Abstract'] != "No Abstract":
|
| 162 |
+
df_translated.at[index, 'Abstract'] = translator.translate(row['Abstract'], dest='zh-cn').text
|
| 163 |
+
except Exception:
|
| 164 |
+
st.warning(get_translation(lang, "translation_warning"), icon="⚠️")
|
| 165 |
+
# 如果翻译失败,保留原文
|
| 166 |
+
continue
|
| 167 |
+
df = df_translated
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
df.index = range(1, len(df) + 1)
|
| 171 |
+
df.rename(columns={
|
| 172 |
+
"Title": get_translation(lang, "col_title"),
|
| 173 |
+
"Abstract": get_translation(lang, "col_abstract"),
|
| 174 |
+
"Authors": get_translation(lang, "col_authors"),
|
| 175 |
+
"Link": get_translation(lang, "col_link")
|
| 176 |
+
}, inplace=True)
|
| 177 |
+
df.index.name = get_translation(lang, "col_index")
|
| 178 |
+
|
| 179 |
+
st.dataframe(df)
|
| 180 |
+
else:
|
| 181 |
+
st.warning(get_translation(lang, "no_results"))
|
| 182 |
|
| 183 |
if __name__ == "__main__":
|
| 184 |
+
main()
|
|
|