Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,7 +2,7 @@ import streamlit as st
|
|
| 2 |
import requests
|
| 3 |
import xml.etree.ElementTree as ET
|
| 4 |
import pandas as pd
|
| 5 |
-
from googletrans import Translator
|
| 6 |
|
| 7 |
# 语言翻译字典
|
| 8 |
translations = {
|
|
@@ -22,8 +22,8 @@ translations = {
|
|
| 22 |
"col_authors": "Authors",
|
| 23 |
"col_link": "Link",
|
| 24 |
"spinner_searching": "Searching PubMed and fetching up to 200 articles...",
|
| 25 |
-
"spinner_translating": "Translating results to Chinese...",
|
| 26 |
-
"translation_warning": "Could not translate
|
| 27 |
},
|
| 28 |
"zh": {
|
| 29 |
"title": "PubMed 文献检索",
|
|
@@ -41,7 +41,7 @@ translations = {
|
|
| 41 |
"col_authors": "文献作者",
|
| 42 |
"col_link": "文献链接",
|
| 43 |
"spinner_searching": "正在检索 PubMed 并获取最多 200 篇文献...",
|
| 44 |
-
"spinner_translating": "
|
| 45 |
"translation_warning": "部分条目翻译失败,将显示原文。"
|
| 46 |
}
|
| 47 |
}
|
|
@@ -51,13 +51,10 @@ def get_translation(lang, key):
|
|
| 51 |
|
| 52 |
@st.cache_data(ttl=3600)
|
| 53 |
def search_pubmed(query, retmax=200):
|
| 54 |
-
"""
|
| 55 |
-
使用PubMed API进行检索,并明确指定retmax
|
| 56 |
-
"""
|
| 57 |
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
|
| 58 |
search_url = f"{base_url}esearch.fcgi?db=pubmed&term={query}&retmax={retmax}"
|
| 59 |
try:
|
| 60 |
-
search_response = requests.get(search_url, timeout=
|
| 61 |
search_response.raise_for_status()
|
| 62 |
search_root = ET.fromstring(search_response.content)
|
| 63 |
id_list = [id_elem.text for id_elem in search_root.findall(".//Id")]
|
|
@@ -68,53 +65,48 @@ def search_pubmed(query, retmax=200):
|
|
| 68 |
|
| 69 |
@st.cache_data(ttl=3600)
|
| 70 |
def fetch_articles(_id_list):
|
| 71 |
-
"""
|
| 72 |
-
根据ID列表获取文献详情
|
| 73 |
-
"""
|
| 74 |
if not _id_list:
|
| 75 |
return None
|
| 76 |
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
|
| 77 |
-
# PubMed efetch有URL长度限制,分批获取更稳妥,但为简化,此处仍一次性获取
|
| 78 |
ids = ",".join(_id_list)
|
| 79 |
fetch_url = f"{base_url}efetch.fcgi?db=pubmed&id={ids}&rettype=xml"
|
| 80 |
try:
|
| 81 |
-
|
|
|
|
| 82 |
fetch_response.raise_for_status()
|
| 83 |
return fetch_response.content
|
| 84 |
except requests.exceptions.RequestException as e:
|
| 85 |
-
st.error(f"Error during fetching article details: {e}")
|
| 86 |
return None
|
| 87 |
|
| 88 |
def parse_articles(xml_data):
|
| 89 |
-
"""
|
| 90 |
-
解析文献的XML数据
|
| 91 |
-
"""
|
| 92 |
articles = []
|
| 93 |
-
if xml_data:
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
|
|
|
| 118 |
return articles
|
| 119 |
|
| 120 |
def main():
|
|
@@ -132,15 +124,12 @@ def main():
|
|
| 132 |
if st.button(get_translation(lang, "search_button")):
|
| 133 |
if search_query:
|
| 134 |
with st.spinner(get_translation(lang, "spinner_searching")):
|
| 135 |
-
# *** 关键修正点 ***
|
| 136 |
-
# 明确传入 retmax=200
|
| 137 |
id_list = search_pubmed(search_query, retmax=200)
|
| 138 |
-
|
| 139 |
if id_list:
|
| 140 |
xml_data = fetch_articles(tuple(id_list))
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
articles = []
|
| 144 |
|
| 145 |
if articles:
|
| 146 |
st.subheader(get_translation(lang, "results_title"))
|
|
@@ -150,22 +139,33 @@ def main():
|
|
| 150 |
|
| 151 |
if lang == 'zh':
|
| 152 |
with st.spinner(get_translation(lang, "spinner_translating")):
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
df.index = range(1, len(df) + 1)
|
| 170 |
df.rename(columns={
|
| 171 |
"Title": get_translation(lang, "col_title"),
|
|
|
|
| 2 |
import requests
|
| 3 |
import xml.etree.ElementTree as ET
|
| 4 |
import pandas as pd
|
| 5 |
+
from googletrans import Translator
|
| 6 |
|
| 7 |
# 语言翻译字典
|
| 8 |
translations = {
|
|
|
|
| 22 |
"col_authors": "Authors",
|
| 23 |
"col_link": "Link",
|
| 24 |
"spinner_searching": "Searching PubMed and fetching up to 200 articles...",
|
| 25 |
+
"spinner_translating": "Translating results to Chinese (this may take a moment)...",
|
| 26 |
+
"translation_warning": "Could not translate some entries. Displaying original text for those."
|
| 27 |
},
|
| 28 |
"zh": {
|
| 29 |
"title": "PubMed 文献检索",
|
|
|
|
| 41 |
"col_authors": "文献作者",
|
| 42 |
"col_link": "文献链接",
|
| 43 |
"spinner_searching": "正在检索 PubMed 并获取最多 200 篇文献...",
|
| 44 |
+
"spinner_translating": "正在批量翻译结果 (请稍候)...",
|
| 45 |
"translation_warning": "部分条目翻译失败,将显示原文。"
|
| 46 |
}
|
| 47 |
}
|
|
|
|
| 51 |
|
| 52 |
@st.cache_data(ttl=3600)
|
| 53 |
def search_pubmed(query, retmax=200):
|
|
|
|
|
|
|
|
|
|
| 54 |
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
|
| 55 |
search_url = f"{base_url}esearch.fcgi?db=pubmed&term={query}&retmax={retmax}"
|
| 56 |
try:
|
| 57 |
+
search_response = requests.get(search_url, timeout=20) # 搜索ID一般很快
|
| 58 |
search_response.raise_for_status()
|
| 59 |
search_root = ET.fromstring(search_response.content)
|
| 60 |
id_list = [id_elem.text for id_elem in search_root.findall(".//Id")]
|
|
|
|
| 65 |
|
| 66 |
@st.cache_data(ttl=3600)
|
| 67 |
def fetch_articles(_id_list):
|
|
|
|
|
|
|
|
|
|
| 68 |
if not _id_list:
|
| 69 |
return None
|
| 70 |
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
|
|
|
|
| 71 |
ids = ",".join(_id_list)
|
| 72 |
fetch_url = f"{base_url}efetch.fcgi?db=pubmed&id={ids}&rettype=xml"
|
| 73 |
try:
|
| 74 |
+
# 1. 增加超时时间:从30秒增加到90秒,以处理200篇文章的大请求
|
| 75 |
+
fetch_response = requests.get(fetch_url, timeout=90)
|
| 76 |
fetch_response.raise_for_status()
|
| 77 |
return fetch_response.content
|
| 78 |
except requests.exceptions.RequestException as e:
|
| 79 |
+
st.error(f"Error during fetching article details (อาจเป็นเพราะหมดเวลา): {e}")
|
| 80 |
return None
|
| 81 |
|
| 82 |
def parse_articles(xml_data):
|
|
|
|
|
|
|
|
|
|
| 83 |
articles = []
|
| 84 |
+
if not xml_data:
|
| 85 |
+
return articles
|
| 86 |
+
root = ET.fromstring(xml_data)
|
| 87 |
+
for article in root.findall(".//PubmedArticle"):
|
| 88 |
+
title_elem = article.find(".//ArticleTitle")
|
| 89 |
+
title = "".join(title_elem.itertext()) if title_elem is not None else "No Title"
|
| 90 |
+
|
| 91 |
+
abstract_elem = article.find(".//Abstract/AbstractText")
|
| 92 |
+
abstract = "".join(abstract_elem.itertext()) if abstract_elem is not None else "No Abstract"
|
| 93 |
+
|
| 94 |
+
author_list = article.findall(".//Author")
|
| 95 |
+
authors = ", ".join([
|
| 96 |
+
f"{author.find('LastName').text if author.find('LastName') is not None else ''} {author.find('Initials').text if author.find('Initials') is not None else ''}".strip()
|
| 97 |
+
for author in author_list
|
| 98 |
+
]) if author_list else "No Authors"
|
| 99 |
+
|
| 100 |
+
pmid_elem = article.find(".//PMID")
|
| 101 |
+
pmid = pmid_elem.text if pmid_elem is not None else ""
|
| 102 |
+
link = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/" if pmid else "No Link"
|
| 103 |
+
|
| 104 |
+
articles.append({
|
| 105 |
+
"Title": title,
|
| 106 |
+
"Abstract": abstract,
|
| 107 |
+
"Authors": authors,
|
| 108 |
+
"Link": link
|
| 109 |
+
})
|
| 110 |
return articles
|
| 111 |
|
| 112 |
def main():
|
|
|
|
| 124 |
if st.button(get_translation(lang, "search_button")):
|
| 125 |
if search_query:
|
| 126 |
with st.spinner(get_translation(lang, "spinner_searching")):
|
|
|
|
|
|
|
| 127 |
id_list = search_pubmed(search_query, retmax=200)
|
| 128 |
+
articles = []
|
| 129 |
if id_list:
|
| 130 |
xml_data = fetch_articles(tuple(id_list))
|
| 131 |
+
if xml_data:
|
| 132 |
+
articles = parse_articles(xml_data)
|
|
|
|
| 133 |
|
| 134 |
if articles:
|
| 135 |
st.subheader(get_translation(lang, "results_title"))
|
|
|
|
| 139 |
|
| 140 |
if lang == 'zh':
|
| 141 |
with st.spinner(get_translation(lang, "spinner_translating")):
|
| 142 |
+
try:
|
| 143 |
+
translator = Translator()
|
| 144 |
+
df_translated = df.copy()
|
| 145 |
+
|
| 146 |
+
# 2. 批量翻译优化
|
| 147 |
+
# 收集所有需要翻译的标题和摘要
|
| 148 |
+
titles_to_translate = df_translated[df_translated['Title'] != "No Title"]['Title'].tolist()
|
| 149 |
+
abstracts_to_translate = df_translated[df_translated['Abstract'] != "No Abstract"]['Abstract'].tolist()
|
| 150 |
+
|
| 151 |
+
# 获取对应的索引,以便稍后写回
|
| 152 |
+
title_indices = df_translated[df_translated['Title'] != "No Title"].index
|
| 153 |
+
abstract_indices = df_translated[df_translated['Abstract'] != "No Abstract"].index
|
| 154 |
+
|
| 155 |
+
# 一次性翻译所有标题
|
| 156 |
+
if titles_to_translate:
|
| 157 |
+
translated_titles = translator.translate(titles_to_translate, dest='zh-cn')
|
| 158 |
+
df_translated.loc[title_indices, 'Title'] = [t.text for t in translated_titles]
|
| 159 |
+
|
| 160 |
+
# 一次性翻译所有摘要
|
| 161 |
+
if abstracts_to_translate:
|
| 162 |
+
translated_abstracts = translator.translate(abstracts_to_translate, dest='zh-cn')
|
| 163 |
+
df_translated.loc[abstract_indices, 'Abstract'] = [t.text for t in translated_abstracts]
|
| 164 |
+
|
| 165 |
+
df = df_translated
|
| 166 |
+
except Exception as e:
|
| 167 |
+
st.warning(f"{get_translation(lang, 'translation_warning')} (Error: {e})", icon="⚠️")
|
| 168 |
+
|
| 169 |
df.index = range(1, len(df) + 1)
|
| 170 |
df.rename(columns={
|
| 171 |
"Title": get_translation(lang, "col_title"),
|