leonsimon23 commited on
Commit
f73be48
·
verified ·
1 Parent(s): 5a53f4f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -64
app.py CHANGED
@@ -2,7 +2,7 @@ import streamlit as st
2
  import requests
3
  import xml.etree.ElementTree as ET
4
  import pandas as pd
5
- from googletrans import Translator, LANGUAGES
6
 
7
  # 语言翻译字典
8
  translations = {
@@ -22,8 +22,8 @@ translations = {
22
  "col_authors": "Authors",
23
  "col_link": "Link",
24
  "spinner_searching": "Searching PubMed and fetching up to 200 articles...",
25
- "spinner_translating": "Translating results to Chinese...",
26
- "translation_warning": "Could not translate an entry. Displaying original text."
27
  },
28
  "zh": {
29
  "title": "PubMed 文献检索",
@@ -41,7 +41,7 @@ translations = {
41
  "col_authors": "文献作者",
42
  "col_link": "文献链接",
43
  "spinner_searching": "正在检索 PubMed 并获取最多 200 篇文献...",
44
- "spinner_translating": "正在将结果翻译成中文...",
45
  "translation_warning": "部分条目翻译失败,将显示原文。"
46
  }
47
  }
@@ -51,13 +51,10 @@ def get_translation(lang, key):
51
 
52
  @st.cache_data(ttl=3600)
53
  def search_pubmed(query, retmax=200):
54
- """
55
- 使用PubMed API进行检索,并明确指定retmax
56
- """
57
  base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
58
  search_url = f"{base_url}esearch.fcgi?db=pubmed&term={query}&retmax={retmax}"
59
  try:
60
- search_response = requests.get(search_url, timeout=15)
61
  search_response.raise_for_status()
62
  search_root = ET.fromstring(search_response.content)
63
  id_list = [id_elem.text for id_elem in search_root.findall(".//Id")]
@@ -68,53 +65,48 @@ def search_pubmed(query, retmax=200):
68
 
69
  @st.cache_data(ttl=3600)
70
  def fetch_articles(_id_list):
71
- """
72
- 根据ID列表获取文献详情
73
- """
74
  if not _id_list:
75
  return None
76
  base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
77
- # PubMed efetch有URL长度限制,分批获取更稳妥,但为简化,此处仍一次性获取
78
  ids = ",".join(_id_list)
79
  fetch_url = f"{base_url}efetch.fcgi?db=pubmed&id={ids}&rettype=xml"
80
  try:
81
- fetch_response = requests.get(fetch_url, timeout=30)
 
82
  fetch_response.raise_for_status()
83
  return fetch_response.content
84
  except requests.exceptions.RequestException as e:
85
- st.error(f"Error during fetching article details: {e}")
86
  return None
87
 
88
  def parse_articles(xml_data):
89
- """
90
- 解析文献的XML数据
91
- """
92
  articles = []
93
- if xml_data:
94
- root = ET.fromstring(xml_data)
95
- for article in root.findall(".//PubmedArticle"):
96
- title_elem = article.find(".//ArticleTitle")
97
- title = "".join(title_elem.itertext()) if title_elem is not None else "No Title"
98
-
99
- abstract_elem = article.find(".//Abstract/AbstractText")
100
- abstract = "".join(abstract_elem.itertext()) if abstract_elem is not None else "No Abstract"
101
-
102
- author_list = article.findall(".//Author")
103
- authors = ", ".join([
104
- f"{author.find('LastName').text if author.find('LastName') is not None else ''} {author.find('Initials').text if author.find('Initials') is not None else ''}".strip()
105
- for author in author_list
106
- ]) if author_list else "No Authors"
107
-
108
- pmid_elem = article.find(".//PMID")
109
- pmid = pmid_elem.text if pmid_elem is not None else ""
110
- link = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/" if pmid else "No Link"
111
-
112
- articles.append({
113
- "Title": title,
114
- "Abstract": abstract,
115
- "Authors": authors,
116
- "Link": link
117
- })
 
118
  return articles
119
 
120
  def main():
@@ -132,15 +124,12 @@ def main():
132
  if st.button(get_translation(lang, "search_button")):
133
  if search_query:
134
  with st.spinner(get_translation(lang, "spinner_searching")):
135
- # *** 关键修正点 ***
136
- # 明确传入 retmax=200
137
  id_list = search_pubmed(search_query, retmax=200)
138
-
139
  if id_list:
140
  xml_data = fetch_articles(tuple(id_list))
141
- articles = parse_articles(xml_data) if xml_data else []
142
- else:
143
- articles = []
144
 
145
  if articles:
146
  st.subheader(get_translation(lang, "results_title"))
@@ -150,22 +139,33 @@ def main():
150
 
151
  if lang == 'zh':
152
  with st.spinner(get_translation(lang, "spinner_translating")):
153
- translator = Translator()
154
- df_translated = df.copy()
155
- for index, row in df.iterrows():
156
- try:
157
- if row['Title'] != "No Title":
158
- df_translated.at[index, 'Title'] = translator.translate(row['Title'], dest='zh-cn').text
159
- if row['Abstract'] != "No Abstract":
160
- df_translated.at[index, 'Abstract'] = translator.translate(row['Abstract'], dest='zh-cn').text
161
- except Exception:
162
- # 只警告一次
163
- if 'translation_warning_shown' not in st.session_state:
164
- st.warning(get_translation(lang, "translation_warning"), icon="⚠️")
165
- st.session_state.translation_warning_shown = True
166
- continue
167
- df = df_translated
168
-
 
 
 
 
 
 
 
 
 
 
 
169
  df.index = range(1, len(df) + 1)
170
  df.rename(columns={
171
  "Title": get_translation(lang, "col_title"),
 
2
  import requests
3
  import xml.etree.ElementTree as ET
4
  import pandas as pd
5
+ from googletrans import Translator
6
 
7
  # 语言翻译字典
8
  translations = {
 
22
  "col_authors": "Authors",
23
  "col_link": "Link",
24
  "spinner_searching": "Searching PubMed and fetching up to 200 articles...",
25
+ "spinner_translating": "Translating results to Chinese (this may take a moment)...",
26
+ "translation_warning": "Could not translate some entries. Displaying original text for those."
27
  },
28
  "zh": {
29
  "title": "PubMed 文献检索",
 
41
  "col_authors": "文献作者",
42
  "col_link": "文献链接",
43
  "spinner_searching": "正在检索 PubMed 并获取最多 200 篇文献...",
44
+ "spinner_translating": "正在批量翻译结果 (请稍候)...",
45
  "translation_warning": "部分条目翻译失败,将显示原文。"
46
  }
47
  }
 
51
 
52
  @st.cache_data(ttl=3600)
53
  def search_pubmed(query, retmax=200):
 
 
 
54
  base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
55
  search_url = f"{base_url}esearch.fcgi?db=pubmed&term={query}&retmax={retmax}"
56
  try:
57
+ search_response = requests.get(search_url, timeout=20) # 搜索ID一般很快
58
  search_response.raise_for_status()
59
  search_root = ET.fromstring(search_response.content)
60
  id_list = [id_elem.text for id_elem in search_root.findall(".//Id")]
 
65
 
66
  @st.cache_data(ttl=3600)
67
  def fetch_articles(_id_list):
 
 
 
68
  if not _id_list:
69
  return None
70
  base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
 
71
  ids = ",".join(_id_list)
72
  fetch_url = f"{base_url}efetch.fcgi?db=pubmed&id={ids}&rettype=xml"
73
  try:
74
+ # 1. 增加超时时间:从30秒增加到90秒,以处理200篇文章的大请求
75
+ fetch_response = requests.get(fetch_url, timeout=90)
76
  fetch_response.raise_for_status()
77
  return fetch_response.content
78
  except requests.exceptions.RequestException as e:
79
+ st.error(f"Error during fetching article details (อาจเป็นเพราะหมดเวลา): {e}")
80
  return None
81
 
82
  def parse_articles(xml_data):
 
 
 
83
  articles = []
84
+ if not xml_data:
85
+ return articles
86
+ root = ET.fromstring(xml_data)
87
+ for article in root.findall(".//PubmedArticle"):
88
+ title_elem = article.find(".//ArticleTitle")
89
+ title = "".join(title_elem.itertext()) if title_elem is not None else "No Title"
90
+
91
+ abstract_elem = article.find(".//Abstract/AbstractText")
92
+ abstract = "".join(abstract_elem.itertext()) if abstract_elem is not None else "No Abstract"
93
+
94
+ author_list = article.findall(".//Author")
95
+ authors = ", ".join([
96
+ f"{author.find('LastName').text if author.find('LastName') is not None else ''} {author.find('Initials').text if author.find('Initials') is not None else ''}".strip()
97
+ for author in author_list
98
+ ]) if author_list else "No Authors"
99
+
100
+ pmid_elem = article.find(".//PMID")
101
+ pmid = pmid_elem.text if pmid_elem is not None else ""
102
+ link = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/" if pmid else "No Link"
103
+
104
+ articles.append({
105
+ "Title": title,
106
+ "Abstract": abstract,
107
+ "Authors": authors,
108
+ "Link": link
109
+ })
110
  return articles
111
 
112
  def main():
 
124
  if st.button(get_translation(lang, "search_button")):
125
  if search_query:
126
  with st.spinner(get_translation(lang, "spinner_searching")):
 
 
127
  id_list = search_pubmed(search_query, retmax=200)
128
+ articles = []
129
  if id_list:
130
  xml_data = fetch_articles(tuple(id_list))
131
+ if xml_data:
132
+ articles = parse_articles(xml_data)
 
133
 
134
  if articles:
135
  st.subheader(get_translation(lang, "results_title"))
 
139
 
140
  if lang == 'zh':
141
  with st.spinner(get_translation(lang, "spinner_translating")):
142
+ try:
143
+ translator = Translator()
144
+ df_translated = df.copy()
145
+
146
+ # 2. 批量翻译优化
147
+ # 收集所有需要翻译的标题和摘要
148
+ titles_to_translate = df_translated[df_translated['Title'] != "No Title"]['Title'].tolist()
149
+ abstracts_to_translate = df_translated[df_translated['Abstract'] != "No Abstract"]['Abstract'].tolist()
150
+
151
+ # 获取对应的索引,以便稍后写回
152
+ title_indices = df_translated[df_translated['Title'] != "No Title"].index
153
+ abstract_indices = df_translated[df_translated['Abstract'] != "No Abstract"].index
154
+
155
+ # 一次性翻译所有标题
156
+ if titles_to_translate:
157
+ translated_titles = translator.translate(titles_to_translate, dest='zh-cn')
158
+ df_translated.loc[title_indices, 'Title'] = [t.text for t in translated_titles]
159
+
160
+ # 一次性翻译所有摘要
161
+ if abstracts_to_translate:
162
+ translated_abstracts = translator.translate(abstracts_to_translate, dest='zh-cn')
163
+ df_translated.loc[abstract_indices, 'Abstract'] = [t.text for t in translated_abstracts]
164
+
165
+ df = df_translated
166
+ except Exception as e:
167
+ st.warning(f"{get_translation(lang, 'translation_warning')} (Error: {e})", icon="⚠️")
168
+
169
  df.index = range(1, len(df) + 1)
170
  df.rename(columns={
171
  "Title": get_translation(lang, "col_title"),