leonsimon23 commited on
Commit
09d7a27
·
verified ·
1 Parent(s): 74a018c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -49
app.py CHANGED
@@ -2,7 +2,7 @@ import streamlit as st
2
  import requests
3
  import xml.etree.ElementTree as ET
4
  import pandas as pd
5
- from googletrans import Translator
6
 
7
  # 语言翻译字典
8
  translations = {
@@ -19,7 +19,10 @@ translations = {
19
  "col_title": "Title",
20
  "col_abstract": "Abstract",
21
  "col_authors": "Authors",
22
- "col_link": "Link"
 
 
 
23
  },
24
  "zh": {
25
  "title": "PubMed 文献检索",
@@ -34,13 +37,17 @@ translations = {
34
  "col_title": "文献标题",
35
  "col_abstract": "文献摘要",
36
  "col_authors": "文献作者",
37
- "col_link": "文献链接"
 
 
 
38
  }
39
  }
40
 
41
  def get_translation(lang, key):
42
  return translations[lang][key]
43
 
 
44
  def search_pubmed(query, retmax=200):
45
  """
46
  使用PubMed API进行检索
@@ -48,28 +55,31 @@ def search_pubmed(query, retmax=200):
48
  base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
49
  search_url = f"{base_url}esearch.fcgi?db=pubmed&term={query}&retmax={retmax}"
50
  try:
51
- search_response = requests.get(search_url)
52
  search_response.raise_for_status()
53
  search_root = ET.fromstring(search_response.content)
54
  id_list = [id_elem.text for id_elem in search_root.findall(".//Id")]
55
  return id_list
56
  except requests.exceptions.RequestException as e:
57
- st.error(f"Error during eSearch: {e}")
58
  return []
59
 
60
- def fetch_articles(id_list):
 
61
  """
62
  根据ID列表获取文献详情
63
  """
 
 
64
  base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
65
- ids = ",".join(id_list)
66
  fetch_url = f"{base_url}efetch.fcgi?db=pubmed&id={ids}&rettype=xml"
67
  try:
68
- fetch_response = requests.get(fetch_url)
69
  fetch_response.raise_for_status()
70
  return fetch_response.content
71
  except requests.exceptions.RequestException as e:
72
- st.error(f"Error during eFetch: {e}")
73
  return None
74
 
75
  def parse_articles(xml_data):
@@ -81,14 +91,14 @@ def parse_articles(xml_data):
81
  root = ET.fromstring(xml_data)
82
  for article in root.findall(".//PubmedArticle"):
83
  title_elem = article.find(".//ArticleTitle")
84
- title = title_elem.text if title_elem is not None else "No Title"
85
 
86
- abstract_elem = article.find(".//AbstractText")
87
- abstract = abstract_elem.text if abstract_elem is not None else "No Abstract"
88
 
89
  author_list = article.findall(".//Author")
90
  authors = ", ".join([
91
- f"{author.find('LastName').text if author.find('LastName') is not None else ''} {author.find('ForeName').text if author.find('ForeName') is not None else ''}".strip()
92
  for author in author_list
93
  ]) if author_list else "No Authors"
94
 
@@ -105,11 +115,11 @@ def parse_articles(xml_data):
105
  return articles
106
 
107
  def main():
108
- st.set_page_config(layout="wide")
109
 
110
  # 语言选择
111
  lang = st.sidebar.radio(
112
- "Language / 语言",
113
  ("en", "zh"),
114
  format_func=lambda x: "English" if x == "en" else "中文"
115
  )
@@ -117,48 +127,58 @@ def main():
117
  st.title(get_translation(lang, "title"))
118
 
119
  # 搜索框
120
- search_query = st.text_input(get_translation(lang, "search_placeholder"))
121
 
122
  if st.button(get_translation(lang, "search_button")):
123
  if search_query:
124
- with st.spinner('Searching...'):
125
  id_list = search_pubmed(search_query)
126
  if id_list:
127
- xml_data = fetch_articles(id_list)
128
  if xml_data:
129
  articles = parse_articles(xml_data)
130
- if articles:
131
- st.subheader(get_translation(lang, "results_title"))
132
-
133
- # 翻译标题和摘要
134
- if lang == 'zh':
135
- translator = Translator()
136
- for article in articles:
137
- try:
138
- article['Title'] = translator.translate(article['Title'], dest='zh-cn').text
139
- article['Abstract'] = translator.translate(article['Abstract'], dest='zh-cn').text
140
- except Exception as e:
141
- st.warning(f"翻译时出错: {e}")
142
-
143
-
144
- df = pd.DataFrame(articles)
145
- df.index = range(1, len(df) + 1)
146
- df.rename(columns={
147
- "Title": get_translation(lang, "col_title"),
148
- "Abstract": get_translation(lang, "col_abstract"),
149
- "Authors": get_translation(lang, "col_authors"),
150
- "Link": get_translation(lang, "col_link")
151
- }, inplace=True)
152
- df.index.name = get_translation(lang, "col_index")
153
-
154
- st.dataframe(df)
155
- else:
156
- st.warning(get_translation(lang, "no_results"))
157
  else:
158
- st.error(get_translation(lang, "error_message"))
159
  else:
160
- st.warning(get_translation(lang, "no_results"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
  if __name__ == "__main__":
163
- main()
164
-
 
2
  import requests
3
  import xml.etree.ElementTree as ET
4
  import pandas as pd
5
+ from googletrans import Translator, LANGUAGES
6
 
7
  # 语言翻译字典
8
  translations = {
 
19
  "col_title": "Title",
20
  "col_abstract": "Abstract",
21
  "col_authors": "Authors",
22
+ "col_link": "Link",
23
+ "spinner_searching": "Searching PubMed and fetching articles...",
24
+ "spinner_translating": "Translating results to Chinese...",
25
+ "translation_warning": "Could not translate an entry. Displaying original text."
26
  },
27
  "zh": {
28
  "title": "PubMed 文献检索",
 
37
  "col_title": "文献标题",
38
  "col_abstract": "文献摘要",
39
  "col_authors": "文献作者",
40
+ "col_link": "文献链接",
41
+ "spinner_searching": "正在检索 PubMed 并获取文献...",
42
+ "spinner_translating": "正在将结果翻译成中文...",
43
+ "translation_warning": "部分条目翻译失败,将显示原文。"
44
  }
45
  }
46
 
47
  def get_translation(lang, key):
48
  return translations[lang][key]
49
 
50
+ @st.cache_data(ttl=3600) # 缓存1小时,避免重复请求
51
  def search_pubmed(query, retmax=200):
52
  """
53
  使用PubMed API进行检索
 
55
  base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
56
  search_url = f"{base_url}esearch.fcgi?db=pubmed&term={query}&retmax={retmax}"
57
  try:
58
+ search_response = requests.get(search_url, timeout=15)
59
  search_response.raise_for_status()
60
  search_root = ET.fromstring(search_response.content)
61
  id_list = [id_elem.text for id_elem in search_root.findall(".//Id")]
62
  return id_list
63
  except requests.exceptions.RequestException as e:
64
+ st.error(f"Error during PubMed ID search: {e}")
65
  return []
66
 
67
+ @st.cache_data(ttl=3600) # 缓存1小时
68
+ def fetch_articles(_id_list): # _id_list to indicate it's cached based on this value
69
  """
70
  根据ID列表获取文献详情
71
  """
72
+ if not _id_list:
73
+ return None
74
  base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
75
+ ids = ",".join(_id_list)
76
  fetch_url = f"{base_url}efetch.fcgi?db=pubmed&id={ids}&rettype=xml"
77
  try:
78
+ fetch_response = requests.get(fetch_url, timeout=30)
79
  fetch_response.raise_for_status()
80
  return fetch_response.content
81
  except requests.exceptions.RequestException as e:
82
+ st.error(f"Error during fetching article details: {e}")
83
  return None
84
 
85
  def parse_articles(xml_data):
 
91
  root = ET.fromstring(xml_data)
92
  for article in root.findall(".//PubmedArticle"):
93
  title_elem = article.find(".//ArticleTitle")
94
+ title = "".join(title_elem.itertext()) if title_elem is not None else "No Title"
95
 
96
+ abstract_elem = article.find(".//Abstract/AbstractText")
97
+ abstract = "".join(abstract_elem.itertext()) if abstract_elem is not None else "No Abstract"
98
 
99
  author_list = article.findall(".//Author")
100
  authors = ", ".join([
101
+ f"{author.find('LastName').text if author.find('LastName') is not None else ''} {author.find('Initials').text if author.find('Initials') is not None else ''}".strip()
102
  for author in author_list
103
  ]) if author_list else "No Authors"
104
 
 
115
  return articles
116
 
117
  def main():
118
+ st.set_page_config(layout="wide", page_title="PubMed Search Tool")
119
 
120
  # 语言选择
121
  lang = st.sidebar.radio(
122
+ get_translation("en", "language_label") + " / " + get_translation("zh", "language_label"),
123
  ("en", "zh"),
124
  format_func=lambda x: "English" if x == "en" else "中文"
125
  )
 
127
  st.title(get_translation(lang, "title"))
128
 
129
  # 搜索框
130
+ search_query = st.text_input("", placeholder=get_translation(lang, "search_placeholder"))
131
 
132
  if st.button(get_translation(lang, "search_button")):
133
  if search_query:
134
+ with st.spinner(get_translation(lang, "spinner_searching")):
135
  id_list = search_pubmed(search_query)
136
  if id_list:
137
+ xml_data = fetch_articles(tuple(id_list)) # Use tuple for caching
138
  if xml_data:
139
  articles = parse_articles(xml_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  else:
141
+ articles = []
142
  else:
143
+ articles = []
144
+
145
+ if articles:
146
+ st.subheader(get_translation(lang, "results_title"))
147
+ df = pd.DataFrame(articles)
148
+
149
+ # 如果是中文,则进行翻译
150
+ if lang == 'zh':
151
+ with st.spinner(get_translation(lang, "spinner_translating")):
152
+ translator = Translator()
153
+ # 创建一个新的DataFrame来存储翻译结果
154
+ df_translated = df.copy()
155
+ for index, row in df.iterrows():
156
+ try:
157
+ # 翻译标题
158
+ if row['Title'] != "No Title":
159
+ df_translated.at[index, 'Title'] = translator.translate(row['Title'], dest='zh-cn').text
160
+ # 翻译摘要
161
+ if row['Abstract'] != "No Abstract":
162
+ df_translated.at[index, 'Abstract'] = translator.translate(row['Abstract'], dest='zh-cn').text
163
+ except Exception:
164
+ st.warning(get_translation(lang, "translation_warning"), icon="⚠️")
165
+ # 如果翻译失败,保留原文
166
+ continue
167
+ df = df_translated
168
+
169
+
170
+ df.index = range(1, len(df) + 1)
171
+ df.rename(columns={
172
+ "Title": get_translation(lang, "col_title"),
173
+ "Abstract": get_translation(lang, "col_abstract"),
174
+ "Authors": get_translation(lang, "col_authors"),
175
+ "Link": get_translation(lang, "col_link")
176
+ }, inplace=True)
177
+ df.index.name = get_translation(lang, "col_index")
178
+
179
+ st.dataframe(df)
180
+ else:
181
+ st.warning(get_translation(lang, "no_results"))
182
 
183
  if __name__ == "__main__":
184
+ main()