Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import requests | |
| import xml.etree.ElementTree as ET | |
| import pandas as pd | |
| from googletrans import Translator | |
| # 语言翻译字典 | |
| translations = { | |
| "en": { | |
| "title": "PubMed Literature Search", | |
| "search_placeholder": "Enter search query (e.g., cancer treatment)", | |
| "search_button": "Search", | |
| "language_option": "English", | |
| "language_label": "Language", | |
| "results_title": "Search Results", | |
| "showing_results": "Showing {count} articles.", | |
| "error_message": "An error occurred while fetching data from PubMed.", | |
| "no_results": "No articles found for the given query.", | |
| "col_index": "Index", | |
| "col_title": "Title", | |
| "col_abstract": "Abstract", | |
| "col_authors": "Authors", | |
| "col_link": "Link", | |
| "spinner_searching": "Searching PubMed and fetching up to 200 articles...", | |
| "spinner_translating": "Translating results to Chinese (this may take a moment)...", | |
| "translation_warning": "Could not translate some entries. Displaying original text for those." | |
| }, | |
| "zh": { | |
| "title": "PubMed 文献检索", | |
| "search_placeholder": "输入检索词(例如:cancer treatment)", | |
| "search_button": "检索", | |
| "language_option": "中文", | |
| "language_label": "语言", | |
| "results_title": "检索结果", | |
| "showing_results": "共找到 {count} 篇文献。", | |
| "error_message": "从 PubMed 获取数据时出错。", | |
| "no_results": "未找到相关文献。", | |
| "col_index": "序号", | |
| "col_title": "文献标题", | |
| "col_abstract": "文献摘要", | |
| "col_authors": "文献作者", | |
| "col_link": "文献链接", | |
| "spinner_searching": "正在检索 PubMed 并获取最多 200 篇文献...", | |
| "spinner_translating": "正在批量翻译结果 (请稍候)...", | |
| "translation_warning": "部分条目翻译失败,将显示原文。" | |
| } | |
| } | |
| def get_translation(lang, key): | |
| return translations[lang][key] | |
| def search_pubmed(query, retmax=200): | |
| base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/" | |
| search_url = f"{base_url}esearch.fcgi?db=pubmed&term={query}&retmax={retmax}" | |
| try: | |
| search_response = requests.get(search_url, timeout=20) # 搜索ID一般很快 | |
| search_response.raise_for_status() | |
| search_root = ET.fromstring(search_response.content) | |
| id_list = [id_elem.text for id_elem in search_root.findall(".//Id")] | |
| return id_list | |
| except requests.exceptions.RequestException as e: | |
| st.error(f"Error during PubMed ID search: {e}") | |
| return [] | |
| def fetch_articles(_id_list): | |
| if not _id_list: | |
| return None | |
| base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/" | |
| ids = ",".join(_id_list) | |
| fetch_url = f"{base_url}efetch.fcgi?db=pubmed&id={ids}&rettype=xml" | |
| try: | |
| # 1. 增加超时时间:从30秒增加到90秒,以处理200篇文章的大请求 | |
| fetch_response = requests.get(fetch_url, timeout=90) | |
| fetch_response.raise_for_status() | |
| return fetch_response.content | |
| except requests.exceptions.RequestException as e: | |
| st.error(f"Error during fetching article details (อาจเป็นเพราะหมดเวลา): {e}") | |
| return None | |
| def parse_articles(xml_data): | |
| articles = [] | |
| if not xml_data: | |
| return articles | |
| root = ET.fromstring(xml_data) | |
| for article in root.findall(".//PubmedArticle"): | |
| title_elem = article.find(".//ArticleTitle") | |
| title = "".join(title_elem.itertext()) if title_elem is not None else "No Title" | |
| abstract_elem = article.find(".//Abstract/AbstractText") | |
| abstract = "".join(abstract_elem.itertext()) if abstract_elem is not None else "No Abstract" | |
| author_list = article.findall(".//Author") | |
| authors = ", ".join([ | |
| f"{author.find('LastName').text if author.find('LastName') is not None else ''} {author.find('Initials').text if author.find('Initials') is not None else ''}".strip() | |
| for author in author_list | |
| ]) if author_list else "No Authors" | |
| pmid_elem = article.find(".//PMID") | |
| pmid = pmid_elem.text if pmid_elem is not None else "" | |
| link = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/" if pmid else "No Link" | |
| articles.append({ | |
| "Title": title, | |
| "Abstract": abstract, | |
| "Authors": authors, | |
| "Link": link | |
| }) | |
| return articles | |
| def main(): | |
| st.set_page_config(layout="wide", page_title="PubMed Search Tool") | |
| lang = st.sidebar.radio( | |
| get_translation("en", "language_label") + " / " + get_translation("zh", "language_label"), | |
| ("en", "zh"), | |
| format_func=lambda x: "English" if x == "en" else "中文" | |
| ) | |
| st.title(get_translation(lang, "title")) | |
| search_query = st.text_input("", placeholder=get_translation(lang, "search_placeholder")) | |
| if st.button(get_translation(lang, "search_button")): | |
| if search_query: | |
| with st.spinner(get_translation(lang, "spinner_searching")): | |
| id_list = search_pubmed(search_query, retmax=200) | |
| articles = [] | |
| if id_list: | |
| xml_data = fetch_articles(tuple(id_list)) | |
| if xml_data: | |
| articles = parse_articles(xml_data) | |
| if articles: | |
| st.subheader(get_translation(lang, "results_title")) | |
| st.info(get_translation(lang, "showing_results").format(count=len(articles))) | |
| df = pd.DataFrame(articles) | |
| if lang == 'zh': | |
| with st.spinner(get_translation(lang, "spinner_translating")): | |
| try: | |
| translator = Translator() | |
| df_translated = df.copy() | |
| # 2. 批量翻译优化 | |
| # 收集所有需要翻译的标题和摘要 | |
| titles_to_translate = df_translated[df_translated['Title'] != "No Title"]['Title'].tolist() | |
| abstracts_to_translate = df_translated[df_translated['Abstract'] != "No Abstract"]['Abstract'].tolist() | |
| # 获取对应的索引,以便稍后写回 | |
| title_indices = df_translated[df_translated['Title'] != "No Title"].index | |
| abstract_indices = df_translated[df_translated['Abstract'] != "No Abstract"].index | |
| # 一次性翻译所有标题 | |
| if titles_to_translate: | |
| translated_titles = translator.translate(titles_to_translate, dest='zh-cn') | |
| df_translated.loc[title_indices, 'Title'] = [t.text for t in translated_titles] | |
| # 一次性翻译所有摘要 | |
| if abstracts_to_translate: | |
| translated_abstracts = translator.translate(abstracts_to_translate, dest='zh-cn') | |
| df_translated.loc[abstract_indices, 'Abstract'] = [t.text for t in translated_abstracts] | |
| df = df_translated | |
| except Exception as e: | |
| st.warning(f"{get_translation(lang, 'translation_warning')} (Error: {e})", icon="⚠️") | |
| df.index = range(1, len(df) + 1) | |
| df.rename(columns={ | |
| "Title": get_translation(lang, "col_title"), | |
| "Abstract": get_translation(lang, "col_abstract"), | |
| "Authors": get_translation(lang, "col_authors"), | |
| "Link": get_translation(lang, "col_link") | |
| }, inplace=True) | |
| df.index.name = get_translation(lang, "col_index") | |
| st.dataframe(df) | |
| else: | |
| st.warning(get_translation(lang, "no_results")) | |
| if __name__ == "__main__": | |
| main() |