File size: 8,201 Bytes
cedac74
 
 
 
f73be48
cedac74
 
 
 
 
 
 
 
 
 
5a53f4f
cedac74
 
 
 
 
 
09d7a27
5a53f4f
f73be48
 
cedac74
 
 
 
 
 
 
 
5a53f4f
cedac74
 
 
 
 
 
09d7a27
5a53f4f
f73be48
09d7a27
cedac74
 
 
 
 
 
5a53f4f
cedac74
 
 
 
f73be48
cedac74
 
 
 
 
09d7a27
cedac74
 
5a53f4f
 
09d7a27
 
cedac74
09d7a27
cedac74
 
f73be48
 
cedac74
 
 
f73be48
cedac74
 
 
 
f73be48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cedac74
 
 
09d7a27
cedac74
 
09d7a27
cedac74
 
 
 
 
09d7a27
cedac74
 
 
09d7a27
5a53f4f
f73be48
cedac74
5a53f4f
f73be48
 
09d7a27
 
 
5a53f4f
 
09d7a27
 
 
 
f73be48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
09d7a27
 
 
 
 
 
 
 
 
 
 
cedac74
 
09d7a27
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
import streamlit as st
import requests
import xml.etree.ElementTree as ET
import pandas as pd
from googletrans import Translator

# 语言翻译字典
translations = {
    "en": {
        "title": "PubMed Literature Search",
        "search_placeholder": "Enter search query (e.g., cancer treatment)",
        "search_button": "Search",
        "language_option": "English",
        "language_label": "Language",
        "results_title": "Search Results",
        "showing_results": "Showing {count} articles.",
        "error_message": "An error occurred while fetching data from PubMed.",
        "no_results": "No articles found for the given query.",
        "col_index": "Index",
        "col_title": "Title",
        "col_abstract": "Abstract",
        "col_authors": "Authors",
        "col_link": "Link",
        "spinner_searching": "Searching PubMed and fetching up to 200 articles...",
        "spinner_translating": "Translating results to Chinese (this may take a moment)...",
        "translation_warning": "Could not translate some entries. Displaying original text for those."
    },
    "zh": {
        "title": "PubMed 文献检索",
        "search_placeholder": "输入检索词(例如:cancer treatment)",
        "search_button": "检索",
        "language_option": "中文",
        "language_label": "语言",
        "results_title": "检索结果",
        "showing_results": "共找到 {count} 篇文献。",
        "error_message": "从 PubMed 获取数据时出错。",
        "no_results": "未找到相关文献。",
        "col_index": "序号",
        "col_title": "文献标题",
        "col_abstract": "文献摘要",
        "col_authors": "文献作者",
        "col_link": "文献链接",
        "spinner_searching": "正在检索 PubMed 并获取最多 200 篇文献...",
        "spinner_translating": "正在批量翻译结果 (请稍候)...",
        "translation_warning": "部分条目翻译失败,将显示原文。"
    }
}

def get_translation(lang, key):
    return translations[lang][key]

@st.cache_data(ttl=3600)
def search_pubmed(query, retmax=200):
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
    search_url = f"{base_url}esearch.fcgi?db=pubmed&term={query}&retmax={retmax}"
    try:
        search_response = requests.get(search_url, timeout=20) # 搜索ID一般很快
        search_response.raise_for_status()
        search_root = ET.fromstring(search_response.content)
        id_list = [id_elem.text for id_elem in search_root.findall(".//Id")]
        return id_list
    except requests.exceptions.RequestException as e:
        st.error(f"Error during PubMed ID search: {e}")
        return []

@st.cache_data(ttl=3600)
def fetch_articles(_id_list):
    if not _id_list:
        return None
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
    ids = ",".join(_id_list)
    fetch_url = f"{base_url}efetch.fcgi?db=pubmed&id={ids}&rettype=xml"
    try:
        # 1. 增加超时时间:从30秒增加到90秒,以处理200篇文章的大请求
        fetch_response = requests.get(fetch_url, timeout=90)
        fetch_response.raise_for_status()
        return fetch_response.content
    except requests.exceptions.RequestException as e:
        st.error(f"Error during fetching article details (อาจเป็นเพราะหมดเวลา): {e}")
        return None

def parse_articles(xml_data):
    articles = []
    if not xml_data:
        return articles
    root = ET.fromstring(xml_data)
    for article in root.findall(".//PubmedArticle"):
        title_elem = article.find(".//ArticleTitle")
        title = "".join(title_elem.itertext()) if title_elem is not None else "No Title"

        abstract_elem = article.find(".//Abstract/AbstractText")
        abstract = "".join(abstract_elem.itertext()) if abstract_elem is not None else "No Abstract"

        author_list = article.findall(".//Author")
        authors = ", ".join([
            f"{author.find('LastName').text if author.find('LastName') is not None else ''} {author.find('Initials').text if author.find('Initials') is not None else ''}".strip()
            for author in author_list
        ]) if author_list else "No Authors"

        pmid_elem = article.find(".//PMID")
        pmid = pmid_elem.text if pmid_elem is not None else ""
        link = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/" if pmid else "No Link"

        articles.append({
            "Title": title,
            "Abstract": abstract,
            "Authors": authors,
            "Link": link
        })
    return articles

def main():
    st.set_page_config(layout="wide", page_title="PubMed Search Tool")

    lang = st.sidebar.radio(
        get_translation("en", "language_label") + " / " + get_translation("zh", "language_label"),
        ("en", "zh"),
        format_func=lambda x: "English" if x == "en" else "中文"
    )

    st.title(get_translation(lang, "title"))
    search_query = st.text_input("", placeholder=get_translation(lang, "search_placeholder"))

    if st.button(get_translation(lang, "search_button")):
        if search_query:
            with st.spinner(get_translation(lang, "spinner_searching")):
                id_list = search_pubmed(search_query, retmax=200)
                articles = []
                if id_list:
                    xml_data = fetch_articles(tuple(id_list))
                    if xml_data:
                        articles = parse_articles(xml_data)

            if articles:
                st.subheader(get_translation(lang, "results_title"))
                st.info(get_translation(lang, "showing_results").format(count=len(articles)))
                
                df = pd.DataFrame(articles)

                if lang == 'zh':
                    with st.spinner(get_translation(lang, "spinner_translating")):
                        try:
                            translator = Translator()
                            df_translated = df.copy()

                            # 2. 批量翻译优化
                            # 收集所有需要翻译的标题和摘要
                            titles_to_translate = df_translated[df_translated['Title'] != "No Title"]['Title'].tolist()
                            abstracts_to_translate = df_translated[df_translated['Abstract'] != "No Abstract"]['Abstract'].tolist()
                            
                            # 获取对应的索引,以便稍后写回
                            title_indices = df_translated[df_translated['Title'] != "No Title"].index
                            abstract_indices = df_translated[df_translated['Abstract'] != "No Abstract"].index

                            # 一次性翻译所有标题
                            if titles_to_translate:
                                translated_titles = translator.translate(titles_to_translate, dest='zh-cn')
                                df_translated.loc[title_indices, 'Title'] = [t.text for t in translated_titles]

                            # 一次性翻译所有摘要
                            if abstracts_to_translate:
                                translated_abstracts = translator.translate(abstracts_to_translate, dest='zh-cn')
                                df_translated.loc[abstract_indices, 'Abstract'] = [t.text for t in translated_abstracts]
                            
                            df = df_translated
                        except Exception as e:
                            st.warning(f"{get_translation(lang, 'translation_warning')} (Error: {e})", icon="⚠️")

                df.index = range(1, len(df) + 1)
                df.rename(columns={
                    "Title": get_translation(lang, "col_title"),
                    "Abstract": get_translation(lang, "col_abstract"),
                    "Authors": get_translation(lang, "col_authors"),
                    "Link": get_translation(lang, "col_link")
                }, inplace=True)
                df.index.name = get_translation(lang, "col_index")
                st.dataframe(df)
            else:
                st.warning(get_translation(lang, "no_results"))

if __name__ == "__main__":
    main()