| |
| """ |
| Created on Sun Dec 11 19:51:02 2022 |
| Modified on Sun Aug 24 22:45:00 2025 to streamline code |
| @author: ruben |
| """ |
|
|
| import streamlit as st |
| from biconWebStmParquetWa import regex_search as rs |
|
|
| def init_session_state(): |
| """Initializes the session state variables.""" |
| if 'queryresults' not in st.session_state: |
| st.session_state.queryresults = None |
| if 'page' not in st.session_state: |
| st.session_state.page = 1 |
| if 'maxpage' not in st.session_state: |
| st.session_state.maxpage = 0 |
| if 'minpage' not in st.session_state: |
| st.session_state.minpage = 1 |
| if 'datasize' not in st.session_state: |
| st.session_state.datasize = 0 |
| if 'chunksize' not in st.session_state: |
| st.session_state.chunksize = 20 |
| if 'success_messages' not in st.session_state: |
| st.session_state.success_messages = [] |
|
|
| def reset_session_state(): |
| """Resets the session state for a new session.""" |
| st.session_state.queryresults = None |
| st.session_state.page = 1 |
| st.session_state.maxpage = 0 |
| st.session_state.minpage = 1 |
| st.session_state.datasize = 0 |
| st.session_state.success_messages = [] |
|
|
| def buildTable(page): |
| """Builds the HTML table for the current page of results.""" |
| slices = st.session_state.queryresults |
| datasize = st.session_state.datasize |
| table = '<table width="100%">' |
| n = st.session_state.chunksize |
| for j in range(n): |
| index = (page - 1) * n + j |
| if index >= datasize: |
| break |
| try: |
| corpus, score, en, zh = slices[page - 1][j].split('\t') |
| except: |
| continue |
| table += ( |
| '<tr>' |
| f'<td>{corpus}</td><td colspan=2>{score}</td>' |
| '</tr>' |
| '<tr>' |
| f'<td>{index + 1}</td><td width="45%" valign="top">{en}</td><td width="50%" valign="top">{zh}</td>' |
| '</tr>' |
| ) |
| table += '</table>' |
| return table |
|
|
| def main(): |
| """Main function to run the Streamlit app.""" |
| appTitle = '臺大法規雙語查詢系統' |
| sources = ('NTURegs', 'VOA') |
|
|
| st.set_page_config( |
| page_title=appTitle, |
| layout='wide', |
| initial_sidebar_state='auto', |
| menu_items={ |
| 'Get Help': 'https://streamlit.io/', |
| 'Report a bug': 'https://github.com', |
| 'About': f'**{appTitle}**\nCopyright (c) Ruben G. Tsui' |
| } |
| ) |
|
|
| page_style = ''' |
| <style> |
| .css-o18uir.e16nr0p33 { |
| margin-top: -125px; |
| } |
| .reportview-container .css-1lcbmhc .css-1outpf7 { |
| padding-top: -125px; |
| } |
| .reportview-container .main .block-container{ |
| padding-top: 0rem; |
| padding-right: 0rem; |
| padding-left: 0rem; |
| padding-bottom: 0rem;} |
| p.europe { |
| font-family: Source Pro, Consolas, LingWai TC, Menlo, Courier New, Arial; |
| font-size: 16px; |
| } |
| p.cjk { |
| font-family: Microsoft Jhenghei, Source Han Sans, Noto Sans CJK TC Regular, Hiragino Sans CNS, LantingHei TC, Source Han Serif; |
| font-size: 18px; |
| } |
| </style> |
| ''' |
| st.markdown(page_style, unsafe_allow_html=True) |
|
|
| st.sidebar.subheader(appTitle) |
|
|
| table_placeholder = st.empty() |
|
|
| with st.sidebar: |
| |
| |
| |
|
|
| query = st.text_area('輸入搜尋字串').strip() |
| multicorpora = st.multiselect('選擇語料庫(可複選)', sources, ['NTURegs']) |
|
|
| colc, cold = st.columns([1, 1]) |
| with colc: |
| submit_button = st.button('搜尋') |
| with cold: |
| regex_search = st.radio("Regex search", ["Yes", "Always"], horizontal=True) |
|
|
| cola, colb = st.columns([1, 1]) |
| with cola: |
| size = st.selectbox('筆數上限', [10, 20, 50, 100, 200, 500, 5000], index=2) |
| with colb: |
| case_sensitive = st.radio("Case sensitive", ["No", "Yes"], horizontal=True) |
|
|
| st.session_state.chunksize = st.slider("每頁筆數", 1, 50, 20) |
|
|
| |
| col1, col2, col3, col4 = st.columns([1, 1, 1, 1]) |
| with col1: |
| first_button = st.button('First') |
| with col2: |
| prev_button = st.button('Prev') |
| with col3: |
| next_button = st.button('Next') |
| with col4: |
| last_button = st.button('Last') |
|
|
| |
| if next_button and st.session_state.page < st.session_state.maxpage: |
| st.session_state.page += 1 |
| if prev_button and st.session_state.page > st.session_state.minpage: |
| st.session_state.page -= 1 |
| if first_button: |
| st.session_state.page = st.session_state.minpage |
| if last_button: |
| st.session_state.page = st.session_state.maxpage |
|
|
| if submit_button: |
| reset_session_state() |
| all_results = [] |
| success_messages = [] |
| success_messages.append('No. of matches found: ') |
| for c in multicorpora: |
| selectedCorpus = sources.index(c) |
| results = rs(query, c=selectedCorpus, max_matches=size, case_sensitive=(case_sensitive == "Yes"), literal=False) |
| success_messages.append(f'[{c}]: {len(results)}') |
| all_results.extend(results) |
| |
| st.session_state.success_messages = success_messages |
|
|
| datasize = len(all_results) |
| n = st.session_state.chunksize |
| slices = [all_results[i:i + n] for i in range(0, datasize, n)] |
| pagesize = len(slices) |
|
|
| st.session_state.datasize = datasize |
| st.session_state.maxpage = pagesize |
| st.session_state.queryresults = slices |
|
|
| if st.session_state.queryresults is not None: |
| table_placeholder.empty() |
| with table_placeholder.container(): |
| if st.session_state.success_messages: |
| messages = " | ".join(st.session_state.success_messages) |
| st.markdown(f''' |
| <div style=" |
| border: 1px solid yellow; |
| padding: 5px; |
| border-radius: 5px; |
| font-size: 0.9em; |
| margin-bottom: 10px; |
| background-color: black; |
| color: yellow; |
| margin-top: -25px;"> |
| {messages} |
| </div> |
| ''', unsafe_allow_html=True) |
|
|
| st.markdown(f"page {st.session_state.page} of {st.session_state.maxpage}") |
| table = buildTable(st.session_state.page) |
| st.markdown(table, unsafe_allow_html=True) |
|
|
| if __name__ == '__main__': |
| init_session_state() |
| main() |
|
|