import gradio as gr import pandas as pd import re from sklearn.feature_extraction.text import TfidfVectorizer import numpy as np from main2 import search_trials # Import your updated search_trials PAGE_SIZE = 5 PREVIEW_WORDS = 100 # Number of words in collapsed preview US_STATES = [ "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut", "Delaware", "Florida", "Georgia", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota", "Mississippi", "Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire", "New Jersey", "New Mexico", "New York", "North Carolina", "North Dakota", "Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Rhode Island", "South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington", "West Virginia", "Wisconsin", "Wyoming", "District of Columbia" ] def split_sentences(text): return [s.strip() for s in re.split(r'(?= min_sentence_length] if not sentences: return "" if len(sentences) <= max_sentences: return " ".join(sentences) vectorizer = TfidfVectorizer(stop_words="english") tfidf_matrix = vectorizer.fit_transform(sentences) scores = np.array(tfidf_matrix.sum(axis=1)).flatten() position_weights = np.linspace(1.5, 1.0, num=len(sentences)) combined_scores = scores * position_weights top_indices = combined_scores.argsort()[-max_sentences:][::-1] top_indices = sorted(top_indices) summary_sentences = [] for i in top_indices: s = sentences[i] if re.match(r"^(Start Date|Primary Completion Date|Intervention Name|Primary Outcome Measure|Primary Outcome Description):", s): continue summary_sentences.append(s) if len(summary_sentences) < max_sentences: for i in top_indices: if len(summary_sentences) >= max_sentences: break if sentences[i] not in summary_sentences: summary_sentences.append(sentences[i]) return " ".join(summary_sentences[:max_sentences]) def run_search(age, sex, state, keywords): df = search_trials( user_age=age, user_sex=sex, user_state=state, user_keywords=keywords, generate_summaries=False ) if df.empty: return pd.DataFrame(), 0, None total_pages = (len(df) + PAGE_SIZE - 1) // PAGE_SIZE page_df = df.iloc[:PAGE_SIZE].copy() page_df['LaymanSummary'] = "" return page_df, total_pages, df def load_page(page_num, full_df): if full_df is None or full_df.empty: return pd.DataFrame() start = page_num * PAGE_SIZE end = start + PAGE_SIZE page_df = full_df.iloc[start:end].copy() page_df['LaymanSummary'] = page_df.apply(generate_summary, axis=1) return page_df def update_page_controls(page_num, total_pages): prev_visible = gr.update(visible=page_num > 0) next_visible = gr.update(visible=page_num < total_pages - 1) page_text = f"Page {page_num + 1} of {total_pages}" if total_pages > 0 else "" return prev_visible, next_visible, page_text def hide_empty_columns(df): cols_to_keep = [] for col in df.columns: col_values = df[col].dropna().astype(str).str.strip() if not col_values.empty and any(val != "" for val in col_values): cols_to_keep.append(col) return df[cols_to_keep] def df_to_html_with_readmore(df: pd.DataFrame) -> str: if df.empty: return "

No matching trials found.

" from html import escape if "LaymanSummary" in df.columns: cols = list(df.columns) cols.insert(0, cols.pop(cols.index("LaymanSummary"))) df = df[cols] df = hide_empty_columns(df) html = [''' '''] html.append('') for col in df.columns: html.append(f'') html.append('') for _, row in df.iterrows(): html.append('') for col in df.columns: val = str(row[col]) words = val.split() if len(words) > PREVIEW_WORDS: short_text = escape(" ".join(words[:PREVIEW_WORDS]) + "...") full_text = escape(val) cell_html = f'''
{short_text}
{full_text}
''' else: cell_html = f'
{escape(val)}
' html.append(f'') html.append('') html.append('
{escape(col)}
{cell_html}
') return "".join(html) def on_search(age, sex, state, keywords): df_page, total_pages, full_df = run_search(age, sex, state, keywords) page_num = 0 if not df_page.empty: df_page = load_page(page_num, full_df) prev_vis, next_vis, page_text = update_page_controls(page_num, total_pages) html_output = df_to_html_with_readmore(df_page) return html_output, page_text, prev_vis, next_vis, page_num, total_pages, full_df, gr.update(visible=False), gr.update(visible=True) def on_page_change(increment, page_num, total_pages, full_df): if full_df is None or full_df.empty: return "

No matching trials found.

", "", gr.update(visible=False), gr.update(visible=False), 0 new_page = max(0, min(page_num + increment, total_pages - 1)) page_df = load_page(new_page, full_df) prev_vis, next_vis, page_text = update_page_controls(new_page, total_pages) html_output = df_to_html_with_readmore(page_df) return html_output, page_text, prev_vis, next_vis, new_page def show_input_page(): return gr.update(visible=True), gr.update(visible=False) with gr.Blocks() as demo: gr.Markdown("# Clinical Trials Search Tool with Pagination and Inline Read More") with gr.Column(visible=True) as input_page: gr.Markdown("Find **recruiting US clinical trials** that match your **age**, **sex**, **state**, and optional **keywords**.") with gr.Row(): age_input = gr.Number(label="Your Age", value=30) sex_input = gr.Dropdown(["Male", "Female", "All"], label="Sex", value="All") with gr.Row(): state_input = gr.Dropdown(US_STATES, label="State", value="California") keywords_input = gr.Textbox(label="Keywords", placeholder="e.g., Cancer, Diabetes") search_btn = gr.Button("Search Trials") with gr.Column(visible=False) as results_page: output_html = gr.HTML() total_pages_text = gr.Textbox(value="", interactive=False) with gr.Row(): prev_btn = gr.Button("Previous Page") next_btn = gr.Button("Next Page") back_btn = gr.Button("Back") page_num_state = gr.State(0) total_pages_state = gr.State(0) full_results_state = gr.State(None) search_btn.click( fn=on_search, inputs=[age_input, sex_input, state_input, keywords_input], outputs=[output_html, total_pages_text, prev_btn, next_btn, page_num_state, total_pages_state, full_results_state, input_page, results_page] ) next_btn.click( fn=on_page_change, inputs=[gr.State(1), page_num_state, total_pages_state, full_results_state], outputs=[output_html, total_pages_text, prev_btn, next_btn, page_num_state] ) prev_btn.click( fn=on_page_change, inputs=[gr.State(-1), page_num_state, total_pages_state, full_results_state], outputs=[output_html, total_pages_text, prev_btn, next_btn, page_num_state] ) back_btn.click( fn=show_input_page, outputs=[input_page, results_page] ) if __name__ == "__main__": demo.launch()