Spaces:

hack4hope
/

model

Sleeping

App Files Files Community

swarit222 commited on Aug 10, 2025

Commit

fcca247

verified ·

1 Parent(s): d376c61

Update app.py

Browse files

Files changed (1) hide show

app.py +184 -88

app.py CHANGED Viewed

@@ -1,88 +1,90 @@
 import gradio as gr
 import pandas as pd
-from main2 import search_trials  # Your updated search_trials includes summary generation
 PAGE_SIZE = 5
 def run_search(age, sex, state, keywords):
-    # Run search WITHOUT generating summaries initially
     df = search_trials(
         user_age=age,
         user_sex=sex,
         user_state=state,
         user_keywords=keywords,
-        generate_summaries=False  # generate summaries page-wise
     )
     if df.empty:
         return pd.DataFrame(), 0, None
     total_pages = (len(df) + PAGE_SIZE - 1) // PAGE_SIZE
     page_df = df.iloc[:PAGE_SIZE].copy()
-    page_df['LaymanSummary'] = ""  # empty summary placeholder
     return page_df, total_pages, df
-def generate_summary_for_row(row):
-    # Use the generate_summary helper inside search_trials function, or reimplement here if needed
-    # Since generate_summary is inside search_trials, just call search_trials with generate_summaries=True on 1 row doesn't work.
-    # So, for simplicity, re-implement the summary logic here or expose generate_summary separately.
-    # But easiest: call search_trials with generate_summaries=True on page data and extract LaymanSummary.
-    # To avoid overhead, let's generate summaries for the page using search_trials with generate_summaries=True
-    pass
 def load_page(page_num, full_df):
     if full_df is None or full_df.empty:
         return pd.DataFrame()
     start = page_num * PAGE_SIZE
     end = start + PAGE_SIZE
     page_df = full_df.iloc[start:end].copy()
-    # Generate summaries for current page only using your own generate_summary inside search_trials
-    # Since generate_summary is local inside search_trials, call search_trials with this subset and generate_summaries=True
-    # Create minimal subset dataframe similar to full_df slice for summary generation
-    page_df_with_summaries = search_trials(
-        user_age=0,    # dummy values; ignored because filtering is done on df subset
-        user_sex="all",
-        user_state="all",
-        user_keywords=[],
-        generate_summaries=True
-    )
-    # The above won't work as is because it re-filters dataset; instead do it manually:
-    # Workaround: Re-apply generate_summary function here explicitly for each row
-    # Re-implement generate_summary here from your main2.py for page_df only:
-    import re
-    from sklearn.feature_extraction.text import TfidfVectorizer
-    import numpy as np
-    def split_sentences(text):
-        return re.split(r'(?<=[.!?])\s+', text.strip())
-    def build_input_text(row):
-        text_parts = [
-            f"Intervention Name: {row.get('InterventionName', '')}",
-            f"Intervention Description: {row.get('InterventionDescription', '')}",
-            f"Brief Summary: {row.get('BriefSummary', '')}",
-            f"Primary Outcome Measure: {row.get('PrimaryOutcomeMeasure', '')}",
-            f"Primary Outcome Description: {row.get('PrimaryOutcomeDescription', '')}",
-            f"Start Date: {row.get('StartDate', '')}",
-            f"Detailed Description: {row.get('DetailedDescription', '')}",
-        ]
-        return " ".join([part for part in text_parts if part.strip()])
-    def generate_summary(row, num_sentences=5):
-        text = build_input_text(row)
-        if not text.strip():
-            return ""
-        sentences = split_sentences(text)
-        if len(sentences) <= num_sentences:
-            return " ".join(sentences)
-        vectorizer = TfidfVectorizer(stop_words="english")
-        tfidf_matrix = vectorizer.fit_transform(sentences)
-        scores = np.array(tfidf_matrix.sum(axis=1)).flatten()
-        top_indices = scores.argsort()[-num_sentences:][::-1]
-        top_indices = sorted(top_indices)
-        summary_sentences = [sentences[i] for i in top_indices]
-        return " ".join(summary_sentences)
     page_df['LaymanSummary'] = page_df.apply(generate_summary, axis=1)
     return page_df
@@ -92,61 +94,155 @@ def update_page_controls(page_num, total_pages):
     page_text = f"Page {page_num + 1} of {total_pages}" if total_pages > 0 else ""
     return prev_visible, next_visible, page_text
 def on_search(age, sex, state, keywords):
     df_page, total_pages, full_df = run_search(age, sex, state, keywords)
     page_num = 0
     if not df_page.empty:
         df_page = load_page(page_num, full_df)
     prev_vis, next_vis, page_text = update_page_controls(page_num, total_pages)
-    return df_page, page_text, prev_vis, next_vis, page_num, total_pages, full_df
 def on_page_change(increment, page_num, total_pages, full_df):
     if full_df is None or full_df.empty:
-        return pd.DataFrame(), "", gr.update(visible=False), gr.update(visible=False), 0
     new_page = max(0, min(page_num + increment, total_pages - 1))
     page_df = load_page(new_page, full_df)
     prev_vis, next_vis, page_text = update_page_controls(new_page, total_pages)
-    return page_df, page_text, prev_vis, next_vis, new_page
-with gr.Blocks() as demo:
-    gr.Markdown("# Clinical Trials Search Tool with Pagination")
-    with gr.Row():
-        age_input = gr.Number(label="Your Age", value=30)
-        sex_input = gr.Dropdown(["Male", "Female", "All"], label="Sex", value="All")
-    with gr.Row():
-        state_input = gr.Textbox(label="State (full name or abbreviation)", placeholder="e.g., California")
-        keywords_input = gr.Textbox(label="Keywords (comma separated)", placeholder="e.g., Cancer, Diabetes")
-    search_btn = gr.Button("Search Trials")
-    output_table = gr.Dataframe(label="Matching Trials", interactive=False)
-    total_pages_text = gr.Textbox(value="", interactive=False)
-    prev_btn = gr.Button("Previous Page")
-    next_btn = gr.Button("Next Page")
     page_num_state = gr.State(0)
     total_pages_state = gr.State(0)
     full_results_state = gr.State(None)
     search_btn.click(
         fn=on_search,
         inputs=[age_input, sex_input, state_input, keywords_input],
-        outputs=[output_table, total_pages_text, prev_btn, next_btn, page_num_state, total_pages_state, full_results_state]
     )
     next_btn.click(
         fn=on_page_change,
         inputs=[gr.State(1), page_num_state, total_pages_state, full_results_state],
-        outputs=[output_table, total_pages_text, prev_btn, next_btn, page_num_state]
     )
     prev_btn.click(
         fn=on_page_change,
         inputs=[gr.State(-1), page_num_state, total_pages_state, full_results_state],
-        outputs=[output_table, total_pages_text, prev_btn, next_btn, page_num_state]
     )
 if __name__ == "__main__":

 import gradio as gr
 import pandas as pd
+import re
+from sklearn.feature_extraction.text import TfidfVectorizer
+import numpy as np
+from main2 import search_trials  # Import your updated search_trials
 PAGE_SIZE = 5
+PREVIEW_WORDS = 100  # Number of words in collapsed preview
+US_STATES = [
+    "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut", "Delaware",
+    "Florida", "Georgia", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky",
+    "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota", "Mississippi",
+    "Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire", "New Jersey", "New Mexico",
+    "New York", "North Carolina", "North Dakota", "Ohio", "Oklahoma", "Oregon", "Pennsylvania",
+    "Rhode Island", "South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", "Vermont",
+    "Virginia", "Washington", "West Virginia", "Wisconsin", "Wyoming", "District of Columbia"
+]
+def split_sentences(text):
+    return [s.strip() for s in re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s', text) if s.strip()]
+def build_input_text(row):
+    text_parts = [
+        f"Intervention Name: {row.get('InterventionName', '')}",
+        f"Intervention Description: {row.get('InterventionDescription', '')}",
+        f"Brief Summary: {row.get('BriefSummary', '')}",
+        f"Primary Outcome Measure: {row.get('PrimaryOutcomeMeasure', '')}",
+        f"Primary Outcome Description: {row.get('PrimaryOutcomeDescription', '')}",
+        f"Start Date: {row.get('StartDate', '')}",
+        f"Primary Completion Date: {row.get('PrimaryCompletionDate', '')}"
+    ]
+    return " ".join([part for part in text_parts if part.strip()])
+def generate_summary(row, max_sentences=7, min_sentence_length=5):
+    text = build_input_text(row)
+    if not text.strip():
+        return ""
+    sentences = split_sentences(text)
+    sentences = [s for s in sentences if len(s.split()) >= min_sentence_length]
+    if not sentences:
+        return ""
+    if len(sentences) <= max_sentences:
+        return " ".join(sentences)
+    vectorizer = TfidfVectorizer(stop_words="english")
+    tfidf_matrix = vectorizer.fit_transform(sentences)
+    scores = np.array(tfidf_matrix.sum(axis=1)).flatten()
+    position_weights = np.linspace(1.5, 1.0, num=len(sentences))
+    combined_scores = scores * position_weights
+    top_indices = combined_scores.argsort()[-max_sentences:][::-1]
+    top_indices = sorted(top_indices)
+    summary_sentences = []
+    for i in top_indices:
+        s = sentences[i]
+        if re.match(r"^(Start Date|Primary Completion Date|Intervention Name|Primary Outcome Measure|Primary Outcome Description):", s):
+            continue
+        summary_sentences.append(s)
+    if len(summary_sentences) < max_sentences:
+        for i in top_indices:
+            if len(summary_sentences) >= max_sentences:
+                break
+            if sentences[i] not in summary_sentences:
+                summary_sentences.append(sentences[i])
+    return " ".join(summary_sentences[:max_sentences])
 def run_search(age, sex, state, keywords):
     df = search_trials(
         user_age=age,
         user_sex=sex,
         user_state=state,
         user_keywords=keywords,
+        generate_summaries=False
     )
     if df.empty:
         return pd.DataFrame(), 0, None
     total_pages = (len(df) + PAGE_SIZE - 1) // PAGE_SIZE
     page_df = df.iloc[:PAGE_SIZE].copy()
+    page_df['LaymanSummary'] = ""
     return page_df, total_pages, df
 def load_page(page_num, full_df):
     if full_df is None or full_df.empty:
         return pd.DataFrame()
     start = page_num * PAGE_SIZE
     end = start + PAGE_SIZE
     page_df = full_df.iloc[start:end].copy()
     page_df['LaymanSummary'] = page_df.apply(generate_summary, axis=1)
     return page_df
     page_text = f"Page {page_num + 1} of {total_pages}" if total_pages > 0 else ""
     return prev_visible, next_visible, page_text
+def hide_empty_columns(df):
+    cols_to_keep = []
+    for col in df.columns:
+        col_values = df[col].dropna().astype(str).str.strip()
+        if not col_values.empty and any(val != "" for val in col_values):
+            cols_to_keep.append(col)
+    return df[cols_to_keep]
+def df_to_html_with_readmore(df: pd.DataFrame) -> str:
+    if df.empty:
+        return "<p>No matching trials found.</p>"
+    from html import escape
+    if "LaymanSummary" in df.columns:
+        cols = list(df.columns)
+        cols.insert(0, cols.pop(cols.index("LaymanSummary")))
+        df = df[cols]
+    df = hide_empty_columns(df)
+    html = ['''
+    <style>
+        table {
+            width: 100%;
+            border-collapse: collapse;
+            font-family: Arial, sans-serif;
+        }
+        th {
+            background-color: #007bff;
+            color: white;
+            padding: 12px;
+            text-align: left;
+            border: 1px solid #ddd;
+        }
+        td {
+            border: 1px solid #ddd;
+            padding: 12px;
+            vertical-align: top;
+            white-space: normal;
+            max-width: 1000px; /* 2.5x original 400px */
+            min-width: 1000px; /* force width */
+            word-wrap: break-word;
+        }
+        details summary {
+            cursor: pointer;
+            color: #007bff;
+            font-weight: bold;
+        }
+        details summary:after {
+            content: " (Read More)";
+            color: #0056b3;
+            font-weight: normal;
+        }
+        details[open] summary {
+            display: none; /* hide preview when expanded */
+        }
+        details div.full-text {
+            display: none;
+        }
+        details[open] div.full-text {
+            display: block;
+            margin-top: 8px;
+        }
+    </style>
+    ''']
+    html.append('<table><thead><tr>')
+    for col in df.columns:
+        html.append(f'<th>{escape(col)}</th>')
+    html.append('</tr></thead><tbody>')
+    for _, row in df.iterrows():
+        html.append('<tr>')
+        for col in df.columns:
+            val = str(row[col])
+            words = val.split()
+            if len(words) > PREVIEW_WORDS:
+                short_text = escape(" ".join(words[:PREVIEW_WORDS]) + "...")
+                full_text = escape(val)
+                cell_html = f'''
+                <div>
+                    <details>
+                        <summary>{short_text}</summary>
+                        <div class="full-text">{full_text}</div>
+                    </details>
+                </div>
+                '''
+            else:
+                cell_html = f'<div>{escape(val)}</div>'
+            html.append(f'<td>{cell_html}</td>')
+        html.append('</tr>')
+    html.append('</tbody></table>')
+    return "".join(html)
 def on_search(age, sex, state, keywords):
     df_page, total_pages, full_df = run_search(age, sex, state, keywords)
     page_num = 0
     if not df_page.empty:
         df_page = load_page(page_num, full_df)
     prev_vis, next_vis, page_text = update_page_controls(page_num, total_pages)
+    html_output = df_to_html_with_readmore(df_page)
+    return html_output, page_text, prev_vis, next_vis, page_num, total_pages, full_df, gr.update(visible=False), gr.update(visible=True)
 def on_page_change(increment, page_num, total_pages, full_df):
     if full_df is None or full_df.empty:
+        return "<p>No matching trials found.</p>", "", gr.update(visible=False), gr.update(visible=False), 0
     new_page = max(0, min(page_num + increment, total_pages - 1))
     page_df = load_page(new_page, full_df)
     prev_vis, next_vis, page_text = update_page_controls(new_page, total_pages)
+    html_output = df_to_html_with_readmore(page_df)
+    return html_output, page_text, prev_vis, next_vis, new_page
+def show_input_page():
+    return gr.update(visible=True), gr.update(visible=False)
+with gr.Blocks() as demo:
+    gr.Markdown("# Clinical Trials Search Tool with Pagination and Inline Read More")
+    with gr.Column(visible=True) as input_page:
+        gr.Markdown("Find **recruiting US clinical trials** that match your **age**, **sex**, **state**, and optional **keywords**.")
+        with gr.Row():
+            age_input = gr.Number(label="Your Age", value=30)
+            sex_input = gr.Dropdown(["Male", "Female", "All"], label="Sex", value="All")
+        with gr.Row():
+            state_input = gr.Dropdown(US_STATES, label="State", value="California")
+            keywords_input = gr.Textbox(label="Keywords", placeholder="e.g., Cancer, Diabetes")
+        search_btn = gr.Button("Search Trials")
+    with gr.Column(visible=False) as results_page:
+        output_html = gr.HTML()
+        total_pages_text = gr.Textbox(value="", interactive=False)
+        with gr.Row():
+            prev_btn = gr.Button("Previous Page")
+            next_btn = gr.Button("Next Page")
+            back_btn = gr.Button("Back")
     page_num_state = gr.State(0)
     total_pages_state = gr.State(0)
     full_results_state = gr.State(None)
     search_btn.click(
         fn=on_search,
         inputs=[age_input, sex_input, state_input, keywords_input],
+        outputs=[output_html, total_pages_text, prev_btn, next_btn, page_num_state, total_pages_state, full_results_state, input_page, results_page]
     )
     next_btn.click(
         fn=on_page_change,
         inputs=[gr.State(1), page_num_state, total_pages_state, full_results_state],
+        outputs=[output_html, total_pages_text, prev_btn, next_btn, page_num_state]
     )
     prev_btn.click(
         fn=on_page_change,
         inputs=[gr.State(-1), page_num_state, total_pages_state, full_results_state],
+        outputs=[output_html, total_pages_text, prev_btn, next_btn, page_num_state]
+    )
+    back_btn.click(
+        fn=show_input_page,
+        outputs=[input_page, results_page]
     )
 if __name__ == "__main__":