swarit222 commited on
Commit
fcca247
·
verified ·
1 Parent(s): d376c61

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +184 -88
app.py CHANGED
@@ -1,88 +1,90 @@
1
  import gradio as gr
2
  import pandas as pd
3
- from main2 import search_trials # Your updated search_trials includes summary generation
 
 
 
4
 
5
  PAGE_SIZE = 5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  def run_search(age, sex, state, keywords):
8
- # Run search WITHOUT generating summaries initially
9
  df = search_trials(
10
  user_age=age,
11
  user_sex=sex,
12
  user_state=state,
13
  user_keywords=keywords,
14
- generate_summaries=False # generate summaries page-wise
15
  )
16
  if df.empty:
17
  return pd.DataFrame(), 0, None
18
  total_pages = (len(df) + PAGE_SIZE - 1) // PAGE_SIZE
19
  page_df = df.iloc[:PAGE_SIZE].copy()
20
- page_df['LaymanSummary'] = "" # empty summary placeholder
21
  return page_df, total_pages, df
22
 
23
- def generate_summary_for_row(row):
24
- # Use the generate_summary helper inside search_trials function, or reimplement here if needed
25
- # Since generate_summary is inside search_trials, just call search_trials with generate_summaries=True on 1 row doesn't work.
26
- # So, for simplicity, re-implement the summary logic here or expose generate_summary separately.
27
- # But easiest: call search_trials with generate_summaries=True on page data and extract LaymanSummary.
28
- # To avoid overhead, let's generate summaries for the page using search_trials with generate_summaries=True
29
- pass
30
-
31
  def load_page(page_num, full_df):
32
  if full_df is None or full_df.empty:
33
  return pd.DataFrame()
34
  start = page_num * PAGE_SIZE
35
  end = start + PAGE_SIZE
36
  page_df = full_df.iloc[start:end].copy()
37
- # Generate summaries for current page only using your own generate_summary inside search_trials
38
- # Since generate_summary is local inside search_trials, call search_trials with this subset and generate_summaries=True
39
-
40
- # Create minimal subset dataframe similar to full_df slice for summary generation
41
- page_df_with_summaries = search_trials(
42
- user_age=0, # dummy values; ignored because filtering is done on df subset
43
- user_sex="all",
44
- user_state="all",
45
- user_keywords=[],
46
- generate_summaries=True
47
- )
48
- # The above won't work as is because it re-filters dataset; instead do it manually:
49
-
50
- # Workaround: Re-apply generate_summary function here explicitly for each row
51
- # Re-implement generate_summary here from your main2.py for page_df only:
52
- import re
53
- from sklearn.feature_extraction.text import TfidfVectorizer
54
- import numpy as np
55
-
56
- def split_sentences(text):
57
- return re.split(r'(?<=[.!?])\s+', text.strip())
58
-
59
- def build_input_text(row):
60
- text_parts = [
61
- f"Intervention Name: {row.get('InterventionName', '')}",
62
- f"Intervention Description: {row.get('InterventionDescription', '')}",
63
- f"Brief Summary: {row.get('BriefSummary', '')}",
64
- f"Primary Outcome Measure: {row.get('PrimaryOutcomeMeasure', '')}",
65
- f"Primary Outcome Description: {row.get('PrimaryOutcomeDescription', '')}",
66
- f"Start Date: {row.get('StartDate', '')}",
67
- f"Detailed Description: {row.get('DetailedDescription', '')}",
68
- ]
69
- return " ".join([part for part in text_parts if part.strip()])
70
-
71
- def generate_summary(row, num_sentences=5):
72
- text = build_input_text(row)
73
- if not text.strip():
74
- return ""
75
- sentences = split_sentences(text)
76
- if len(sentences) <= num_sentences:
77
- return " ".join(sentences)
78
- vectorizer = TfidfVectorizer(stop_words="english")
79
- tfidf_matrix = vectorizer.fit_transform(sentences)
80
- scores = np.array(tfidf_matrix.sum(axis=1)).flatten()
81
- top_indices = scores.argsort()[-num_sentences:][::-1]
82
- top_indices = sorted(top_indices)
83
- summary_sentences = [sentences[i] for i in top_indices]
84
- return " ".join(summary_sentences)
85
-
86
  page_df['LaymanSummary'] = page_df.apply(generate_summary, axis=1)
87
  return page_df
88
 
@@ -92,61 +94,155 @@ def update_page_controls(page_num, total_pages):
92
  page_text = f"Page {page_num + 1} of {total_pages}" if total_pages > 0 else ""
93
  return prev_visible, next_visible, page_text
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  def on_search(age, sex, state, keywords):
96
  df_page, total_pages, full_df = run_search(age, sex, state, keywords)
97
  page_num = 0
98
  if not df_page.empty:
99
  df_page = load_page(page_num, full_df)
100
  prev_vis, next_vis, page_text = update_page_controls(page_num, total_pages)
101
- return df_page, page_text, prev_vis, next_vis, page_num, total_pages, full_df
 
102
 
103
  def on_page_change(increment, page_num, total_pages, full_df):
104
  if full_df is None or full_df.empty:
105
- return pd.DataFrame(), "", gr.update(visible=False), gr.update(visible=False), 0
106
  new_page = max(0, min(page_num + increment, total_pages - 1))
107
  page_df = load_page(new_page, full_df)
108
  prev_vis, next_vis, page_text = update_page_controls(new_page, total_pages)
109
- return page_df, page_text, prev_vis, next_vis, new_page
 
110
 
111
- with gr.Blocks() as demo:
112
- gr.Markdown("# Clinical Trials Search Tool with Pagination")
113
-
114
- with gr.Row():
115
- age_input = gr.Number(label="Your Age", value=30)
116
- sex_input = gr.Dropdown(["Male", "Female", "All"], label="Sex", value="All")
117
-
118
- with gr.Row():
119
- state_input = gr.Textbox(label="State (full name or abbreviation)", placeholder="e.g., California")
120
- keywords_input = gr.Textbox(label="Keywords (comma separated)", placeholder="e.g., Cancer, Diabetes")
121
-
122
- search_btn = gr.Button("Search Trials")
123
-
124
- output_table = gr.Dataframe(label="Matching Trials", interactive=False)
125
-
126
- total_pages_text = gr.Textbox(value="", interactive=False)
127
- prev_btn = gr.Button("Previous Page")
128
- next_btn = gr.Button("Next Page")
129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  page_num_state = gr.State(0)
131
  total_pages_state = gr.State(0)
132
  full_results_state = gr.State(None)
133
-
134
  search_btn.click(
135
  fn=on_search,
136
  inputs=[age_input, sex_input, state_input, keywords_input],
137
- outputs=[output_table, total_pages_text, prev_btn, next_btn, page_num_state, total_pages_state, full_results_state]
138
  )
139
-
140
  next_btn.click(
141
  fn=on_page_change,
142
  inputs=[gr.State(1), page_num_state, total_pages_state, full_results_state],
143
- outputs=[output_table, total_pages_text, prev_btn, next_btn, page_num_state]
144
  )
145
-
146
  prev_btn.click(
147
  fn=on_page_change,
148
  inputs=[gr.State(-1), page_num_state, total_pages_state, full_results_state],
149
- outputs=[output_table, total_pages_text, prev_btn, next_btn, page_num_state]
 
 
 
 
150
  )
151
 
152
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  import pandas as pd
3
+ import re
4
+ from sklearn.feature_extraction.text import TfidfVectorizer
5
+ import numpy as np
6
+ from main2 import search_trials # Import your updated search_trials
7
 
8
  PAGE_SIZE = 5
9
+ PREVIEW_WORDS = 100 # Number of words in collapsed preview
10
+
11
+ US_STATES = [
12
+ "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut", "Delaware",
13
+ "Florida", "Georgia", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky",
14
+ "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota", "Mississippi",
15
+ "Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire", "New Jersey", "New Mexico",
16
+ "New York", "North Carolina", "North Dakota", "Ohio", "Oklahoma", "Oregon", "Pennsylvania",
17
+ "Rhode Island", "South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", "Vermont",
18
+ "Virginia", "Washington", "West Virginia", "Wisconsin", "Wyoming", "District of Columbia"
19
+ ]
20
+
21
+ def split_sentences(text):
22
+ return [s.strip() for s in re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s', text) if s.strip()]
23
+
24
+ def build_input_text(row):
25
+ text_parts = [
26
+ f"Intervention Name: {row.get('InterventionName', '')}",
27
+ f"Intervention Description: {row.get('InterventionDescription', '')}",
28
+ f"Brief Summary: {row.get('BriefSummary', '')}",
29
+ f"Primary Outcome Measure: {row.get('PrimaryOutcomeMeasure', '')}",
30
+ f"Primary Outcome Description: {row.get('PrimaryOutcomeDescription', '')}",
31
+ f"Start Date: {row.get('StartDate', '')}",
32
+ f"Primary Completion Date: {row.get('PrimaryCompletionDate', '')}"
33
+ ]
34
+ return " ".join([part for part in text_parts if part.strip()])
35
+
36
+ def generate_summary(row, max_sentences=7, min_sentence_length=5):
37
+ text = build_input_text(row)
38
+ if not text.strip():
39
+ return ""
40
+ sentences = split_sentences(text)
41
+ sentences = [s for s in sentences if len(s.split()) >= min_sentence_length]
42
+ if not sentences:
43
+ return ""
44
+ if len(sentences) <= max_sentences:
45
+ return " ".join(sentences)
46
+ vectorizer = TfidfVectorizer(stop_words="english")
47
+ tfidf_matrix = vectorizer.fit_transform(sentences)
48
+ scores = np.array(tfidf_matrix.sum(axis=1)).flatten()
49
+ position_weights = np.linspace(1.5, 1.0, num=len(sentences))
50
+ combined_scores = scores * position_weights
51
+ top_indices = combined_scores.argsort()[-max_sentences:][::-1]
52
+ top_indices = sorted(top_indices)
53
+ summary_sentences = []
54
+ for i in top_indices:
55
+ s = sentences[i]
56
+ if re.match(r"^(Start Date|Primary Completion Date|Intervention Name|Primary Outcome Measure|Primary Outcome Description):", s):
57
+ continue
58
+ summary_sentences.append(s)
59
+ if len(summary_sentences) < max_sentences:
60
+ for i in top_indices:
61
+ if len(summary_sentences) >= max_sentences:
62
+ break
63
+ if sentences[i] not in summary_sentences:
64
+ summary_sentences.append(sentences[i])
65
+ return " ".join(summary_sentences[:max_sentences])
66
 
67
  def run_search(age, sex, state, keywords):
 
68
  df = search_trials(
69
  user_age=age,
70
  user_sex=sex,
71
  user_state=state,
72
  user_keywords=keywords,
73
+ generate_summaries=False
74
  )
75
  if df.empty:
76
  return pd.DataFrame(), 0, None
77
  total_pages = (len(df) + PAGE_SIZE - 1) // PAGE_SIZE
78
  page_df = df.iloc[:PAGE_SIZE].copy()
79
+ page_df['LaymanSummary'] = ""
80
  return page_df, total_pages, df
81
 
 
 
 
 
 
 
 
 
82
  def load_page(page_num, full_df):
83
  if full_df is None or full_df.empty:
84
  return pd.DataFrame()
85
  start = page_num * PAGE_SIZE
86
  end = start + PAGE_SIZE
87
  page_df = full_df.iloc[start:end].copy()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  page_df['LaymanSummary'] = page_df.apply(generate_summary, axis=1)
89
  return page_df
90
 
 
94
  page_text = f"Page {page_num + 1} of {total_pages}" if total_pages > 0 else ""
95
  return prev_visible, next_visible, page_text
96
 
97
+ def hide_empty_columns(df):
98
+ cols_to_keep = []
99
+ for col in df.columns:
100
+ col_values = df[col].dropna().astype(str).str.strip()
101
+ if not col_values.empty and any(val != "" for val in col_values):
102
+ cols_to_keep.append(col)
103
+ return df[cols_to_keep]
104
+
105
+ def df_to_html_with_readmore(df: pd.DataFrame) -> str:
106
+ if df.empty:
107
+ return "<p>No matching trials found.</p>"
108
+ from html import escape
109
+ if "LaymanSummary" in df.columns:
110
+ cols = list(df.columns)
111
+ cols.insert(0, cols.pop(cols.index("LaymanSummary")))
112
+ df = df[cols]
113
+ df = hide_empty_columns(df)
114
+ html = ['''
115
+ <style>
116
+ table {
117
+ width: 100%;
118
+ border-collapse: collapse;
119
+ font-family: Arial, sans-serif;
120
+ }
121
+ th {
122
+ background-color: #007bff;
123
+ color: white;
124
+ padding: 12px;
125
+ text-align: left;
126
+ border: 1px solid #ddd;
127
+ }
128
+ td {
129
+ border: 1px solid #ddd;
130
+ padding: 12px;
131
+ vertical-align: top;
132
+ white-space: normal;
133
+ max-width: 1000px; /* 2.5x original 400px */
134
+ min-width: 1000px; /* force width */
135
+ word-wrap: break-word;
136
+ }
137
+ details summary {
138
+ cursor: pointer;
139
+ color: #007bff;
140
+ font-weight: bold;
141
+ }
142
+ details summary:after {
143
+ content: " (Read More)";
144
+ color: #0056b3;
145
+ font-weight: normal;
146
+ }
147
+ details[open] summary {
148
+ display: none; /* hide preview when expanded */
149
+ }
150
+ details div.full-text {
151
+ display: none;
152
+ }
153
+ details[open] div.full-text {
154
+ display: block;
155
+ margin-top: 8px;
156
+ }
157
+ </style>
158
+ ''']
159
+ html.append('<table><thead><tr>')
160
+ for col in df.columns:
161
+ html.append(f'<th>{escape(col)}</th>')
162
+ html.append('</tr></thead><tbody>')
163
+ for _, row in df.iterrows():
164
+ html.append('<tr>')
165
+ for col in df.columns:
166
+ val = str(row[col])
167
+ words = val.split()
168
+ if len(words) > PREVIEW_WORDS:
169
+ short_text = escape(" ".join(words[:PREVIEW_WORDS]) + "...")
170
+ full_text = escape(val)
171
+ cell_html = f'''
172
+ <div>
173
+ <details>
174
+ <summary>{short_text}</summary>
175
+ <div class="full-text">{full_text}</div>
176
+ </details>
177
+ </div>
178
+ '''
179
+ else:
180
+ cell_html = f'<div>{escape(val)}</div>'
181
+ html.append(f'<td>{cell_html}</td>')
182
+ html.append('</tr>')
183
+ html.append('</tbody></table>')
184
+ return "".join(html)
185
+
186
  def on_search(age, sex, state, keywords):
187
  df_page, total_pages, full_df = run_search(age, sex, state, keywords)
188
  page_num = 0
189
  if not df_page.empty:
190
  df_page = load_page(page_num, full_df)
191
  prev_vis, next_vis, page_text = update_page_controls(page_num, total_pages)
192
+ html_output = df_to_html_with_readmore(df_page)
193
+ return html_output, page_text, prev_vis, next_vis, page_num, total_pages, full_df, gr.update(visible=False), gr.update(visible=True)
194
 
195
  def on_page_change(increment, page_num, total_pages, full_df):
196
  if full_df is None or full_df.empty:
197
+ return "<p>No matching trials found.</p>", "", gr.update(visible=False), gr.update(visible=False), 0
198
  new_page = max(0, min(page_num + increment, total_pages - 1))
199
  page_df = load_page(new_page, full_df)
200
  prev_vis, next_vis, page_text = update_page_controls(new_page, total_pages)
201
+ html_output = df_to_html_with_readmore(page_df)
202
+ return html_output, page_text, prev_vis, next_vis, new_page
203
 
204
+ def show_input_page():
205
+ return gr.update(visible=True), gr.update(visible=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
+ with gr.Blocks() as demo:
208
+ gr.Markdown("# Clinical Trials Search Tool with Pagination and Inline Read More")
209
+ with gr.Column(visible=True) as input_page:
210
+ gr.Markdown("Find **recruiting US clinical trials** that match your **age**, **sex**, **state**, and optional **keywords**.")
211
+ with gr.Row():
212
+ age_input = gr.Number(label="Your Age", value=30)
213
+ sex_input = gr.Dropdown(["Male", "Female", "All"], label="Sex", value="All")
214
+ with gr.Row():
215
+ state_input = gr.Dropdown(US_STATES, label="State", value="California")
216
+ keywords_input = gr.Textbox(label="Keywords", placeholder="e.g., Cancer, Diabetes")
217
+ search_btn = gr.Button("Search Trials")
218
+ with gr.Column(visible=False) as results_page:
219
+ output_html = gr.HTML()
220
+ total_pages_text = gr.Textbox(value="", interactive=False)
221
+ with gr.Row():
222
+ prev_btn = gr.Button("Previous Page")
223
+ next_btn = gr.Button("Next Page")
224
+ back_btn = gr.Button("Back")
225
  page_num_state = gr.State(0)
226
  total_pages_state = gr.State(0)
227
  full_results_state = gr.State(None)
 
228
  search_btn.click(
229
  fn=on_search,
230
  inputs=[age_input, sex_input, state_input, keywords_input],
231
+ outputs=[output_html, total_pages_text, prev_btn, next_btn, page_num_state, total_pages_state, full_results_state, input_page, results_page]
232
  )
 
233
  next_btn.click(
234
  fn=on_page_change,
235
  inputs=[gr.State(1), page_num_state, total_pages_state, full_results_state],
236
+ outputs=[output_html, total_pages_text, prev_btn, next_btn, page_num_state]
237
  )
 
238
  prev_btn.click(
239
  fn=on_page_change,
240
  inputs=[gr.State(-1), page_num_state, total_pages_state, full_results_state],
241
+ outputs=[output_html, total_pages_text, prev_btn, next_btn, page_num_state]
242
+ )
243
+ back_btn.click(
244
+ fn=show_input_page,
245
+ outputs=[input_page, results_page]
246
  )
247
 
248
  if __name__ == "__main__":