flameb0lt commited on
Commit
c1ea0b9
·
verified ·
1 Parent(s): d469f5b

Upload app.py

Browse files

experimenting with theme

Files changed (1) hide show
  1. app.py +20 -230
app.py CHANGED
@@ -1,248 +1,38 @@
1
  import gradio as gr
2
- import pandas as pd
3
- import re
4
- from sklearn.feature_extraction.text import TfidfVectorizer
5
- import numpy as np
6
- from main2 import search_trials # Import your updated search_trials
7
-
8
- PAGE_SIZE = 5
9
- PREVIEW_WORDS = 100 # Number of words in collapsed preview
10
-
11
- US_STATES = [
12
- "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut", "Delaware",
13
- "Florida", "Georgia", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky",
14
- "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota", "Mississippi",
15
- "Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire", "New Jersey", "New Mexico",
16
- "New York", "North Carolina", "North Dakota", "Ohio", "Oklahoma", "Oregon", "Pennsylvania",
17
- "Rhode Island", "South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", "Vermont",
18
- "Virginia", "Washington", "West Virginia", "Wisconsin", "Wyoming", "District of Columbia"
19
- ]
20
-
21
- def split_sentences(text):
22
- return [s.strip() for s in re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s', text) if s.strip()]
23
-
24
- def build_input_text(row):
25
- text_parts = [
26
- f"Intervention Name: {row.get('InterventionName', '')}",
27
- f"Intervention Description: {row.get('InterventionDescription', '')}",
28
- f"Brief Summary: {row.get('BriefSummary', '')}",
29
- f"Primary Outcome Measure: {row.get('PrimaryOutcomeMeasure', '')}",
30
- f"Primary Outcome Description: {row.get('PrimaryOutcomeDescription', '')}",
31
- f"Start Date: {row.get('StartDate', '')}",
32
- f"Primary Completion Date: {row.get('PrimaryCompletionDate', '')}"
33
- ]
34
- return " ".join([part for part in text_parts if part.strip()])
35
-
36
- def generate_summary(row, max_sentences=7, min_sentence_length=5):
37
- text = build_input_text(row)
38
- if not text.strip():
39
- return ""
40
- sentences = split_sentences(text)
41
- sentences = [s for s in sentences if len(s.split()) >= min_sentence_length]
42
- if not sentences:
43
- return ""
44
- if len(sentences) <= max_sentences:
45
- return " ".join(sentences)
46
- vectorizer = TfidfVectorizer(stop_words="english")
47
- tfidf_matrix = vectorizer.fit_transform(sentences)
48
- scores = np.array(tfidf_matrix.sum(axis=1)).flatten()
49
- position_weights = np.linspace(1.5, 1.0, num=len(sentences))
50
- combined_scores = scores * position_weights
51
- top_indices = combined_scores.argsort()[-max_sentences:][::-1]
52
- top_indices = sorted(top_indices)
53
- summary_sentences = []
54
- for i in top_indices:
55
- s = sentences[i]
56
- if re.match(r"^(Start Date|Primary Completion Date|Intervention Name|Primary Outcome Measure|Primary Outcome Description):", s):
57
- continue
58
- summary_sentences.append(s)
59
- if len(summary_sentences) < max_sentences:
60
- for i in top_indices:
61
- if len(summary_sentences) >= max_sentences:
62
- break
63
- if sentences[i] not in summary_sentences:
64
- summary_sentences.append(sentences[i])
65
- return " ".join(summary_sentences[:max_sentences])
66
 
67
  def run_search(age, sex, state, keywords):
68
- df = search_trials(
69
  user_age=age,
70
  user_sex=sex,
71
  user_state=state,
72
- user_keywords=keywords,
73
- generate_summaries=False
74
  )
75
- if df.empty:
76
- return pd.DataFrame(), 0, None
77
- total_pages = (len(df) + PAGE_SIZE - 1) // PAGE_SIZE
78
- page_df = df.iloc[:PAGE_SIZE].copy()
79
- page_df['LaymanSummary'] = ""
80
- return page_df, total_pages, df
81
-
82
- def load_page(page_num, full_df):
83
- if full_df is None or full_df.empty:
84
- return pd.DataFrame()
85
- start = page_num * PAGE_SIZE
86
- end = start + PAGE_SIZE
87
- page_df = full_df.iloc[start:end].copy()
88
- page_df['LaymanSummary'] = page_df.apply(generate_summary, axis=1)
89
- return page_df
90
 
91
- def update_page_controls(page_num, total_pages):
92
- prev_visible = gr.update(visible=page_num > 0)
93
- next_visible = gr.update(visible=page_num < total_pages - 1)
94
- page_text = f"Page {page_num + 1} of {total_pages}" if total_pages > 0 else ""
95
- return prev_visible, next_visible, page_text
96
-
97
- def hide_empty_columns(df):
98
- cols_to_keep = []
99
- for col in df.columns:
100
- col_values = df[col].dropna().astype(str).str.strip()
101
- if not col_values.empty and any(val != "" for val in col_values):
102
- cols_to_keep.append(col)
103
- return df[cols_to_keep]
104
 
105
- def df_to_html_with_readmore(df: pd.DataFrame) -> str:
106
- if df.empty:
107
- return "<p>No matching trials found.</p>"
108
- from html import escape
109
- if "LaymanSummary" in df.columns:
110
- cols = list(df.columns)
111
- cols.insert(0, cols.pop(cols.index("LaymanSummary")))
112
- df = df[cols]
113
- df = hide_empty_columns(df)
114
- html = ['''
115
- <style>
116
- table {
117
- width: 100%;
118
- border-collapse: collapse;
119
- font-family: Arial, sans-serif;
120
- }
121
- th {
122
- background-color: #007bff;
123
- color: white;
124
- padding: 12px;
125
- text-align: left;
126
- border: 1px solid #ddd;
127
- }
128
- td {
129
- border: 1px solid #ddd;
130
- padding: 12px;
131
- vertical-align: top;
132
- white-space: normal;
133
- max-width: 1000px; /* 2.5x original 400px */
134
- min-width: 1000px; /* force width */
135
- word-wrap: break-word;
136
- }
137
- details summary {
138
- cursor: pointer;
139
- color: #007bff;
140
- font-weight: bold;
141
- }
142
- details summary:after {
143
- content: " (Read More)";
144
- color: #0056b3;
145
- font-weight: normal;
146
- }
147
- details[open] summary {
148
- display: none; /* hide preview when expanded */
149
- }
150
- details div.full-text {
151
- display: none;
152
- }
153
- details[open] div.full-text {
154
- display: block;
155
- margin-top: 8px;
156
- }
157
- </style>
158
- ''']
159
- html.append('<table><thead><tr>')
160
- for col in df.columns:
161
- html.append(f'<th>{escape(col)}</th>')
162
- html.append('</tr></thead><tbody>')
163
- for _, row in df.iterrows():
164
- html.append('<tr>')
165
- for col in df.columns:
166
- val = str(row[col])
167
- words = val.split()
168
- if len(words) > PREVIEW_WORDS:
169
- short_text = escape(" ".join(words[:PREVIEW_WORDS]) + "...")
170
- full_text = escape(val)
171
- cell_html = f'''
172
- <div>
173
- <details>
174
- <summary>{short_text}</summary>
175
- <div class="full-text">{full_text}</div>
176
- </details>
177
- </div>
178
- '''
179
- else:
180
- cell_html = f'<div>{escape(val)}</div>'
181
- html.append(f'<td>{cell_html}</td>')
182
- html.append('</tr>')
183
- html.append('</tbody></table>')
184
- return "".join(html)
185
 
186
- def on_search(age, sex, state, keywords):
187
- df_page, total_pages, full_df = run_search(age, sex, state, keywords)
188
- page_num = 0
189
- if not df_page.empty:
190
- df_page = load_page(page_num, full_df)
191
- prev_vis, next_vis, page_text = update_page_controls(page_num, total_pages)
192
- html_output = df_to_html_with_readmore(df_page)
193
- return html_output, page_text, prev_vis, next_vis, page_num, total_pages, full_df, gr.update(visible=False), gr.update(visible=True)
194
 
195
- def on_page_change(increment, page_num, total_pages, full_df):
196
- if full_df is None or full_df.empty:
197
- return "<p>No matching trials found.</p>", "", gr.update(visible=False), gr.update(visible=False), 0
198
- new_page = max(0, min(page_num + increment, total_pages - 1))
199
- page_df = load_page(new_page, full_df)
200
- prev_vis, next_vis, page_text = update_page_controls(new_page, total_pages)
201
- html_output = df_to_html_with_readmore(page_df)
202
- return html_output, page_text, prev_vis, next_vis, new_page
203
 
204
- def show_input_page():
205
- return gr.update(visible=True), gr.update(visible=False)
206
 
207
- with gr.Blocks() as demo:
208
- gr.Markdown("# Clinical Trials Search Tool with Pagination and Inline Read More")
209
- with gr.Column(visible=True) as input_page:
210
- gr.Markdown("Find **recruiting US clinical trials** that match your **age**, **sex**, **state**, and optional **keywords**.")
211
- with gr.Row():
212
- age_input = gr.Number(label="Your Age", value=30)
213
- sex_input = gr.Dropdown(["Male", "Female", "All"], label="Sex", value="All")
214
- with gr.Row():
215
- state_input = gr.Dropdown(US_STATES, label="State", value="California")
216
- keywords_input = gr.Textbox(label="Keywords", placeholder="e.g., Cancer, Diabetes")
217
- search_btn = gr.Button("Search Trials")
218
- with gr.Column(visible=False) as results_page:
219
- output_html = gr.HTML()
220
- total_pages_text = gr.Textbox(value="", interactive=False)
221
- with gr.Row():
222
- prev_btn = gr.Button("Previous Page")
223
- next_btn = gr.Button("Next Page")
224
- back_btn = gr.Button("Back")
225
- page_num_state = gr.State(0)
226
- total_pages_state = gr.State(0)
227
- full_results_state = gr.State(None)
228
  search_btn.click(
229
- fn=on_search,
230
  inputs=[age_input, sex_input, state_input, keywords_input],
231
- outputs=[output_html, total_pages_text, prev_btn, next_btn, page_num_state, total_pages_state, full_results_state, input_page, results_page]
232
- )
233
- next_btn.click(
234
- fn=on_page_change,
235
- inputs=[gr.State(1), page_num_state, total_pages_state, full_results_state],
236
- outputs=[output_html, total_pages_text, prev_btn, next_btn, page_num_state]
237
- )
238
- prev_btn.click(
239
- fn=on_page_change,
240
- inputs=[gr.State(-1), page_num_state, total_pages_state, full_results_state],
241
- outputs=[output_html, total_pages_text, prev_btn, next_btn, page_num_state]
242
- )
243
- back_btn.click(
244
- fn=show_input_page,
245
- outputs=[input_page, results_page]
246
  )
247
 
248
  if __name__ == "__main__":
 
1
  import gradio as gr
2
+ from main2 import search_trials # Importing from main2.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  def run_search(age, sex, state, keywords):
5
+ results = search_trials(
6
  user_age=age,
7
  user_sex=sex,
8
  user_state=state,
9
+ user_keywords=keywords
 
10
  )
11
+ return results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ with gr.Blocks(theme=gr.theme.Citrus()) as demo:
14
+ gr.Markdown("# Clinical Trials Search Tool")
15
+ gr.Markdown(
16
+ "Find **recruiting US clinical trials** that match your **age**, **sex**, "
17
+ "**state**, and optional **keywords**."
18
+ )
 
 
 
 
 
 
 
19
 
20
+ with gr.Row():
21
+ age_input = gr.Number(label="Your Age", value=30)
22
+ sex_input = gr.Dropdown(["Male", "Female"], label="Sex", value="Male")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
+ with gr.Row():
25
+ state_input = gr.Dropdown(["Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut", "Delaware", "Florida", "Georgia", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana", "Maine", "Massachusetts", "Michigan", "Minnesota", "Mississippi", "Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire", "New Jersey", "New York", "North Carolina", "North Dakota", "Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Rhode Island", "South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington", "West Virginia", "Wisconsin", "Wyoming"], label="State (full name or abbreviation)", value="California")
26
+ keywords_input = gr.Textbox(label="Keywords (comma separated)", placeholder="e.g., cancer, diabetes")
 
 
 
 
 
27
 
28
+ search_btn = gr.Button("Search Trials")
 
 
 
 
 
 
 
29
 
30
+ output_table = gr.Dataframe(label="Matching Trials", interactive=False)
 
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  search_btn.click(
33
+ fn=run_search,
34
  inputs=[age_input, sex_input, state_input, keywords_input],
35
+ outputs=output_table
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  )
37
 
38
  if __name__ == "__main__":