buildinqq commited on
Commit
4950abf
·
verified ·
1 Parent(s): 4afaf5e

Update app.py

Browse files

- model: gemini-flash-002
- embedder: malteos
- persisted data: malteos_scincl__CAR_T_cell__PersistVectorStore_v2
- prompt: 1 step with ref (prompt 03 with little modifications e.g. edit priority)

Files changed (1) hide show
  1. app.py +73 -231
app.py CHANGED
@@ -50,29 +50,23 @@ safety_settings = [
50
  ]
51
 
52
  llm = Gemini(
53
- # model="models/gemini-1.5-flash-002",
54
- model="models/gemini-1.5-pro",
55
  generation_config=generation_config,
56
  safety_settings=safety_settings,
57
  )
58
 
59
  # Setup embedder
60
- embed_model_name = "BAAI/bge-small-en-v1.5"
61
  embed_model = HuggingFaceEmbedding(model_name=embed_model_name)
62
 
63
  Settings.llm = llm
64
  Settings.embed_model = embed_model
65
 
66
  # rebuild storage context
67
- storage_context = StorageContext.from_defaults(persist_dir="VectorStore")
68
  # load index
69
  index_persisted = load_index_from_storage(storage_context, index_id="vector_index")
70
 
71
- async def remove_ref(text):
72
- """Removes content after 'Reference Papers' (case-insensitive)."""
73
- split_text = re.split(r'\bReference Papers\b', text, flags=re.IGNORECASE)
74
- return split_text[0].strip() if len(split_text) > 1 else text.strip()
75
-
76
  async def clean_trial_text(text):
77
  """Removes intro text from references if present."""
78
  sections, cleaned_sections, in_references = text.split('\n'), [], False
@@ -101,100 +95,6 @@ async def clean_trial_text(text):
101
 
102
  return '\n'.join(cleaned_sections).strip()
103
 
104
- async def get_criteria(study_information, top_k):
105
- """Fetches eligibility criteria and metadata for a study."""
106
- query_engine_get_study = CitationQueryEngine.from_args(
107
- index_persisted,
108
- similarity_top_k=top_k,
109
- citation_chunk_size=2048,
110
- verbose=True,
111
- node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.8)],
112
- use_async=True
113
- )
114
- criteria_response = await query_engine_get_study.aquery(f"""
115
- Based on the provided instructions and clinical trial information, generate the new eligibility criteria specific for clinical trial information.
116
-
117
- ### Instruction:
118
- Find suitable papers that are relevant or similar to the provided clinical trial information (### Clinical Trial Information).
119
- Prioritize the following topics when finding related studies:
120
- 1. Study Objectives
121
- 2. Study Design and Phases
122
- 3. Conditions
123
- 4. Intervention/Treatment
124
-
125
- Criteria Generation:
126
- As a clinical researcher, generate new eligibility criteria for the given clinical trial information.
127
- Analyze the information from all {top_k} related studies to generate new precise eligibility criteria.
128
- Ensure that the criteria are specific for the given clinical trial information (### Clinical Trial Information).
129
-
130
- Please follow the pattern of the output (### Pattern of the output).
131
- --------------------------------------------------
132
- ### Clinical Trial Information
133
- {study_information}
134
- --------------------------------------------------
135
- ### Pattern of the Output
136
- Inclusion Criteria
137
- 1.
138
- 2.
139
- ...
140
-
141
- Exclusion Criteria
142
- 1.
143
- 2.
144
- ...
145
- """)
146
- metadata_list = [source.node.get_metadata_str() for source in criteria_response.source_nodes]
147
- return criteria_response.response, metadata_list
148
-
149
- async def process_reference(metadata_list):
150
- """Formats metadata list into a numbered string."""
151
- return "\n".join([f"{i + 1}. {meta}" for i, meta in enumerate(metadata_list)])
152
-
153
- async def get_response(criteria, reference):
154
- """Processes eligibility criteria and updates references to match new numbering."""
155
- response = await llm.acomplete(f"""
156
- ### Task Description:
157
- You are tasked with processing clinical trial metadata and eligibility criteria. The goal is to clean, reorder, and maintain consistency between the metadata and references used in eligibility criteria.
158
-
159
- ### Instructions:
160
- 1. Review the eligibility criteria provided, which include references to metadata numbers (e.g., [1], [2], etc.). Identify all reference numbers that are actually used in the criteria.
161
- 2. Remove metadata of reference papers (### Metadata of Reference Papers) that does not have a corresponding reference in the eligibility criteria. This will ensure only relevant references are kept.
162
- 3. Reorder the remaining metadata so that they are numbered sequentially, starting from 1.
163
- 4. Update the reference numbers in the eligibility criteria accordingly to reflect the new order.
164
- 5. Maintain Criteria Consistency: Ensure that the eligibility criteria remain exactly the same in terms of content, but the reference numbers are updated to match the new numbering of metadata.
165
- --------------------------------------------------
166
- ### Eligibility Criteria
167
- {criteria}
168
- --------------------------------------------------
169
- ### Metadata of Reference Papers
170
- {reference}
171
- --------------------------------------------------
172
- ### Pattern of the Output
173
- Inclusion Criteria
174
- 1.
175
- 2.
176
- ...
177
-
178
- Exclusion Criteria
179
- 1.
180
- 2.
181
- ...
182
-
183
- Reference Papers
184
- 1.NCT ID:
185
- Study Name:
186
- Condition:
187
- Intervention/Treatment:
188
- 2.NCT ID:
189
- Study Name:
190
- Condition:
191
- Intervention/Treatment:
192
- .
193
- .
194
- .""")
195
- response_text = response.text
196
- return response_text
197
-
198
  async def extract_criteria(text):
199
  """Extracts inclusion and exclusion criteria from text."""
200
  patterns = {
@@ -212,6 +112,17 @@ async def extract_criteria(text):
212
  async def run_function_on_text(top_k, study_obj, study_type, phase, purpose, allocation, intervention_model, Masking, conditions, interventions, location_countries, removed_location_countries):
213
  """Runs the main function to process study information and generate formatted output."""
214
 
 
 
 
 
 
 
 
 
 
 
 
215
  study_information = f"""
216
  # Study Objectives/Description
217
  {study_obj}
@@ -235,15 +146,66 @@ async def run_function_on_text(top_k, study_obj, study_type, phase, purpose, all
235
  - Masking: None {Masking}
236
  """
237
 
238
- criteria, metadata_list = await get_criteria(study_information, top_k)
239
- if criteria != "Empty Response":
240
- processed_ref = await process_reference(metadata_list)
241
- response = await get_response(criteria, processed_ref)
242
- combine_criteria = await extract_criteria(response)
 
 
 
 
 
 
 
243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  # Extract and format references
245
  pattern = r'Reference Papers\s*(.+)$'
246
- match = re.search(pattern, response, re.DOTALL | re.IGNORECASE)
247
  ext_ref = match.group(1) if match else ""
248
  split_ref = re.split(r'\n*\d+\.\s+', ext_ref)[1:]
249
 
@@ -272,106 +234,6 @@ async def run_function_on_text(top_k, study_obj, study_type, phase, purpose, all
272
 
273
  return combine_criteria, formatted_ref
274
 
275
- # # LLM.complete
276
- # complete_response = await llm.acomplete(f"""
277
- # Based on the provided instructions and clinical trial information, generate the new eligibility criteria by analyzing clinical trial information(### Clinical Trial Information).
278
- # ### Instruction:
279
- # Criteria generation:
280
- # As a clinical researcher, generate new eligibility criteria for given clinical trial information.
281
- # Ensure the criteria are clear, specific, and reasonable for a clinical research information.
282
-
283
- # Prioritize the following topics in clinical trial information.:
284
- # 1. Study Objectives
285
- # 2. Study Design and Phases
286
- # 3. Conditions
287
- # 4. Intervention/Treatment
288
-
289
- # Please follow the pattern of the output(### Pattern of the output).
290
- # --------------------------------------------------
291
- # ### Clinical Trial Information
292
- # {study_information}
293
- # --------------------------------------------------
294
- # ### Pattern of the output
295
- # Inclusion Criteria
296
- # 1.
297
- # 2.
298
- # .
299
- # .
300
- # .
301
-
302
- # Exclusion Criteria
303
- # 1.
304
- # 2.
305
- # .
306
- # .
307
- # .
308
-
309
-
310
- # """
311
- # )
312
-
313
- # combine_response = await llm.acomplete(f"""
314
- # Based on the provided instructions clinical, clinical trial information, and criteria information, generate the appropriate eligibility criteria for ### Clinical Trial Information by analyze clinical trial information(### Clinical Trial Information), criteria 1 (### Criteria 1) and criteria 2 (### Criteria 2).
315
- # ### Instruction:
316
- # Criteria generation:
317
- # As a clinical researcher, generate appropriate eligibility criteria by analyzing given information.
318
- # Ensure the criteria are clear, specific, and reasonable for a clinical research information(### Clinical Trial Information).
319
-
320
- # Prioritize the following topics in clinical trial information.:
321
- # 1. Study Objectives
322
- # 2. Study Design and Phases
323
- # 3. Conditions
324
- # 4. Intervention/Treatment
325
-
326
- # Do not generate redundant inclusion and exclusion criteria. For example, if a criterion is included in one set of inclusion or exclusion criteria, do not include it again.
327
-
328
- # Reference Papers generation:
329
- # Please give us NCT IDs and study names from the references list in ### Criteria 1.
330
-
331
- # Please follow the pattern of the output(### Pattern of the output).
332
- # --------------------------------------------------
333
- # ### Clinical Trial Information
334
- # {study_information}
335
- # --------------------------------------------------
336
- # ### Criteria 1
337
- # {query_response}
338
- # --------------------------------------------------
339
- # ### Criteria 2
340
- # {complete_response}
341
- # --------------------------------------------------
342
- # ### Pattern of the output
343
- # Inclusion Criteria
344
- # 1.
345
- # 2.
346
- # .
347
- # .
348
- # .
349
-
350
- # Exclusion Criteria
351
- # 1.
352
- # 2.
353
- # .
354
- # .
355
- # .
356
-
357
- # Reference Papers
358
- # 1.NCT ID:
359
- # Study Name:
360
- # Condition:
361
- # Intervention/Treatment:
362
- # 2.NCT ID:
363
- # Study Name:
364
- # Condition:
365
- # Intervention/Treatment:
366
- # .
367
- # .
368
- # .
369
- # """
370
- # )
371
-
372
- # return query_response
373
- # return query_response,complete_response,combine_response
374
-
375
  # Place holder
376
  place_holder = f"""Study Objectives
377
  The purpose of this study is to evaluate the safety, tolerance and efficacy of Liposomal Paclitaxel With Nedaplatin as First-line in patients with Advanced or Recurrent Esophageal Carcinoma
@@ -558,27 +420,6 @@ with gr.Blocks() as demo:
558
 
559
  clear_button.click(lambda : [None] * len(inputs_information), outputs=inputs_information)
560
 
561
- # with gr.Row():
562
- # selected_response = gr.Radio(
563
- # choices=[
564
- # "Response 1",
565
- # "Response 2",
566
- # "Response 3",
567
- # "All responses are equally good",
568
- # "Neither response is satisfactory"
569
- # ],
570
- # label="Select the best response"
571
- # )
572
- # with gr.Row():
573
- # flag_button = gr.Button("Flag Selected Response")
574
-
575
- # #Flagging
576
- # dataset_name = "ravistech/feedback-demo-space"
577
- # hf_writer = gr.HuggingFaceDatasetSaver(hf_token=token_w, dataset_name=dataset_name, private=True)
578
- # hf_writer.setup([selected_response, study_obj_box, study_type_box, phase_box, purpose_box, allocation_box, intervention_model_box, masking_box, conditions_box, intervention_box, location_box, removed_location_box, top_k_box, base_box, rag_box, combine_box],dataset_name)
579
-
580
- # flag_button.click(lambda *args: hf_writer.flag(list(args)), [selected_response, study_obj_box, study_type_box, phase_box, purpose_box, allocation_box, intervention_model_box, masking_box, conditions_box, intervention_box, location_box, removed_location_box, top_k_box, base_box, rag_box, combine_box], None, preprocess=False)
581
-
582
  #Clear all
583
  with gr.Row():
584
  clear_all_button = gr.Button("Clear All")
@@ -588,4 +429,5 @@ with gr.Blocks() as demo:
588
  clear_all_button.click(lambda : [None] * len(all_information), outputs=all_information)
589
 
590
  if __name__ == "__main__":
591
- demo.launch(debug=True)
 
 
50
  ]
51
 
52
  llm = Gemini(
53
+ model="models/gemini-1.5-flash-002",
 
54
  generation_config=generation_config,
55
  safety_settings=safety_settings,
56
  )
57
 
58
  # Setup embedder
59
+ embed_model_name = "malteos/scincl"
60
  embed_model = HuggingFaceEmbedding(model_name=embed_model_name)
61
 
62
  Settings.llm = llm
63
  Settings.embed_model = embed_model
64
 
65
  # rebuild storage context
66
+ storage_context = StorageContext.from_defaults(persist_dir="malteos_scincl__CAR_T_cell__PersistVectorStore_v2")
67
  # load index
68
  index_persisted = load_index_from_storage(storage_context, index_id="vector_index")
69
 
 
 
 
 
 
70
  async def clean_trial_text(text):
71
  """Removes intro text from references if present."""
72
  sections, cleaned_sections, in_references = text.split('\n'), [], False
 
95
 
96
  return '\n'.join(cleaned_sections).strip()
97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  async def extract_criteria(text):
99
  """Extracts inclusion and exclusion criteria from text."""
100
  patterns = {
 
112
  async def run_function_on_text(top_k, study_obj, study_type, phase, purpose, allocation, intervention_model, Masking, conditions, interventions, location_countries, removed_location_countries):
113
  """Runs the main function to process study information and generate formatted output."""
114
 
115
+ # Set up query engine
116
+ query_engine_get_study = CitationQueryEngine.from_args(
117
+ index_persisted,
118
+ similarity_top_k=top_k,
119
+ citation_chunk_size=2048,
120
+ verbose=True,
121
+ node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.8)],
122
+ use_async=True
123
+ )
124
+
125
+ # Build prompt
126
  study_information = f"""
127
  # Study Objectives/Description
128
  {study_obj}
 
146
  - Masking: None {Masking}
147
  """
148
 
149
+ # Query
150
+
151
+ query_response = await query_engine_get_study.aquery(f"""
152
+ Based on the provided instructions and clinical trial information, generate the new eligibility criteria by analyzing the related studies and clinical trial information.
153
+ ### Instruction:
154
+ Find suitable papers that have relevant or similar to the clinical trial information(### Clinical Trial Information).
155
+ Prioritize the following topics when finding related studies:
156
+ 1. Study Objectives
157
+ 2. Study Design and Phases
158
+ 3. Conditions
159
+ 4. Intervention/Treatment
160
+ 5. Location
161
 
162
+ Criteria generation:
163
+ As a clinical researcher, generate new eligibility criteria for given clinical trial information.
164
+ Analyze the information from related studies for more precise new eligibility criteria generation.
165
+ Ensure the criteria are clear, specific, and reasonable for a clinical research information.
166
+
167
+ Reference Papers generation:
168
+ Please give us NCT IDs and study names for {top_k} used papers.
169
+
170
+ Please follows the pattern of the output(### Pattern of the output).
171
+ --------------------------------------------------
172
+ ### Clinical Trial Information
173
+ {study_information}
174
+ --------------------------------------------------
175
+ ### Pattern of the output
176
+ Inclusion Criteria
177
+ 1.
178
+ 2.
179
+ .
180
+ .
181
+ .
182
+
183
+ Exclusion Criteria
184
+ 1.
185
+ 2.
186
+ .
187
+ .
188
+ .
189
+
190
+ Reference Papers
191
+ 1.NCT ID:
192
+ Study Name:
193
+ Condition:
194
+ Intervention/Treatment:
195
+ 2.NCT ID:
196
+ Study Name:
197
+ Condition:
198
+ Intervention/Treatment:
199
+ .
200
+ .
201
+ .
202
+ """
203
+ )
204
+
205
+ if query_response.response != "Empty Response":
206
  # Extract and format references
207
  pattern = r'Reference Papers\s*(.+)$'
208
+ match = re.search(pattern, query_response.response, re.DOTALL | re.IGNORECASE)
209
  ext_ref = match.group(1) if match else ""
210
  split_ref = re.split(r'\n*\d+\.\s+', ext_ref)[1:]
211
 
 
234
 
235
  return combine_criteria, formatted_ref
236
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  # Place holder
238
  place_holder = f"""Study Objectives
239
  The purpose of this study is to evaluate the safety, tolerance and efficacy of Liposomal Paclitaxel With Nedaplatin as First-line in patients with Advanced or Recurrent Esophageal Carcinoma
 
420
 
421
  clear_button.click(lambda : [None] * len(inputs_information), outputs=inputs_information)
422
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
423
  #Clear all
424
  with gr.Row():
425
  clear_all_button = gr.Button("Clear All")
 
429
  clear_all_button.click(lambda : [None] * len(all_information), outputs=all_information)
430
 
431
  if __name__ == "__main__":
432
+ demo.launch(debug=True)
433
+ # demo.queue(max_size=20,default_concurrency_limit=5 ).launch(server_name="0.0.0.0", server_port=7860,debug=True, share=True)