Spaces:

ravistech
/

Ravis-gemini

Sleeping

App Files Files

buildinqq commited on Nov 28, 2024

Commit

4950abf

verified ·

1 Parent(s): 4afaf5e

Update app.py

Browse files

- model: gemini-flash-002
- embedder: malteos
- persisted data: malteos_scincl__CAR_T_cell__PersistVectorStore_v2
- prompt: 1 step with ref (prompt 03 with little modifications e.g. edit priority)

Files changed (1) hide show

app.py +73 -231

app.py CHANGED Viewed

@@ -50,29 +50,23 @@ safety_settings = [
 ]
 llm = Gemini(
-    # model="models/gemini-1.5-flash-002",
-    model="models/gemini-1.5-pro",
     generation_config=generation_config,
     safety_settings=safety_settings,
 )
 # Setup embedder
-embed_model_name = "BAAI/bge-small-en-v1.5"
 embed_model = HuggingFaceEmbedding(model_name=embed_model_name)
 Settings.llm = llm
 Settings.embed_model = embed_model
 # rebuild storage context
-storage_context = StorageContext.from_defaults(persist_dir="VectorStore")
 # load index
 index_persisted = load_index_from_storage(storage_context, index_id="vector_index")
-async def remove_ref(text):
-    """Removes content after 'Reference Papers' (case-insensitive)."""
-    split_text = re.split(r'\bReference Papers\b', text, flags=re.IGNORECASE)
-    return split_text[0].strip() if len(split_text) > 1 else text.strip()
 async def clean_trial_text(text):
     """Removes intro text from references if present."""
     sections, cleaned_sections, in_references = text.split('\n'), [], False
@@ -101,100 +95,6 @@ async def clean_trial_text(text):
     return '\n'.join(cleaned_sections).strip()
-async def get_criteria(study_information, top_k):
-    """Fetches eligibility criteria and metadata for a study."""
-    query_engine_get_study = CitationQueryEngine.from_args(
-        index_persisted,
-        similarity_top_k=top_k,
-        citation_chunk_size=2048,
-        verbose=True,
-        node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.8)],
-        use_async=True
-    )
-    criteria_response = await query_engine_get_study.aquery(f"""
-      Based on the provided instructions and clinical trial information, generate the new eligibility criteria specific for clinical trial information.
-      ### Instruction:
-      Find suitable papers that are relevant or similar to the provided clinical trial information (### Clinical Trial Information).
-      Prioritize the following topics when finding related studies:
-      1. Study Objectives
-      2. Study Design and Phases
-      3. Conditions
-      4. Intervention/Treatment
-      Criteria Generation:
-      As a clinical researcher, generate new eligibility criteria for the given clinical trial information.
-      Analyze the information from all {top_k} related studies to generate new precise eligibility criteria.
-      Ensure that the criteria are specific for the given clinical trial information (### Clinical Trial Information).
-      Please follow the pattern of the output (### Pattern of the output).
-      --------------------------------------------------
-      ### Clinical Trial Information
-      {study_information}
-      --------------------------------------------------
-      ### Pattern of the Output
-      Inclusion Criteria
-      1.
-      2.
-      ...
-      Exclusion Criteria
-      1.
-      2.
-      ...
-      """)
-    metadata_list = [source.node.get_metadata_str() for source in criteria_response.source_nodes]
-    return criteria_response.response, metadata_list
-async def process_reference(metadata_list):
-    """Formats metadata list into a numbered string."""
-    return "\n".join([f"{i + 1}. {meta}" for i, meta in enumerate(metadata_list)])
-async def get_response(criteria, reference):
-    """Processes eligibility criteria and updates references to match new numbering."""
-    response = await llm.acomplete(f"""
-    ### Task Description:
-    You are tasked with processing clinical trial metadata and eligibility criteria. The goal is to clean, reorder, and maintain consistency between the metadata and references used in eligibility criteria.
-    ### Instructions:
-    1. Review the eligibility criteria provided, which include references to metadata numbers (e.g., [1], [2], etc.). Identify all reference numbers that are actually used in the criteria.
-    2. Remove metadata of reference papers (### Metadata of Reference Papers) that does not have a corresponding reference in the eligibility criteria. This will ensure only relevant references are kept.
-    3. Reorder the remaining metadata so that they are numbered sequentially, starting from 1.
-    4. Update the reference numbers in the eligibility criteria accordingly to reflect the new order.
-    5. Maintain Criteria Consistency: Ensure that the eligibility criteria remain exactly the same in terms of content, but the reference numbers are updated to match the new numbering of metadata.
-    --------------------------------------------------
-    ### Eligibility Criteria
-    {criteria}
-    --------------------------------------------------
-    ### Metadata of Reference Papers
-    {reference}
-    --------------------------------------------------
-    ### Pattern of the Output
-    Inclusion Criteria
-    1.
-    2.
-    ...
-    Exclusion Criteria
-    1.
-    2.
-    ...
-    Reference Papers
-    1.NCT ID:
-      Study Name:
-      Condition:
-      Intervention/Treatment:
-    2.NCT ID:
-      Study Name:
-      Condition:
-      Intervention/Treatment:
-    .
-    .
-    .""")
-    response_text = response.text
-    return response_text
 async def extract_criteria(text):
     """Extracts inclusion and exclusion criteria from text."""
     patterns = {
@@ -212,6 +112,17 @@ async def extract_criteria(text):
 async def run_function_on_text(top_k, study_obj, study_type, phase, purpose, allocation, intervention_model, Masking, conditions, interventions, location_countries, removed_location_countries):
     """Runs the main function to process study information and generate formatted output."""
     study_information = f"""
     # Study Objectives/Description
     {study_obj}
@@ -235,15 +146,66 @@ async def run_function_on_text(top_k, study_obj, study_type, phase, purpose, all
     - Masking: None {Masking}
     """
-    criteria, metadata_list = await get_criteria(study_information, top_k)
-    if criteria != "Empty Response":
-        processed_ref = await process_reference(metadata_list)
-        response = await get_response(criteria, processed_ref)
-        combine_criteria = await extract_criteria(response)
         # Extract and format references
         pattern = r'Reference Papers\s*(.+)$'
-        match = re.search(pattern, response, re.DOTALL | re.IGNORECASE)
         ext_ref = match.group(1) if match else ""
         split_ref = re.split(r'\n*\d+\.\s+', ext_ref)[1:]
@@ -272,106 +234,6 @@ async def run_function_on_text(top_k, study_obj, study_type, phase, purpose, all
     return combine_criteria, formatted_ref
-  # # LLM.complete
-  # complete_response  = await llm.acomplete(f"""
-  #     Based on the provided instructions and clinical trial information, generate the new eligibility criteria by analyzing clinical trial information(### Clinical Trial Information).
-  #     ### Instruction:
-  #     Criteria generation:
-  #     As a clinical researcher, generate new eligibility criteria for given clinical trial information.
-  #     Ensure the criteria are clear, specific, and reasonable for a clinical research information.
-  #     Prioritize the following topics in clinical trial information.:
-  #     1. Study Objectives
-  #     2. Study Design and Phases
-  #     3. Conditions
-  #     4. Intervention/Treatment
-  #     Please follow the pattern of the output(### Pattern of the output).
-  #     --------------------------------------------------
-  #     ### Clinical Trial Information
-  #     {study_information}
-  #     --------------------------------------------------
-  #     ### Pattern of the output
-  #     Inclusion Criteria
-  #     1.
-  #     2.
-  #     .
-  #     .
-  #     .
-  #     Exclusion Criteria
-  #     1.
-  #     2.
-  #     .
-  #     .
-  #     .
-  #     """
-  #   )
-  # combine_response = await llm.acomplete(f"""
-  #     Based on the provided instructions clinical, clinical trial information, and criteria information, generate the appropriate eligibility criteria for ### Clinical Trial Information by analyze clinical trial information(### Clinical Trial Information), criteria 1 (### Criteria 1) and criteria 2 (### Criteria 2).
-  #     ### Instruction:
-  #     Criteria generation:
-  #     As a clinical researcher, generate appropriate eligibility criteria by analyzing given information.
-  #     Ensure the criteria are clear, specific, and reasonable for a clinical research information(### Clinical Trial Information).
-  #     Prioritize the following topics in clinical trial information.:
-  #     1. Study Objectives
-  #     2. Study Design and Phases
-  #     3. Conditions
-  #     4. Intervention/Treatment
-  #     Do not generate redundant inclusion and exclusion criteria. For example, if a criterion is included in one set of inclusion or exclusion criteria, do not include it again.
-  #     Reference Papers generation:
-  #     Please give us NCT IDs and study names from the references list in ### Criteria 1.
-  #     Please follow the pattern of the output(### Pattern of the output).
-  #     --------------------------------------------------
-  #     ### Clinical Trial Information
-  #     {study_information}
-  #     --------------------------------------------------
-  #     ### Criteria 1
-  #     {query_response}
-  #     --------------------------------------------------
-  #     ### Criteria 2
-  #     {complete_response}
-  #     --------------------------------------------------
-  #     ### Pattern of the output
-  #     Inclusion Criteria
-  #     1.
-  #     2.
-  #     .
-  #     .
-  #     .
-  #     Exclusion Criteria
-  #     1.
-  #     2.
-  #     .
-  #     .
-  #     .
-  #     Reference Papers
-  #     1.NCT ID:
-  #       Study Name:
-  #       Condition:
-  #       Intervention/Treatment:
-  #     2.NCT ID:
-  #       Study Name:
-  #       Condition:
-  #       Intervention/Treatment:
-  #     .
-  #     .
-  #     .
-  #     """
-    # )
-  # return query_response
-  # return query_response,complete_response,combine_response
 # Place holder
 place_holder = f"""Study Objectives
 The purpose of this study is to evaluate the safety, tolerance and efficacy of Liposomal Paclitaxel With Nedaplatin as First-line in patients with Advanced or Recurrent Esophageal Carcinoma
@@ -558,27 +420,6 @@ with gr.Blocks() as demo:
   clear_button.click(lambda : [None] * len(inputs_information), outputs=inputs_information)
-  # with gr.Row():
-  #     selected_response = gr.Radio(
-  #         choices=[
-  #             "Response 1",
-  #             "Response 2",
-              # "Response 3",
-  #             "All responses are equally good",
-  #             "Neither response is satisfactory"
-  #             ],
-  #         label="Select the best response"
-  #     )
-  # with gr.Row():
-  #     flag_button = gr.Button("Flag Selected Response")
-  # #Flagging
-  # dataset_name = "ravistech/feedback-demo-space"
-  # hf_writer = gr.HuggingFaceDatasetSaver(hf_token=token_w, dataset_name=dataset_name, private=True)
-  # hf_writer.setup([selected_response, study_obj_box, study_type_box, phase_box, purpose_box, allocation_box, intervention_model_box, masking_box, conditions_box, intervention_box, location_box, removed_location_box, top_k_box, base_box, rag_box, combine_box],dataset_name)
-  # flag_button.click(lambda *args: hf_writer.flag(list(args)), [selected_response, study_obj_box, study_type_box, phase_box, purpose_box, allocation_box, intervention_model_box, masking_box, conditions_box, intervention_box, location_box, removed_location_box, top_k_box, base_box, rag_box, combine_box], None, preprocess=False)
   #Clear all
   with gr.Row():
     clear_all_button = gr.Button("Clear All")
@@ -588,4 +429,5 @@ with gr.Blocks() as demo:
   clear_all_button.click(lambda : [None] * len(all_information), outputs=all_information)
 if __name__ == "__main__":
-  demo.launch(debug=True)

 ]
 llm = Gemini(
+    model="models/gemini-1.5-flash-002",
     generation_config=generation_config,
     safety_settings=safety_settings,
 )
 # Setup embedder
+embed_model_name = "malteos/scincl"
 embed_model = HuggingFaceEmbedding(model_name=embed_model_name)
 Settings.llm = llm
 Settings.embed_model = embed_model
 # rebuild storage context
+storage_context = StorageContext.from_defaults(persist_dir="malteos_scincl__CAR_T_cell__PersistVectorStore_v2")
 # load index
 index_persisted = load_index_from_storage(storage_context, index_id="vector_index")
 async def clean_trial_text(text):
     """Removes intro text from references if present."""
     sections, cleaned_sections, in_references = text.split('\n'), [], False
     return '\n'.join(cleaned_sections).strip()
 async def extract_criteria(text):
     """Extracts inclusion and exclusion criteria from text."""
     patterns = {
 async def run_function_on_text(top_k, study_obj, study_type, phase, purpose, allocation, intervention_model, Masking, conditions, interventions, location_countries, removed_location_countries):
     """Runs the main function to process study information and generate formatted output."""
+    # Set up query engine
+    query_engine_get_study = CitationQueryEngine.from_args(
+    index_persisted,
+    similarity_top_k=top_k,
+    citation_chunk_size=2048,
+    verbose=True,
+    node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.8)],
+    use_async=True
+    )
+    # Build prompt
     study_information = f"""
     # Study Objectives/Description
     {study_obj}
     - Masking: None {Masking}
     """
+    # Query
+    query_response = await query_engine_get_study.aquery(f"""
+    Based on the provided instructions and clinical trial information, generate the new eligibility criteria by analyzing the related studies and clinical trial information.
+    ### Instruction:
+    Find suitable papers that have relevant or similar to the clinical trial information(### Clinical Trial Information).
+    Prioritize the following topics when finding related studies:
+    1. Study Objectives
+    2. Study Design and Phases
+    3. Conditions
+    4. Intervention/Treatment
+    5. Location
+    Criteria generation:
+    As a clinical researcher, generate new eligibility criteria for given clinical trial information.
+    Analyze the information from related studies for more precise new eligibility criteria generation.
+    Ensure the criteria are clear, specific, and reasonable for a clinical research information.
+    Reference Papers generation:
+    Please give us NCT IDs and study names for {top_k} used papers.
+    Please follows the pattern of the output(### Pattern of the output).
+    --------------------------------------------------
+    ### Clinical Trial Information
+    {study_information}
+    --------------------------------------------------
+    ### Pattern of the output
+    Inclusion Criteria
+    1.
+    2.
+    .
+    .
+    .
+    Exclusion Criteria
+    1.
+    2.
+    .
+    .
+    .
+    Reference Papers
+    1.NCT ID:
+    Study Name:
+    Condition:
+    Intervention/Treatment:
+    2.NCT ID:
+    Study Name:
+    Condition:
+    Intervention/Treatment:
+    .
+    .
+    .
+    """
+    )
+    if query_response.response != "Empty Response":
         # Extract and format references
         pattern = r'Reference Papers\s*(.+)$'
+        match = re.search(pattern, query_response.response, re.DOTALL | re.IGNORECASE)
         ext_ref = match.group(1) if match else ""
         split_ref = re.split(r'\n*\d+\.\s+', ext_ref)[1:]
     return combine_criteria, formatted_ref
 # Place holder
 place_holder = f"""Study Objectives
 The purpose of this study is to evaluate the safety, tolerance and efficacy of Liposomal Paclitaxel With Nedaplatin as First-line in patients with Advanced or Recurrent Esophageal Carcinoma
   clear_button.click(lambda : [None] * len(inputs_information), outputs=inputs_information)
   #Clear all
   with gr.Row():
     clear_all_button = gr.Button("Clear All")
   clear_all_button.click(lambda : [None] * len(all_information), outputs=all_information)
 if __name__ == "__main__":
+  demo.launch(debug=True)
+#   demo.queue(max_size=20,default_concurrency_limit=5 ).launch(server_name="0.0.0.0", server_port=7860,debug=True, share=True)