Spaces:

klinic-hackupc
/

klinic

Sleeping

App Files Files Community

1-ARIjitS commited on May 5, 2024

Commit

d8b73be

1 Parent(s): 772bbc6

changed to include stats

Browse files

Files changed (2) hide show

app.py +2 -1
llm_res.py +51 -21

app.py CHANGED Viewed

@@ -88,8 +88,9 @@ with st.container():
             status.json(json_of_clinical_trials, expanded=False)
             # 7. Use an LLM to get a summary of the clinical trials, in plain text format.
             status.write("Getting a summary of the clinical trials...")
-            response = get_short_summary_out_of_json_files(json_of_clinical_trials)
             print(f'Response from LLM summarization: {response}')
             status.write(f'Response from LLM summarization: {response}')
             # 8. Use an LLM to extract numerical data from the clinical trials (e.g. number of patients, number of deaths, etc.). Get summary statistics out of that.
             status.write("Getting summary statistics of the clinical trials...")

             status.json(json_of_clinical_trials, expanded=False)
             # 7. Use an LLM to get a summary of the clinical trials, in plain text format.
             status.write("Getting a summary of the clinical trials...")
+            response, stats_dict = get_short_summary_out_of_json_files(json_of_clinical_trials)
             print(f'Response from LLM summarization: {response}')
+            print(f'basic_stats_dict:{stats_dict}')
             status.write(f'Response from LLM summarization: {response}')
             # 8. Use an LLM to extract numerical data from the clinical trials (e.g. number of patients, number of deaths, etc.). Get summary statistics out of that.
             status.write("Getting summary statistics of the clinical trials...")

llm_res.py CHANGED Viewed

@@ -22,6 +22,8 @@ from langchain_core.pydantic_v1 import BaseModel, Field
 from langchain_openai import ChatOpenAI
 from langchain.chains.llm import LLMChain
 from langchain_core.prompts import PromptTemplate
 load_dotenv()
@@ -267,6 +269,26 @@ def get_short_summary_out_of_json_files(data_json):
     return result
 def tagging_insights_from_json(data_json):
     processed_json= process_dictionaty_with_llm_to_generate_response(data_json)
@@ -286,25 +308,25 @@ def tagging_insights_from_json(data_json):
         # description: str = Field(
         #     description="text description grouping all the clinical trials using briefDescription and detailedDescription keys"
         # )
-        project_title: list = Field(
-            description="Extract the project titles of all the clinical trials"
-        )
-        status: list = Field(
-            description="Extract the status of all the clinical trials"
-        )
-        # keywords: list = Field(
-        #    description="Extract the most relevant keywords regrouping all the clinical trials"
         # )
-        interventions: list = Field(
-            description="describe the interventions for each clinical trial using title, name and description"
         )
         primary_outcomes: list = Field(
-            description="get the primary outcomes of each clinical trial"
         )
-        # secondary_outcomes: list= Field(description= "get the secondary outcomes of each clinical trial")
-        # eligibility: list = Field(
-        #    description="get the eligibilityCriteria grouping all the clinical trials"
-        # )
         healthy_volunteers: list= Field(description= "determine whether the clinical trial requires healthy volunteers")
         minimum_age: list = Field(
            description="get the minimum age from each experiment"
@@ -316,12 +338,12 @@ def tagging_insights_from_json(data_json):
         def get_dict(self):
             return {
-                "project_title": self.project_title,
-                "status": self.status,
-                # "keywords": self.keywords,
-                "interventions": self.interventions,
                 "primary_outcomes": self.primary_outcomes,
-                # "secondary_outcomes": self.secondary_outcomes,
                 # "eligibility": self.eligibility,
                 "healthy_volunteers": self.healthy_volunteers,
                 "minimum_age": self.minimum_age,
@@ -342,8 +364,16 @@ def tagging_insights_from_json(data_json):
     res= tagging_chain.invoke({"input": processed_json})
     result_dict= res.get_dict()
     print(f"Result_tagging: {result_dict}")
-    return result_dict
 # clinical_record_info = get_clinical_records_by_ids(['NCT00841061', 'NCT03035123', 'NCT02272751', 'NCT03035123', 'NCT03055377'])

 from langchain_openai import ChatOpenAI
 from langchain.chains.llm import LLMChain
 from langchain_core.prompts import PromptTemplate
+from collections import Counter
+import statistics
 load_dotenv()
     return result
+def analyze_data(data):
+    # Extract minimum and maximum ages
+    min_ages = [int(age.split()[0]) for age in data['minimum_age'] if age]
+    max_ages = [int(age.split()[0]) for age in data['maximum_age'] if age]
+    # primary_timeframe= [int(age.split()[0]) for age in data['[primary_outcome]'] if age]
+    # Calculate average minimum and maximum ages
+    avg_min_age = statistics.mean(min_ages) if min_ages else None
+    avg_max_age = statistics.mean(max_ages) if max_ages else None
+    # Find most common gender
+    gender_counter = Counter(data['gender'])
+    most_common_gender = gender_counter.most_common(1)[0][0]
+    # Flatten keywords list and find common keywords
+    keywords = [keyword for sublist in data['keywords'] for keyword in sublist]
+    common_keywords = [word for word, count in Counter(keywords).most_common()]
+    return avg_min_age, avg_max_age, most_common_gender, common_keywords
 def tagging_insights_from_json(data_json):
     processed_json= process_dictionaty_with_llm_to_generate_response(data_json)
         # description: str = Field(
         #     description="text description grouping all the clinical trials using briefDescription and detailedDescription keys"
         # )
+        # project_title: list = Field(
+        #     description="Extract the project titles of all the clinical trials"
         # )
+        # status: list = Field(
+        #     description="Extract the status of all the clinical trials"
+        # )
+        keywords: list = Field(
+           description="Extract the most relevant keywords for each clinical trials"
         )
+        # interventions: list = Field(
+        #     description="describe the interventions for each clinical trial using title, name and description"
+        # )
         primary_outcomes: list = Field(
+            description="get the timeframe of each clinical trial"
+        )
+        secondary_outcomes: list= Field(description= "get the secondary outcomes of each clinical trial")
+        eligibility: list = Field(
+           description="get the timeframe of each clinical trial"
         )
         healthy_volunteers: list= Field(description= "determine whether the clinical trial requires healthy volunteers")
         minimum_age: list = Field(
            description="get the minimum age from each experiment"
         def get_dict(self):
             return {
+                # "project_title": self.project_title,
+                # "status": self.status,
+                "keywords": self.keywords,
+                # "interventions": self.interventions,
                 "primary_outcomes": self.primary_outcomes,
+                "secondary_outcomes": self.secondary_outcomes,
                 # "eligibility": self.eligibility,
                 "healthy_volunteers": self.healthy_volunteers,
                 "minimum_age": self.minimum_age,
     res= tagging_chain.invoke({"input": processed_json})
     result_dict= res.get_dict()
+    avg_min_age, avg_max_age, most_common_gender, common_keywords= analyze_data(result_dict)
+    stats_dict= {'Average Minimum age': avg_min_age,
+                 'Average Maximum age': avg_max_age,
+                 'Most common gender undergoing the trials': most_common_gender,
+                 'common keywords found in the trials': common_keywords}
     print(f"Result_tagging: {result_dict}")
+    return result_dict, stats_dict
 # clinical_record_info = get_clinical_records_by_ids(['NCT00841061', 'NCT03035123', 'NCT02272751', 'NCT03035123', 'NCT03055377'])