Spaces:
Sleeping
Sleeping
1-ARIjitS
commited on
Commit
·
d8b73be
1
Parent(s):
772bbc6
changed to include stats
Browse files- app.py +2 -1
- llm_res.py +51 -21
app.py
CHANGED
|
@@ -88,8 +88,9 @@ with st.container():
|
|
| 88 |
status.json(json_of_clinical_trials, expanded=False)
|
| 89 |
# 7. Use an LLM to get a summary of the clinical trials, in plain text format.
|
| 90 |
status.write("Getting a summary of the clinical trials...")
|
| 91 |
-
response = get_short_summary_out_of_json_files(json_of_clinical_trials)
|
| 92 |
print(f'Response from LLM summarization: {response}')
|
|
|
|
| 93 |
status.write(f'Response from LLM summarization: {response}')
|
| 94 |
# 8. Use an LLM to extract numerical data from the clinical trials (e.g. number of patients, number of deaths, etc.). Get summary statistics out of that.
|
| 95 |
status.write("Getting summary statistics of the clinical trials...")
|
|
|
|
| 88 |
status.json(json_of_clinical_trials, expanded=False)
|
| 89 |
# 7. Use an LLM to get a summary of the clinical trials, in plain text format.
|
| 90 |
status.write("Getting a summary of the clinical trials...")
|
| 91 |
+
response, stats_dict = get_short_summary_out_of_json_files(json_of_clinical_trials)
|
| 92 |
print(f'Response from LLM summarization: {response}')
|
| 93 |
+
print(f'basic_stats_dict:{stats_dict}')
|
| 94 |
status.write(f'Response from LLM summarization: {response}')
|
| 95 |
# 8. Use an LLM to extract numerical data from the clinical trials (e.g. number of patients, number of deaths, etc.). Get summary statistics out of that.
|
| 96 |
status.write("Getting summary statistics of the clinical trials...")
|
llm_res.py
CHANGED
|
@@ -22,6 +22,8 @@ from langchain_core.pydantic_v1 import BaseModel, Field
|
|
| 22 |
from langchain_openai import ChatOpenAI
|
| 23 |
from langchain.chains.llm import LLMChain
|
| 24 |
from langchain_core.prompts import PromptTemplate
|
|
|
|
|
|
|
| 25 |
|
| 26 |
load_dotenv()
|
| 27 |
|
|
@@ -267,6 +269,26 @@ def get_short_summary_out_of_json_files(data_json):
|
|
| 267 |
|
| 268 |
return result
|
| 269 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
def tagging_insights_from_json(data_json):
|
| 271 |
processed_json= process_dictionaty_with_llm_to_generate_response(data_json)
|
| 272 |
|
|
@@ -286,25 +308,25 @@ def tagging_insights_from_json(data_json):
|
|
| 286 |
# description: str = Field(
|
| 287 |
# description="text description grouping all the clinical trials using briefDescription and detailedDescription keys"
|
| 288 |
# )
|
| 289 |
-
project_title: list = Field(
|
| 290 |
-
|
| 291 |
-
)
|
| 292 |
-
status: list = Field(
|
| 293 |
-
description="Extract the status of all the clinical trials"
|
| 294 |
-
)
|
| 295 |
-
# keywords: list = Field(
|
| 296 |
-
# description="Extract the most relevant keywords regrouping all the clinical trials"
|
| 297 |
# )
|
| 298 |
-
|
| 299 |
-
|
|
|
|
|
|
|
|
|
|
| 300 |
)
|
|
|
|
|
|
|
|
|
|
| 301 |
primary_outcomes: list = Field(
|
| 302 |
-
description="get the
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
)
|
| 304 |
-
# secondary_outcomes: list= Field(description= "get the secondary outcomes of each clinical trial")
|
| 305 |
-
# eligibility: list = Field(
|
| 306 |
-
# description="get the eligibilityCriteria grouping all the clinical trials"
|
| 307 |
-
# )
|
| 308 |
healthy_volunteers: list= Field(description= "determine whether the clinical trial requires healthy volunteers")
|
| 309 |
minimum_age: list = Field(
|
| 310 |
description="get the minimum age from each experiment"
|
|
@@ -316,12 +338,12 @@ def tagging_insights_from_json(data_json):
|
|
| 316 |
|
| 317 |
def get_dict(self):
|
| 318 |
return {
|
| 319 |
-
"project_title": self.project_title,
|
| 320 |
-
"status": self.status,
|
| 321 |
-
|
| 322 |
-
"interventions": self.interventions,
|
| 323 |
"primary_outcomes": self.primary_outcomes,
|
| 324 |
-
|
| 325 |
# "eligibility": self.eligibility,
|
| 326 |
"healthy_volunteers": self.healthy_volunteers,
|
| 327 |
"minimum_age": self.minimum_age,
|
|
@@ -342,8 +364,16 @@ def tagging_insights_from_json(data_json):
|
|
| 342 |
|
| 343 |
res= tagging_chain.invoke({"input": processed_json})
|
| 344 |
result_dict= res.get_dict()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
print(f"Result_tagging: {result_dict}")
|
| 346 |
-
return result_dict
|
| 347 |
|
| 348 |
|
| 349 |
# clinical_record_info = get_clinical_records_by_ids(['NCT00841061', 'NCT03035123', 'NCT02272751', 'NCT03035123', 'NCT03055377'])
|
|
|
|
| 22 |
from langchain_openai import ChatOpenAI
|
| 23 |
from langchain.chains.llm import LLMChain
|
| 24 |
from langchain_core.prompts import PromptTemplate
|
| 25 |
+
from collections import Counter
|
| 26 |
+
import statistics
|
| 27 |
|
| 28 |
load_dotenv()
|
| 29 |
|
|
|
|
| 269 |
|
| 270 |
return result
|
| 271 |
|
| 272 |
+
def analyze_data(data):
|
| 273 |
+
# Extract minimum and maximum ages
|
| 274 |
+
min_ages = [int(age.split()[0]) for age in data['minimum_age'] if age]
|
| 275 |
+
max_ages = [int(age.split()[0]) for age in data['maximum_age'] if age]
|
| 276 |
+
# primary_timeframe= [int(age.split()[0]) for age in data['[primary_outcome]'] if age]
|
| 277 |
+
|
| 278 |
+
# Calculate average minimum and maximum ages
|
| 279 |
+
avg_min_age = statistics.mean(min_ages) if min_ages else None
|
| 280 |
+
avg_max_age = statistics.mean(max_ages) if max_ages else None
|
| 281 |
+
|
| 282 |
+
# Find most common gender
|
| 283 |
+
gender_counter = Counter(data['gender'])
|
| 284 |
+
most_common_gender = gender_counter.most_common(1)[0][0]
|
| 285 |
+
|
| 286 |
+
# Flatten keywords list and find common keywords
|
| 287 |
+
keywords = [keyword for sublist in data['keywords'] for keyword in sublist]
|
| 288 |
+
common_keywords = [word for word, count in Counter(keywords).most_common()]
|
| 289 |
+
|
| 290 |
+
return avg_min_age, avg_max_age, most_common_gender, common_keywords
|
| 291 |
+
|
| 292 |
def tagging_insights_from_json(data_json):
|
| 293 |
processed_json= process_dictionaty_with_llm_to_generate_response(data_json)
|
| 294 |
|
|
|
|
| 308 |
# description: str = Field(
|
| 309 |
# description="text description grouping all the clinical trials using briefDescription and detailedDescription keys"
|
| 310 |
# )
|
| 311 |
+
# project_title: list = Field(
|
| 312 |
+
# description="Extract the project titles of all the clinical trials"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 313 |
# )
|
| 314 |
+
# status: list = Field(
|
| 315 |
+
# description="Extract the status of all the clinical trials"
|
| 316 |
+
# )
|
| 317 |
+
keywords: list = Field(
|
| 318 |
+
description="Extract the most relevant keywords for each clinical trials"
|
| 319 |
)
|
| 320 |
+
# interventions: list = Field(
|
| 321 |
+
# description="describe the interventions for each clinical trial using title, name and description"
|
| 322 |
+
# )
|
| 323 |
primary_outcomes: list = Field(
|
| 324 |
+
description="get the timeframe of each clinical trial"
|
| 325 |
+
)
|
| 326 |
+
secondary_outcomes: list= Field(description= "get the secondary outcomes of each clinical trial")
|
| 327 |
+
eligibility: list = Field(
|
| 328 |
+
description="get the timeframe of each clinical trial"
|
| 329 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
healthy_volunteers: list= Field(description= "determine whether the clinical trial requires healthy volunteers")
|
| 331 |
minimum_age: list = Field(
|
| 332 |
description="get the minimum age from each experiment"
|
|
|
|
| 338 |
|
| 339 |
def get_dict(self):
|
| 340 |
return {
|
| 341 |
+
# "project_title": self.project_title,
|
| 342 |
+
# "status": self.status,
|
| 343 |
+
"keywords": self.keywords,
|
| 344 |
+
# "interventions": self.interventions,
|
| 345 |
"primary_outcomes": self.primary_outcomes,
|
| 346 |
+
"secondary_outcomes": self.secondary_outcomes,
|
| 347 |
# "eligibility": self.eligibility,
|
| 348 |
"healthy_volunteers": self.healthy_volunteers,
|
| 349 |
"minimum_age": self.minimum_age,
|
|
|
|
| 364 |
|
| 365 |
res= tagging_chain.invoke({"input": processed_json})
|
| 366 |
result_dict= res.get_dict()
|
| 367 |
+
|
| 368 |
+
avg_min_age, avg_max_age, most_common_gender, common_keywords= analyze_data(result_dict)
|
| 369 |
+
|
| 370 |
+
stats_dict= {'Average Minimum age': avg_min_age,
|
| 371 |
+
'Average Maximum age': avg_max_age,
|
| 372 |
+
'Most common gender undergoing the trials': most_common_gender,
|
| 373 |
+
'common keywords found in the trials': common_keywords}
|
| 374 |
+
|
| 375 |
print(f"Result_tagging: {result_dict}")
|
| 376 |
+
return result_dict, stats_dict
|
| 377 |
|
| 378 |
|
| 379 |
# clinical_record_info = get_clinical_records_by_ids(['NCT00841061', 'NCT03035123', 'NCT02272751', 'NCT03035123', 'NCT03055377'])
|