Spaces:

klinic-hackupc
/

klinic

Sleeping

App Files Files Community

1-ARIjitS commited on May 5, 2024

Commit

772bbc6

2 Parent(s): aa656dd ec6a815

Merge branch 'main' of https://huggingface.co/spaces/klinic-hackupc/klinic into main

Browse files

Files changed (6) hide show

.DS_Store +0 -0
MATLAB/visualize_app.mlapp +0 -0
MATLAB/visualize_connectedNodes_continuous.m +6 -0
app.py +29 -38
img_klinic.jpeg +0 -0
utils.py +47 -1

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

MATLAB/visualize_app.mlapp CHANGED Viewed

Binary files a/MATLAB/visualize_app.mlapp and b/MATLAB/visualize_app.mlapp differ

MATLAB/visualize_connectedNodes_continuous.m CHANGED Viewed

@@ -18,6 +18,12 @@ function visualize_connectedNodes_continuous()
         else
             connectionsMap(char_node) = char_connectedNode;
         end
     end
     % Loop for continuous interaction

         else
             connectionsMap(char_node) = char_connectedNode;
         end
+        if isKey(connectionsMap, char_connectedNode)
+            connectionsMap(char_connectedNode) = [connectionsMap(char_connectedNode), '|', char_node];
+        else
+            connectionsMap(char_connectedNode) = char_node;
+        end
     end
     % Loop for continuous interaction

app.py CHANGED Viewed

@@ -12,7 +12,8 @@ from utils import (
     get_similarities_among_diseases_uris,
     augment_the_set_of_diseaces,
     get_clinical_trials_related_to_diseases,
-    get_clinical_records_by_ids
 )
 from llm_res import get_short_summary_out_of_json_files, tagging_insights_from_json
 import json
@@ -36,11 +37,15 @@ CONNECTION_STRING = f"iris://{username}:{password}@{hostname}:{port}/{namespace}
 engine = create_engine(CONNECTION_STRING)
 with st.container(): # user input
     col1, col2 = st.columns((6, 1))
     with col1:
-        description_input = st.text_area(label="Enter the disease description 👇", placeholder='A disease that causes memory loss and other cognitive impairments.')
     with col2:
         st.text('') # dummy to center vertically
@@ -60,6 +65,8 @@ with st.container():
             diseases_related_to_the_user_text = get_diseases_related_to_a_textual_description(
                 description_input, encoder
             )
             # 3. Get the similarities of the embeddings of those diseases (cosine similarity of the embeddings of the nodes of such diseases)
             status.write("Getting the similarities among the diseases to filter out less promising ones...")
             diseases_uris = [disease["uri"] for disease in diseases_related_to_the_user_text]
@@ -78,7 +85,7 @@ with st.container():
             json_of_clinical_trials = get_clinical_records_by_ids(
                 [trial["nct_id"] for trial in clinical_trials_related_to_the_diseases]
             )
-            status.json(json_of_clinical_trials)
             # 7. Use an LLM to get a summary of the clinical trials, in plain text format.
             status.write("Getting a summary of the clinical trials...")
             response = get_short_summary_out_of_json_files(json_of_clinical_trials)
@@ -91,13 +98,23 @@ with st.container():
             status.write(f'Response from LLM tagging: {response}')
             # 9. Show the results to the user: graph of the diseases chosen, summary of the clinical trials, summary statistics of the clinical trials, and list of the details of the clinical trials considered
             status.update(label="Done!", state="complete")
-            time.sleep(1)
             show_graph = True
 # graph
 with st.container():
     if show_graph:
         # TODO actual graph
         graph_of_diseases = agraph(
             nodes=[
@@ -147,39 +164,13 @@ with st.container():
         # TODO replace mock data
         with open("mock_trial.json") as f:
             d = json.load(f)
-        for i in range(0, 5):
             trials.append(d)
-        for trial in trials:
-            with st.expander(f"{trial['protocolSection']['identificationModule']['nctId']}"):
-                official_title = trial["protocolSection"]["identificationModule"][
-                    "officialTitle"
-                ]
-                st.write(f"##### {official_title}")
-                brief_summary = trial["protocolSection"]["descriptionModule"]["briefSummary"]
-                st.write(brief_summary)
-                status_module = {
-                    "Status": trial["protocolSection"]["statusModule"]["overallStatus"],
-                    "Status Date": trial["protocolSection"]["statusModule"][
-                        "statusVerifiedDate"
-                    ],
-                }
-                st.write("###### Status")
-                st.table(status_module)
-                design_module = {
-                    "Study Type": trial["protocolSection"]["designModule"]["studyType"],
-                    # "Phases": trial["protocolSection"]["designModule"]["phases"], # breaks formatting because it is an array
-                    "Allocation": trial["protocolSection"]["designModule"]["designInfo"][
-                        "allocation"
-                    ],
-                    "Participants": trial["protocolSection"]["designModule"]["enrollmentInfo"][
-                        "count"
-                    ],
-                }
-                st.write("###### Design")
-                st.table(design_module)
-                # TODO more modules?

     get_similarities_among_diseases_uris,
     augment_the_set_of_diseaces,
     get_clinical_trials_related_to_diseases,
+    get_clinical_records_by_ids,
+    render_trial_details
 )
 from llm_res import get_short_summary_out_of_json_files, tagging_insights_from_json
 import json
 engine = create_engine(CONNECTION_STRING)
+st.image("img_klinic.jpeg", caption="(AI-generated image)", use_column_width=True)
+st.title("Klìnic", help="AI-powered clinical trial search engine")
+st.subheader("Find clinical trials in a scoped domain of biomedical research, guiding your research with AI-powered insights.")
 with st.container(): # user input
     col1, col2 = st.columns((6, 1))
     with col1:
+        description_input = st.text_area(label="Enter a disease description 👇", placeholder='A disease that causes memory loss and other cognitive impairments.')
     with col2:
         st.text('') # dummy to center vertically
             diseases_related_to_the_user_text = get_diseases_related_to_a_textual_description(
                 description_input, encoder
             )
+            status.info(f'Found {len(diseases_related_to_the_user_text)} diseases related to the description you entered.')
+            status.json(diseases_related_to_the_user_text, expanded=False)
             # 3. Get the similarities of the embeddings of those diseases (cosine similarity of the embeddings of the nodes of such diseases)
             status.write("Getting the similarities among the diseases to filter out less promising ones...")
             diseases_uris = [disease["uri"] for disease in diseases_related_to_the_user_text]
             json_of_clinical_trials = get_clinical_records_by_ids(
                 [trial["nct_id"] for trial in clinical_trials_related_to_the_diseases]
             )
+            status.json(json_of_clinical_trials, expanded=False)
             # 7. Use an LLM to get a summary of the clinical trials, in plain text format.
             status.write("Getting a summary of the clinical trials...")
             response = get_short_summary_out_of_json_files(json_of_clinical_trials)
             status.write(f'Response from LLM tagging: {response}')
             # 9. Show the results to the user: graph of the diseases chosen, summary of the clinical trials, summary statistics of the clinical trials, and list of the details of the clinical trials considered
             status.update(label="Done!", state="complete")
+            status.balloons()
             show_graph = True
 # graph
 with st.container():
     if show_graph:
+        st.info(
+            """This is a graph of the relevant diseases that we found, based on the description that you entered. The diseases are connected by edges if they are similar to each other. The color of the edges represents the similarity of the diseases.
+We use the embeddings of the diseases to determine the similarity between them. The embeddings are generated using a Representation Learning algorithm that learns the topological relations among the nodes in the graph, depending on how they are connected. We utilize the (PyKeen)[https://github.com/pykeen/pykeen] implementation of TransH to train an embedding model.
+(TransH)[https://ojs.aaai.org/index.php/AAAI/article/view/8870] utilizes hyperplanes to model relations between entities. It is a multi-relational model that can handle many-to-many relations between entities. The model is trained on the triples of the graph, where the triples are the subject, relation, and object of the graph. The model learns the embeddings of the entities and the relations, such that the embeddings of the subject and object are close to each other when the relation is true.
+Specifically, it optimizes the following cost function:
+$$"""
+        )
         # TODO actual graph
         graph_of_diseases = agraph(
             nodes=[
         # TODO replace mock data
         with open("mock_trial.json") as f:
             d = json.load(f)
+        for i in range(0, 8):
             trials.append(d)
+        tab_titles = [f"{trial['protocolSection']['identificationModule']['nctId']}" for trial in trials]
+        tabs = st.tabs(tab_titles)
+        for i in range(0, len(tabs)):
+            with tabs[i]:
+                render_trial_details(trials[i])

img_klinic.jpeg ADDED Viewed

utils.py CHANGED Viewed

@@ -4,6 +4,7 @@ import os
 from sqlalchemy import create_engine, text
 import requests
 from sentence_transformers import SentenceTransformer
 username = "demo"
 password = "demo"
@@ -181,7 +182,7 @@ def get_clinical_trials_related_to_diseases(
     with engine.connect() as conn:
         with conn.begin():
             sql = f"""
-                    SELECT TOP 5 d.nct_id, VECTOR_COSINE(d.embedding, TO_VECTOR('{string_representation}', DOUBLE)) AS distance
                     FROM Test.ClinicalTrials d
                     ORDER BY distance DESC
                 """
@@ -190,6 +191,51 @@ def get_clinical_trials_related_to_diseases(
     return [{"nct_id": row[0], "distance": row[1]} for row in data]
 if __name__ == "__main__":
     username = "demo"

 from sqlalchemy import create_engine, text
 import requests
 from sentence_transformers import SentenceTransformer
+import streamlit as st
 username = "demo"
 password = "demo"
     with engine.connect() as conn:
         with conn.begin():
             sql = f"""
+                    SELECT TOP 10 d.nct_id, VECTOR_COSINE(d.embedding, TO_VECTOR('{string_representation}', DOUBLE)) AS distance
                     FROM Test.ClinicalTrials d
                     ORDER BY distance DESC
                 """
     return [{"nct_id": row[0], "distance": row[1]} for row in data]
+def to_capitalized_case(string: str) -> str:
+    string = string.replace("_", " ")
+    if string.isupper():
+        return string[0] + string[1:].lower()
+def list_to_capitalized_case(strings: List[str]) -> str:
+    strings = [to_capitalized_case(s) for s in strings]
+    return ", ".join(strings)
+def render_trial_details(trial: dict) -> None:
+            # TODO: handle key errors for all cases (→ do not render)
+            official_title = trial["protocolSection"]["identificationModule"]["officialTitle"]
+            st.write(f"##### {official_title}")
+            brief_summary = trial["protocolSection"]["descriptionModule"]["briefSummary"]
+            st.write(brief_summary)
+            status_module = {
+                "Status": to_capitalized_case(trial["protocolSection"]["statusModule"]["overallStatus"]),
+                "Status Date": trial["protocolSection"]["statusModule"]["statusVerifiedDate"],
+                "Has Results": trial["hasResults"]
+            }
+            st.write("###### Status")
+            st.table(status_module)
+            design_module = {
+                "Study Type": to_capitalized_case(trial["protocolSection"]["designModule"]["studyType"]),
+                "Phases": list_to_capitalized_case(trial["protocolSection"]["designModule"]["phases"]),
+                "Allocation": to_capitalized_case(trial["protocolSection"]["designModule"]["designInfo"]["allocation"]),
+                "Primary Purpose": to_capitalized_case(trial["protocolSection"]["designModule"]["designInfo"]["primaryPurpose"]),
+                "Participants": trial["protocolSection"]["designModule"]["enrollmentInfo"]["count"],
+                "Masking": to_capitalized_case(trial["protocolSection"]["designModule"]["designInfo"]["maskingInfo"]["masking"]),
+                "Who Masked": list_to_capitalized_case(trial["protocolSection"]["designModule"]["designInfo"]["maskingInfo"]["whoMasked"])
+            }
+            st.write("###### Design")
+            st.table(design_module)
+            interventions_module = {}
+            for intervention in trial["protocolSection"]["armsInterventionsModule"]["interventions"]:
+                name = intervention["name"]
+                desc = intervention["description"]
+                interventions_module[name] = desc
+            st.write("###### Interventions")
+            st.table(interventions_module)
 if __name__ == "__main__":
     username = "demo"