Spaces:

amra-ai
/

studies

Runtime error

App Files Files Community

Roland Ding commited on Jul 29, 2023

Commit

667bfca

1 Parent(s): 20fc5ea

2.3.11.32 updated features, completed process_study as the one stroke process for clinical report, completed create_overview, create_details for markdown ui content population, completed the select_prompts process to align with the prompt selection logic as per prior meetings (notion to be added in the spec at later day).

Browse files

Files changed (1) hide show

features.py +199 -84

features.py CHANGED Viewed

@@ -3,6 +3,7 @@ from datetime import datetime
 from operator import mul
 from functools import reduce
 from sys import stdout
 # external packages
 import gradio as gr
@@ -15,72 +16,121 @@ from supplier import *
 encoding = tiktoken.get_encoding("cl100k_base")
 def process_study(
         study_file_obj,
         performance_metric_1,
         performance_metric_2,
         safety_metric_1,
         safety_metric_2,
         device=default_device
         ):
-    if study_file_obj is None:
-        return "", "", ""
-    article = add_article(device,study_file_obj)
-    content = extract_key_content(article["content"],["abstract","objective"],["discussion"])
-    assessments = select_prompts(content)
     output = {
         "domain":article["domain"],
         "article":article["name"],
-        "output":{}
     }
-    n_assessments = len(assessments)
-    c = 1
-    for a, prompts in assessments.items():
-        # run prompts with the content
-        output["output"][a] = []
-        n_prompts = len(prompts)
-        for i,p in enumerate(prompts):
-            # run prompt on content and append it to the outputs[output][assessment]
-            prompt_text = f"{content}\n\n {p}\n"
-            # print(len(encoding.encode(prompt_text)))
-            feedback = execute_prompt(prompt_text)
-            # print(feedback)
-            output["output"][a].append(process_feedback(feedback))
-            stdout.write(f"{c}/{n_assessments} - {i+1}/{n_prompts}\r")
-        c += 1
-    add_output(output)
-    overview = create_overview(output)
-    performance = create_performance(output)
-    safety = create_safety(output)
-    return overview, performance, safety
-def create_overview(output):
-    # raw_text = output["output"]["Clinical Overview"]
-    raw_text = "work in progress"
-    overview = f"<hr /><p>{raw_text}</p>"
-    return gr.update(value=overview)
-def create_performance(output):
-    performances = output["output"]["Clinical Performance"]
     md_text = ""
-    for p in performances:
-        md_text += f"<hr /><p>{p}</p>"
     return gr.update(value=md_text)
-def create_safety(output):
-    raw_text = output["output"]["Safety"]
-    safety = f"<hr /><p>{raw_text}</p>"
-    return gr.update(value=safety)
-def extract_key_content(text,start,end,case_sensitive=False):
     '''
     this function extract the content between start and end
     and return the content in between. The function will find
@@ -112,13 +162,20 @@ def extract_key_content(text,start,end,case_sensitive=False):
     start_index = 0
     for s in start:
         start_index = max(start_index,text.find(s))
     end_index = 0
     for e in end:
-        end_index = max(end_index,text[start_index:].find(e)) if start_index!=-1 else max(end_index,text.find(e))
-    content = origin[start_index:start_index+end_index] if start_index!=-1 else origin[:end_index]
-    return content
 def get_articles(update_local=True):
     '''
@@ -160,7 +217,7 @@ def get_article(domain,name):
     return article
-def add_article(domain,file_obj,add_to_s3=True, add_to_local=True):
     '''
     this function receive the domain name and file obj
     and add the article to the cloud, s3 and local memory
@@ -181,17 +238,31 @@ def add_article(domain,file_obj,add_to_s3=True, add_to_local=True):
     dict
         article object
     '''
-    content, meta = read_pdf(file_obj)
     article ={
         "domain":domain,
-        "name":file_obj.name.split("\\")[-1].split(".")[0],
         "content":content,
-        # "meta":meta,
         "upload_time":datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
     }
-    if add_to_s3:
-        s3_path = upload_fileobj(file_obj,domain,article["name"])
         article["s3_path"] = s3_path
     if add_to_local:
@@ -264,37 +335,6 @@ def update_article(article,file_obj=None,update_local=True):
     return article
-def process_feedback(text):
-    return text
-def select_prompts(content):
-    '''
-    select the prompts based on the content and the search terms
-    that was included in the content
-    Parameters
-    ----------
-    content : str
-        content of the article
-    Returns
-    -------
-    dict
-        prompts
-    '''
-    prompts = {}
-    for a in assessments:
-        prompts[a] = set()
-    for p in app_data["terms"]:
-        p["terms"] = p["term"].split(",")
-        if reduce(mul, [s in content for s in p["terms"]], 1):
-            prompts[p["assessment_step"]].add(p["command"])
-    return prompts
 def add_output(output):
     '''
     this function add the output to the cloud
@@ -336,7 +376,7 @@ def update_output(output):
         return False
     return True
-def add_device():
     pass
 def get_device():
@@ -346,4 +386,79 @@ def remove_device():
     pass
 def update_device():
-    pass

 from operator import mul
 from functools import reduce
 from sys import stdout
+from collections import defaultdict
 # external packages
 import gradio as gr
 encoding = tiktoken.get_encoding("cl100k_base")
+# get prompts, terms, outputs from the cloud
+def init_app_data():
+    '''
+    a function to initialize the application data from the cloud backend
+    '''
+    app_data["prompts"] = get_table("prompts")
+    app_data["terms"] = get_table("terms")
+    app_data["outputs"] = get_table("outputs")
+    app_data["articles"] = get_table("articles")
 def process_study(
         study_file_obj,
+        study_content,
         performance_metric_1,
         performance_metric_2,
         safety_metric_1,
         safety_metric_2,
         device=default_device
         ):
+    if study_file_obj:
+        article = add_article(device,study_file_obj)
+    elif study_content:
+        article = add_article(device,study_content,file_object=False)
+    else:
+        return "No file or content provided","No file or content provided","No file or content provided"
+    prompts = select_prompts( # need to identify how the app will know which prompts to use
+        article,
+        performance_metric_1,
+        performance_metric_2,
+        safety_metric_1,
+        safety_metric_2
+    )
+    # print("check prompts",prompts)
     output = {
         "domain":article["domain"],
         "article":article["name"],
+        "output":defaultdict(list)
     }
+    for p in prompts:
+        prompt_string = ""
+        for s in p["sections"].split(","):
+            prompt_string += f"{article[s]}"
+        prompt_string += f"\n {p['prompt']}"
+        with open(f"prompt_{p['template_name']}.txt","w") as f:
+            f.write(prompt_string)
+        res = execute_prompt(prompt_string)
+        with open(f"output_{p['template_name']}.txt","w") as f:
+            f.write(res)
+        output["output"][p["assessment_step"]].append(res)
+    overview = create_overview(output["output"]["overview"])
+    details = create_details(output["output"])
+    add_output(output)
+    return overview, details
+def refresh():
+    '''
+    this function refresh the application data from the cloud backend
+    '''
+    init_app_data()
+    return "refreshed", "refreshed"
+def create_overview(overview_list):
+    '''
+    '''
+    md_text = "## Overview\n\n"
+    md_text += "| attributes | detail |\n|:---|:---|\n"
+    for v in overview_list:
+        r = v.replace("\n\n","")
+        rows = r.split("\n")
+        for r in rows:
+            c = r.replace(": "," | ")
+            md_text += f"| {c} |\n"
+    # with open("overview.md","w") as f:
+        # f.write(md_text)
+    return gr.update(value=md_text)
+def create_details(output):
+    sections = ["clinical", "radiographic", "fussion assessment", "other","safety"]
+    titles = ["Clinical Outcomes", "Radiological Outcomes", "Fussion Assessment", "Other Outcomes","Safety Outcomes"]
     md_text = ""
+    for section, title in zip(sections, titles):
+        md_text += f"## {title}\n\n"
+        # print(output[section])
+        for i,table in enumerate(output[section]):
+            table = table.replace("\n\n","")
+            rows = table.split("\n")
+            for i,r in enumerate(rows):
+                cells = r.split("\t")
+                md_text += f"| {' | '.join(cells)} |\n"
+                if i == 0:
+                    md_text += "|:---"*len(cells)+"|\n"
+            md_text += "\n\n"
+    # with open("details.md","w") as f:
+    #     f.write(md_text)
     return gr.update(value=md_text)
+def extract_key_content(text,start,end,before = None,case_sensitive=False):
     '''
     this function extract the content between start and end
     and return the content in between. The function will find
     start_index = 0
     for s in start:
         start_index = max(start_index,text.find(s))
+    if start_index ==-1: start_index = 0
     end_index = 0
     for e in end:
+        end_index = max(end_index,text[start_index:].find(e))
+    if before:
+        for b in before:
+            before_index = text[start_index:start_index+end_index].find(b)
+            end_index = min(end_index,before_index) if before_index != -1 and before_index >=800 else end_index # 800 is a magic number for the length of the abstract
+    content = origin[start_index:start_index+end_index]
+    return content, start_index, start_index+end_index
 def get_articles(update_local=True):
     '''
     return article
+def add_article(domain,file,add_to_s3=True, add_to_local=True, file_object=True):
     '''
     this function receive the domain name and file obj
     and add the article to the cloud, s3 and local memory
     dict
         article object
     '''
+    if file_object:
+        content, _ = read_pdf(file)
+        name = file.name.split("\\")[-1].split(".")[0]
+    else:
+        content = file
+        name = f"temp_{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
+    abstract,_,end_abstract = extract_key_content(content,["objective","abstract"],["key","words:","methods"],["introduction"])
+    methods,_,end_methods = extract_key_content(content[end_abstract:],["methods"],["results"])
+    if not methods:
+        methods,_,end_methods = extract_key_content(content[end_abstract:],["methods"],["discussion"])
+    results,_,_ = extract_key_content(content[end_methods:],["results"],["discussion"])
     article ={
         "domain":domain,
+        "name":name,
         "content":content,
+        "abstract":abstract,
+        "methods":methods,
+        "results":results,
         "upload_time":datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
     }
+    if add_to_s3 and file_object:
+        s3_path = upload_fileobj(file,domain,article["name"])
         article["s3_path"] = s3_path
     if add_to_local:
     return article
 def add_output(output):
     '''
     this function add the output to the cloud
         return False
     return True
+def add_device(*args):
     pass
 def get_device():
     pass
 def update_device():
+    pass
+def process_feedback(text):
+    return text
+def select_prompts(article,*args):
+    '''
+    select the prompts based on the content and the search terms
+    that was included in the content
+    Parameters
+    ----------
+    article : dict
+        article object
+    Returns
+    -------
+    dict
+        prompts
+    '''
+    # get template names based on the search terms
+    memory = set()
+    prompts = []
+    for t in app_data["terms"]:
+        t["terms"] = t["term"].split(",")
+        if reduce(mul, [s in article["content"] for s in t["terms"]], 1) and t["template_name"] not in memory:
+            # get prompts based from templates
+            template_names = t["template_name"].split(",")
+            for tn in template_names:
+                prompts.extend([p for p in app_data["prompts"] if p["template_name"]==tn])
+                prompts[-1]["prompt"].replace("<--clinical term-->",t["clinical term"])
+                prompts[-1]["prompt"].replace("<--radiologic term-->",t["clinical term"])
+                prompts[-1]["prompt"].replace("<--other term-->",t["clinical term"])
+            memory.add(t["template_name"])
+    # add overview prompts
+    prompts.extend([ov for ov in app_data["prompts"] if ov["assessment_step"]=="overview"])
+    # print("number of prompts",len(prompts))
+    # check if groups, levels and preopratives are in the article
+    article_logic = {}
+    for k,value in logic_keywords.items():
+        article_logic[k] = bool(sum([kw in article["content"] for kw in value]))
+    # print(article_logic)
+    # use article_logic to filter prompts
+    prompts = [p for p in prompts
+                if (p["groups"] == article_logic["groups"] or p["groups"] is None)
+                and (p["levels"] == article_logic["levels"] or p["levels"] is None)
+                and (p["preoperatives"] == article_logic["preoperatives"] or p["preoperatives"] is None)]
+    # print("number of prompts after logic",len(prompts))
+    # early return if no specific result
+    if "".join(args) == "":
+        # print("no args")
+        return prompts
+    # # performance metrics and safety metrics filter
+    # for p in prompts:
+    #     if not sum([a in p["clinical term"] for a in args if a]):
+    #         print(p["template_name"])
+    #         prompts.remove(p)
+    # print("number of prompts after args",len(prompts))
+    return prompts
+def keyword_search(keywords,full_text):
+    keywords_result = {}
+    for k in keywords:
+        if type(k) is tuple:
+            keywords_result[k]=list_or([keyword_search(kw,full_text) for kw in k])
+        else:
+            keywords_result[k]=keyword_search(k,full_text)
+    return keywords_result