Spaces:

amra-ai
/

studies

Runtime error

App Files Files Community

Roland Ding commited on Aug 2, 2023

Commit

ef80389

1 Parent(s): 5f0eb5f

5.4.15.42 combined chatgpt-turbo-16k and search term prompt identification feature. additionally aligned with the cloud data structure for seperation of fields and prompts, added ai formating instruction and simlified ui.

Browse files

Files changed (7) hide show

app.py +4 -3
application.py +6 -6
cloud_storage.py +3 -3
features.py +105 -147
supplier.py +64 -5
ui_study.py +21 -59
utility.py +0 -2

app.py CHANGED Viewed

@@ -18,9 +18,10 @@ examples = []
 # app_theme = gr.themes.Base(neutral_hue="blue")
 demo = gr.TabbedInterface(
-    [device_page,study_page,equivalent_page],
-    ["Device","Clinical Study Report","Equivalent Comparators"],
-    # theme= app_theme,
     theme = gr.themes.Soft(primary_hue="sky",secondary_hue="orange"),
     css = "footer {visibility: hidden}",
     title="AMRA AI Medi Reader")

 # app_theme = gr.themes.Base(neutral_hue="blue")
 demo = gr.TabbedInterface(
+    # [device_page,study_page,equivalent_page],
+    # ["Device","Clinical Study Report","Equivalent Comparators"],
+    [study_page],
+    ["Clinical Study"],
     theme = gr.themes.Soft(primary_hue="sky",secondary_hue="orange"),
     css = "footer {visibility: hidden}",
     title="AMRA AI Medi Reader")

application.py CHANGED Viewed

@@ -57,7 +57,8 @@ data_structure = {
         "term",
         "clinical term",
         "summary term",
-        "template_name"
     ]},
     "prompts":{
         "key":[
@@ -72,6 +73,8 @@ data_structure = {
             "levels",
             "preoperatives",
             "prompt",
         ]
     },
     "articles":{
@@ -92,6 +95,7 @@ data_structure = {
         "key":[
             "domain",
             "article",
         ],
         "fields":[
             "domain",
@@ -105,12 +109,8 @@ data_structure = {
 application default data
 '''
 app_data = {
-    "articles":[],
     "terms":[],
     "prompts":[],
     "outputs":[]
 }
-# hypothesis:
-# normal abstract length is about 800 characters

         "term",
         "clinical term",
         "summary term",
+        "template_name",
+        "terms"
     ]},
     "prompts":{
         "key":[
             "levels",
             "preoperatives",
             "prompt",
+            "fields",
+            "reformat_inst"
         ]
     },
     "articles":{
         "key":[
             "domain",
             "article",
+            "outcomes"
         ],
         "fields":[
             "domain",
 application default data
 '''
 app_data = {
+    "current_article":{},
     "terms":[],
     "prompts":[],
     "outputs":[]
 }

cloud_storage.py CHANGED Viewed

@@ -48,11 +48,11 @@ def upload_fileobj(file_obj, bucket, object_name=None):
         object_name = file_obj.name
     try:
-        s3.upload_fileobj(file_obj, bucket, object_name)
     except Exception as e:
         print(e)
-        return False
-    return True
 # get a file from s3
 def download_file(bucket, object_name, file_name=None):

         object_name = file_obj.name
     try:
+        res = s3.upload_fileobj(file_obj, bucket, object_name)
     except Exception as e:
         print(e)
+        return e
+    return res
 # get a file from s3
 def download_file(bucket, object_name, file_name=None):

features.py CHANGED Viewed

@@ -1,8 +1,5 @@
 # language default packages
 from datetime import datetime
-from operator import mul
-from functools import reduce
-from sys import stdout
 from collections import defaultdict
 # external packages
@@ -29,10 +26,6 @@ def init_app_data():
 def process_study(
         study_file_obj,
         study_content,
-        performance_metric_1,
-        performance_metric_2,
-        safety_metric_1,
-        safety_metric_2,
         device=default_device
         ):
@@ -43,89 +36,69 @@ def process_study(
     else:
         return "No file or content provided","No file or content provided","No file or content provided"
-    prompts = select_prompts( # need to identify how the app will know which prompts to use
-        article,
-        performance_metric_1,
-        performance_metric_2,
-        safety_metric_1,
-        safety_metric_2
-    )
-    # print("check prompts",prompts)
     output = {
         "domain":article["domain"],
         "article":article["name"],
-        "output":defaultdict(dict)
     }
-    for p in prompts:
-        prompt_string = ""
-        for s in p["sections"].split(","):
-            prompt_string += f"{article[s]}"
-        prompt_string += f"\n {p['prompt']}"
-        with open(f".prompts/{article['name']}_{p['template_name']}.txt","w") as f:
-            f.write(prompt_string)
-        res = execute_prompt(prompt_string)
-        with open(f".outputs/{article['name']}_{p['template_name']}.txt","w") as f:
-            f.write(res)
-        output["output"][p["assessment_step"]][p["template_name"]]=res
-    overview = create_overview(output["output"]["overview"])
-    details = create_details(output["output"])
     add_output(output)
-    return overview, details
 def refresh():
     '''
     this function refresh the application data from the cloud backend
     '''
     init_app_data()
-    return "refreshed", "refreshed"
-def create_overview(overview_list):
-    '''
-    '''
-    md_text = "## Overview\n\n"
-    md_text += "| attributes | detail |\n|:---|:---|\n"
-    for _,v in overview_list.items():
-        r = v.replace("\n\n","")
-        rows = r.split("\n")
-        for r in rows:
-            c = r.replace(": "," | ")
-            md_text += f"| {c} |\n"
-    # with open("overview.md","w") as f:
-        # f.write(md_text)
-    return gr.update(value=md_text)
-def create_details(output):
-    sections = ["clinical", "radiographic", "fussion assessment", "other","safety"]
-    titles = ["Clinical Outcomes", "Radiological Outcomes", "Fussion Assessment", "Other Outcomes","Safety Outcomes"]
     md_text = ""
-    for section, title in zip(sections, titles):
-        md_text += f"## {title}\n\n"
-        for key,table in output[section].items():
-            md_text += f"### {key} \n\n"
-            rows = table.replace("\n\n","").split("\n")
-            for i,r in enumerate(rows):
-                cells = r.split("\t")
-                md_text += f"| {' | '.join(cells)} |\n"
-                if i == 0:
-                    md_text += "| --- "*len(cells)+"|\n"
-            md_text += "\n\n"
-    # with open("details.md","w") as f:
-    #     f.write(md_text)
     return gr.update(value=md_text)
@@ -239,29 +212,22 @@ def add_article(domain,file,add_to_s3=True, add_to_local=True, file_object=True)
     '''
     if file_object:
         content, _ = read_pdf(file)
-        name = file.name.split("\\")[-1].split(".")[0]
     else:
         content = file
-        name = f"temp_{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
-    abstract,_,end_abstract = extract_key_content(content,["objective","abstract"],["key","words:","methods"],["introduction"])
-    methods,_,end_methods = extract_key_content(content[end_abstract:],["methods"],["results"])
-    if not methods:
-        methods,_,end_methods = extract_key_content(content[end_abstract:],["methods"],["discussion"])
-    results,_,_ = extract_key_content(content[end_methods:],["results"],["discussion"])
     article ={
         "domain":domain,
-        "name":name,
         "content":content,
-        "abstract":abstract,
-        "methods":methods,
-        "results":results,
         "upload_time":datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
     }
     if add_to_s3 and file_object:
-        s3_path = upload_fileobj(file,domain,article["name"])
         article["s3_path"] = s3_path
     if add_to_local:
@@ -269,7 +235,7 @@ def add_article(domain,file,add_to_s3=True, add_to_local=True, file_object=True)
     res = post_item("articles",article)
     if "Error" in res:
-        print(res)
         return res
     return article
@@ -387,72 +353,31 @@ def remove_device():
 def update_device():
     pass
-def process_feedback(text):
-    return text
-def select_prompts(article,*args):
-    '''
-    select the prompts based on the content and the search terms
-    that was included in the content
-    Parameters
-    ----------
-    article : dict
-        article object
-    Returns
-    -------
-    dict
-        prompts
-    '''
-    # get template names based on the search terms
-    memory = set()
-    prompts = []
-    for t in app_data["terms"]:
-        t["terms"] = t["term"].split(",")
-        if reduce(mul, [s in article["content"] for s in t["terms"]], 1) and t["template_name"] not in memory:
-            # get prompts based from templates
-            template_names = t["template_name"].split(",")
-            for tn in template_names:
-                prompts.extend([p for p in app_data["prompts"] if p["template_name"]==tn])
-                if prompts:
-                    prompts[-1]["prompt"].replace("<--clinical term-->",t["clinical term"])
-                    prompts[-1]["prompt"].replace("<--radiologic term-->",t["clinical term"])
-                    prompts[-1]["prompt"].replace("<--other term-->",t["clinical term"])
-            memory.add(t["template_name"])
-    # add overview prompts
-    prompts.extend([ov for ov in app_data["prompts"] if ov["assessment_step"]=="overview"])
-    # print("number of prompts",len(prompts))
-    # check if groups, levels and preopratives are in the article
-    article_logic = {}
-    for k,value in logic_keywords.items():
-        article_logic[k] = bool(sum([kw in article["content"] for kw in value]))
-    # print(article_logic)
-    # use article_logic to filter prompts
-    prompts = [p for p in prompts
-                if (p["groups"] == article_logic["groups"] or p["groups"] is None)
-                and (p["levels"] == article_logic["levels"] or p["levels"] is None)
-                and (p["preoperatives"] == article_logic["preoperatives"] or p["preoperatives"] is None)]
-    # print("number of prompts after logic",len(prompts))
-    # early return if no specific result
-    if "".join(args) == "":
-        # print("no args")
-        return prompts
-    # # performance metrics and safety metrics filter
-    # for p in prompts:
-    #     if not sum([a in p["clinical term"] for a in args if a]):
-    #         print(p["template_name"])
-    #         prompts.remove(p)
-    # print("number of prompts after args",len(prompts))
-    return prompts
 def keyword_search(keywords,full_text):
     keywords_result = {}
@@ -461,4 +386,37 @@ def keyword_search(keywords,full_text):
             keywords_result[k]=list_or([keyword_search(kw,full_text) for kw in k])
         else:
             keywords_result[k]=keyword_search(k,full_text)
-    return keywords_result

 # language default packages
 from datetime import datetime
 from collections import defaultdict
 # external packages
 def process_study(
         study_file_obj,
         study_content,
         device=default_device
         ):
     else:
         return "No file or content provided","No file or content provided","No file or content provided"
+    app_data["current_article"] = article
+    selected_prompts = select_prompts(article["content"],terms=app_data["terms"],prompts=app_data["prompts"])
     output = {
         "domain":article["domain"],
         "article":article["name"],
+        "outcomes":defaultdict(str)
     }
+    res = process_prompts(article["content"],selected_prompts)
+    output["outcomes"] = res
+    # overview = create_overview(output["output"]["Overview"])
+    views = create_views(res)
     add_output(output)
+    return views
+    # return ""
 def refresh():
     '''
     this function refresh the application data from the cloud backend
     '''
     init_app_data()
+    article = app_data["current_article"]
+    selected_prompts = select_prompts(article["content"],terms=app_data["terms"],prompts=app_data["prompts"])
+    output = {
+        "domain":article["domain"],
+        "article":article["name"],
+        "outcomes":defaultdict(str)
+    }
+    res = process_prompts(article["content"],selected_prompts)
+    output["outcomes"] = res
+    views = create_views(res)
+    add_output(output)
+    return views
+def create_views(output):
     md_text = ""
+    overview = [v for _,v in output.items() if v["assessment"] == "overview"][0]
+    safety = [v for _,v in output.items() if v["assessment"] == "safety"][0]
+    # add overview
+    md_text += f"<details>\n<summary>Overivew</summary>\n\n"
+    md_text += overview["content"] + "\n</details>\n\n"
+    # add performance
+    md_text += f"<details>\n<summary>Performance</summary>\n\n"
+    for title,content in output.items():
+        if content["assessment"] not in  ["overview","safety"]:
+            md_text += f"#### {content['assessment']} - {title}\n\n"
+            md_text += content["content"] + "\n\n"
+    md_text += "</details>\n\n"
+    # add safety
+    md_text += f"<details>\n<summary>Safety</summary>\n\n"
+    md_text += safety["content"] + "\n\n" + "</details>\n\n"
     return gr.update(value=md_text)
     '''
     if file_object:
         content, _ = read_pdf(file)
+        filename = file.name.split("\\")[-1]
+        # name = filename.split(".")[0]
     else:
         content = file
+        # filename = file.name
+        filename = f"temp_{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
     article ={
         "domain":domain,
+        "name":filename,
         "content":content,
         "upload_time":datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
     }
     if add_to_s3 and file_object:
+        s3_path = upload_fileobj(file,domain,filename)
         article["s3_path"] = s3_path
     if add_to_local:
     res = post_item("articles",article)
     if "Error" in res:
+        print(res["Error"])
         return res
     return article
 def update_device():
     pass
+# identify article state
+def identify_logic(text):
+    article_logic = [
+        "groups",
+        "levels",
+        "preoperatives"
+    ]
+    return {l:l in text.lower() for l in article_logic}
+def select_prompts(text,terms,prompts):
+    selected_templates = set()
+    for t in terms:
+        if all([term in text for term in t["terms"]]):
+            selected_templates.update(t["template_name"])
+    logic = identify_logic(text)
+    selected_prompts = [p for p in prompts if p["template_name"] in selected_templates]
+    overview_prompts = [p for p in prompts if p["assessment_step"] == "overview"]
+    for p in overview_prompts:
+        if all([p[l]==v for l,v in logic.items() if v]):
+            selected_prompts.append(p)
+    return selected_prompts
 def keyword_search(keywords,full_text):
     keywords_result = {}
             keywords_result[k]=list_or([keyword_search(kw,full_text) for kw in k])
         else:
             keywords_result[k]=keyword_search(k,full_text)
+    return keywords_result
+def process_prompts(text,prompts):
+    '''
+    process_prompts function receive the text and prompts and return the instruction stream
+    Parameters
+    ----------
+    text : str
+        text of the article
+    prompts : list
+        list of prompts
+    Returns
+    -------
+    dict
+        processed extraction results from openai api
+    '''
+    res = defaultdict(dict)
+    for p in prompts:
+        inst = [
+            p["prompt"]+", ".join(p["fields"]),
+            p["reformat_inst"]
+        ]
+        inst_stream = create_inst(text,inst)
+        extraction = send_inst(inst_stream)
+        res[p["template_name"]] = {
+            "template_name":p["template_name"],
+            "assessment":p["assessment_step"],
+            "content":extraction
+        }
+    return res

supplier.py CHANGED Viewed

@@ -1,11 +1,6 @@
 import openai
 from application import *
-# import json
-#
-# with open("openai_api_key.json", "r") as f:
-#     openai_api_key = json.load(f)["key"]
 openai.api_key = openai_api_key
 def execute_prompt(prompt):
@@ -28,3 +23,67 @@ def execute_prompt(prompt):
         presence_penalty=0
     )
     return res.choices[0]["text"] if res.choices else "<error> failed to generate text</error>"

 import openai
 from application import *
 openai.api_key = openai_api_key
 def execute_prompt(prompt):
         presence_penalty=0
     )
     return res.choices[0]["text"] if res.choices else "<error> failed to generate text</error>"
+def format(**kwargs):
+    if "format" in kwargs:
+        return kwargs["format"]
+    return kwargs
+def execute_instruction(article, instruction,model="gpt-3.5-turbo-16k",format="markdown"):
+    '''
+    execute_instruction function takes three arguments: article, instruction and model
+    article: the raw text from the article source
+    instruction: the instruction for the rational execution it needs to complete
+    model: the model used for the rational execution, default to gpt-3.5-turbo-16k
+    format: the format of the table, default to markdown
+    return: a string, the result of the rational execution
+    '''
+    msg_stream = [
+        {
+            "role":"system",
+            "content":article
+        }
+    ]
+    msg_stream.append({
+        "role":"user",
+        "content":instruction
+    })
+    msg_stream.append({
+        "role":"user",
+        "content":f"Format the table in {format} syntax"
+    })
+    res= openai.ChatCompletion.create(
+        model=model,
+        messages=msg_stream,
+        temperature=0)
+    return res["choices"][0]["message"]["content"]
+def create_inst(article, instructions):
+    msg_stream = [
+        {
+            "role":"system",
+            "content":article
+        }
+    ]
+    for i in instructions:
+        msg_stream.append({
+            "role":"user",
+            "content":i
+        })
+    return msg_stream
+def send_inst(stream, model="gpt-3.5-turbo-16k",temperature=0):
+    res= openai.ChatCompletion.create(
+            model=model,
+            messages=stream,
+            temperature=temperature)
+    return res["choices"][0]["message"]["content"]

ui_study.py CHANGED Viewed

@@ -1,22 +1,16 @@
 import gradio as gr
-# from article import *
 from utility import *
 from application import *
 from features import *
 def reset():
     return (
         gr.Files.update(value=None),
         gr.TextArea.update(value=""),
-        gr.Textbox.update(value=""),
-        gr.Textbox.update(value=""),
-        gr.Textbox.update(value=""),
-        gr.Textbox.update(value=""),
-        gr.Checkbox.update(value=False),
-        gr.Slider.update(value=0),
-        gr.Markdown.update(value=""),
-        gr.Markdown.update(value=""),
         gr.Markdown.update(value="")
     )
@@ -25,76 +19,44 @@ with gr.Blocks() as study_page:
         with gr.Column():
             gr.Markdown("## Studies")
             gr.HTML("<hr>")
             upload_study = gr.File(label="Upload a clinical study report",type="file")
             input_study = gr.TextArea(label="Or paste a clinical study report content",placeholder="Paste content here...",lines=5)
             with gr.Row():
                 btn_reset = gr.Button(value="Reset",variant="stop")
                 btn_add_study = gr.Button(value="Add",variant="primary")
-        with gr.Column():
-            performance_metric_1 = gr.Textbox(lines=1, label="identify Key Performance Outcome 1",placeholder="e.g. VAS Score")
-            performance_metric_2 = gr.Textbox(lines=1, label="identify Key Performance Outcome 2",placeholder="e.g. Incidence of Fusion")
-            safety_metric_1 = gr.Textbox(lines=1, label="identify Key Safety Outcome 1",placeholder="e.g. Incidence of Revision")
-            safety_metric_2 = gr.Textbox(lines=1, label="identify Key Safety Outcome 2",placeholder="e.g. Incidence of Nonunion")
-            device_options["secondary extraction"] = gr.Checkbox(label="Will a secondary extraction with device stratification be required?",interactive=True)
-            device_options["secondary extraction count"] = gr.Slider(minimum=0, maximum=10, step=1, label="How many secondary extractions with device stratification be required?",interactive=True)
-    gr.Markdown("<hr>")
-    with gr.Row():
         with gr.Column():
-            gr.Markdown("## Literature Report")
-        with gr.Column():
-            bt_refresh = gr.Button(value="Refresh",variant="primary")
-    gr.Markdown("<hr>")
-    with gr.Row():
-        with gr.Column(scale=2):
-            overview = gr.Markdown("")
-        with gr.Column(scale=3):
-            details = gr.Markdown("")
-            btn_reset.click(
-                reset,
-                outputs=[
-                    upload_study,
-                    input_study,
-                    performance_metric_1,
-                    performance_metric_2,
-                    safety_metric_1,
-                    safety_metric_2,
-                    device_options["secondary extraction"],
-                    device_options["secondary extraction count"],
-                    overview,
-                    details
-                ]
-            )
     btn_add_study.click(
         process_study,
         inputs=[
             upload_study,
             input_study,
-            performance_metric_1,
-            performance_metric_2,
-            safety_metric_1,
-            safety_metric_2
         ],
         outputs=[
-            overview,
-            details
         ],
     )
-    bt_refresh.click(
         refresh,
         outputs=[
-            overview,
-            details
-        ]
     )

 import gradio as gr
 from utility import *
 from application import *
 from features import *
 def reset():
+    '''
+    reset gradio input and output features in this page.
+    '''
     return (
         gr.Files.update(value=None),
         gr.TextArea.update(value=""),
         gr.Markdown.update(value="")
     )
         with gr.Column():
             gr.Markdown("## Studies")
             gr.HTML("<hr>")
             upload_study = gr.File(label="Upload a clinical study report",type="file")
             input_study = gr.TextArea(label="Or paste a clinical study report content",placeholder="Paste content here...",lines=5)
             with gr.Row():
                 btn_reset = gr.Button(value="Reset",variant="stop")
                 btn_add_study = gr.Button(value="Add",variant="primary")
         with gr.Column():
+            with gr.Row():
+                gr.Markdown("## Literature Report")
+                btn_refresh = gr.Button(value="Refresh",variant="primary")
+            views = gr.Markdown("")
+    btn_reset.click(
+        reset,
+        outputs=[
+            upload_study,
+            input_study,
+            views,
+        ]
+    )
     btn_add_study.click(
         process_study,
         inputs=[
             upload_study,
             input_study,
         ],
         outputs=[
+            views,
         ],
     )
+    btn_refresh.click(
         refresh,
         outputs=[
+            views,
+        ],
     )

utility.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import json
-import tempfile
 from application import *
 from pdfminer.high_level import extract_text
@@ -143,7 +142,6 @@ def py_dict_to_db_map(py_dict):
             db_map[key] = {"BOOL":value}
         elif value is None:
             db_map[key] = {"NULL":True}
     return db_map
 # convert dynamodb list to python list

 import json
 from application import *
 from pdfminer.high_level import extract_text
             db_map[key] = {"BOOL":value}
         elif value is None:
             db_map[key] = {"NULL":True}
     return db_map
 # convert dynamodb list to python list