Roland Ding commited on
Commit ·
bb32635
1
Parent(s): 3afa0c1
9.9.22.67 mass update of the application
Browse files+ Revised internally to apply usage of langchain and async call
+ Realigned the application to use the new terms and prompts from
search term 13n
+ Added the new chains.py module to support the new langchain
+ revised the application.py to align with all the new backend data
structure.
modified: app.py
modified: application.py
new file: chains.py
modified: features.py
modified: requirements.txt
modified: supplier.py
modified: ui_studies.py
modified: ui_study.py
- app.py +16 -9
- application.py +26 -16
- chains.py +107 -0
- features.py +374 -198
- requirements.txt +2 -1
- supplier.py +13 -14
- ui_studies.py +17 -13
- ui_study.py +81 -59
app.py
CHANGED
|
@@ -13,23 +13,30 @@ from utility import *
|
|
| 13 |
|
| 14 |
from ui_study import *
|
| 15 |
from ui_studies import *
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
examples = []
|
| 19 |
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
def refresh_data():
|
| 31 |
return
|
| 32 |
|
| 33 |
if __name__ == "__main__":
|
| 34 |
init_app_data()
|
|
|
|
| 35 |
demo.launch()
|
|
|
|
| 13 |
|
| 14 |
from ui_study import *
|
| 15 |
from ui_studies import *
|
| 16 |
+
# from application import app_data
|
| 17 |
|
| 18 |
|
| 19 |
examples = []
|
| 20 |
|
| 21 |
+
@terminal_print
|
| 22 |
+
def init_demo():
|
| 23 |
+
'''
|
| 24 |
+
initialize the demo data
|
| 25 |
+
'''
|
| 26 |
+
study_page = init_study_page()
|
| 27 |
+
studies_page = init_studies_page()
|
| 28 |
+
|
| 29 |
+
return gr.TabbedInterface(
|
| 30 |
+
[study_page,studies_page],
|
| 31 |
+
["Clinical Study","Studies"],
|
| 32 |
+
theme = gr.themes.Soft(primary_hue="sky",secondary_hue="orange"),
|
| 33 |
+
css = "footer {visibility: hidden}",
|
| 34 |
+
title="AMRA AI Medi Reader")
|
| 35 |
|
| 36 |
def refresh_data():
|
| 37 |
return
|
| 38 |
|
| 39 |
if __name__ == "__main__":
|
| 40 |
init_app_data()
|
| 41 |
+
demo = init_demo()
|
| 42 |
demo.launch()
|
application.py
CHANGED
|
@@ -1,7 +1,5 @@
|
|
| 1 |
import os
|
| 2 |
|
| 3 |
-
from collections import defaultdict
|
| 4 |
-
|
| 5 |
'''
|
| 6 |
shared environment variables
|
| 7 |
'''
|
|
@@ -54,28 +52,40 @@ tables_inst=[
|
|
| 54 |
f"include all table titles."
|
| 55 |
]
|
| 56 |
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
-
accepted_month_inst=[
|
| 68 |
-
f"extract the acceptance month of the article from the system text.",
|
| 69 |
-
f"return the results on a single line as 'Accepted Month: <month>.",
|
| 70 |
-
]
|
| 71 |
|
| 72 |
'''
|
| 73 |
application default data
|
| 74 |
'''
|
| 75 |
app_data = {
|
| 76 |
"current article":{},
|
| 77 |
-
"articles":
|
| 78 |
"prompts":{},
|
| 79 |
"terms":[],
|
| 80 |
-
"summary":
|
| 81 |
}
|
|
|
|
| 1 |
import os
|
| 2 |
|
|
|
|
|
|
|
| 3 |
'''
|
| 4 |
shared environment variables
|
| 5 |
'''
|
|
|
|
| 52 |
f"include all table titles."
|
| 53 |
]
|
| 54 |
|
| 55 |
+
article_prompts = {
|
| 56 |
+
"Authors": '''extract all of the authors of the article from the above text.\n
|
| 57 |
+
Return the results on the same line separated by commas as Authors: Author A, Author B...
|
| 58 |
+
''',
|
| 59 |
+
"Acceptance Year": '''extract the acceptance year of the article from the above text.\n
|
| 60 |
+
Return the results on a single line as Accepted Year: <year>.
|
| 61 |
+
''',
|
| 62 |
+
|
| 63 |
+
"Acceptance Month":'''extract the acceptance month of the article from the above text.\n
|
| 64 |
+
Return the results on a single line as Accepted Month: <month>.
|
| 65 |
+
'''
|
| 66 |
+
}
|
| 67 |
|
| 68 |
+
overview_prompts = clinical_prompts = radiological_prompts = other_prompts = {}
|
| 69 |
+
|
| 70 |
+
# populate the prompts from .prompt/overview/ folder
|
| 71 |
+
def update_prompts_from_dir(prompts,path):
|
| 72 |
+
for file in os.listdir(path):
|
| 73 |
+
with open(f"{path}/{file}","r") as f:
|
| 74 |
+
prompts[file.split(".")[0]] = f.read()
|
| 75 |
+
|
| 76 |
+
update_prompts_from_dir(overview_prompts,".prompts/overview")
|
| 77 |
+
update_prompts_from_dir(clinical_prompts,".prompts/clinical")
|
| 78 |
+
update_prompts_from_dir(radiological_prompts,".prompts/radiologic")
|
| 79 |
+
update_prompts_from_dir(other_prompts,".prompts/other")
|
| 80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
'''
|
| 83 |
application default data
|
| 84 |
'''
|
| 85 |
app_data = {
|
| 86 |
"current article":{},
|
| 87 |
+
"articles":{},
|
| 88 |
"prompts":{},
|
| 89 |
"terms":[],
|
| 90 |
+
"summary":{}
|
| 91 |
}
|
chains.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
|
| 3 |
+
from langchain.chat_models import ChatOpenAI
|
| 4 |
+
from langchain.prompts.chat import ChatPromptTemplate
|
| 5 |
+
from langchain.schema import BaseOutputParser
|
| 6 |
+
|
| 7 |
+
from utility import read_pdf,terminal_print
|
| 8 |
+
|
| 9 |
+
class Replacement(BaseOutputParser):
|
| 10 |
+
"""Parse the output of an LLM call to a comma-separated list."""
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def parse(self, text: str, **kwargs):
|
| 14 |
+
"""Parse the output of an LLM call."""
|
| 15 |
+
if kwargs:
|
| 16 |
+
print(kwargs)
|
| 17 |
+
return text.strip().split(", ")
|
| 18 |
+
|
| 19 |
+
@terminal_print # need to review this.
|
| 20 |
+
async def async_generate(article,name,chain,replacement_term=None):
|
| 21 |
+
if replacement_term:
|
| 22 |
+
resp = await chain.ainvoke({"term":replacement_term})
|
| 23 |
+
else:
|
| 24 |
+
resp = await chain.ainvoke({"term":""})
|
| 25 |
+
article[name] = resp.content
|
| 26 |
+
|
| 27 |
+
@terminal_print # need to review this.
|
| 28 |
+
async def execute_concurrent(article,prompts):
|
| 29 |
+
llm = ChatOpenAI(temperature=0.0,model_name="gpt-3.5-turbo-16k")
|
| 30 |
+
tasks = []
|
| 31 |
+
|
| 32 |
+
prompt_type = article["logic"]
|
| 33 |
+
prompt_list = list(prompts.keys())
|
| 34 |
+
print(prompt_list)
|
| 35 |
+
|
| 36 |
+
# for name,p in prompts.items():
|
| 37 |
+
while prompt_list:
|
| 38 |
+
name = prompt_list.pop(0)
|
| 39 |
+
p = prompts[name]
|
| 40 |
+
|
| 41 |
+
if any([s not in article for s in p["input_list"]]):
|
| 42 |
+
# prompt_list.append(name)
|
| 43 |
+
print("skip",name,"due to missing input",p["input_list"])
|
| 44 |
+
continue
|
| 45 |
+
|
| 46 |
+
print("executing",p["assessment_step"],name)
|
| 47 |
+
input_text = "".join([article[s] for s in p["input_list"]])
|
| 48 |
+
|
| 49 |
+
chat_prompt = ChatPromptTemplate.from_messages([
|
| 50 |
+
("human",input_text),
|
| 51 |
+
("system",p[prompt_type]),
|
| 52 |
+
])
|
| 53 |
+
|
| 54 |
+
if "reformat_inst" in p:
|
| 55 |
+
chat_prompt.append(
|
| 56 |
+
("system",p["reformat_inst"])
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
post_prompt_maping = {}
|
| 60 |
+
post_replace_term = lambda res,map=post_prompt_maping:replace_term(res,map=map)
|
| 61 |
+
|
| 62 |
+
chain = chat_prompt | llm | post_replace_term
|
| 63 |
+
if "term" in p:
|
| 64 |
+
tasks.append(async_generate(article,name,chain,replacement_term=p["term"]["term_prompt"])) # in here the name shall be the term_prompt from the terms triggered
|
| 65 |
+
else:
|
| 66 |
+
tasks.append(async_generate(article,name,chain)) # in here the name shall be the term_prompt from the terms triggered
|
| 67 |
+
|
| 68 |
+
await asyncio.gather(*tasks)
|
| 69 |
+
|
| 70 |
+
def replace_term(res,**kwargs):
|
| 71 |
+
if "map" in kwargs:
|
| 72 |
+
for key,term in kwargs["map"].items():
|
| 73 |
+
res.content = res.content.replace(key,term)
|
| 74 |
+
return res
|
| 75 |
+
|
| 76 |
+
if __name__ == "__main__":
|
| 77 |
+
# lets try the Blood Loss, Operation Time, and Need for ICU in other folder
|
| 78 |
+
sample_artice = ".samples/Ha SK, 2008.pdf"
|
| 79 |
+
sample_content,_ = read_pdf(sample_artice)
|
| 80 |
+
|
| 81 |
+
llm = ChatOpenAI(temperature=0.0,model_name="gpt-3.5-turbo-16k")
|
| 82 |
+
# with open(".prompts/other/Need for ICU.txt") as f:
|
| 83 |
+
# prompt = f.read()
|
| 84 |
+
# name = "Need for ICU"
|
| 85 |
+
with open(".prompts/other/Operation Time.txt") as f:
|
| 86 |
+
prompt = f.read()
|
| 87 |
+
name = "Operation Time"
|
| 88 |
+
# with open(".prompts/other/Blood Loss.txt") as f:
|
| 89 |
+
# prompt = f.read()
|
| 90 |
+
# name = "Blood Loss"
|
| 91 |
+
|
| 92 |
+
post_prompt_maping = {}
|
| 93 |
+
post_replace_term = lambda res,map=post_prompt_maping:replace_term(res,map=map)
|
| 94 |
+
|
| 95 |
+
chain_prompt = ChatPromptTemplate.from_messages([
|
| 96 |
+
("human",sample_artice),
|
| 97 |
+
("system",prompt),
|
| 98 |
+
])
|
| 99 |
+
|
| 100 |
+
# experiment with cascading the chain
|
| 101 |
+
chain = chain_prompt | llm
|
| 102 |
+
chain2 = chain | post_replace_term
|
| 103 |
+
|
| 104 |
+
# lets try remove from chain
|
| 105 |
+
chain2.last.with_retry = True
|
| 106 |
+
res = chain2.invoke({"term":name})
|
| 107 |
+
print(res.content)
|
features.py
CHANGED
|
@@ -1,29 +1,66 @@
|
|
| 1 |
# language default packages
|
| 2 |
from datetime import datetime
|
| 3 |
-
from collections import defaultdict
|
| 4 |
|
| 5 |
# external packages
|
| 6 |
import gradio as gr
|
| 7 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
# internal packages
|
|
|
|
| 10 |
from cloud_db import *
|
| 11 |
from cloud_storage import *
|
|
|
|
| 12 |
from supplier import *
|
| 13 |
-
|
| 14 |
-
encoding = tiktoken.get_encoding("cl100k_base")
|
| 15 |
|
| 16 |
# get prompts, terms, outputs from the cloud
|
|
|
|
| 17 |
def init_app_data():
|
| 18 |
'''
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
'''
|
| 21 |
-
app_data["prompts"] = get_table("prompts")
|
| 22 |
app_data["terms"] = get_table("terms")
|
| 23 |
-
|
| 24 |
-
app_data["
|
| 25 |
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
domain,
|
| 28 |
study_file_obj,
|
| 29 |
study_content,
|
|
@@ -39,47 +76,89 @@ def process_study(
|
|
| 39 |
else:
|
| 40 |
return "No file or content provided","No file or content provided","No file or content provided"
|
| 41 |
|
| 42 |
-
#
|
| 43 |
-
|
| 44 |
-
index_discussion = raw_content.lower().index("discussion") if "discussion" in raw_content.lower() else len(raw_content)
|
| 45 |
-
meta_content = raw_content[:index_discussion]
|
| 46 |
-
key_content = get_key_content(raw_content)
|
| 47 |
-
|
| 48 |
-
authors = send_inst(create_inst(meta_content,authors_inst))
|
| 49 |
-
accepted_date = send_inst(create_inst(meta_content,accepted_date_inst))
|
| 50 |
-
tables = send_inst(create_inst(key_content,tables_inst))
|
| 51 |
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
selected_prompts = select_prompts(key_content,terms=app_data["terms"],prompts=app_data["prompts"])
|
| 55 |
-
res = process_prompts(key_content,selected_prompts)
|
| 56 |
-
|
| 57 |
-
detail_views = create_detail_views(res)
|
| 58 |
-
overview = create_overview(res)
|
| 59 |
-
|
| 60 |
-
article.update({
|
| 61 |
-
"meta":{
|
| 62 |
-
"authors":authors,
|
| 63 |
-
"accepted_date":accepted_date,
|
| 64 |
-
},
|
| 65 |
-
"extractions":res
|
| 66 |
-
})
|
| 67 |
|
| 68 |
-
article
|
| 69 |
-
{
|
| 70 |
-
"key_content":key_content,
|
| 71 |
-
"tables":tables,
|
| 72 |
-
}
|
| 73 |
-
)
|
| 74 |
app_data["current_article"] = article
|
|
|
|
|
|
|
| 75 |
try:
|
| 76 |
update_article(article)
|
| 77 |
except Exception as e:
|
| 78 |
print(e)
|
| 79 |
# return overview, detail_views
|
| 80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
return overview, detail_views
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
def refresh():
|
| 84 |
'''
|
| 85 |
this function refresh the application data from the cloud backend
|
|
@@ -89,64 +168,56 @@ def refresh():
|
|
| 89 |
article = app_data["current_article"]
|
| 90 |
if not article:
|
| 91 |
return "No file or content provided"
|
| 92 |
-
|
| 93 |
|
| 94 |
-
|
|
|
|
| 95 |
|
| 96 |
-
article.update({
|
| 97 |
-
"extractions":res
|
| 98 |
-
})
|
| 99 |
-
|
| 100 |
-
detail_views = create_detail_views(res)
|
| 101 |
-
overview = create_overview(res)
|
| 102 |
update_article(article=article)
|
| 103 |
|
| 104 |
return overview, detail_views
|
| 105 |
|
|
|
|
| 106 |
def create_overview(article):
|
| 107 |
-
md_text = ""
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
return gr.update(value=md_text)
|
| 113 |
|
|
|
|
| 114 |
def create_detail_views(article):
|
| 115 |
-
md_text = ""
|
|
|
|
| 116 |
|
| 117 |
# add performance
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
for content in safeties:
|
| 129 |
-
md_text += f"#### {content['assessment']} - {content['template_name']}\n\n"
|
| 130 |
-
md_text += content["content"] + "\n\n"
|
| 131 |
-
|
| 132 |
-
# add other
|
| 133 |
-
others = [v for _,v in article.items() if v["assessment"] == "other"]
|
| 134 |
-
|
| 135 |
-
md_text += f"### Other\n\n"
|
| 136 |
-
for title,content in others:
|
| 137 |
-
md_text += f"#### {content['assessment']} - {content['template_name']}\n\n"
|
| 138 |
-
md_text += content["content"] + "\n\n"
|
| 139 |
|
| 140 |
return gr.update(value=md_text)
|
| 141 |
|
| 142 |
-
|
|
|
|
| 143 |
'''
|
| 144 |
this function extract the content between start and end
|
| 145 |
-
and return the content in between.
|
| 146 |
-
|
| 147 |
-
and find all the end and keep the last one showing up in the
|
| 148 |
-
text. If no start or end is found, the function will return
|
| 149 |
-
the no text.
|
| 150 |
|
| 151 |
Parameters
|
| 152 |
----------
|
|
@@ -162,19 +233,32 @@ def get_key_content(text,case_sensitive=False):
|
|
| 162 |
str
|
| 163 |
content between start and end
|
| 164 |
'''
|
| 165 |
-
if not case_sensitive:
|
| 166 |
-
|
|
|
|
| 167 |
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
|
| 176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
|
|
|
|
| 178 |
def get_articles(update_local=True):
|
| 179 |
'''
|
| 180 |
this function return the list of articles
|
|
@@ -191,10 +275,11 @@ def get_articles(update_local=True):
|
|
| 191 |
'''
|
| 192 |
articles = get_table("articles")
|
| 193 |
if update_local:
|
| 194 |
-
app_data["articles"] = articles
|
| 195 |
|
| 196 |
return articles
|
| 197 |
|
|
|
|
| 198 |
def get_article(domain,name):
|
| 199 |
'''
|
| 200 |
this function return the article object
|
|
@@ -215,6 +300,7 @@ def get_article(domain,name):
|
|
| 215 |
|
| 216 |
return article
|
| 217 |
|
|
|
|
| 218 |
def add_article(domain,file,add_to_s3=True, add_to_local=True, file_object=True):
|
| 219 |
'''
|
| 220 |
this function receive the domain name and file obj
|
|
@@ -236,29 +322,29 @@ def add_article(domain,file,add_to_s3=True, add_to_local=True, file_object=True)
|
|
| 236 |
dict
|
| 237 |
article object
|
| 238 |
'''
|
| 239 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
content, _ = read_pdf(file)
|
| 241 |
filename = file.name.split("\\")[-1]
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
|
| 248 |
article ={
|
| 249 |
"domain":domain,
|
| 250 |
"name":filename,
|
| 251 |
-
"
|
| 252 |
-
"raw":content
|
| 253 |
-
},
|
| 254 |
"upload_time":datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
| 255 |
}
|
| 256 |
-
|
| 257 |
-
if add_to_s3 and file_object:
|
| 258 |
-
upload_fileobj(file,default_s3_bucket,filename)
|
| 259 |
|
| 260 |
if add_to_local:
|
| 261 |
-
app_data["articles"]
|
| 262 |
|
| 263 |
res = post_item("articles",article)
|
| 264 |
if "Error" in res:
|
|
@@ -267,6 +353,7 @@ def add_article(domain,file,add_to_s3=True, add_to_local=True, file_object=True)
|
|
| 267 |
|
| 268 |
return article
|
| 269 |
|
|
|
|
| 270 |
def remove_article(domain,name,remove_from_s3=True, remove_from_local=True):
|
| 271 |
'''
|
| 272 |
this function remove the article from the cloud, s3 and local memory
|
|
@@ -291,12 +378,13 @@ def remove_article(domain,name,remove_from_s3=True, remove_from_local=True):
|
|
| 291 |
if remove_from_s3:
|
| 292 |
delete_file(domain,name)
|
| 293 |
if remove_from_local:
|
| 294 |
-
|
| 295 |
pass
|
| 296 |
delete_item("articles",{"domain":domain,"name":name})
|
| 297 |
|
| 298 |
return True
|
| 299 |
|
|
|
|
| 300 |
def update_article(article,file_obj=None,update_local=True):
|
| 301 |
'''
|
| 302 |
this function receive the article object and update the article
|
|
@@ -320,118 +408,206 @@ def update_article(article,file_obj=None,update_local=True):
|
|
| 320 |
upload_fileobj(file_obj,article["domain"],article["name"])
|
| 321 |
|
| 322 |
if update_local:
|
| 323 |
-
app_data["articles"]
|
| 324 |
|
| 325 |
post_item("articles",article)
|
| 326 |
|
| 327 |
return article
|
| 328 |
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
|
|
|
| 332 |
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
#
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
# if "Error" in res:
|
| 353 |
-
# print(res)
|
| 354 |
-
# return False
|
| 355 |
-
# return output
|
| 356 |
-
|
| 357 |
-
# def remove_output(domain,name):
|
| 358 |
-
# res = delete_item("outputs",{"domain":domain,"name":name})
|
| 359 |
-
# if "Error" in res:
|
| 360 |
-
# print(res)
|
| 361 |
-
# return False
|
| 362 |
-
# return True
|
| 363 |
-
|
| 364 |
-
# def update_output(output):
|
| 365 |
-
# res = put_item("outputs",output)
|
| 366 |
-
# if "Error" in res:
|
| 367 |
-
# print(res)
|
| 368 |
-
# return False
|
| 369 |
-
# return True
|
| 370 |
-
|
| 371 |
-
# identify article state
|
| 372 |
-
def identify_logic(text):
|
| 373 |
-
article_logic = [
|
| 374 |
-
"groups",
|
| 375 |
-
"levels",
|
| 376 |
-
"preoperatives"
|
| 377 |
-
]
|
| 378 |
|
| 379 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 380 |
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 386 |
|
| 387 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 388 |
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
|
| 395 |
-
|
|
|
|
|
|
|
| 396 |
|
|
|
|
| 397 |
def keyword_search(keywords,full_text):
|
| 398 |
keywords_result = {}
|
| 399 |
for k in keywords:
|
| 400 |
-
if type(k) is tuple:
|
| 401 |
-
keywords_result[k]=
|
| 402 |
else:
|
| 403 |
-
keywords_result[k]=
|
| 404 |
return keywords_result
|
| 405 |
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 409 |
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
text : str
|
| 413 |
-
text of the article
|
| 414 |
-
prompts : list
|
| 415 |
-
list of prompts
|
| 416 |
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 421 |
'''
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
}
|
| 436 |
-
|
| 437 |
-
return res
|
|
|
|
| 1 |
# language default packages
|
| 2 |
from datetime import datetime
|
|
|
|
| 3 |
|
| 4 |
# external packages
|
| 5 |
import gradio as gr
|
| 6 |
+
import asyncio
|
| 7 |
+
|
| 8 |
+
from langchain.llms import OpenAI
|
| 9 |
+
from langchain.prompts import PromptTemplate
|
| 10 |
+
from langchain.chains import LLMChain
|
| 11 |
|
| 12 |
# internal packages
|
| 13 |
+
from chains import *
|
| 14 |
from cloud_db import *
|
| 15 |
from cloud_storage import *
|
| 16 |
+
from cloud_textract import *
|
| 17 |
from supplier import *
|
| 18 |
+
from utility import list_dict_to_dict
|
|
|
|
| 19 |
|
| 20 |
# get prompts, terms, outputs from the cloud
|
| 21 |
+
@terminal_print
|
| 22 |
def init_app_data():
|
| 23 |
'''
|
| 24 |
+
A function to initialize the application data from the cloud backend.
|
| 25 |
+
All the cloud data was saved in the app_data dictionary.
|
| 26 |
+
|
| 27 |
+
Parameters
|
| 28 |
+
----------
|
| 29 |
+
None
|
| 30 |
+
|
| 31 |
+
Returns
|
| 32 |
+
-------
|
| 33 |
+
None
|
| 34 |
'''
|
| 35 |
+
app_data["prompts"] = list_dict_to_dict(get_table("prompts"),key="prompt_name")
|
| 36 |
app_data["terms"] = get_table("terms")
|
| 37 |
+
app_data["articles"] = list_dict_to_dict(get_table("articles"),key="name")
|
| 38 |
+
app_data["summary"] = list_dict_to_dict(get_table("summary"),key="term")
|
| 39 |
|
| 40 |
+
@terminal_print
|
| 41 |
+
def get_existing_article(
|
| 42 |
+
article_name,
|
| 43 |
+
):
|
| 44 |
+
'''
|
| 45 |
+
get_existing_article function receive the article name and return the article object
|
| 46 |
+
|
| 47 |
+
Parameters
|
| 48 |
+
----------
|
| 49 |
+
article_name : str
|
| 50 |
+
name of the article
|
| 51 |
+
|
| 52 |
+
Returns
|
| 53 |
+
-------
|
| 54 |
+
dict
|
| 55 |
+
article object
|
| 56 |
+
'''
|
| 57 |
+
article = app_data["articles"][article_name]
|
| 58 |
+
app_data["current_article"] = article
|
| 59 |
+
|
| 60 |
+
return create_overview(article), create_detail_views(article)
|
| 61 |
+
|
| 62 |
+
@terminal_print
|
| 63 |
+
def process_study( # need revision
|
| 64 |
domain,
|
| 65 |
study_file_obj,
|
| 66 |
study_content,
|
|
|
|
| 76 |
else:
|
| 77 |
return "No file or content provided","No file or content provided","No file or content provided"
|
| 78 |
|
| 79 |
+
# update the common article segment from its existing attributes.
|
| 80 |
+
update_article_segment(article)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
+
# perform pathway logic and content extraction
|
| 83 |
+
process_prompts(article=article)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
|
| 85 |
+
# set the current article to the completed article object
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
app_data["current_article"] = article
|
| 87 |
+
|
| 88 |
+
# update the article to the cloud
|
| 89 |
try:
|
| 90 |
update_article(article)
|
| 91 |
except Exception as e:
|
| 92 |
print(e)
|
| 93 |
# return overview, detail_views
|
| 94 |
|
| 95 |
+
# create overview and detail markdown views for the article
|
| 96 |
+
detail_views = create_detail_views(article)
|
| 97 |
+
overview = create_overview(article)
|
| 98 |
+
|
| 99 |
return overview, detail_views
|
| 100 |
|
| 101 |
+
@terminal_print
|
| 102 |
+
def update_article_segment(article):
|
| 103 |
+
# get the key content between article objective and discussion
|
| 104 |
+
raw_content = article["raw"]
|
| 105 |
+
index_discussion = raw_content.lower().index("discussion") if "discussion" in raw_content.lower() else len(raw_content)
|
| 106 |
+
|
| 107 |
+
# get the meta data
|
| 108 |
+
meta_content = raw_content[:index_discussion]
|
| 109 |
+
abstract, next_content = get_key_content(raw_content,"objective","key") # article Liu does not have objective and key but has introduction.
|
| 110 |
+
introduction, next_content = get_key_content(next_content,"key","methods")
|
| 111 |
+
materials_and_methods, next_content = get_key_content(next_content,"methods","results")
|
| 112 |
+
results, _ = get_key_content(next_content,"results","discussion")
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
# update the article object
|
| 116 |
+
article.update({
|
| 117 |
+
"Abstract": abstract,
|
| 118 |
+
"Introduction": introduction,
|
| 119 |
+
"Material and Methods": materials_and_methods,
|
| 120 |
+
"Results": results,
|
| 121 |
+
"Meta Content": meta_content,
|
| 122 |
+
"tables": get_tables(article["name"]),
|
| 123 |
+
|
| 124 |
+
})
|
| 125 |
+
|
| 126 |
+
# add the key content as an aggregation of the other sections
|
| 127 |
+
article.update({
|
| 128 |
+
"key_content": article["Abstract"] + article["Introduction"] + article["Material and Methods"] + article["Results"],
|
| 129 |
+
})
|
| 130 |
+
# add the recognized logic to the article
|
| 131 |
+
article.update(identify_logic(article["key_content"]))
|
| 132 |
+
# one thing to notice here, due to the fact that update_article_segment function perform direct change on the article object,
|
| 133 |
+
# there is no need to re-assign the article object to the same variable name
|
| 134 |
+
|
| 135 |
+
pre_loop = asyncio.new_event_loop()
|
| 136 |
+
pre_loop.run_until_complete(get_segments(article,article_prompts))
|
| 137 |
+
pre_loop.close()
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
@terminal_print # need to review this.
|
| 141 |
+
async def gen_segment(article,name,chain):
|
| 142 |
+
|
| 143 |
+
resp = await chain.ainvoke({"term":""})
|
| 144 |
+
article[name] = resp.content #["content"]
|
| 145 |
+
|
| 146 |
+
@terminal_print # need to review this.
|
| 147 |
+
async def get_segments(article,prompts):
|
| 148 |
+
llm = ChatOpenAI(temperature=0.0,model_name="gpt-3.5-turbo-16k")
|
| 149 |
+
tasks = []
|
| 150 |
+
|
| 151 |
+
for name,p in prompts.items():
|
| 152 |
+
prompt = ChatPromptTemplate.from_messages([
|
| 153 |
+
("human",article["Meta Content"]),
|
| 154 |
+
("system","From the text above "+p),
|
| 155 |
+
])
|
| 156 |
+
chain = prompt | llm
|
| 157 |
+
tasks.append(gen_segment(article,name,chain))
|
| 158 |
+
|
| 159 |
+
await asyncio.gather(*tasks)
|
| 160 |
+
|
| 161 |
+
@terminal_print
|
| 162 |
def refresh():
|
| 163 |
'''
|
| 164 |
this function refresh the application data from the cloud backend
|
|
|
|
| 168 |
article = app_data["current_article"]
|
| 169 |
if not article:
|
| 170 |
return "No file or content provided"
|
| 171 |
+
process_prompts(article)
|
| 172 |
|
| 173 |
+
detail_views = create_detail_views(article)
|
| 174 |
+
overview = create_overview(article)
|
| 175 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
update_article(article=article)
|
| 177 |
|
| 178 |
return overview, detail_views
|
| 179 |
|
| 180 |
+
@terminal_print
|
| 181 |
def create_overview(article):
|
| 182 |
+
# md_text = ""
|
| 183 |
+
assessment = "overview"
|
| 184 |
+
|
| 185 |
+
md_text = f"## Overview\n\n"
|
| 186 |
+
overview_components = article["extraction"][assessment]
|
| 187 |
+
for component in overview_components:
|
| 188 |
+
md_text += f"#### {assessment} - {component}\n\n"
|
| 189 |
+
if component in article:
|
| 190 |
+
md_text += article[component] + "\n\n"
|
| 191 |
+
else:
|
| 192 |
+
md_text += "No content found\n\n"
|
| 193 |
+
# md_text += article[component] + "\n\n"
|
| 194 |
return gr.update(value=md_text)
|
| 195 |
|
| 196 |
+
@terminal_print
|
| 197 |
def create_detail_views(article):
|
| 198 |
+
md_text = "## Performance\n\n"
|
| 199 |
+
assessments = ["clinical","radiologic","safety","other"]
|
| 200 |
|
| 201 |
# add performance
|
| 202 |
+
for a in assessments:
|
| 203 |
+
if a in article["extraction"]:
|
| 204 |
+
md_text += f"### {a.capitalize()}\n\n"
|
| 205 |
+
performance_components = article["extraction"][a]
|
| 206 |
+
for component in performance_components:
|
| 207 |
+
md_text += f"#### {a} - {component}\n\n"
|
| 208 |
+
if component in article:
|
| 209 |
+
md_text += article[component] + "\n\n"
|
| 210 |
+
else:
|
| 211 |
+
md_text += "No content found\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
|
| 213 |
return gr.update(value=md_text)
|
| 214 |
|
| 215 |
+
@terminal_print
|
| 216 |
+
def get_key_content(text:str,start,end:str,case_sensitive:bool=False): # not getting the materials and methods
|
| 217 |
'''
|
| 218 |
this function extract the content between start and end
|
| 219 |
+
and return the content in between. If no start or end is
|
| 220 |
+
found, the function will return the empty string.
|
|
|
|
|
|
|
|
|
|
| 221 |
|
| 222 |
Parameters
|
| 223 |
----------
|
|
|
|
| 233 |
str
|
| 234 |
content between start and end
|
| 235 |
'''
|
| 236 |
+
# if not case_sensitive:
|
| 237 |
+
text = text.lower()
|
| 238 |
+
end = end.lower()
|
| 239 |
|
| 240 |
+
if type(start) is str:
|
| 241 |
+
start = start.lower()
|
| 242 |
+
start_index = text.find(start)
|
| 243 |
+
else:
|
| 244 |
+
start_index = start
|
| 245 |
+
|
| 246 |
+
end_index = text.find(end)
|
| 247 |
|
| 248 |
+
# if the start is not found, set the start as the beginning of the text
|
| 249 |
+
if start_index == -1:
|
| 250 |
+
start_index = 0
|
| 251 |
+
|
| 252 |
+
# if the end is not found, return the from the start to the end of the text for both
|
| 253 |
+
# the searched text and the remaining text
|
| 254 |
+
if end_index == -1:
|
| 255 |
+
end_index = 0
|
| 256 |
+
return text[start_index:],text[start_index:]
|
| 257 |
+
|
| 258 |
+
# return the searched text and the remaining text
|
| 259 |
+
return text[start_index:end_index],text[end_index:]
|
| 260 |
|
| 261 |
+
@terminal_print
|
| 262 |
def get_articles(update_local=True):
|
| 263 |
'''
|
| 264 |
this function return the list of articles
|
|
|
|
| 275 |
'''
|
| 276 |
articles = get_table("articles")
|
| 277 |
if update_local:
|
| 278 |
+
app_data["articles"] = list_dict_to_dict(articles)
|
| 279 |
|
| 280 |
return articles
|
| 281 |
|
| 282 |
+
@terminal_print
|
| 283 |
def get_article(domain,name):
|
| 284 |
'''
|
| 285 |
this function return the article object
|
|
|
|
| 300 |
|
| 301 |
return article
|
| 302 |
|
| 303 |
+
@terminal_print
|
| 304 |
def add_article(domain,file,add_to_s3=True, add_to_local=True, file_object=True):
|
| 305 |
'''
|
| 306 |
this function receive the domain name and file obj
|
|
|
|
| 322 |
dict
|
| 323 |
article object
|
| 324 |
'''
|
| 325 |
+
if type(file) is str:
|
| 326 |
+
content = file
|
| 327 |
+
filename = file
|
| 328 |
+
upload_file(file,default_s3_bucket,filename)
|
| 329 |
+
else:
|
| 330 |
+
# extract the content from the pdf file
|
| 331 |
content, _ = read_pdf(file)
|
| 332 |
filename = file.name.split("\\")[-1]
|
| 333 |
+
|
| 334 |
+
# upload the article to s3
|
| 335 |
+
pdf_obj = open(file.name, 'rb')
|
| 336 |
+
upload_fileobj(pdf_obj,default_s3_bucket,filename)
|
| 337 |
+
pdf_obj.close()
|
| 338 |
|
| 339 |
article ={
|
| 340 |
"domain":domain,
|
| 341 |
"name":filename,
|
| 342 |
+
"raw":content,
|
|
|
|
|
|
|
| 343 |
"upload_time":datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
| 344 |
}
|
|
|
|
|
|
|
|
|
|
| 345 |
|
| 346 |
if add_to_local:
|
| 347 |
+
app_data["articles"][article["name"]]=article
|
| 348 |
|
| 349 |
res = post_item("articles",article)
|
| 350 |
if "Error" in res:
|
|
|
|
| 353 |
|
| 354 |
return article
|
| 355 |
|
| 356 |
+
@terminal_print
|
| 357 |
def remove_article(domain,name,remove_from_s3=True, remove_from_local=True):
|
| 358 |
'''
|
| 359 |
this function remove the article from the cloud, s3 and local memory
|
|
|
|
| 378 |
if remove_from_s3:
|
| 379 |
delete_file(domain,name)
|
| 380 |
if remove_from_local:
|
| 381 |
+
del app_data["articles"][name]
|
| 382 |
pass
|
| 383 |
delete_item("articles",{"domain":domain,"name":name})
|
| 384 |
|
| 385 |
return True
|
| 386 |
|
| 387 |
+
@terminal_print
|
| 388 |
def update_article(article,file_obj=None,update_local=True):
|
| 389 |
'''
|
| 390 |
this function receive the article object and update the article
|
|
|
|
| 408 |
upload_fileobj(file_obj,article["domain"],article["name"])
|
| 409 |
|
| 410 |
if update_local:
|
| 411 |
+
app_data["articles"][article["name"]] = article
|
| 412 |
|
| 413 |
post_item("articles",article)
|
| 414 |
|
| 415 |
return article
|
| 416 |
|
| 417 |
+
@terminal_print
|
| 418 |
+
def identify_logic(text,logic_keywords=logic_keywords,case_sensitive=False):
|
| 419 |
+
'''
|
| 420 |
+
identify_logic function receive the text and return the logic of the article
|
| 421 |
|
| 422 |
+
Parameters
|
| 423 |
+
----------
|
| 424 |
+
text : str
|
| 425 |
+
text of the article
|
| 426 |
|
| 427 |
+
Returns
|
| 428 |
+
-------
|
| 429 |
+
dict
|
| 430 |
+
the type of prompt to be used for the article (groups, preoperative, both or none)
|
| 431 |
+
'''
|
| 432 |
+
if not case_sensitive:
|
| 433 |
+
text = text.lower()
|
| 434 |
+
|
| 435 |
+
prompt_logic={ # define the logic surfix for the prompt
|
| 436 |
+
(True,True):"prompt_p_g",
|
| 437 |
+
(True,False):"prompt_np_g",
|
| 438 |
+
(False,True):"prompt_p_ng",
|
| 439 |
+
(False,False):"prompt_np_ng",
|
| 440 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 441 |
|
| 442 |
+
article_observation = (
|
| 443 |
+
sum([text.count(kw) for kw in logic_keywords["groups"]])>3,
|
| 444 |
+
sum([text.count(kw) for kw in logic_keywords["preoperatives"]])>=3
|
| 445 |
+
)
|
| 446 |
+
|
| 447 |
+
return {"logic":prompt_logic[article_observation]}
|
| 448 |
+
|
| 449 |
+
# lets do it one by one
|
| 450 |
+
@terminal_print
|
| 451 |
+
def select_overview_prompts(article):
|
| 452 |
+
valid_prompts = set()
|
| 453 |
+
for t in app_data["terms"]:
|
| 454 |
+
# select overview prompts
|
| 455 |
+
if validate_term(article,t,"overview"):
|
| 456 |
+
# add the prompts to the memory
|
| 457 |
+
valid_prompts.update(t["prompts_list"])
|
| 458 |
+
article["extraction"]["overview"] = valid_prompts.copy()
|
| 459 |
+
|
| 460 |
+
return {p:app_data["prompts"][p] for p in valid_prompts}
|
| 461 |
+
|
| 462 |
+
@terminal_print
|
| 463 |
+
def select_performance_prompts(article,performance_assessment):
|
| 464 |
+
valid_terms = []
|
| 465 |
+
search_text = article["key_content"]+article["Authors"]+article["Acceptance Month"]+article["Acceptance Year"]+"\n".join(article["tables"])
|
| 466 |
+
search_text = search_text.lower()
|
| 467 |
+
|
| 468 |
+
for t in app_data["terms"]:
|
| 469 |
+
# select overview prompts
|
| 470 |
+
if validate_term(article,t,performance_assessment):
|
| 471 |
+
# add the prompts to the memory
|
| 472 |
+
valid_terms.append(t)
|
| 473 |
+
|
| 474 |
+
valid_prompts = {}
|
| 475 |
+
for t in valid_terms:
|
| 476 |
+
if any([p not in valid_prompts for p in t["prompts_list"]]):
|
| 477 |
+
for p in t["prompts_list"]:
|
| 478 |
+
prompt = app_data["prompts"][p]
|
| 479 |
+
valid_prompts[p] = prompt
|
| 480 |
+
valid_prompts[p]["term"] = t
|
| 481 |
+
if performance_assessment not in article["extraction"]:
|
| 482 |
+
article["extraction"][performance_assessment] = []
|
| 483 |
+
article["extraction"][performance_assessment].append(prompt["prompt_name"])
|
| 484 |
+
|
| 485 |
+
return valid_prompts
|
| 486 |
+
|
| 487 |
+
@terminal_print
|
| 488 |
+
def process_prompts(article): # function overly complicated. need to be simplified.
|
| 489 |
+
'''
|
| 490 |
+
process_prompts function receive the article identify the prompts to be used,
|
| 491 |
+
and traverse through the prompts and article to extract the content from the article
|
| 492 |
+
The prompts were selected based on the terms and the article attributes
|
| 493 |
+
|
| 494 |
+
Parameters
|
| 495 |
+
----------
|
| 496 |
+
article : dict
|
| 497 |
+
article object
|
| 498 |
+
terms : list
|
| 499 |
+
list of terms
|
| 500 |
+
prompts : list
|
| 501 |
+
list of prompts
|
| 502 |
|
| 503 |
+
Returns
|
| 504 |
+
-------
|
| 505 |
+
list
|
| 506 |
+
list of prompts selected for use on the article
|
| 507 |
+
'''
|
| 508 |
+
article["extraction"] = {}
|
| 509 |
+
|
| 510 |
+
overview_prompts = select_overview_prompts(article)
|
| 511 |
+
performance_assessments = ["clinical","radiologic","safety","other"]
|
| 512 |
+
|
| 513 |
+
performance_prompts = {}
|
| 514 |
+
for assessment in performance_assessments:
|
| 515 |
+
performance_prompts[assessment] = select_performance_prompts(article,assessment)
|
| 516 |
+
|
| 517 |
+
overview = asyncio.new_event_loop()
|
| 518 |
+
overview.run_until_complete(execute_concurrent(article,overview_prompts))
|
| 519 |
+
overview.close()
|
| 520 |
+
for assessment in performance_assessments:
|
| 521 |
+
performance = asyncio.new_event_loop()
|
| 522 |
+
performance.run_until_complete(execute_concurrent(article,performance_prompts[assessment]))
|
| 523 |
+
performance.close()
|
| 524 |
+
|
| 525 |
|
| 526 |
+
def validate_term(article,term,assessment_step):
|
| 527 |
+
# validate if the term is used for the right anatomic region for the article
|
| 528 |
+
if term["region"] != "all" and term["region"] != article["domain"].lower():
|
| 529 |
+
return False
|
| 530 |
+
|
| 531 |
+
if assessment_step == "overview" and term["assessment_step"] == "overview":
|
| 532 |
+
return True
|
| 533 |
|
| 534 |
+
# validate if the term is used for overview
|
| 535 |
+
if term["assessment_step"] == assessment_step:
|
| 536 |
+
# validate if the term is used for performance
|
| 537 |
+
key_text = (article["key_content"]+article["Authors"]+article["Acceptance Month"]+article["Acceptance Year"]+"\n".join(article["tables"])).lower()
|
| 538 |
+
keywords = [kw.strip() for kw in term["term"].split(",")]
|
| 539 |
|
| 540 |
+
return any([kw in key_text for kw in keywords])
|
| 541 |
+
|
| 542 |
+
return False
|
| 543 |
|
| 544 |
+
@terminal_print
|
| 545 |
def keyword_search(keywords,full_text):
|
| 546 |
keywords_result = {}
|
| 547 |
for k in keywords:
|
| 548 |
+
if type(k) is tuple or type(k) is list or type(k) is set:
|
| 549 |
+
keywords_result[k]=any([keyword_search(kw,full_text) for kw in k])
|
| 550 |
else:
|
| 551 |
+
keywords_result[k]=k in full_text
|
| 552 |
return keywords_result
|
| 553 |
|
| 554 |
+
@terminal_print
|
| 555 |
+
def execute_prompts(article,prompt):
|
| 556 |
+
# traverse back to add any article segments that are missing
|
| 557 |
+
for i in prompt["input_list"]:
|
| 558 |
+
if i.strip() not in article:
|
| 559 |
+
execute_prompts(article,app_data["prompts"][i.strip()]) # it might be a good idea to add level here.
|
| 560 |
+
|
| 561 |
+
# run executor
|
| 562 |
+
run_executor(article,prompt)
|
| 563 |
+
|
| 564 |
+
@terminal_print
|
| 565 |
+
def run_gpt(article,prompt):
|
| 566 |
+
# create the instruction stream
|
| 567 |
+
instructions = [
|
| 568 |
+
prompt[article["logic"]],
|
| 569 |
+
prompt["reformat_inst"]
|
| 570 |
+
]
|
| 571 |
+
text_in = "\n".join([article[i.strip()] for i in prompt["input_list"]])
|
| 572 |
+
inst_stream = create_inst(text_in,instructions)
|
| 573 |
+
print(prompt["prompt_name"])
|
| 574 |
|
| 575 |
+
# send the instruction stream to the openai api
|
| 576 |
+
res = send_inst(inst_stream)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 577 |
|
| 578 |
+
# return the result to the article object
|
| 579 |
+
article[prompt["prompt_name"]] = res
|
| 580 |
+
|
| 581 |
+
|
| 582 |
+
@terminal_print
|
| 583 |
+
def f_replacement_term(article,prompt):
|
| 584 |
+
input_text = article[prompt["input_list"][0]]
|
| 585 |
+
|
| 586 |
+
for t in app_data["summary"]:
|
| 587 |
+
result = input_text.replace(t["term"],t["term_replacement"])
|
| 588 |
+
article[prompt["prompt_name"]] = result
|
| 589 |
+
|
| 590 |
+
@terminal_print
|
| 591 |
+
def f_summary_term(article,prompt):
|
| 592 |
+
input_text = article[prompt["input_list"][0]]
|
| 593 |
+
|
| 594 |
+
for t in app_data["summary"]:
|
| 595 |
+
result = input_text.replace(t["term"],t["term_summary"])
|
| 596 |
+
article[prompt["prompt_name"]] = result
|
| 597 |
+
|
| 598 |
+
@terminal_print
|
| 599 |
+
def run_executor(article,prompt):
|
| 600 |
'''
|
| 601 |
+
run_executor function receive the text and prompts and select the executor for the text input
|
| 602 |
+
'''
|
| 603 |
+
match prompt["executed by"]:
|
| 604 |
+
case "gpt-3.5-turbo-16k":
|
| 605 |
+
run_gpt(article,prompt)
|
| 606 |
+
case "f_replacement_term":
|
| 607 |
+
f_replacement_term(article,prompt)
|
| 608 |
+
case "f_summary_term":
|
| 609 |
+
f_summary_term(article,prompt)
|
| 610 |
+
|
| 611 |
+
|
| 612 |
+
def add_inst(instructions,prompt):
|
| 613 |
+
return instructions + prompt
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -5,4 +5,5 @@ boto3
|
|
| 5 |
requests
|
| 6 |
openai
|
| 7 |
pdfminer.six
|
| 8 |
-
tiktoken
|
|
|
|
|
|
| 5 |
requests
|
| 6 |
openai
|
| 7 |
pdfminer.six
|
| 8 |
+
tiktoken
|
| 9 |
+
langchain
|
supplier.py
CHANGED
|
@@ -7,27 +7,26 @@ from utility import terminal_print
|
|
| 7 |
openai.api_key = openai_api_key
|
| 8 |
token_encoder = tiktoken.get_encoding("cl100k_base")
|
| 9 |
|
| 10 |
-
|
|
|
|
| 11 |
max_retry = 5
|
| 12 |
-
def
|
| 13 |
import time
|
| 14 |
count = 0
|
|
|
|
| 15 |
|
| 16 |
while(count < max_retry):
|
| 17 |
try:
|
| 18 |
return func(*args,**kwargs)
|
| 19 |
except Exception as e:
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
count += 1
|
| 24 |
-
else:
|
| 25 |
-
raise e
|
| 26 |
|
| 27 |
-
return
|
| 28 |
|
| 29 |
@terminal_print
|
| 30 |
-
def execute_prompt(prompt):
|
| 31 |
'''
|
| 32 |
execute_prompt function takes two arguments: text and prompt
|
| 33 |
|
|
@@ -49,14 +48,14 @@ def execute_prompt(prompt):
|
|
| 49 |
return res.choices[0]["text"] if res.choices else "<error> failed to generate text</error>"
|
| 50 |
|
| 51 |
@terminal_print
|
| 52 |
-
def format(**kwargs):
|
| 53 |
if "format" in kwargs:
|
| 54 |
return kwargs["format"]
|
| 55 |
return kwargs
|
| 56 |
|
| 57 |
|
| 58 |
@terminal_print
|
| 59 |
-
def execute_instruction(article, instruction,model="gpt-3.5-turbo-16k",format="markdown"):
|
| 60 |
'''
|
| 61 |
execute_instruction function takes three arguments: article, instruction and model
|
| 62 |
|
|
@@ -96,7 +95,7 @@ def execute_instruction(article, instruction,model="gpt-3.5-turbo-16k",format="m
|
|
| 96 |
return res["choices"][0]["message"]["content"]
|
| 97 |
|
| 98 |
@terminal_print
|
| 99 |
-
def create_inst(article, instructions):
|
| 100 |
msg_stream = [
|
| 101 |
{
|
| 102 |
"role":"system",
|
|
@@ -113,7 +112,7 @@ def create_inst(article, instructions):
|
|
| 113 |
|
| 114 |
@terminal_print
|
| 115 |
@request_retry
|
| 116 |
-
def send_inst(stream, model="gpt-3.5-turbo-16k",temperature=0):
|
| 117 |
res= openai.ChatCompletion.create(
|
| 118 |
model=model,
|
| 119 |
messages=stream,
|
|
|
|
| 7 |
openai.api_key = openai_api_key
|
| 8 |
token_encoder = tiktoken.get_encoding("cl100k_base")
|
| 9 |
|
| 10 |
+
|
| 11 |
+
def request_retry(func): # need revision
|
| 12 |
max_retry = 5
|
| 13 |
+
def deco_retry(*args,**kwargs):
|
| 14 |
import time
|
| 15 |
count = 0
|
| 16 |
+
# print(f"Retrying {func.__name__}...")
|
| 17 |
|
| 18 |
while(count < max_retry):
|
| 19 |
try:
|
| 20 |
return func(*args,**kwargs)
|
| 21 |
except Exception as e:
|
| 22 |
+
print(f"Error: {e.__class__.__name__}, retrying in 5 seconds...")
|
| 23 |
+
time.sleep(5)
|
| 24 |
+
count += 1
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
+
return deco_retry
|
| 27 |
|
| 28 |
@terminal_print
|
| 29 |
+
def execute_prompt(prompt): # need revision
|
| 30 |
'''
|
| 31 |
execute_prompt function takes two arguments: text and prompt
|
| 32 |
|
|
|
|
| 48 |
return res.choices[0]["text"] if res.choices else "<error> failed to generate text</error>"
|
| 49 |
|
| 50 |
@terminal_print
|
| 51 |
+
def format(**kwargs): # need revision
|
| 52 |
if "format" in kwargs:
|
| 53 |
return kwargs["format"]
|
| 54 |
return kwargs
|
| 55 |
|
| 56 |
|
| 57 |
@terminal_print
|
| 58 |
+
def execute_instruction(article, instruction,model="gpt-3.5-turbo-16k",format="markdown"): # need revision
|
| 59 |
'''
|
| 60 |
execute_instruction function takes three arguments: article, instruction and model
|
| 61 |
|
|
|
|
| 95 |
return res["choices"][0]["message"]["content"]
|
| 96 |
|
| 97 |
@terminal_print
|
| 98 |
+
def create_inst(article, instructions): # need revision
|
| 99 |
msg_stream = [
|
| 100 |
{
|
| 101 |
"role":"system",
|
|
|
|
| 112 |
|
| 113 |
@terminal_print
|
| 114 |
@request_retry
|
| 115 |
+
def send_inst(stream, model="gpt-3.5-turbo-16k",temperature=0): # need revision to change to async method
|
| 116 |
res= openai.ChatCompletion.create(
|
| 117 |
model=model,
|
| 118 |
messages=stream,
|
ui_studies.py
CHANGED
|
@@ -2,6 +2,7 @@ import gradio as gr
|
|
| 2 |
|
| 3 |
from application import *
|
| 4 |
from features import init_app_data
|
|
|
|
| 5 |
|
| 6 |
def refresh():
|
| 7 |
init_app_data()
|
|
@@ -19,20 +20,23 @@ def create_md_tables(articles):
|
|
| 19 |
md_text += "| Domain | File Name | Upload Time | Device |\n| --- | --- | --- | --- |\n"
|
| 20 |
|
| 21 |
for article in articles:
|
| 22 |
-
md_table = f"| {article['domain']} | {article['name']} | {article['upload_time']} | {
|
| 23 |
md_text += md_table
|
| 24 |
|
| 25 |
return md_text
|
| 26 |
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
from application import *
|
| 4 |
from features import init_app_data
|
| 5 |
+
from utility import terminal_print
|
| 6 |
|
| 7 |
def refresh():
|
| 8 |
init_app_data()
|
|
|
|
| 20 |
md_text += "| Domain | File Name | Upload Time | Device |\n| --- | --- | --- | --- |\n"
|
| 21 |
|
| 22 |
for article in articles:
|
| 23 |
+
md_table = f"| {article['domain']} | {article['name']} | {article['upload_time']} | {default_region} |\n"
|
| 24 |
md_text += md_table
|
| 25 |
|
| 26 |
return md_text
|
| 27 |
|
| 28 |
+
@terminal_print
|
| 29 |
+
def init_studies_page():
|
| 30 |
+
with gr.Blocks() as studies_page:
|
| 31 |
+
with gr.Row():
|
| 32 |
+
gr.Markdown("## Article Lists")
|
| 33 |
+
btn_refresh = gr.Button(value="Refresh",variant="primary")
|
| 34 |
+
gr.HTML("<hr>")
|
| 35 |
+
|
| 36 |
+
article_list = gr.Markdown("")
|
| 37 |
+
|
| 38 |
+
btn_refresh.click(
|
| 39 |
+
fn=refresh,
|
| 40 |
+
outputs=[article_list]
|
| 41 |
+
)
|
| 42 |
+
return studies_page
|
ui_study.py
CHANGED
|
@@ -17,67 +17,89 @@ def reset():
|
|
| 17 |
)
|
| 18 |
|
| 19 |
# complete user interfaces
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
with gr.
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
gr.
|
|
|
|
|
|
|
| 26 |
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
btn_refresh = gr.Button(value="Refresh",variant="primary")
|
| 41 |
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
# control element definition
|
| 52 |
-
btn_reset.click(
|
| 53 |
-
reset,
|
| 54 |
-
outputs=[
|
| 55 |
-
domain,
|
| 56 |
-
upload_study,
|
| 57 |
-
input_study,
|
| 58 |
-
overview,
|
| 59 |
-
detail_views,
|
| 60 |
-
]
|
| 61 |
-
)
|
| 62 |
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
|
|
|
|
|
| 17 |
)
|
| 18 |
|
| 19 |
# complete user interfaces
|
| 20 |
+
@terminal_print
|
| 21 |
+
def init_study_page():
|
| 22 |
+
with gr.Blocks() as study_page:
|
| 23 |
+
# user control panel
|
| 24 |
+
with gr.Row(equal_height=False):
|
| 25 |
+
with gr.Column():
|
| 26 |
+
gr.Markdown("## Studies")
|
| 27 |
+
gr.HTML("<hr>")
|
| 28 |
|
| 29 |
+
upload_study = gr.File(label="Upload a clinical study report",type="file",file_count="multiple")
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
with gr.Column():
|
| 33 |
+
domain = gr.Radio(label="Anatomical Region",choices=anatomic_domains,value=default_region)
|
| 34 |
+
input_study = gr.TextArea(label="Or paste a clinical study report content",placeholder="Paste content here...",lines=5)
|
| 35 |
+
with gr.Row():
|
| 36 |
+
btn_reset = gr.Button(value="Reset",variant="stop")
|
| 37 |
+
btn_add_study = gr.Button(value="Add",variant="primary")
|
| 38 |
|
| 39 |
+
gr.HTML("<hr>")
|
| 40 |
+
with gr.Row():
|
| 41 |
+
gr.Markdown("## Literature Report")
|
|
|
|
| 42 |
|
| 43 |
+
gr.HTML("<hr>")
|
| 44 |
+
with gr.Row(equal_height=False):
|
| 45 |
+
with gr.Column():
|
| 46 |
+
dropdown = gr.Dropdown(label="Select a literature report",choices=app_data["articles"].keys())
|
| 47 |
+
with gr.Column():
|
| 48 |
+
with gr.Row():
|
| 49 |
+
btn_get_article = gr.Button(value="Get",variant="primary")
|
| 50 |
+
btn_refresh = gr.Button(value="Refresh",variant="primary")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
+
gr.HTML("<hr>")
|
| 53 |
+
# extraction outcome panel
|
| 54 |
+
with gr.Row(equal_height=False):
|
| 55 |
+
with gr.Column():
|
| 56 |
+
overview = gr.Markdown("")
|
| 57 |
+
with gr.Column():
|
| 58 |
+
# tables = gr.Markdown("")
|
| 59 |
+
detail_views = gr.Markdown("")
|
| 60 |
+
|
| 61 |
+
# control element definition
|
| 62 |
+
btn_get_article.click(
|
| 63 |
+
get_existing_article,
|
| 64 |
+
inputs=[
|
| 65 |
+
dropdown,
|
| 66 |
+
],
|
| 67 |
+
outputs=[
|
| 68 |
+
overview,
|
| 69 |
+
detail_views,
|
| 70 |
+
]
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
btn_reset.click(
|
| 74 |
+
reset,
|
| 75 |
+
outputs=[
|
| 76 |
+
domain,
|
| 77 |
+
upload_study,
|
| 78 |
+
input_study,
|
| 79 |
+
overview,
|
| 80 |
+
detail_views,
|
| 81 |
+
]
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
btn_add_study.click(
|
| 85 |
+
process_study,
|
| 86 |
+
inputs=[
|
| 87 |
+
domain,
|
| 88 |
+
upload_study,
|
| 89 |
+
input_study,
|
| 90 |
+
],
|
| 91 |
+
outputs=[
|
| 92 |
+
overview,
|
| 93 |
+
detail_views,
|
| 94 |
+
# tables
|
| 95 |
+
],
|
| 96 |
+
)
|
| 97 |
|
| 98 |
+
btn_refresh.click(
|
| 99 |
+
refresh,
|
| 100 |
+
outputs=[
|
| 101 |
+
overview,
|
| 102 |
+
detail_views,
|
| 103 |
+
],
|
| 104 |
+
)
|
| 105 |
+
return study_page
|