File size: 6,941 Bytes
fa33591 9c25650 e462bfe 9c25650 e462bfe 9c25650 e462bfe 9c25650 e462bfe 9c25650 e462bfe 9c25650 e462bfe 9c25650 fa33591 9c25650 fa33591 9c25650 fa33591 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 | import streamlit as st
import pandas as pd
import os
from glob import glob
from pymongo import MongoClient
import pandas as pd
from glob import glob
def ibis_ngs_db_connection():
username_ibis = ''
password_ibis = #in url format
#database
connection_string_ibis = f'database://{username_ibis}:{password_ibis}@ipaddress/'
client_ibis = MongoClient(connection_string_ibis)
ibis_ngs_db = client_ibis['database name']
return ibis_ngs_db
def common_uat_db_connection():
username_ibis = ''
password_ibis = #in url format
connection_string_ibis = f'database://{username_ibis}:{password_ibis}@ipaddress/'
client_ibis = MongoClient(connection_string_ibis)
ibis_ngs_db = client_ibis['database name']
return ibis_ngs_db
def aon_ngs_db_connection():
username_aon = ''
password_aon = #in url format
connection_string_aon = f'database://{username_aon}:{password_aon}@ipaddress/'
client_aon = MongoClient(connection_string_aon)
aon_ngs_db = client_aon['database name']
return aon_ngs_db
def doc_textNLP(docID, db):
temp_entity = db.documents.find_one({'docID':{'$in':[f"{docID}"]}})
doc_text_df = pd.DataFrame(temp_entity['docTextNLP'])
return doc_text_df
def low_quality_report(docID, db):
temp_entity = db.documents.find_one({'docID':{'$in':[f"{docID}"]}})
low_quality_df = pd.DataFrame(temp_entity['docTextNLPLowQuality'])
return low_quality_df
def db_selection(doc_id):
if f'{doc_id}'.isdigit():
db = ibis_ngs_db_connection()
else:
db = ibis_ngs_db_connection()
return db
# Function to get list of document IDs
def get_document_ids():
# Assuming 'abc' is the folder containing documents
document_ids = [file.split('_')[0] for file in os.listdir('abc') if file.endswith('.png')]
return list(set(document_ids))
# Function to load image based on selected document ID and page number
def load_image(image_path, document_id, page_number):
im_path = f"{image_path}{document_id}-{page_number-1}.png"
if os.path.exists(im_path):
return im_path
else:
return None
# Function to load dataframe based on selected document ID
def load_dataframe(auto_csv_path, document_id, page_number):
csv_path = glob(f'{auto_csv_path}*{document_id}*auto.csv')
print(csv_path)
if len(csv_path)>0:
auto_df = pd.read_csv(csv_path[0])
auto_df_page = auto_df[auto_df['Page#']==page_number]
return auto_df_page
else:
return None
def path_setting(PhaseData_path, Inbound_CSV_path):
auto_csv_path = f'{PhaseData_path}/Batch1/NLP_batch/'
image_path = f'{PhaseData_path}/Data/output/images/'
inbound_df = pd.read_csv(Inbound_CSV_path)
pif_list = list(inbound_df.pif_key.values)
return pif_list, image_path, auto_csv_path
def main():
st.set_page_config(layout="wide")
st.write("### Input Paths")
PhaseData_path = st.text_input("Enter PhaseData Path:")
Inbound_CSV_path = st.text_input("Enter Inbound_CSV Path:")
if not PhaseData_path or not Inbound_CSV_path:
st.warning("Please enter both PhaseData Path and Inbound_CSV Path.")
return
inbound_df_path = 'inbound_issues_tempus_2_q2_new.csv'
pif_list, image_path, auto_csv_path = path_setting(PhaseData_path, Inbound_CSV_path)
col1_width = st.sidebar.slider("Width of First Column", 0.1, 10.0, 2.0, 0.1)
col2_width = st.sidebar.slider("Width of Second Column", 0.1, 10.0, 6.5, 0.1)
col3_width = st.sidebar.slider("Width of Third Column", 0.1, 10.0, 5.0, 0.1)
col1, col2, col3 = st.columns([col1_width, col2_width, col3_width])
with col1:
st.write("### Document Selection")
global doc_index
doc_index = st.number_input("Select Document Index", min_value=1, max_value=len(pif_list)+1, step=1, value=1)
document_id = pif_list[doc_index-1]
st.write("Current Document ID: ", document_id)
pages = [int(i.split('-')[-1].split('.')[0]) for i in glob(f"{image_path}{document_id}*.png")]
page_number = st.number_input("Page Number", min_value=1, max_value=len(pages), step=1, value=1)
if f'{document_id}'.isdigit():
db = ibis_ngs_db_connection()
else:
db = aon_ngs_db_connection()
db_df = doc_textNLP(f'{document_id}', db)
df_tmp = pd.read_csv(glob(f"{auto_csv_path}*{document_id}*auto.csv")[0])
reason_for_onhold = st.text_area("Reason for On-hold: ", value="Add a reason for onhold column into inbound CSV")
comment_pipeline_db = f"#BM DB Page:{db_df[db_df['Page#']==page_number].shape[0]}\n#BM Pipeline Page:{df_tmp[df_tmp['Page#']==page_number].shape[0]}\n-----------------------------\n#BM DB Total:{db_df.shape[0]}\n#BM Pipeline Total:{df_tmp.shape[0]}"
pipeline_db_stats = st.text_area("Biomarker Stats#: ", value=comment_pipeline_db, height=150)
with col2:
st.write("### Display Image")
im_path = load_image(image_path, document_id, page_number)
if im_path:
st.image(im_path)
else:
st.write("Image not found")
with col3:
st.write("### Display DataFrame")
df = load_dataframe(auto_csv_path, document_id, page_number, )
if df is not None:
columns_to_display = st.multiselect("Select Columns to Display", df.columns)
if len(columns_to_display) > 0:
st.write(df[columns_to_display])
st.subheader("Add Comments on NLP Output")
reco_list = ['Regular NLP working', 'Wrong Report type', 'Report not Found',
'Dev work required', 'NLP not supported (non 5 labs)',
'Limited Regular NLP Support and Manual NLP (ROI) is working',
'Poor quality report- NLP not supported']
comment = st.selectbox("Select Comment", options=reco_list)
add_comment = st.text_area("Add your additional comment here")
if st.button("Submit"):
global comments_df
data = {
"Document ID": [document_id],
"Page": [page_number],
"Comment": [comment],
"Additional Comment": [add_comment]
}
comments_df = comments_df.append(pd.DataFrame(data), ignore_index=True)
comments_csv = "comments.csv"
if os.path.exists(comments_csv):
comments_df.to_csv(comments_csv, mode="a", header=False, index=False)
else:
comments_df.to_csv(comments_csv, index=False)
else:
st.write("No columns selected")
else:
st.write("DataFrame not found")
if __name__ == "__main__":
main() |