streamlit / streamlit_UI_code.py
jkushwaha's picture
Update streamlit_UI_code.py
e462bfe verified
import streamlit as st
import pandas as pd
import os
from glob import glob
from pymongo import MongoClient
import pandas as pd
from glob import glob
def ibis_ngs_db_connection():
username_ibis = ''
password_ibis = #in url format
#database
connection_string_ibis = f'database://{username_ibis}:{password_ibis}@ipaddress/'
client_ibis = MongoClient(connection_string_ibis)
ibis_ngs_db = client_ibis['database name']
return ibis_ngs_db
def common_uat_db_connection():
username_ibis = ''
password_ibis = #in url format
connection_string_ibis = f'database://{username_ibis}:{password_ibis}@ipaddress/'
client_ibis = MongoClient(connection_string_ibis)
ibis_ngs_db = client_ibis['database name']
return ibis_ngs_db
def aon_ngs_db_connection():
username_aon = ''
password_aon = #in url format
connection_string_aon = f'database://{username_aon}:{password_aon}@ipaddress/'
client_aon = MongoClient(connection_string_aon)
aon_ngs_db = client_aon['database name']
return aon_ngs_db
def doc_textNLP(docID, db):
temp_entity = db.documents.find_one({'docID':{'$in':[f"{docID}"]}})
doc_text_df = pd.DataFrame(temp_entity['docTextNLP'])
return doc_text_df
def low_quality_report(docID, db):
temp_entity = db.documents.find_one({'docID':{'$in':[f"{docID}"]}})
low_quality_df = pd.DataFrame(temp_entity['docTextNLPLowQuality'])
return low_quality_df
def db_selection(doc_id):
if f'{doc_id}'.isdigit():
db = ibis_ngs_db_connection()
else:
db = ibis_ngs_db_connection()
return db
# Function to get list of document IDs
def get_document_ids():
# Assuming 'abc' is the folder containing documents
document_ids = [file.split('_')[0] for file in os.listdir('abc') if file.endswith('.png')]
return list(set(document_ids))
# Function to load image based on selected document ID and page number
def load_image(image_path, document_id, page_number):
im_path = f"{image_path}{document_id}-{page_number-1}.png"
if os.path.exists(im_path):
return im_path
else:
return None
# Function to load dataframe based on selected document ID
def load_dataframe(auto_csv_path, document_id, page_number):
csv_path = glob(f'{auto_csv_path}*{document_id}*auto.csv')
print(csv_path)
if len(csv_path)>0:
auto_df = pd.read_csv(csv_path[0])
auto_df_page = auto_df[auto_df['Page#']==page_number]
return auto_df_page
else:
return None
def path_setting(PhaseData_path, Inbound_CSV_path):
auto_csv_path = f'{PhaseData_path}/Batch1/NLP_batch/'
image_path = f'{PhaseData_path}/Data/output/images/'
inbound_df = pd.read_csv(Inbound_CSV_path)
pif_list = list(inbound_df.pif_key.values)
return pif_list, image_path, auto_csv_path
def main():
st.set_page_config(layout="wide")
st.write("### Input Paths")
PhaseData_path = st.text_input("Enter PhaseData Path:")
Inbound_CSV_path = st.text_input("Enter Inbound_CSV Path:")
if not PhaseData_path or not Inbound_CSV_path:
st.warning("Please enter both PhaseData Path and Inbound_CSV Path.")
return
inbound_df_path = 'inbound_issues_tempus_2_q2_new.csv'
pif_list, image_path, auto_csv_path = path_setting(PhaseData_path, Inbound_CSV_path)
col1_width = st.sidebar.slider("Width of First Column", 0.1, 10.0, 2.0, 0.1)
col2_width = st.sidebar.slider("Width of Second Column", 0.1, 10.0, 6.5, 0.1)
col3_width = st.sidebar.slider("Width of Third Column", 0.1, 10.0, 5.0, 0.1)
col1, col2, col3 = st.columns([col1_width, col2_width, col3_width])
with col1:
st.write("### Document Selection")
global doc_index
doc_index = st.number_input("Select Document Index", min_value=1, max_value=len(pif_list)+1, step=1, value=1)
document_id = pif_list[doc_index-1]
st.write("Current Document ID: ", document_id)
pages = [int(i.split('-')[-1].split('.')[0]) for i in glob(f"{image_path}{document_id}*.png")]
page_number = st.number_input("Page Number", min_value=1, max_value=len(pages), step=1, value=1)
if f'{document_id}'.isdigit():
db = ibis_ngs_db_connection()
else:
db = aon_ngs_db_connection()
db_df = doc_textNLP(f'{document_id}', db)
df_tmp = pd.read_csv(glob(f"{auto_csv_path}*{document_id}*auto.csv")[0])
reason_for_onhold = st.text_area("Reason for On-hold: ", value="Add a reason for onhold column into inbound CSV")
comment_pipeline_db = f"#BM DB Page:{db_df[db_df['Page#']==page_number].shape[0]}\n#BM Pipeline Page:{df_tmp[df_tmp['Page#']==page_number].shape[0]}\n-----------------------------\n#BM DB Total:{db_df.shape[0]}\n#BM Pipeline Total:{df_tmp.shape[0]}"
pipeline_db_stats = st.text_area("Biomarker Stats#: ", value=comment_pipeline_db, height=150)
with col2:
st.write("### Display Image")
im_path = load_image(image_path, document_id, page_number)
if im_path:
st.image(im_path)
else:
st.write("Image not found")
with col3:
st.write("### Display DataFrame")
df = load_dataframe(auto_csv_path, document_id, page_number, )
if df is not None:
columns_to_display = st.multiselect("Select Columns to Display", df.columns)
if len(columns_to_display) > 0:
st.write(df[columns_to_display])
st.subheader("Add Comments on NLP Output")
reco_list = ['Regular NLP working', 'Wrong Report type', 'Report not Found',
'Dev work required', 'NLP not supported (non 5 labs)',
'Limited Regular NLP Support and Manual NLP (ROI) is working',
'Poor quality report- NLP not supported']
comment = st.selectbox("Select Comment", options=reco_list)
add_comment = st.text_area("Add your additional comment here")
if st.button("Submit"):
global comments_df
data = {
"Document ID": [document_id],
"Page": [page_number],
"Comment": [comment],
"Additional Comment": [add_comment]
}
comments_df = comments_df.append(pd.DataFrame(data), ignore_index=True)
comments_csv = "comments.csv"
if os.path.exists(comments_csv):
comments_df.to_csv(comments_csv, mode="a", header=False, index=False)
else:
comments_df.to_csv(comments_csv, index=False)
else:
st.write("No columns selected")
else:
st.write("DataFrame not found")
if __name__ == "__main__":
main()