import streamlit as st import pandas as pd import os from glob import glob from pymongo import MongoClient import pandas as pd from glob import glob def ibis_ngs_db_connection(): username_ibis = '' password_ibis = #in url format #database connection_string_ibis = f'database://{username_ibis}:{password_ibis}@ipaddress/' client_ibis = MongoClient(connection_string_ibis) ibis_ngs_db = client_ibis['database name'] return ibis_ngs_db def common_uat_db_connection(): username_ibis = '' password_ibis = #in url format connection_string_ibis = f'database://{username_ibis}:{password_ibis}@ipaddress/' client_ibis = MongoClient(connection_string_ibis) ibis_ngs_db = client_ibis['database name'] return ibis_ngs_db def aon_ngs_db_connection(): username_aon = '' password_aon = #in url format connection_string_aon = f'database://{username_aon}:{password_aon}@ipaddress/' client_aon = MongoClient(connection_string_aon) aon_ngs_db = client_aon['database name'] return aon_ngs_db def doc_textNLP(docID, db): temp_entity = db.documents.find_one({'docID':{'$in':[f"{docID}"]}}) doc_text_df = pd.DataFrame(temp_entity['docTextNLP']) return doc_text_df def low_quality_report(docID, db): temp_entity = db.documents.find_one({'docID':{'$in':[f"{docID}"]}}) low_quality_df = pd.DataFrame(temp_entity['docTextNLPLowQuality']) return low_quality_df def db_selection(doc_id): if f'{doc_id}'.isdigit(): db = ibis_ngs_db_connection() else: db = ibis_ngs_db_connection() return db # Function to get list of document IDs def get_document_ids(): # Assuming 'abc' is the folder containing documents document_ids = [file.split('_')[0] for file in os.listdir('abc') if file.endswith('.png')] return list(set(document_ids)) # Function to load image based on selected document ID and page number def load_image(image_path, document_id, page_number): im_path = f"{image_path}{document_id}-{page_number-1}.png" if os.path.exists(im_path): return im_path else: return None # Function to load dataframe based on selected document ID def load_dataframe(auto_csv_path, document_id, page_number): csv_path = glob(f'{auto_csv_path}*{document_id}*auto.csv') print(csv_path) if len(csv_path)>0: auto_df = pd.read_csv(csv_path[0]) auto_df_page = auto_df[auto_df['Page#']==page_number] return auto_df_page else: return None def path_setting(PhaseData_path, Inbound_CSV_path): auto_csv_path = f'{PhaseData_path}/Batch1/NLP_batch/' image_path = f'{PhaseData_path}/Data/output/images/' inbound_df = pd.read_csv(Inbound_CSV_path) pif_list = list(inbound_df.pif_key.values) return pif_list, image_path, auto_csv_path def main(): st.set_page_config(layout="wide") st.write("### Input Paths") PhaseData_path = st.text_input("Enter PhaseData Path:") Inbound_CSV_path = st.text_input("Enter Inbound_CSV Path:") if not PhaseData_path or not Inbound_CSV_path: st.warning("Please enter both PhaseData Path and Inbound_CSV Path.") return inbound_df_path = 'inbound_issues_tempus_2_q2_new.csv' pif_list, image_path, auto_csv_path = path_setting(PhaseData_path, Inbound_CSV_path) col1_width = st.sidebar.slider("Width of First Column", 0.1, 10.0, 2.0, 0.1) col2_width = st.sidebar.slider("Width of Second Column", 0.1, 10.0, 6.5, 0.1) col3_width = st.sidebar.slider("Width of Third Column", 0.1, 10.0, 5.0, 0.1) col1, col2, col3 = st.columns([col1_width, col2_width, col3_width]) with col1: st.write("### Document Selection") global doc_index doc_index = st.number_input("Select Document Index", min_value=1, max_value=len(pif_list)+1, step=1, value=1) document_id = pif_list[doc_index-1] st.write("Current Document ID: ", document_id) pages = [int(i.split('-')[-1].split('.')[0]) for i in glob(f"{image_path}{document_id}*.png")] page_number = st.number_input("Page Number", min_value=1, max_value=len(pages), step=1, value=1) if f'{document_id}'.isdigit(): db = ibis_ngs_db_connection() else: db = aon_ngs_db_connection() db_df = doc_textNLP(f'{document_id}', db) df_tmp = pd.read_csv(glob(f"{auto_csv_path}*{document_id}*auto.csv")[0]) reason_for_onhold = st.text_area("Reason for On-hold: ", value="Add a reason for onhold column into inbound CSV") comment_pipeline_db = f"#BM DB Page:{db_df[db_df['Page#']==page_number].shape[0]}\n#BM Pipeline Page:{df_tmp[df_tmp['Page#']==page_number].shape[0]}\n-----------------------------\n#BM DB Total:{db_df.shape[0]}\n#BM Pipeline Total:{df_tmp.shape[0]}" pipeline_db_stats = st.text_area("Biomarker Stats#: ", value=comment_pipeline_db, height=150) with col2: st.write("### Display Image") im_path = load_image(image_path, document_id, page_number) if im_path: st.image(im_path) else: st.write("Image not found") with col3: st.write("### Display DataFrame") df = load_dataframe(auto_csv_path, document_id, page_number, ) if df is not None: columns_to_display = st.multiselect("Select Columns to Display", df.columns) if len(columns_to_display) > 0: st.write(df[columns_to_display]) st.subheader("Add Comments on NLP Output") reco_list = ['Regular NLP working', 'Wrong Report type', 'Report not Found', 'Dev work required', 'NLP not supported (non 5 labs)', 'Limited Regular NLP Support and Manual NLP (ROI) is working', 'Poor quality report- NLP not supported'] comment = st.selectbox("Select Comment", options=reco_list) add_comment = st.text_area("Add your additional comment here") if st.button("Submit"): global comments_df data = { "Document ID": [document_id], "Page": [page_number], "Comment": [comment], "Additional Comment": [add_comment] } comments_df = comments_df.append(pd.DataFrame(data), ignore_index=True) comments_csv = "comments.csv" if os.path.exists(comments_csv): comments_df.to_csv(comments_csv, mode="a", header=False, index=False) else: comments_df.to_csv(comments_csv, index=False) else: st.write("No columns selected") else: st.write("DataFrame not found") if __name__ == "__main__": main()