| import streamlit as st |
| import pandas as pd |
| import os |
| from glob import glob |
|
|
|
|
| from pymongo import MongoClient |
| import pandas as pd |
| from glob import glob |
|
|
| def ibis_ngs_db_connection(): |
| username_ibis = '' |
| password_ibis = |
| |
| connection_string_ibis = f'database://{username_ibis}:{password_ibis}@ipaddress/' |
| client_ibis = MongoClient(connection_string_ibis) |
| ibis_ngs_db = client_ibis['database name'] |
| return ibis_ngs_db |
|
|
| def common_uat_db_connection(): |
| username_ibis = '' |
| password_ibis = |
| connection_string_ibis = f'database://{username_ibis}:{password_ibis}@ipaddress/' |
| client_ibis = MongoClient(connection_string_ibis) |
| ibis_ngs_db = client_ibis['database name'] |
| return ibis_ngs_db |
|
|
|
|
| def aon_ngs_db_connection(): |
| username_aon = '' |
| password_aon = |
| connection_string_aon = f'database://{username_aon}:{password_aon}@ipaddress/' |
| client_aon = MongoClient(connection_string_aon) |
| aon_ngs_db = client_aon['database name'] |
| return aon_ngs_db |
|
|
|
|
| def doc_textNLP(docID, db): |
| temp_entity = db.documents.find_one({'docID':{'$in':[f"{docID}"]}}) |
| doc_text_df = pd.DataFrame(temp_entity['docTextNLP']) |
| return doc_text_df |
|
|
| def low_quality_report(docID, db): |
| temp_entity = db.documents.find_one({'docID':{'$in':[f"{docID}"]}}) |
| low_quality_df = pd.DataFrame(temp_entity['docTextNLPLowQuality']) |
| return low_quality_df |
|
|
| def db_selection(doc_id): |
| if f'{doc_id}'.isdigit(): |
| db = ibis_ngs_db_connection() |
| else: |
| db = ibis_ngs_db_connection() |
| return db |
|
|
|
|
| |
| def get_document_ids(): |
| |
| document_ids = [file.split('_')[0] for file in os.listdir('abc') if file.endswith('.png')] |
| return list(set(document_ids)) |
|
|
| |
| def load_image(image_path, document_id, page_number): |
| im_path = f"{image_path}{document_id}-{page_number-1}.png" |
| if os.path.exists(im_path): |
| return im_path |
| else: |
| return None |
|
|
| |
| def load_dataframe(auto_csv_path, document_id, page_number): |
| csv_path = glob(f'{auto_csv_path}*{document_id}*auto.csv') |
| print(csv_path) |
| if len(csv_path)>0: |
| auto_df = pd.read_csv(csv_path[0]) |
| auto_df_page = auto_df[auto_df['Page#']==page_number] |
| return auto_df_page |
| else: |
| return None |
| |
| def path_setting(PhaseData_path, Inbound_CSV_path): |
| auto_csv_path = f'{PhaseData_path}/Batch1/NLP_batch/' |
| image_path = f'{PhaseData_path}/Data/output/images/' |
| inbound_df = pd.read_csv(Inbound_CSV_path) |
| pif_list = list(inbound_df.pif_key.values) |
| return pif_list, image_path, auto_csv_path |
|
|
| def main(): |
| st.set_page_config(layout="wide") |
| st.write("### Input Paths") |
| PhaseData_path = st.text_input("Enter PhaseData Path:") |
| Inbound_CSV_path = st.text_input("Enter Inbound_CSV Path:") |
| |
| if not PhaseData_path or not Inbound_CSV_path: |
| st.warning("Please enter both PhaseData Path and Inbound_CSV Path.") |
| return |
| |
| inbound_df_path = 'inbound_issues_tempus_2_q2_new.csv' |
| pif_list, image_path, auto_csv_path = path_setting(PhaseData_path, Inbound_CSV_path) |
| |
| col1_width = st.sidebar.slider("Width of First Column", 0.1, 10.0, 2.0, 0.1) |
| col2_width = st.sidebar.slider("Width of Second Column", 0.1, 10.0, 6.5, 0.1) |
| col3_width = st.sidebar.slider("Width of Third Column", 0.1, 10.0, 5.0, 0.1) |
|
|
| col1, col2, col3 = st.columns([col1_width, col2_width, col3_width]) |
|
|
| with col1: |
| st.write("### Document Selection") |
| global doc_index |
| doc_index = st.number_input("Select Document Index", min_value=1, max_value=len(pif_list)+1, step=1, value=1) |
| document_id = pif_list[doc_index-1] |
| st.write("Current Document ID: ", document_id) |
| pages = [int(i.split('-')[-1].split('.')[0]) for i in glob(f"{image_path}{document_id}*.png")] |
| page_number = st.number_input("Page Number", min_value=1, max_value=len(pages), step=1, value=1) |
| if f'{document_id}'.isdigit(): |
| db = ibis_ngs_db_connection() |
| else: |
| db = aon_ngs_db_connection() |
| db_df = doc_textNLP(f'{document_id}', db) |
| df_tmp = pd.read_csv(glob(f"{auto_csv_path}*{document_id}*auto.csv")[0]) |
| reason_for_onhold = st.text_area("Reason for On-hold: ", value="Add a reason for onhold column into inbound CSV") |
| comment_pipeline_db = f"#BM DB Page:{db_df[db_df['Page#']==page_number].shape[0]}\n#BM Pipeline Page:{df_tmp[df_tmp['Page#']==page_number].shape[0]}\n-----------------------------\n#BM DB Total:{db_df.shape[0]}\n#BM Pipeline Total:{df_tmp.shape[0]}" |
| pipeline_db_stats = st.text_area("Biomarker Stats#: ", value=comment_pipeline_db, height=150) |
|
|
| with col2: |
| st.write("### Display Image") |
| im_path = load_image(image_path, document_id, page_number) |
| if im_path: |
| st.image(im_path) |
| else: |
| st.write("Image not found") |
|
|
| with col3: |
| st.write("### Display DataFrame") |
| df = load_dataframe(auto_csv_path, document_id, page_number, ) |
| if df is not None: |
| columns_to_display = st.multiselect("Select Columns to Display", df.columns) |
| if len(columns_to_display) > 0: |
| st.write(df[columns_to_display]) |
| st.subheader("Add Comments on NLP Output") |
| reco_list = ['Regular NLP working', 'Wrong Report type', 'Report not Found', |
| 'Dev work required', 'NLP not supported (non 5 labs)', |
| 'Limited Regular NLP Support and Manual NLP (ROI) is working', |
| 'Poor quality report- NLP not supported'] |
| comment = st.selectbox("Select Comment", options=reco_list) |
| add_comment = st.text_area("Add your additional comment here") |
| if st.button("Submit"): |
| global comments_df |
| data = { |
| "Document ID": [document_id], |
| "Page": [page_number], |
| "Comment": [comment], |
| "Additional Comment": [add_comment] |
| } |
| comments_df = comments_df.append(pd.DataFrame(data), ignore_index=True) |
| comments_csv = "comments.csv" |
| if os.path.exists(comments_csv): |
| comments_df.to_csv(comments_csv, mode="a", header=False, index=False) |
| else: |
| comments_df.to_csv(comments_csv, index=False) |
| |
| else: |
| st.write("No columns selected") |
| else: |
| st.write("DataFrame not found") |
|
|
| if __name__ == "__main__": |
| main() |