Spaces:
Sleeping
Sleeping
File size: 5,054 Bytes
4ff22c7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 | from datetime import datetime
import gradio as gr
import gspread
import validators
from validators import ValidationFailure
from google.oauth2.service_account import Credentials
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
import etl
# test sheet - https://docs.google.com/spreadsheets/d/1iJ0-882HsWkAth0e0P_kf21aL6583Z7m1LwloaRCVEc/edit#gid=0
week_list = [f"week_{i}" for i in range(1, 51)]
# credentials file
cred_file_path = 'heliumhealth-a05d595e5991.json'
# google auth scopes
scopes = ['https://www.googleapis.com/auth/spreadsheets',
'https://www.googleapis.com/auth/drive']
# create credentials
credentials = Credentials.from_service_account_file(
cred_file_path, scopes=scopes )
# authorize google spreadsheet
gc = gspread.authorize(credentials)
gauth = GoogleAuth()
drive = GoogleDrive(gauth)
def overlap_matching(row, d_type):
"""
matching overlap
"""
match_cols = {
"prescription": ['RX Norm [Super Generic]_x', 'RX Norm [Super Generic]_y'],
"diagnosis": ['ICD10 Diagnosis_x','ICD10 Diagnosis_y']
}
cols = match_cols[d_type]
row['match_status'] = 'match' if row[cols[0]] == row[cols[1]] else 'no match'
return row
def overlap_check(gs, start_date, end_date, week_no, d_type):
"""
load, preprocess, check overlap, postprocess and output data to google sheet
Args:
gs (GSheet instance): Gsheet instance access to google sheet
start_date (str): Date str format(YYYY-mm-dd)
end_date (_type_): Date str format(YYYY-mm-dd)
d_type (_type_): sheet type (prescription or diagnosis)
"""
# load data
all_data = etl.load_data(gs, start_date, end_date, d_type)
# preprocess and return overlap data
overlap_data = etl.preprocess_data(all_data, d_type)
# do overlap matching
overlap_data = overlap_data.apply(overlap_matching, axis=1, args=[d_type])
# post process
overlap_data = etl.post_process(overlap_data, d_type)
# write to sheet
etl.output_data(gc, overlap_data, week_no, d_type)
def overlap_check_main(sheet_type, start_date_str, end_date_str, week_str, sheet_url):
"""
overlap check main function
Args:
sheet_type (str): sheet type (prescription or diagnosis)
start_date_str (str): start date string: e.g. 2023-03-21
end_date_str (str): end date string: e.g. 2023-03-24
week_str (str): week string e.g. week_1
sheet_url (url): _description_
Raises:
gr.exceptions.Error: Date Format Error - either start_date or end date are bad format
gr.exceptions.Error: Date Error - when start_date is greater than end date
gr.exceptions.Error: URL Error - Bad url format
"""
# format date from string
try:
start_date = datetime.strptime(start_date_str.strip(), "%Y-%m-%d").date()
end_date = datetime.strptime(end_date_str.strip(), "%Y-%m-%d").date()
except:
raise gr.Error(message="Wrong date format")
# raise error when start date is greater end date
if start_date >= end_date:
raise gr.Error(message="Start date cannot be greater end date")
# Check if the input is valid url
# ToDO: Error message displayed is not explanatory - Fix it
url_check = validators.url(sheet_url)
if isinstance(url_check, ValidationFailure):
raise gr.Error(message="Please enter a valid URL")
# open the google sheet for reading
gs = gc.open_by_url(sheet_url)
# if sheet_type == 'prescription':
try:
overlap_check(gs, start_date_str, end_date_str, week_str, sheet_type)
except:
gr.Error(message="Permission denied. Please add IAM user to the sheet and try again")
return f"Successfully ran {sheet_type} overlap check for {week_str.replace('_', ' ').title()} ({start_date_str} - {end_date_str})"
with gr.Blocks() as demo:
gr.Markdown(
"""
## Overlap Check App
* Add IAM User to sheet you want to test
* Gsheet tabs required for diagnosis:
* Diagnosis
* Gsheet tabs required for prescription:
* Prescriptions
* Data headers required for diagnosis
* Unstructured Name, ICD10 Diagnosis, Intern
* Data headers required for prescription
* Unstructured Name, RX Norm [Super Generic], Intern
"""
)
# inputs
sheet_type = gr.Dropdown(['prescription', 'diagnosis'], label="QA Type")
start_date = gr.Textbox(label="Start Date", placeholder="YYYY-MM-DD")
end_date = gr.Textbox(label="End Date", placeholder="YYYY-MM-DD")
week_input = gr.Dropdown(week_list, label="Week")
url = gr.Textbox(label="URL", placeholder="Enter sheet url ...")
# outputs
output = gr.Textbox(label="Output Box")
run_btn = gr.Button("Run")
run_btn.click(
fn=overlap_check_main,
inputs=[
sheet_type,
start_date,
end_date,
week_input,
url
],
outputs=output,
api_name="Overlap_check"
)
demo.launch() |