qa-app-overlap / app.py
tiesan's picture
Upload 5 files
4ff22c7
from datetime import datetime
import gradio as gr
import gspread
import validators
from validators import ValidationFailure
from google.oauth2.service_account import Credentials
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
import etl
# test sheet - https://docs.google.com/spreadsheets/d/1iJ0-882HsWkAth0e0P_kf21aL6583Z7m1LwloaRCVEc/edit#gid=0
week_list = [f"week_{i}" for i in range(1, 51)]
# credentials file
cred_file_path = 'heliumhealth-a05d595e5991.json'
# google auth scopes
scopes = ['https://www.googleapis.com/auth/spreadsheets',
'https://www.googleapis.com/auth/drive']
# create credentials
credentials = Credentials.from_service_account_file(
cred_file_path, scopes=scopes )
# authorize google spreadsheet
gc = gspread.authorize(credentials)
gauth = GoogleAuth()
drive = GoogleDrive(gauth)
def overlap_matching(row, d_type):
"""
matching overlap
"""
match_cols = {
"prescription": ['RX Norm [Super Generic]_x', 'RX Norm [Super Generic]_y'],
"diagnosis": ['ICD10 Diagnosis_x','ICD10 Diagnosis_y']
}
cols = match_cols[d_type]
row['match_status'] = 'match' if row[cols[0]] == row[cols[1]] else 'no match'
return row
def overlap_check(gs, start_date, end_date, week_no, d_type):
"""
load, preprocess, check overlap, postprocess and output data to google sheet
Args:
gs (GSheet instance): Gsheet instance access to google sheet
start_date (str): Date str format(YYYY-mm-dd)
end_date (_type_): Date str format(YYYY-mm-dd)
d_type (_type_): sheet type (prescription or diagnosis)
"""
# load data
all_data = etl.load_data(gs, start_date, end_date, d_type)
# preprocess and return overlap data
overlap_data = etl.preprocess_data(all_data, d_type)
# do overlap matching
overlap_data = overlap_data.apply(overlap_matching, axis=1, args=[d_type])
# post process
overlap_data = etl.post_process(overlap_data, d_type)
# write to sheet
etl.output_data(gc, overlap_data, week_no, d_type)
def overlap_check_main(sheet_type, start_date_str, end_date_str, week_str, sheet_url):
"""
overlap check main function
Args:
sheet_type (str): sheet type (prescription or diagnosis)
start_date_str (str): start date string: e.g. 2023-03-21
end_date_str (str): end date string: e.g. 2023-03-24
week_str (str): week string e.g. week_1
sheet_url (url): _description_
Raises:
gr.exceptions.Error: Date Format Error - either start_date or end date are bad format
gr.exceptions.Error: Date Error - when start_date is greater than end date
gr.exceptions.Error: URL Error - Bad url format
"""
# format date from string
try:
start_date = datetime.strptime(start_date_str.strip(), "%Y-%m-%d").date()
end_date = datetime.strptime(end_date_str.strip(), "%Y-%m-%d").date()
except:
raise gr.Error(message="Wrong date format")
# raise error when start date is greater end date
if start_date >= end_date:
raise gr.Error(message="Start date cannot be greater end date")
# Check if the input is valid url
# ToDO: Error message displayed is not explanatory - Fix it
url_check = validators.url(sheet_url)
if isinstance(url_check, ValidationFailure):
raise gr.Error(message="Please enter a valid URL")
# open the google sheet for reading
gs = gc.open_by_url(sheet_url)
# if sheet_type == 'prescription':
try:
overlap_check(gs, start_date_str, end_date_str, week_str, sheet_type)
except:
gr.Error(message="Permission denied. Please add IAM user to the sheet and try again")
return f"Successfully ran {sheet_type} overlap check for {week_str.replace('_', ' ').title()} ({start_date_str} - {end_date_str})"
with gr.Blocks() as demo:
gr.Markdown(
"""
## Overlap Check App
* Add IAM User to sheet you want to test
* Gsheet tabs required for diagnosis:
* Diagnosis
* Gsheet tabs required for prescription:
* Prescriptions
* Data headers required for diagnosis
* Unstructured Name, ICD10 Diagnosis, Intern
* Data headers required for prescription
* Unstructured Name, RX Norm [Super Generic], Intern
"""
)
# inputs
sheet_type = gr.Dropdown(['prescription', 'diagnosis'], label="QA Type")
start_date = gr.Textbox(label="Start Date", placeholder="YYYY-MM-DD")
end_date = gr.Textbox(label="End Date", placeholder="YYYY-MM-DD")
week_input = gr.Dropdown(week_list, label="Week")
url = gr.Textbox(label="URL", placeholder="Enter sheet url ...")
# outputs
output = gr.Textbox(label="Output Box")
run_btn = gr.Button("Run")
run_btn.click(
fn=overlap_check_main,
inputs=[
sheet_type,
start_date,
end_date,
week_input,
url
],
outputs=output,
api_name="Overlap_check"
)
demo.launch()