File size: 5,054 Bytes
4ff22c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
from datetime import datetime

import gradio as gr
import gspread
import validators
from validators import ValidationFailure
from google.oauth2.service_account import Credentials
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive 

import etl

# test sheet - https://docs.google.com/spreadsheets/d/1iJ0-882HsWkAth0e0P_kf21aL6583Z7m1LwloaRCVEc/edit#gid=0
week_list = [f"week_{i}" for i in range(1, 51)]

# credentials file 
cred_file_path = 'heliumhealth-a05d595e5991.json'

# google auth scopes
scopes = ['https://www.googleapis.com/auth/spreadsheets',
          'https://www.googleapis.com/auth/drive']

# create credentials 
credentials = Credentials.from_service_account_file(
    cred_file_path, scopes=scopes )

# authorize google spreadsheet
gc = gspread.authorize(credentials)

gauth = GoogleAuth()
drive = GoogleDrive(gauth)

def overlap_matching(row, d_type):
  """
  matching overlap 
  """
  match_cols = {
      "prescription": ['RX Norm [Super Generic]_x', 'RX Norm [Super Generic]_y'],
      "diagnosis": ['ICD10 Diagnosis_x','ICD10 Diagnosis_y']
  }
  cols = match_cols[d_type]
  row['match_status'] = 'match' if row[cols[0]] == row[cols[1]] else 'no match'
  return row

def overlap_check(gs, start_date, end_date, week_no, d_type):
    """
   load, preprocess, check overlap, postprocess and output data to google sheet 

    Args:
        gs (GSheet instance): Gsheet instance access to google sheet 
        start_date (str): Date str format(YYYY-mm-dd)
        end_date (_type_): Date str format(YYYY-mm-dd)
        d_type (_type_):  sheet type (prescription or diagnosis)
    """
    # load data
    all_data = etl.load_data(gs, start_date, end_date, d_type)

    # preprocess and return overlap data
    overlap_data = etl.preprocess_data(all_data, d_type)

    # do overlap matching 
    overlap_data = overlap_data.apply(overlap_matching, axis=1, args=[d_type])

    # post process 
    overlap_data = etl.post_process(overlap_data, d_type)

    # write to sheet 
    etl.output_data(gc, overlap_data, week_no, d_type)


def overlap_check_main(sheet_type, start_date_str, end_date_str, week_str, sheet_url):
    """
    overlap check main function

    Args:
        sheet_type (str): sheet type (prescription or diagnosis)
        start_date_str (str): start date string: e.g. 2023-03-21 
        end_date_str (str): end date string: e.g. 2023-03-24
        week_str (str): week string e.g. week_1
        sheet_url (url): _description_

    Raises:
        gr.exceptions.Error: Date Format Error - either start_date or end date are bad format
        gr.exceptions.Error: Date Error - when start_date is greater than end date
        gr.exceptions.Error: URL Error - Bad url format
    """
    # format date from string
    try: 
        start_date = datetime.strptime(start_date_str.strip(), "%Y-%m-%d").date()
        end_date = datetime.strptime(end_date_str.strip(), "%Y-%m-%d").date()
    except:
        raise gr.Error(message="Wrong date format")

    # raise error when start date is greater end date
    if start_date >= end_date:
        raise gr.Error(message="Start date cannot be greater end date")
    
    # Check if the input is valid url
    # ToDO: Error message displayed is not explanatory - Fix it 
    url_check = validators.url(sheet_url)
    if isinstance(url_check, ValidationFailure):
        raise gr.Error(message="Please enter a valid URL")
    
    
    # open the google sheet for reading 
    
    gs = gc.open_by_url(sheet_url)
    

    # if sheet_type == 'prescription':
    try:
        overlap_check(gs, start_date_str, end_date_str, week_str,  sheet_type)
    except:
        gr.Error(message="Permission denied. Please add IAM user to the sheet and try again")


    return f"Successfully ran {sheet_type} overlap check for {week_str.replace('_', ' ').title()} ({start_date_str} - {end_date_str})"

with gr.Blocks() as demo:
  gr.Markdown(
      """
      ## Overlap Check App

      * Add IAM User to sheet you want to test
      * Gsheet tabs required for diagnosis:  
        * Diagnosis
      * Gsheet tabs required for prescription:  
        * Prescriptions
      * Data headers required for diagnosis
        * Unstructured Name, ICD10 Diagnosis, Intern
      * Data headers required for prescription
        * Unstructured Name, RX Norm [Super Generic], Intern 
      """
  )
  # inputs
  sheet_type = gr.Dropdown(['prescription', 'diagnosis'], label="QA Type")
  start_date = gr.Textbox(label="Start Date", placeholder="YYYY-MM-DD")
  end_date = gr.Textbox(label="End Date", placeholder="YYYY-MM-DD")
  week_input = gr.Dropdown(week_list, label="Week")
  url = gr.Textbox(label="URL", placeholder="Enter sheet url ...")
  
  # outputs
  output = gr.Textbox(label="Output Box")
  
  run_btn = gr.Button("Run")
  run_btn.click(
      fn=overlap_check_main,
      inputs=[
          sheet_type,
          start_date,
          end_date, 
          week_input, 
          url
          ],
      outputs=output, 
      api_name="Overlap_check"
  )

demo.launch()