tiesan commited on
Commit
ca302ac
·
1 Parent(s): cfd4655

Upload 7 files

Browse files
Files changed (7) hide show
  1. Dockerfile +11 -0
  2. README.md +5 -6
  3. app.py +164 -0
  4. etl.py +105 -0
  5. generic.py +22 -0
  6. heliumhealth-a05d595e5991.json +12 -0
  7. requirements.txt +87 -0
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11.3
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt /app/
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
8
+
9
+ COPY . /app/
10
+
11
+ CMD ["Gradio app.py" ]
README.md CHANGED
@@ -1,13 +1,12 @@
1
  ---
2
- title: Quality Check App
3
- emoji: 👁
4
- colorFrom: yellow
5
- colorTo: gray
6
  sdk: gradio
7
- sdk_version: 4.1.2
8
  app_file: app.py
9
  pinned: false
10
- license: mit
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Intern Qa Check
3
+ emoji: 🏢
4
+ colorFrom: pink
5
+ colorTo: red
6
  sdk: gradio
7
+ sdk_version: 3.28.1
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # standard libraries
2
+ import os
3
+
4
+ # third party libraries
5
+ import gradio as gr
6
+ import validators
7
+ from validators import ValidationFailure
8
+ import gspread
9
+ from google.oauth2.service_account import Credentials
10
+ from pydrive.auth import GoogleAuth
11
+ from pydrive.drive import GoogleDrive
12
+
13
+ import etl
14
+ from generic import matched_super_generic
15
+
16
+ cred_filename = 'heliumhealth-a05d595e5991.json'
17
+ # cred_file_path = os.path.join('credentials', cred_filename)
18
+
19
+ # google auth scopes
20
+ scopes = ['https://www.googleapis.com/auth/spreadsheets',
21
+ 'https://www.googleapis.com/auth/drive']
22
+
23
+ # create credentials
24
+ credentials = Credentials.from_service_account_file(
25
+ cred_filename, scopes=scopes )
26
+
27
+ # authorize google spreadsheet
28
+ gc = gspread.authorize(credentials)
29
+
30
+ gauth = GoogleAuth()
31
+ drive = GoogleDrive(gauth)
32
+
33
+
34
+ def prescription_quality_check(gs, sheet_type):
35
+ """
36
+ - Loads, transforms prescription data
37
+ - Performs RX Norm check for prescription data
38
+
39
+ Args:
40
+ gs (Object): authenticated google sheet instance
41
+ sheet_type (str): sheet type always: prescription
42
+ """
43
+ # load data from sheet
44
+ pres_data, super_data = etl.load_data(gs, sheet_type)
45
+
46
+ # process data for use
47
+ pres_data, super_data = etl.preprocess_data(pres_data, super_data, sheet_type)
48
+
49
+ # run the matching algorithm
50
+ pres_data = pres_data.apply(matched_super_generic,
51
+ axis=1,
52
+ args=[
53
+ super_data,
54
+ 'RX Norm [Super Generic]',
55
+ 'SUPER_GENERIC',
56
+ 'match'])
57
+
58
+ # post process data
59
+ pres_data = pres_data[['Unstructured Name', 'RX Norm [Super Generic]', 'match', 'Number']]
60
+
61
+ # write output to sheet
62
+ etl.output_data(gs, pres_data, sheet_type)
63
+
64
+
65
+ def diagnosis_quality_check(gs, sheet_type):
66
+ """
67
+ - Loads, transforms prescription data
68
+ - Performs RX Norm check for prescription data
69
+
70
+ Args:
71
+ gs (Object): google-sheet instance
72
+ sheet_type (str): sheet type always diagnosis
73
+ """
74
+ # load data from sheet
75
+ diag_data, super_data = etl.load_data(gs, sheet_type)
76
+ # process data for use
77
+ diag_data, super_data = etl.preprocess_data(diag_data, super_data, sheet_type)
78
+
79
+
80
+ # run the matching algorithm
81
+ diag_data = diag_data.apply(matched_super_generic,
82
+ axis=1,
83
+ args=[
84
+ super_data,
85
+ 'ICD10 Diagnosis',
86
+ 'Name',
87
+ 'match'])
88
+
89
+ # post processing
90
+ diag_data = diag_data[['Unstructured Name', 'ICD10 Diagnosis', 'match', 'Number']]
91
+
92
+ # write output to sheet
93
+ etl.output_data(gs, diag_data, sheet_type)
94
+
95
+ def quality_check_main(sheet_type, sheet_url):
96
+ """
97
+ main check function
98
+
99
+ Args:
100
+ sheet_type (str): sheet type either: prescription or diagnosis
101
+ sheet_url (url): url to data sheet
102
+
103
+ Raises:
104
+ gr.exceptions.Error: Value error for invalid urls
105
+
106
+ Returns:
107
+ message (str): success or failure messages.
108
+ """
109
+ result = validators.url(sheet_url)
110
+
111
+ # open the google sheet for reading
112
+ gs = gc.open_by_url(sheet_url)
113
+
114
+ # Check if the input is valid url
115
+ # ToDO: Error message displayed is not explanatory - Fix it
116
+ if isinstance(result, ValidationFailure):
117
+ raise gr.exceptions.Error(message="Please enter a valid URL")
118
+
119
+ if sheet_type == 'diagnosis':
120
+ diagnosis_quality_check(gs, sheet_type)
121
+ elif sheet_type == 'prescription':
122
+ prescription_quality_check(gs, sheet_type)
123
+
124
+ output_text = f"{sheet_type} Q/A successfully. Please check the result sheet for output"
125
+ return output_text
126
+
127
+
128
+
129
+ with gr.Blocks() as demo:
130
+ gr.Markdown(
131
+ """
132
+ # Quality Assurance App
133
+
134
+ ## Instructions
135
+
136
+ * Add IAM User to sheet you want to test
137
+ * Gsheet tabs required for diagnosis:
138
+ * Diagnosis, ICD10 Diagnosis, resultd
139
+ * Gsheet tabs required for prescription:
140
+ * Prescriptions, Super_generic_Prescriptions, result
141
+ * Data headers required for diagnosis
142
+ * Unstructured Name, ICD10 Diagnosis, Number
143
+ * Data headers required for prescription
144
+ * Unstructured Name, RX Norm [Super Generic], Number
145
+ """
146
+ )
147
+ # inputs
148
+ sheet_type = gr.Dropdown(['prescription', 'diagnosis'], label="QA Type")
149
+ url = gr.Textbox(label="URL", placeholder="Enter sheet url ...")
150
+ # outputs
151
+ output = gr.Textbox(label="Output Box")
152
+
153
+ run_btn = gr.Button("Run")
154
+ run_btn.click(
155
+ fn=quality_check_main,
156
+ inputs=[
157
+ sheet_type,
158
+ url
159
+ ],
160
+ outputs=output,
161
+ api_name="quality_check"
162
+ )
163
+
164
+ demo.launch()
etl.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from gspread_dataframe import set_with_dataframe
4
+
5
+ def load_data(gs, d_type):
6
+ """_summary_
7
+
8
+ Args:
9
+ gs (_type_): _description_
10
+ d_type (_type_): _description_
11
+
12
+ Raises:
13
+ gr.exception.Error: _description_
14
+
15
+ Returns:
16
+ _type_: _description_
17
+ """
18
+ in_sheets_dict = {
19
+ "diagnosis": ["Diagnosis", "ICD10 Diagnosis"],
20
+ "prescription": ["Prescriptions", "Super_generic_Prescriptions"]
21
+ }
22
+
23
+ # open data and super sheet
24
+ try:
25
+ data_sheet = gs.worksheet(in_sheets_dict[d_type][0])
26
+ super_sheet = gs.worksheet(in_sheets_dict[d_type][1])
27
+ except:
28
+ raise gr.exception.Error(message='Permission denied. Please add IAM user to the sheet and try again')
29
+
30
+ # read data from sheets
31
+ data = pd.DataFrame(data_sheet.get_all_records())
32
+ super_data = pd.DataFrame(super_sheet.get_all_records())
33
+
34
+ return data, super_data
35
+
36
+ def preprocess_data(data, super_data, d_type):
37
+ """
38
+ _summary_
39
+
40
+ Args:
41
+ data (_type_): _description_
42
+ super_data (_type_): _description_
43
+ d_type (_type_): _description_
44
+
45
+ Raises:
46
+ gr.exception.Error: _description_
47
+
48
+ Returns:
49
+ _type_: _description_
50
+ """
51
+ cols_dict = {
52
+ "diagnosis": ['Unstructured Name', 'ICD10 Diagnosis', 'Number'],
53
+ "prescription": ['Unstructured Name', 'RX Norm [Super Generic]', 'Number']
54
+ }
55
+
56
+ super_dict = {
57
+ "diagnosis": "Name",
58
+ "prescription": "SUPER_GENERIC"
59
+ }
60
+ try:
61
+ # remove white spaces from columns
62
+ data.columns = data.columns.str.strip()
63
+ super_data.columns = super_data.columns.str.strip()
64
+ # select relevant columns
65
+ data = data[cols_dict[d_type]]
66
+
67
+ # remove all white spaces from column data
68
+ super_data[super_dict[d_type]] = super_data[super_dict[d_type]].str.strip()
69
+ data[cols_dict[d_type][1]] = data[cols_dict[d_type][1]].str.strip()
70
+ except:
71
+ raise gr.exception.Error(message='Unmatched named columns')
72
+
73
+ return data, super_data
74
+
75
+ def output_data(gs, data, dtype):
76
+ """_summary_
77
+
78
+ Args:
79
+ gs (_type_): _description_
80
+ data (_type_): _description_
81
+ dtype (_type_): _description_
82
+
83
+ Raises:
84
+ gr.exception.Error: _description_
85
+ """
86
+ out_sheet_dict = {
87
+ "diagnosis": "resultd",
88
+ "prescription": "result"
89
+ }
90
+
91
+ # write output to sheet
92
+ try:
93
+ out_worksheet = gs.worksheet(out_sheet_dict[dtype])
94
+ # clear the worksheet
95
+ out_worksheet.clear()
96
+ except:
97
+ raise gr.exception.Error("Result sheet does not exist")
98
+
99
+ # write dataframe to work sheet
100
+ set_with_dataframe(
101
+ worksheet=out_worksheet,
102
+ dataframe=data,
103
+ include_index=False,
104
+ include_column_header=True,
105
+ resize=True)
generic.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def matched_super_generic(row,
2
+ generic_data,
3
+ colname,
4
+ super_colname,
5
+ match_col):
6
+ """_summary_
7
+
8
+ Args:
9
+ row (_type_): _description_
10
+ generic_data (_type_): _description_
11
+ colname (_type_): _description_
12
+ super_colname (_type_): _description_
13
+ match_col (_type_): _description_
14
+
15
+ Returns:
16
+ _type_: _description_
17
+ """
18
+ diag = row[colname]
19
+ match_ = generic_data[generic_data[super_colname] == diag]
20
+ if not match_.empty:
21
+ row[match_col] = match_[super_colname].iloc[0]
22
+ return row
heliumhealth-a05d595e5991.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "type": "service_account",
3
+ "project_id": "heliumhealth",
4
+ "private_key_id": "a05d595e59911b601cf718b32e967a0f4e1f463d",
5
+ "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEugIBADANBgkqhkiG9w0BAQEFAASCBKQwggSgAgEAAoIBAQCzJJ2dCd32LwAc\njoTiCB2shYwGRFvApmF3/RUkT9BZDg5S+imfuXSsHBgueKIEAW7E0UBGVbioKQC0\nEzKh/+aLVdZEYdqyNdrx4g5HmuDn4sQyd/zP0J3cUa/LlF/cND6HsExJcdkjL9rV\nVZEQZhpVrgB9J6Ui5dZsrN9RWHguP8awI6JFqsfF2aKWJ4Z+Gl9YlxMhCqcwBalF\nf4MUOwnXSK1t0QMXdEAg9zA+wwySYjCz/nVOIrEBB1lHp1mEiwpv2KtuX3f2GGRt\nSGwOThDWJPn6EuLcX2mlxEs7ylEs1hYcKfROPzBMSRX5g/m5lKkRuZLsBpqKZj5G\nrB62FasXAgMBAAECggEADIMutRTG2Zaf0d7MfkMl23J6fMeShwq3/RsCuoYOneHC\n813AwYTBFvJhaAGKA3CcLYgWDrOXEbvHbkx2+WS31BybOiyBi3gCgmmRKMQzGE84\n9/N0h4lhNU3JUxVaerV1dqHEOk8G5RPHG6Z92uDezwQ3yYYW3TCjgPS0JmjhAA3b\nAnAgATuiY4ivscR28xSTDsexQhDnVGdM0ms4hrAUV0nM+sGLbXil6hTBoRi/iEmq\nGESeFsDOcdJKhIAvs94FwhHGLljCniP+2/jN6Wg8+wTFwUpaH0NpV6tfHYvGe8Bp\natfVZwCqwP5+J3dXHuaNUwe/cUK6HSVcGARXp5vubQKBgQD7xXVg2WoMONpGHWnD\nxk3T085JiC+MgBK+y5MzNLTtVxCNzsv3I70lu8ilv7ytfy35bnQ2dx8Ych1Jjc7z\n5kQs9rRxtY5iZvyTRaBB4Zfyr2uGxrR/lMJizsH+M0my48zPiHWLdqJSTupBmufr\n2kFeRIbphEl6u5fir9s5c/+EGwKBgQC2JuCQXrYHryn2vs6qf+Zf/RgzK5+3irHy\nEeYbedEpe6dCy5dWjlh7sXR23938XTXtnwmKWMuovdnLybBXvuWacOsqz5wYuXAn\nK6V8oBPpV9Aj+nVSft26E8nMzNX1lXbvxBgq1GvOUk7Mbi9p4nm6gjKNKYo1/U9P\nH+S9VHQMtQKBgAxqkH9WOYSgySLsEGs3PF9V8rZtoOqs5j/Cil/cGZAa2xYjPKvT\nd2CFAkAqVIO54eqLJ/AHr+Dkv80A0VP15ybQg9WXvo3bxlj89gpJtdSiEgtzgTNJ\ngsycpbSDkv7ffRo/AI0ALMEiYysZGJbpDJA5kO2zOGx1E+h6A7WxoshBAn8UolLO\nB+yW8kDOss62gcaXGRSkt0xgflWqFlz6v9Hx4RARgP6jz3w2huOqk7GR5P027c0m\n3ugzNU52x2Iyjm10EVaSgvIr2tXZmhglBf07cbciXDzuG3ECozs49/tE1qmif5Q9\nRdLwjGJgxhqY5A7mEdmoJAyEES3qyLIgwHBlAoGAS0sA8A5Ay+BbaAGufqSaBnxt\nQgMGNYUgyeG6edksXMRPMqTWbQ5xdjEmv38nQ8AT9RVMZ9TfZqCts1fCD+FgTcAU\niAasrJMvd4mCBhWZWPwGJN+yaeZHvdbrXsSEo3kkyiwjFip03CCJtDJO3MwyowOb\nIxC34Nn4A6XUGjaF7Io=\n-----END PRIVATE KEY-----\n",
6
+ "client_email": "test-gs@heliumhealth.iam.gserviceaccount.com",
7
+ "client_id": "102772078802762265627",
8
+ "auth_uri": "https://accounts.google.com/o/oauth2/auth",
9
+ "token_uri": "https://oauth2.googleapis.com/token",
10
+ "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
11
+ "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/test-gs%40heliumhealth.iam.gserviceaccount.com"
12
+ }
requirements.txt ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.1.0
2
+ aiohttp==3.8.4
3
+ aiosignal==1.3.1
4
+ altair==4.2.2
5
+ anyio==3.6.2
6
+ async-timeout==4.0.2
7
+ attrs==23.1.0
8
+ beautifulsoup4==4.12.2
9
+ cachetools==5.3.0
10
+ certifi==2022.12.7
11
+ charset-normalizer==3.1.0
12
+ click==8.1.3
13
+ contourpy==1.0.7
14
+ cycler==0.11.0
15
+ decorator==5.1.1
16
+ entrypoints==0.4
17
+ fastapi==0.95.1
18
+ ffmpy==0.3.0
19
+ filelock==3.12.0
20
+ fonttools==4.39.3
21
+ frozenlist==1.3.3
22
+ fsspec==2023.4.0
23
+ google==3.0.0
24
+ google-api-core==2.11.0
25
+ google-api-python-client==2.86.0
26
+ google-auth==2.17.3
27
+ google-auth-httplib2==0.1.0
28
+ google-auth-oauthlib==1.0.0
29
+ googleapis-common-protos==1.59.0
30
+ gradio==3.28.1
31
+ gradio_client==0.1.4
32
+ gspread==5.8.0
33
+ gspread-dataframe==3.3.0
34
+ h11==0.14.0
35
+ httpcore==0.17.0
36
+ httplib2==0.22.0
37
+ httpx==0.24.0
38
+ huggingface-hub==0.14.1
39
+ idna==3.4
40
+ Jinja2==3.1.2
41
+ jsonschema==4.17.3
42
+ kiwisolver==1.4.4
43
+ linkify-it-py==2.0.2
44
+ markdown-it-py==2.2.0
45
+ MarkupSafe==2.1.2
46
+ matplotlib==3.7.1
47
+ mdit-py-plugins==0.3.3
48
+ mdurl==0.1.2
49
+ multidict==6.0.4
50
+ numpy==1.24.3
51
+ oauth2client==4.1.3
52
+ oauthlib==3.2.2
53
+ orjson==3.8.11
54
+ packaging==23.1
55
+ pandas==2.0.1
56
+ Pillow==9.5.0
57
+ protobuf==4.22.3
58
+ pyasn1==0.5.0
59
+ pyasn1-modules==0.3.0
60
+ pydantic==1.10.7
61
+ PyDrive==1.3.1
62
+ pydub==0.25.1
63
+ pyparsing==3.0.9
64
+ pyrsistent==0.19.3
65
+ python-dateutil==2.8.2
66
+ python-multipart==0.0.6
67
+ pytz==2023.3
68
+ PyYAML==6.0
69
+ requests==2.29.0
70
+ requests-oauthlib==1.3.1
71
+ rsa==4.9
72
+ semantic-version==2.10.0
73
+ six==1.16.0
74
+ sniffio==1.3.0
75
+ soupsieve==2.4.1
76
+ starlette==0.26.1
77
+ toolz==0.12.0
78
+ tqdm==4.65.0
79
+ typing_extensions==4.5.0
80
+ tzdata==2023.3
81
+ uc-micro-py==1.0.2
82
+ uritemplate==4.1.1
83
+ urllib3==1.26.15
84
+ uvicorn==0.22.0
85
+ validators==0.20.0
86
+ websockets==11.0.2
87
+ yarl==1.9.2