cloud-sean commited on
Commit
ad56c9a
·
verified ·
1 Parent(s): 6883fbe

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +710 -0
app.py ADDED
@@ -0,0 +1,710 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import streamlit as st
3
+ import os
4
+ import json
5
+ import time
6
+ import requests
7
+ from html import escape
8
+ from random import choice
9
+ import plotly.graph_objects as go
10
+ import networkx as nx
11
+ from openai import AzureOpenAI
12
+ import re
13
+ import pandas as pd
14
+
15
+ # Initialize the AzureOpenAI client
16
+ client = AzureOpenAI(
17
+ api_key=os.environ.get("AOAI_API_KEY"),
18
+ api_version="2024-05-01-preview",
19
+ azure_endpoint=os.environ.get("AOAI_AZURE_ENDPOINT"),
20
+ azure_deployment="gpt-4o"
21
+ )
22
+
23
+
24
+
25
+ st.set_page_config(layout="wide")
26
+
27
+ def analyze_healthcare_text(text):
28
+ # Endpoint, headers and subscription key
29
+ base_url = "https://ta4h-endpoint.cognitiveservices.azure.com/language/analyze-text/jobs"
30
+ headers = {
31
+ "Content-Type": "application/json",
32
+ "Ocp-Apim-Subscription-Key": "00667ce9381d46a3a279c4799dd698d0"
33
+ }
34
+
35
+ # Data to be sent in the initial POST request
36
+ data = {
37
+ "tasks": [{"kind": "Healthcare"}],
38
+ "analysisInput": {
39
+ "documents": [
40
+ {
41
+ "id": "documentId",
42
+ "text": text,
43
+ "language": "en"
44
+ }
45
+ ]
46
+ }
47
+ }
48
+
49
+ # Making the initial POST request
50
+ response = requests.post(f"{base_url}?api-version=2022-10-01-preview", headers=headers, json=data)
51
+
52
+ # Get the operation-location from the response header
53
+ operation_location = response.headers.get('operation-location')
54
+
55
+ # Extract JOB-ID from the operation-location
56
+ job_id = operation_location.split('/')[-1].split('?')[0]
57
+
58
+ # Make a subsequent GET request to retrieve the results using the JOB-ID
59
+ success = False
60
+ while not success:
61
+ result_response = requests.get(f"{base_url}/{job_id}?api-version=2022-10-01-preview", headers=headers)
62
+ if result_response.json()['status'] == 'succeeded':
63
+ success = True
64
+ else:
65
+ time.sleep(1)
66
+
67
+ # Return the JSON response from the GET request
68
+ result = result_response.json()
69
+ # save json to file
70
+
71
+ return result
72
+
73
+ def annotate_text_with_entities(original_text, entities_data):
74
+
75
+ # save json to file
76
+ with open('entities_data.json', 'w') as f:
77
+ json.dump(entities_data, f)
78
+ # Color palette for different categories
79
+ PALETTE = [
80
+ "#ff4b4b",
81
+ "#ffa421",
82
+ "#ffe312",
83
+ "#21c354",
84
+ "#00d4b1",
85
+ "#00c0f2",
86
+ "#1c83e1",
87
+ "#803df5",
88
+ "#808495",
89
+ ]
90
+
91
+ # Opacities
92
+ OPACITIES = [
93
+ "33", "66",
94
+ ]
95
+
96
+ json_data = entities_data
97
+
98
+ # Extract entities from the JSON data
99
+ entities = json_data['documents'][0]['entities']
100
+
101
+ # Get unique categories from entities
102
+ unique_categories = list(set(entity['category'] for entity in entities))
103
+
104
+ # Create a mapping of categories to colors
105
+ category_to_color = {category: PALETTE[i % len(PALETTE)] for i, category in enumerate(unique_categories)}
106
+
107
+ # If we run out of colors in the palette, we will assign random colors to the remaining categories
108
+ if len(unique_categories) > len(PALETTE):
109
+ additional_colors = ['#'+''.join([choice('0123456789ABCDEF') for _ in range(6)]) for _ in range(len(unique_categories) - len(PALETTE))]
110
+ for i, category in enumerate(unique_categories[len(PALETTE):]):
111
+ category_to_color[category] = additional_colors[i]
112
+
113
+ def create_entity_html(entity, entity_id):
114
+ # Get the color for the entity category
115
+ color = category_to_color[entity["category"]] + OPACITIES[entity["offset"] % len(OPACITIES)]
116
+
117
+ entity_html = f'<span id="entity-{entity_id}"><span style="display: inline-flex; flex-direction: row; align-items: center; background: {color}; border-radius: 0.5rem; padding: 0.25rem 0.5rem; overflow: hidden; line-height: 1;">{escape(entity["text"])}'
118
+
119
+
120
+ # If there are links, create a dropdown menu with the links
121
+ if entity.get("links"):
122
+ options = "".join(f'<option value="{link["id"]}">{link["dataSource"]} Code {link["id"]}</option>' for link in entity["links"])
123
+ dropdown_html = f'''
124
+ <span style="border-left: 1px solid; opacity: 0.1; margin-left: 0.5rem; align-self: stretch;"></span>
125
+ <span style="margin-left: 0.5rem; display: flex; flex-direction: column; align-items: flex-start;">
126
+ <select style="font-size: 0.75rem; opacity: 0.5;">
127
+ {options}
128
+ </select>
129
+ <label style="font-size: 0.6rem; margin-top: 0.25rem;">{entity["category"]}</label>
130
+ </span>
131
+ '''
132
+ entity_html += dropdown_html
133
+ else:
134
+ # If there are no links, just display the category label
135
+ entity_html += f'<span style="border-left: 1px solid; opacity: 0.1; margin-left: 0.5rem; align-self: stretch;"></span><span style="margin-left: 0.5rem; font-size: 0.75rem; opacity: 0.5;">{entity["category"]}</span>'
136
+
137
+ # Close the main span element
138
+ entity_html += '</span></span>'
139
+
140
+ return entity_html
141
+
142
+ # Create HTML representation for each entity
143
+ entity_htmls = [create_entity_html(entity, i) for i, entity in enumerate(entities)]
144
+
145
+ # Replace entities in the original text with their HTML representations
146
+ # We iterate from the end to avoid changing the offsets of the yet-to-be-replaced entities
147
+ for entity, entity_html in sorted(zip(entities, entity_htmls), key=lambda x: x[0]['offset'], reverse=True):
148
+ start = entity['offset']
149
+ end = start + entity['length']
150
+ original_text = original_text[:start] + entity_html + original_text[end:]
151
+
152
+ # Create a color key section
153
+ color_key_section = "<strong>Color Key:</strong><br>"
154
+ for category, color in category_to_color.items():
155
+ color_key_section += f'<span style="display: inline-block; background: {color}; width: 1em; height: 1em; margin-right: 0.5em; vertical-align: middle;"></span>{category}<br>'
156
+
157
+
158
+ original_text = color_key_section + original_text
159
+
160
+ return original_text, category_to_color
161
+
162
+ def create_interactive_graph_from_json(json_data, category_to_color):
163
+ # Load the JSON data
164
+ entities = json_data['documents'][0]['entities']
165
+ relations = json_data['documents'][0].get('relations', [])
166
+
167
+ # Create a new directed graph
168
+ graph = nx.DiGraph()
169
+
170
+ # Add nodes to the graph
171
+ for i, entity in enumerate(entities):
172
+ graph.add_node(i, label=entity['text'], category=entity['category'])
173
+
174
+ # Add edges to the graph
175
+ for relation in relations:
176
+ source_index = int(relation['entities'][0]['ref'].split('/')[-1])
177
+ target_index = int(relation['entities'][1]['ref'].split('/')[-1])
178
+ graph.add_edge(source_index, target_index, label=relation['relationType'])
179
+
180
+ # Get positions of the nodes using spring layout
181
+ pos = nx.spring_layout(graph)
182
+
183
+ # Get node positions
184
+ x_nodes = [pos[i][0] for i in graph.nodes]
185
+ y_nodes = [pos[i][1] for i in graph.nodes]
186
+
187
+ # Get the colors for each node based on its category
188
+ node_colors = [category_to_color[graph.nodes[i]['category']] for i in graph.nodes]
189
+
190
+ # Get edge positions
191
+ x_edges = []
192
+ y_edges = []
193
+ for edge in graph.edges:
194
+ x_edges += [pos[edge[0]][0], pos[edge[1]][0], None]
195
+ y_edges += [pos[edge[0]][1], pos[edge[1]][1], None]
196
+
197
+ # Create edge traces
198
+ edge_trace = go.Scatter(x=x_edges, y=y_edges, line=dict(width=0.5, color='#888'), hoverinfo='none', mode='lines')
199
+
200
+ # Create node traces with all nodes
201
+ node_trace_all = go.Scatter(x=x_nodes, y=y_nodes, text=[graph.nodes[i]['label'] for i in graph.nodes],
202
+ mode='markers+text', hoverinfo='text', marker=dict(color=node_colors, size=10))
203
+
204
+ # Create node traces with nodes having at least one edge
205
+ nodes_with_edges = set([edge[0] for edge in graph.edges] + [edge[1] for edge in graph.edges])
206
+ x_nodes_with_edges = [pos[i][0] for i in nodes_with_edges]
207
+ y_nodes_with_edges = [pos[i][1] for i in nodes_with_edges]
208
+
209
+ node_trace_with_edges = go.Scatter(x=x_nodes_with_edges, y=y_nodes_with_edges,
210
+ text=[graph.nodes[i]['label'] for i in nodes_with_edges],
211
+ mode='markers+text', hoverinfo='text', marker=dict(color=node_colors, size=10))
212
+
213
+ # Create figure
214
+ fig = go.Figure(data=[edge_trace, node_trace_all, node_trace_with_edges],
215
+ layout=go.Layout(title='Entities and Relationships in Patient Notes',
216
+ titlefont_size=16,
217
+ showlegend=False,
218
+ hovermode='closest',
219
+ margin=dict(b=20, l=5, r=5, t=40),
220
+ xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
221
+ yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
222
+ updatemenus=[dict(type="buttons",
223
+ x=1.15,
224
+ y=1.2,
225
+ buttons=[dict(label="All Entities",
226
+ method="update",
227
+ args=[{"visible": [True, True, False]}]),
228
+ dict(label="Entities with Relationships",
229
+ method="update",
230
+ args=[{"visible": [True, False, True]}])])]))
231
+
232
+ # Display the interactive plot
233
+ return fig
234
+
235
+ def format_sdoh_entities_as_list(json_data):
236
+ # print(json_data)
237
+ relevant_categories = ['EMPLOYMENT', 'LIVING_STATUS', 'SUBSTANCEUSE', 'SUBSTANCEUSEAMOUNT', 'ETHNICITY']
238
+ formatted_result = []
239
+
240
+
241
+ for document in json_data['documents']:
242
+ for entity in document['entities']:
243
+ category = entity['category'].upper()
244
+ if category in relevant_categories:
245
+ formatted_result.append(f"- **{category}** : '{entity['text']}' \n")
246
+ print(formatted_result)
247
+ return '\n'.join(formatted_result)
248
+
249
+ table_arr = [
250
+ """Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples
251
+ A Access to Care Access to health care is the timely use of personal health services to achieve the best possible health outcomes and pertains to a person's ability to access needed health or social services.
252
+ Access to Care Availability - Hospitals & Clinics A1 FQHCs, Rate Per Low-Income Population Hospitals and Clinics are very over crowded. They are a long way from where I live. Services not provided in my area. Urgent Care closed when I am sick.
253
+ A2 Hospital Beds Per Capita
254
+ A3 Proximity to Hospitals with ER This refers to comments about not having enough doctors and nurses in a given geographic context.
255
+ Availability - Mental Health Care A4 Mental Health Professional Shortage Areas "This refers to comments about not haveing enough Doctors and nurses to service the need of the mental needs of the community. Long wait times to get an appointment. Doctors not taking new patients
256
+ Having to go to ER for mental health issues"
257
+ A5 Mental Health Providers Comments related to not having enough mental health providers
258
+ Availability - Primary Care A6 Primary Care Providers "This refers to comments about not having enough Doctors and nurses to service the need of the mental needs of the community. Long wait times to get an appointment. Doctors not taking new patients
259
+ Having to go to ER for mental health issues"
260
+ A7 Primary Care Shortage Areas This refers to comments about not having enough doctors and nurses in a given geographic context.
261
+ Availability - Specialty Care A8 Maternal Care Providers
262
+ A9 Dental Care Providers
263
+ Barriers - Health Literacy A10 Educational Attainment Appointments on-line and do not know how to make appointment. They do not speak my language. I do not know what or who to ask a question. Do not understand what they did or what I need to do now.
264
+ A11 Limited English Proficiency
265
+ Barriers - Medical Insurance A12 Health Insurance Disparities I can't get insurance from my job. I am not eligible for medicaid or do not know how to sign up. Can not afford Insurance (see Financial Stability / Income)
266
+ A13 Population without Medical Insurance
267
+ Barriers - Transportation A14 Distance to Public Transit I don't have a way to get there. I don't have a car. We only have one car. Can't afford a car (Financial Stability / Income)
268
+ A15 Households with No Vehicle 7
269
+ Notable Comments - Access to Care A16 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
270
+ """Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples
271
+ Health Conditions B Health Conditions According to the World Health Organization (WHO), health is a state of complete physical, mental, and social well-being and not merely the absence of disease. Chronic diseases are defined broadly as conditions that last 1 year or more and require ongoing medical attention or limit daily activities.
272
+ Asthma & COPD B1 Lung Disease Mortality Comments related to Asthma & COPD
273
+ B2 Lung Disease Prevalence
274
+ Cancers B3 Cancer Prevalence Comments related to Cancers
275
+ B4 Cancer Mortality*
276
+ Chronic Brain Disorders B5 Alzheimer's Disease Mortality* Comments related to Chronic Brain Disorders
277
+ B6 Alzheimer's Disease Prevalence
278
+ Heart Disease & Stroke B7 Heart Disease & Stroke Mortality* Comments related to Heart Disease & Stroke
279
+ B8 Heart Disease Prevalence
280
+ Kidney & Liver Diseases B9 Kidney Disease Prevalence Comments related to Chronic Kidney & Liver Diseases
281
+ B10 Liver Disease Mortality*
282
+ Obesity & Diabetes B11 Diabetes Comments related to Obesity & Diabetes
283
+ B12 Obesity
284
+ Preventable Death B13 Premature Death Disparities*
285
+ Aging Conditions B14
286
+ Notable Comments - Health Conditions B15 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
287
+ """""",
288
+ """Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples
289
+ Mental Health Health Outcomes - Anxiety & Depression D1 Mental Health Diagnoses* COVID, economy, etc cause stress and depression
290
+ D2 Poor Mental Health
291
+ Health Outcomes - Deaths of Despair D3 Deaths of Despair* Suicide, and drug overdose. Pill mills. Big Pharma
292
+ D4 Suicide Mortality*
293
+ Risk Factors - Access to Care D5 Access to Mental Health Providers Lack of services for the mental ill.
294
+ D6 Medical Insurance
295
+ Risk Factors - Drugs & Alcohol D7 Binge Drinking Lack of treatment options
296
+ D8 Substance Use Disorder*
297
+ Risk Factors - Stress & Trauma D9 Unemployment Domestic violance. Child abuse.
298
+ D10 Violent Crime Rate*
299
+ Notable Comments - Mental Health D11 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
300
+ """Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples
301
+ Food Security E Food Security Food security means that all people, at all times, have physical, social, and economic access to sufficient, safe, and nutritious food that meets their food preferences and dietary needs for an active and healthy life.
302
+ Economic Security E1 Free/Reduced Price Lunch Groceries cost too much. No money for good food.
303
+ E2 Poverty (100% FPL)
304
+ Food Access E3 Access to Healthy Food No supermarkets in the area. Bad food choice selection.
305
+ E4 Healthy Food Access Disparities
306
+ E5 Local Food Outlets
307
+ E6 SNAP-Authorized Retailers No supermarkets accept SNAP
308
+ Notable Comments - Food Security E7 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
309
+ """""",
310
+ """Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples
311
+ Education F Education Educational attainment is a well-established social determinant of health. It affects health through many mechanisms such as neural development, biological aging, health literacy and health behaviors, sense of control and empowerment, and life chances.
312
+ Achievement F1 Chronic Absenteeism Kids do not do well in school. Skipping class
313
+ F2 English Language Arts Proficiency
314
+ Attainment F3 Associate's Degree or Higher Students dropping out of school or not going to college
315
+ F4 Educational Attainment Disparities
316
+ F5 High School Graduation Rate
317
+ Early Childhood F6 Childcare Scarcity Lack of affordable or convient pre-schools.
318
+ F7 Preschool Enrollment
319
+ Notable Comments - Education F8 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
320
+ """Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples
321
+ Financial Stability G Financial Stability Financial stability is the ability for someone to withstand financial emergencies and sustain themselves and their family. Financial stability enables families to access safe housing, healthy foods, and other necessities, to engage fully in their communities, and to plan for the future.
322
+ Employment G1 Labor Force Participation Rate No jobs or well paying jobs. Only min wage jobs
323
+ G2 Unemployment
324
+ Income G3 Childhood Poverty Rate Do not make enough to pay the bills. Just getting by. Pay check to pay check
325
+ G4 Senior Poverty Rate
326
+ G5 Income Inequality
327
+ G6 Median Household Income
328
+ Security G7 Housing Cost Burden (30%) Rent takes up all my income. Can't afford to but or rent a house.
329
+ G8 Population with Debt*
330
+ Notable Comments - Financial Stability G9 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
331
+ """""",
332
+ """Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples
333
+ H Housing Homelessness can take many forms, with people living on the streets, in encampments or shelters, in transitional housing programs, or doubled up with family and friends.
334
+ Housing Homelessness H1 Evictions* Homelessness status or mention of increase
335
+ H2 Homeless Population*
336
+ H3 Homeless Students
337
+ Housing Costs H4 Low-Income Housing Appartments too expensive. Can not afford to buy a house. Rent take up my budget
338
+ H5 Housing + Transportation Affordability Index
339
+ H6 Median Household Income
340
+ H7 Severe Housing Cost Burden (50%)
341
+ Housing Quality H8 Overcrowded Housing Housing conditions are not desirable for overall health and well-being.
342
+ H9 Owner Occupied Households
343
+ H10 Renter Occupied Households
344
+ H11 Incomplete Facilities (Plumbing, Kitchen)
345
+ Notable Comments - Housing H12 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
346
+ """Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples
347
+ Climate & Natural Environment I Climate & Natural Environment Climate and the natural environment are about the impacts of climate change on livelihoods in the community.
348
+ Physical Environment - Air & Water I1 Air Toxics Risk Do not trust the water. Chemicals in the air. Too close to the fields with pesticides
349
+ I2 Particulate Matter (PM 2.5) Air quality from particulate matter (PM 2.5) and toxics risks are harmful to short-term and long-term health and has been associated with chronic health conditions such as lung disease, respiratory infections and heart disease among others.
350
+ Physical Environment - Heat & Climate I3 Disaster Risk Index Global warming. Droughts. Floods. Wild fires
351
+ I4 Extreme Heat Days* Extreme heat is a climate-driven hazard that affects the capacity of individuals, workers and communities.
352
+ Notable Comments - Climate & Natural Environment I5 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
353
+ """Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples
354
+ Community Safety J Community Safety Community safety is both about people's sense of safety and about the community's crime rate.
355
+ Injuries J1 Injury Mortality (Falls, Firearms, Drowning)* Injuries related to Community Safety
356
+ J2 Motor Vehicle Crash Fatality
357
+ Public Safety J3 Property Crime* Comments that the community is not safe. Unsafe neighborhoods
358
+ J4 Violent Crime*
359
+ Risk Factors J5 Disengaged Youth Comments about risk factors
360
+ J6 School Suspensions + Expulsions
361
+ Notable Comments - Community Safety J7 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
362
+ """Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples
363
+ Community Infrastructure K Community Infrastructure Having optimal environment and infrastructure for well-being means that streets are designed to be safe and enjoyable for those who are walking or biking. It means that air and water are safe to breathe and drink. It means that technical access infrastructure like broadband internet are available, to connect people to opportunities.
364
+ Access to Childcare K1 Childcare Access Disparities Lack of affordable or convient pre-schools.
365
+ K2 Childcare Scarcity
366
+ Community Amenities K3 Walkability No walking paths or parks in the area
367
+ Internet & Technology K4 Cellular Plan Only Bad or no Internet for people to connect to. Few daycare centers. Have to take the bus everywhere.
368
+ K5 Internet Access Disparities
369
+ K6 No Computer
370
+ K7 No High-Speed Internet
371
+ Transportation K8 Tansportation Access Inadequate transportation modes and access
372
+ Notable Comments - Built Environment K9 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
373
+ """Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples
374
+ Social & Economic Context L Social & Economic Context Social & Economic Context is about whether people feel a sense of belonging to the place where they live and whether they feel connected to one another. Do they trust their leaders? Do they engage in community life and participate in decision-making?
375
+ Civic Engagement L1 Census Response Rate People are not engaged or aware of community or city programs. Not able or desire to vote
376
+ L2 Voter Participation Rate*
377
+ Economic Vitality L3 Business Vacancy Rate Lack of jobs in the community. Unemployment. Closed businesses
378
+ L4 Funding for Public Works & Welfare*
379
+ Place Attachment L5 Home Ownership Too many rentals. People move around a lot
380
+ L6 Net Migration (Population Loss)*
381
+ Social Inclusion L7 501c3 organizations People and seniors who live alone. Feelings of isolation and loniliness
382
+ L8 Neighborhood Segregation
383
+ L9 Older Adults Living Alone
384
+ Socioeconomic Disadvantage L10 Area Deprivation Index Comments related to socioeconomic disadvantage
385
+ Notable Comments - Social Environment L11 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)"""
386
+ ]
387
+
388
+ def chunk_by_speaker(transcript):
389
+ """
390
+ Chunks a transcript string by speaker, grouping all text by each speaker,
391
+ regardless of original order.
392
+
393
+ Args:
394
+ transcript: The transcript string.
395
+
396
+ Returns:
397
+ A dictionary where keys are speaker numbers and values are their complete text.
398
+ """
399
+ # Split the transcript into lines
400
+ lines = transcript.splitlines()
401
+
402
+ # Initialize a dictionary to store speaker text
403
+ speaker_texts = {}
404
+
405
+ # Variable to keep track of the current speaker
406
+ current_speaker = None
407
+
408
+ # Iterate through each line
409
+ for line in lines:
410
+ # Check if the line starts with a speaker identifier
411
+ match = re.match(r"Speaker (\d+)", line)
412
+ if match:
413
+ # Extract the speaker number
414
+ current_speaker = int(match.group(1))
415
+
416
+ # Add the speaker to the dictionary if not already present
417
+ if current_speaker not in speaker_texts:
418
+ speaker_texts[current_speaker] = ""
419
+
420
+ # Append the line to the current speaker's text
421
+ if current_speaker is not None:
422
+ speaker_texts[current_speaker] += line + "\n"
423
+
424
+ # Return the dictionary of speaker texts
425
+ return speaker_texts
426
+
427
+
428
+
429
+ def generate_response(system_message, input_text):
430
+ """
431
+ Generates a response from the OpenAI API using the given input text.
432
+
433
+ Args:
434
+ input_text: The input text to be analyzed.
435
+
436
+ Returns:
437
+ The response from the OpenAI API.
438
+ """
439
+ try:
440
+ response = client.chat.completions.create(
441
+ model="gpt-4o",
442
+ messages=[
443
+ {"role": "system", "content": system_message},
444
+ {"role": "user", "content": input_text},
445
+ ],
446
+ response_format={ "type": "json_object" },
447
+ )
448
+
449
+ return response.choices[0].message.content
450
+ except Exception as e:
451
+ return f"Error generating response: {e}"
452
+
453
+ def generate_response_summary(system_message, input_text):
454
+ """
455
+ Generates a response from the OpenAI API using the given input text.
456
+
457
+ Args:
458
+ input_text: The input text to be analyzed.
459
+
460
+ Returns:
461
+ The response from the OpenAI API.
462
+ """
463
+ try:
464
+ response = client.chat.completions.create(
465
+ model="gpt-4o",
466
+ messages=[
467
+ {"role": "system", "content": system_message},
468
+ {"role": "user", "content": input_text},
469
+ ],
470
+ )
471
+
472
+ return response.choices[0].message.content
473
+ except Exception as e:
474
+ return f"Error generating response: {e}"
475
+
476
+
477
+ # divide the page into 2 columns
478
+ col1, col2 = st.columns([2, 5])
479
+
480
+ if 'r' not in st.session_state:
481
+ st.session_state.r = 'value'
482
+
483
+ if 'r_annotated' not in st.session_state:
484
+ st.session_state.r_annotated = 'value'
485
+
486
+ if 'colour_to_category' not in st.session_state:
487
+ st.session_state.colour_to_category = 'value'
488
+
489
+ with col1:
490
+ col1.subheader("Patient Note Input")
491
+
492
+ st.text("Enter your text input below:")
493
+
494
+ dax_input = st.text_area("", height=500)
495
+ analyze_btn = st.button("Analyze")
496
+
497
+
498
+
499
+ with col2:
500
+ col2.subheader("Text Analytics for Health Output")
501
+ if st.session_state.r_annotated != 'value':
502
+ with st.expander("Entity Mappings"):
503
+ st.markdown(st.session_state.r_annotated, unsafe_allow_html=True)
504
+ with st.expander("Show Relationships"):
505
+ st.plotly_chart(create_interactive_graph_from_json(st.session_state.r, st.session_state.colour_to_category), use_container_width=True)
506
+ with st.expander("Show JSON"):
507
+ st.json(st.session_state.r)
508
+ with st.expander("Show SDOH"):
509
+ st.write(format_sdoh_entities_as_list(st.session_state.r))
510
+
511
+ if analyze_btn:
512
+
513
+ st.session_state.r = analyze_healthcare_text(dax_input)["tasks"]["items"][0]["results"]
514
+ r_annotated, category_to_color = annotate_text_with_entities(dax_input, st.session_state.r)
515
+ st.session_state.r_annotated = r_annotated
516
+ st.session_state.colour_to_category = category_to_color
517
+ with st.expander("Entity Mappings"):
518
+ st.markdown(r_annotated, unsafe_allow_html=True)
519
+ with st.expander("Show Relationships"):
520
+ st.plotly_chart(create_interactive_graph_from_json(st.session_state.r, category_to_color), use_container_width=True)
521
+ with st.expander("Show JSON"):
522
+ st.json(st.session_state.r)
523
+ with st.expander("Show SDOH"):
524
+ st.write("Social Determinants of Health (SDOH) Entities")
525
+ st.write(format_sdoh_entities_as_list(st.session_state.r))
526
+ col2.subheader("GPT-4o for Health Output")
527
+ with st.expander("General Themes"):
528
+ st.write(generate_response_summary("Create a overal report of the SDOH themes mentioned in the text", dax_input))
529
+ with st.expander("Specific Themes (faster but less detailed)"):
530
+ system_message_template_total = """
531
+ You will get snippets from a transcript with a patient. Your job is to label the transcript with SDOH. You should reference the table below only when assigning values.
532
+
533
+ Reference Table:
534
+ Food Security E Food Security Food security means that all people, at all times, have physical, social, and economic access to sufficient, safe, and nutritious food that meets their food preferences and dietary needs for an active and healthy life.
535
+ Economic Security E1 Free/Reduced Price Lunch Groceries cost too much. No money for good food.
536
+ E2 Poverty (100% FPL)
537
+ Food Access E3 Access to Healthy Food No supermarkets in the area. Bad food choice selection.
538
+ E4 Healthy Food Access Disparities
539
+ E5 Local Food Outlets
540
+ E6 SNAP-Authorized Retailers No supermarkets accept SNAP
541
+ Notable Comments - Food Security E7 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)
542
+ Education F Education Educational attainment is a well-established social determinant of health. It affects health through many mechanisms such as neural development, biological aging, health literacy and health behaviors, sense of control and empowerment, and life chances.
543
+ Achievement F1 Chronic Absenteeism Kids do not do well in school. Skipping class
544
+ F2 English Language Arts Proficiency
545
+ Attainment F3 Associate's Degree or Higher Students dropping out of school or not going to college
546
+ F4 Educational Attainment Disparities
547
+ F5 High School Graduation Rate
548
+ Early Childhood F6 Childcare Scarcity Lack of affordable or convient pre-schools.
549
+ F7 Preschool Enrollment
550
+ Notable Comments - Education F8 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)
551
+ Financial Stability G Financial Stability Financial stability is the ability for someone to withstand financial emergencies and sustain themselves and their family. Financial stability enables families to access safe housing, healthy foods, and other necessities, to engage fully in their communities, and to plan for the future.
552
+ Employment G1 Labor Force Participation Rate No jobs or well paying jobs. Only min wage jobs
553
+ G2 Unemployment
554
+ Income G3 Childhood Poverty Rate Do not make enough to pay the bills. Just getting by. Pay check to pay check
555
+ G4 Senior Poverty Rate
556
+ G5 Income Inequality
557
+ G6 Median Household Income
558
+ Security G7 Housing Cost Burden (30%) Rent takes up all my income. Can't afford to but or rent a house.
559
+ G8 Population with Debt*
560
+ Notable Comments - Financial Stability G9 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)
561
+ H Housing Homelessness can take many forms, with people living on the streets, in encampments or shelters, in transitional housing programs, or doubled up with family and friends.
562
+ Housing Homelessness H1 Evictions* Homelessness status or mention of increase
563
+ H2 Homeless Population*
564
+ H3 Homeless Students
565
+ Housing Costs H4 Low-Income Housing Appartments too expensive. Can not afford to buy a house. Rent take up my budget
566
+ H5 Housing + Transportation Affordability Index
567
+ H6 Median Household Income
568
+ H7 Severe Housing Cost Burden (50%)
569
+ Housing Quality H8 Overcrowded Housing Housing conditions are not desirable for overall health and well-being.
570
+ H9 Owner Occupied Households
571
+ H10 Renter Occupied Households
572
+ H11 Incomplete Facilities (Plumbing, Kitchen)
573
+ Notable Comments - Housing H12 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)
574
+ Climate & Natural Environment I Climate & Natural Environment Climate and the natural environment are about the impacts of climate change on livelihoods in the community.
575
+ Physical Environment - Air & Water I1 Air Toxics Risk Do not trust the water. Chemicals in the air. Too close to the fields with pesticides
576
+ I2 Particulate Matter (PM 2.5) Air quality from particulate matter (PM 2.5) and toxics risks are harmful to short-term and long-term health and has been associated with chronic health conditions such as lung disease, respiratory infections and heart disease among others.
577
+ Physical Environment - Heat & Climate I3 Disaster Risk Index Global warming. Droughts. Floods. Wild fires
578
+ I4 Extreme Heat Days* Extreme heat is a climate-driven hazard that affects the capacity of individuals, workers and communities.
579
+ Notable Comments - Climate & Natural Environment I5 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)
580
+ Community Safety J Community Safety Community safety is both about people's sense of safety and about the community's crime rate.
581
+ Injuries J1 Injury Mortality (Falls, Firearms, Drowning)* Injuries related to Community Safety
582
+ J2 Motor Vehicle Crash Fatality
583
+ Public Safety J3 Property Crime* Comments that the community is not safe. Unsafe neighborhoods
584
+ J4 Violent Crime*
585
+ Risk Factors J5 Disengaged Youth Comments about risk factors
586
+ J6 School Suspensions + Expulsions
587
+ Notable Comments - Community Safety J7 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)
588
+ Community Infrastructure K Community Infrastructure Having optimal environment and infrastructure for well-being means that streets are designed to be safe and enjoyable for those who are walking or biking. It means that air and water are safe to breathe and drink. It means that technical access infrastructure like broadband internet are available, to connect people to opportunities.
589
+ Access to Childcare K1 Childcare Access Disparities Lack of affordable or convient pre-schools.
590
+ K2 Childcare Scarcity
591
+ Community Amenities K3 Walkability No walking paths or parks in the area
592
+ Internet & Technology K4 Cellular Plan Only Bad or no Internet for people to connect to. Few daycare centers. Have to take the bus everywhere.
593
+ K5 Internet Access Disparities
594
+ K6 No Computer
595
+ K7 No High-Speed Internet
596
+ Transportation K8 Tansportation Access Inadequate transportation modes and access
597
+ Notable Comments - Built Environment K9 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)
598
+ Social & Economic Context L Social & Economic Context Social & Economic Context is about whether people feel a sense of belonging to the place where they live and whether they feel connected to one another. Do they trust their leaders? Do they engage in community life and participate in decision-making?
599
+ Civic Engagement L1 Census Response Rate People are not engaged or aware of community or city programs. Not able or desire to vote
600
+ L2 Voter Participation Rate*
601
+ Economic Vitality L3 Business Vacancy Rate Lack of jobs in the community. Unemployment. Closed businesses
602
+ L4 Funding for Public Works & Welfare*
603
+ Place Attachment L5 Home Ownership Too many rentals. People move around a lot
604
+ L6 Net Migration (Population Loss)*
605
+ Social Inclusion L7 501c3 organizations People and seniors who live alone. Feelings of isolation and loniliness
606
+ L8 Neighborhood Segregation
607
+ L9 Older Adults Living Alone
608
+ Socioeconomic Disadvantage L10 Area Deprivation Index Comments related to socioeconomic disadvantage
609
+ Notable Comments - Social Environment L11 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)
610
+
611
+ There can be multiple excerpts for the same category, include all of them. If there are none, just return an empty values array.
612
+ make sure to give the full context of the excerpt, so it is clear without having to go back to the original transcript.
613
+ You should return your result in JSON as follows (example):
614
+
615
+ {{
616
+ "values" : [
617
+ {{
618
+ "category" : "Availability - Primary Care",
619
+ "subcategory": "A6",
620
+ "codename" : "Primary Care Providers",
621
+ "excerpt_from_text_evidence" : "xxxx",
622
+ "speaker" : "1"
623
+ }},
624
+ ...n]
625
+ }}
626
+ """
627
+
628
+ chunks = chunk_by_speaker(dax_input)
629
+ speaker_list = [{"speaker_number": speaker_number, "text": text.strip()} for speaker_number, text in chunks.items()]
630
+ progress_text = "Processing chunks. Please wait."
631
+ my_bar = st.progress(0, text=progress_text)
632
+
633
+ total_chunks = len(speaker_list)
634
+ chunk_counter = 0
635
+
636
+ all_outputs_faster = []
637
+
638
+ for chunk_index, chunk in enumerate(speaker_list):
639
+ print(chunk)
640
+ response = generate_response(system_message_template_total, chunk["text"])
641
+ response_json = json.loads(response)
642
+ all_outputs_faster.extend(response_json["values"])
643
+
644
+ # Update progress bar
645
+ chunk_counter += 1
646
+ my_bar.progress(chunk_counter / total_chunks, text=progress_text)
647
+
648
+ df_faster = pd.DataFrame(all_outputs_faster)
649
+ st.dataframe(df_faster)
650
+
651
+
652
+
653
+
654
+
655
+
656
+ with st.expander("Specific Themes (slower but more detailed)"):
657
+ chunks = chunk_by_speaker(dax_input)
658
+ speaker_list = [{"speaker_number": speaker_number, "text": text.strip()} for speaker_number, text in chunks.items()]
659
+ all_outputs = {}
660
+
661
+ system_message_template = """
662
+ You will get snippets from a transcript with a patient. Your job is to label the transcript with SDOH. You should reference the table below only when assigning values.
663
+
664
+ Reference Table: {reference_table}
665
+ There can be multiple excerpts for the same category, include all of them. If there are none, just return an empty values array.
666
+ make sure to give the full context of the excerpt, so it is clear without having to go back to the original transcript.
667
+ You should return your result in JSON as follows (example):
668
+
669
+ {{
670
+ "values" : [
671
+ {{
672
+ "category" : "Availability - Primary Care",
673
+ "subcategory": "A6",
674
+ "codename" : "Primary Care Providers",
675
+ "excerpt_from_text_evidence" : "xxxx",
676
+ "speaker" : "1"
677
+ }},
678
+ ...n]
679
+ }}
680
+ """
681
+
682
+ progress_text = "Processing chunks. Please wait."
683
+ my_bar = st.progress(0, text=progress_text)
684
+
685
+ total_chunks = len(table_arr) * len(speaker_list)
686
+ chunk_counter = 0
687
+
688
+ all_outputs = []
689
+
690
+
691
+ for table_index, table in enumerate(table_arr):
692
+ system_message = system_message_template.format(reference_table=table)
693
+ for chunk_index, chunk in enumerate(speaker_list):
694
+ response = generate_response(system_message, chunk["text"])
695
+ response_json = json.loads(response)
696
+ all_outputs.extend(response_json["values"])
697
+
698
+
699
+
700
+ # Update progress bar
701
+ chunk_counter += 1
702
+ my_bar.progress(chunk_counter / total_chunks, text=progress_text)
703
+
704
+ df = pd.DataFrame(all_outputs)
705
+ st.dataframe(df)
706
+
707
+
708
+
709
+
710
+