chna_ai_space / app.py
cloud-sean's picture
Create app.py
ad56c9a verified
import openai
import streamlit as st
import os
import json
import time
import requests
from html import escape
from random import choice
import plotly.graph_objects as go
import networkx as nx
from openai import AzureOpenAI
import re
import pandas as pd
# Initialize the AzureOpenAI client
client = AzureOpenAI(
api_key=os.environ.get("AOAI_API_KEY"),
api_version="2024-05-01-preview",
azure_endpoint=os.environ.get("AOAI_AZURE_ENDPOINT"),
azure_deployment="gpt-4o"
)
st.set_page_config(layout="wide")
def analyze_healthcare_text(text):
# Endpoint, headers and subscription key
base_url = "https://ta4h-endpoint.cognitiveservices.azure.com/language/analyze-text/jobs"
headers = {
"Content-Type": "application/json",
"Ocp-Apim-Subscription-Key": "00667ce9381d46a3a279c4799dd698d0"
}
# Data to be sent in the initial POST request
data = {
"tasks": [{"kind": "Healthcare"}],
"analysisInput": {
"documents": [
{
"id": "documentId",
"text": text,
"language": "en"
}
]
}
}
# Making the initial POST request
response = requests.post(f"{base_url}?api-version=2022-10-01-preview", headers=headers, json=data)
# Get the operation-location from the response header
operation_location = response.headers.get('operation-location')
# Extract JOB-ID from the operation-location
job_id = operation_location.split('/')[-1].split('?')[0]
# Make a subsequent GET request to retrieve the results using the JOB-ID
success = False
while not success:
result_response = requests.get(f"{base_url}/{job_id}?api-version=2022-10-01-preview", headers=headers)
if result_response.json()['status'] == 'succeeded':
success = True
else:
time.sleep(1)
# Return the JSON response from the GET request
result = result_response.json()
# save json to file
return result
def annotate_text_with_entities(original_text, entities_data):
# save json to file
with open('entities_data.json', 'w') as f:
json.dump(entities_data, f)
# Color palette for different categories
PALETTE = [
"#ff4b4b",
"#ffa421",
"#ffe312",
"#21c354",
"#00d4b1",
"#00c0f2",
"#1c83e1",
"#803df5",
"#808495",
]
# Opacities
OPACITIES = [
"33", "66",
]
json_data = entities_data
# Extract entities from the JSON data
entities = json_data['documents'][0]['entities']
# Get unique categories from entities
unique_categories = list(set(entity['category'] for entity in entities))
# Create a mapping of categories to colors
category_to_color = {category: PALETTE[i % len(PALETTE)] for i, category in enumerate(unique_categories)}
# If we run out of colors in the palette, we will assign random colors to the remaining categories
if len(unique_categories) > len(PALETTE):
additional_colors = ['#'+''.join([choice('0123456789ABCDEF') for _ in range(6)]) for _ in range(len(unique_categories) - len(PALETTE))]
for i, category in enumerate(unique_categories[len(PALETTE):]):
category_to_color[category] = additional_colors[i]
def create_entity_html(entity, entity_id):
# Get the color for the entity category
color = category_to_color[entity["category"]] + OPACITIES[entity["offset"] % len(OPACITIES)]
entity_html = f'<span id="entity-{entity_id}"><span style="display: inline-flex; flex-direction: row; align-items: center; background: {color}; border-radius: 0.5rem; padding: 0.25rem 0.5rem; overflow: hidden; line-height: 1;">{escape(entity["text"])}'
# If there are links, create a dropdown menu with the links
if entity.get("links"):
options = "".join(f'<option value="{link["id"]}">{link["dataSource"]} Code {link["id"]}</option>' for link in entity["links"])
dropdown_html = f'''
<span style="border-left: 1px solid; opacity: 0.1; margin-left: 0.5rem; align-self: stretch;"></span>
<span style="margin-left: 0.5rem; display: flex; flex-direction: column; align-items: flex-start;">
<select style="font-size: 0.75rem; opacity: 0.5;">
{options}
</select>
<label style="font-size: 0.6rem; margin-top: 0.25rem;">{entity["category"]}</label>
</span>
'''
entity_html += dropdown_html
else:
# If there are no links, just display the category label
entity_html += f'<span style="border-left: 1px solid; opacity: 0.1; margin-left: 0.5rem; align-self: stretch;"></span><span style="margin-left: 0.5rem; font-size: 0.75rem; opacity: 0.5;">{entity["category"]}</span>'
# Close the main span element
entity_html += '</span></span>'
return entity_html
# Create HTML representation for each entity
entity_htmls = [create_entity_html(entity, i) for i, entity in enumerate(entities)]
# Replace entities in the original text with their HTML representations
# We iterate from the end to avoid changing the offsets of the yet-to-be-replaced entities
for entity, entity_html in sorted(zip(entities, entity_htmls), key=lambda x: x[0]['offset'], reverse=True):
start = entity['offset']
end = start + entity['length']
original_text = original_text[:start] + entity_html + original_text[end:]
# Create a color key section
color_key_section = "<strong>Color Key:</strong><br>"
for category, color in category_to_color.items():
color_key_section += f'<span style="display: inline-block; background: {color}; width: 1em; height: 1em; margin-right: 0.5em; vertical-align: middle;"></span>{category}<br>'
original_text = color_key_section + original_text
return original_text, category_to_color
def create_interactive_graph_from_json(json_data, category_to_color):
# Load the JSON data
entities = json_data['documents'][0]['entities']
relations = json_data['documents'][0].get('relations', [])
# Create a new directed graph
graph = nx.DiGraph()
# Add nodes to the graph
for i, entity in enumerate(entities):
graph.add_node(i, label=entity['text'], category=entity['category'])
# Add edges to the graph
for relation in relations:
source_index = int(relation['entities'][0]['ref'].split('/')[-1])
target_index = int(relation['entities'][1]['ref'].split('/')[-1])
graph.add_edge(source_index, target_index, label=relation['relationType'])
# Get positions of the nodes using spring layout
pos = nx.spring_layout(graph)
# Get node positions
x_nodes = [pos[i][0] for i in graph.nodes]
y_nodes = [pos[i][1] for i in graph.nodes]
# Get the colors for each node based on its category
node_colors = [category_to_color[graph.nodes[i]['category']] for i in graph.nodes]
# Get edge positions
x_edges = []
y_edges = []
for edge in graph.edges:
x_edges += [pos[edge[0]][0], pos[edge[1]][0], None]
y_edges += [pos[edge[0]][1], pos[edge[1]][1], None]
# Create edge traces
edge_trace = go.Scatter(x=x_edges, y=y_edges, line=dict(width=0.5, color='#888'), hoverinfo='none', mode='lines')
# Create node traces with all nodes
node_trace_all = go.Scatter(x=x_nodes, y=y_nodes, text=[graph.nodes[i]['label'] for i in graph.nodes],
mode='markers+text', hoverinfo='text', marker=dict(color=node_colors, size=10))
# Create node traces with nodes having at least one edge
nodes_with_edges = set([edge[0] for edge in graph.edges] + [edge[1] for edge in graph.edges])
x_nodes_with_edges = [pos[i][0] for i in nodes_with_edges]
y_nodes_with_edges = [pos[i][1] for i in nodes_with_edges]
node_trace_with_edges = go.Scatter(x=x_nodes_with_edges, y=y_nodes_with_edges,
text=[graph.nodes[i]['label'] for i in nodes_with_edges],
mode='markers+text', hoverinfo='text', marker=dict(color=node_colors, size=10))
# Create figure
fig = go.Figure(data=[edge_trace, node_trace_all, node_trace_with_edges],
layout=go.Layout(title='Entities and Relationships in Patient Notes',
titlefont_size=16,
showlegend=False,
hovermode='closest',
margin=dict(b=20, l=5, r=5, t=40),
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
updatemenus=[dict(type="buttons",
x=1.15,
y=1.2,
buttons=[dict(label="All Entities",
method="update",
args=[{"visible": [True, True, False]}]),
dict(label="Entities with Relationships",
method="update",
args=[{"visible": [True, False, True]}])])]))
# Display the interactive plot
return fig
def format_sdoh_entities_as_list(json_data):
# print(json_data)
relevant_categories = ['EMPLOYMENT', 'LIVING_STATUS', 'SUBSTANCEUSE', 'SUBSTANCEUSEAMOUNT', 'ETHNICITY']
formatted_result = []
for document in json_data['documents']:
for entity in document['entities']:
category = entity['category'].upper()
if category in relevant_categories:
formatted_result.append(f"- **{category}** : '{entity['text']}' \n")
print(formatted_result)
return '\n'.join(formatted_result)
table_arr = [
"""Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples
A Access to Care Access to health care is the timely use of personal health services to achieve the best possible health outcomes and pertains to a person's ability to access needed health or social services.
Access to Care Availability - Hospitals & Clinics A1 FQHCs, Rate Per Low-Income Population Hospitals and Clinics are very over crowded. They are a long way from where I live. Services not provided in my area. Urgent Care closed when I am sick.
A2 Hospital Beds Per Capita
A3 Proximity to Hospitals with ER This refers to comments about not having enough doctors and nurses in a given geographic context.
Availability - Mental Health Care A4 Mental Health Professional Shortage Areas "This refers to comments about not haveing enough Doctors and nurses to service the need of the mental needs of the community. Long wait times to get an appointment. Doctors not taking new patients
Having to go to ER for mental health issues"
A5 Mental Health Providers Comments related to not having enough mental health providers
Availability - Primary Care A6 Primary Care Providers "This refers to comments about not having enough Doctors and nurses to service the need of the mental needs of the community. Long wait times to get an appointment. Doctors not taking new patients
Having to go to ER for mental health issues"
A7 Primary Care Shortage Areas This refers to comments about not having enough doctors and nurses in a given geographic context.
Availability - Specialty Care A8 Maternal Care Providers
A9 Dental Care Providers
Barriers - Health Literacy A10 Educational Attainment Appointments on-line and do not know how to make appointment. They do not speak my language. I do not know what or who to ask a question. Do not understand what they did or what I need to do now.
A11 Limited English Proficiency
Barriers - Medical Insurance A12 Health Insurance Disparities I can't get insurance from my job. I am not eligible for medicaid or do not know how to sign up. Can not afford Insurance (see Financial Stability / Income)
A13 Population without Medical Insurance
Barriers - Transportation A14 Distance to Public Transit I don't have a way to get there. I don't have a car. We only have one car. Can't afford a car (Financial Stability / Income)
A15 Households with No Vehicle 7
Notable Comments - Access to Care A16 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
"""Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples
Health Conditions B Health Conditions According to the World Health Organization (WHO), health is a state of complete physical, mental, and social well-being and not merely the absence of disease. Chronic diseases are defined broadly as conditions that last 1 year or more and require ongoing medical attention or limit daily activities.
Asthma & COPD B1 Lung Disease Mortality Comments related to Asthma & COPD
B2 Lung Disease Prevalence
Cancers B3 Cancer Prevalence Comments related to Cancers
B4 Cancer Mortality*
Chronic Brain Disorders B5 Alzheimer's Disease Mortality* Comments related to Chronic Brain Disorders
B6 Alzheimer's Disease Prevalence
Heart Disease & Stroke B7 Heart Disease & Stroke Mortality* Comments related to Heart Disease & Stroke
B8 Heart Disease Prevalence
Kidney & Liver Diseases B9 Kidney Disease Prevalence Comments related to Chronic Kidney & Liver Diseases
B10 Liver Disease Mortality*
Obesity & Diabetes B11 Diabetes Comments related to Obesity & Diabetes
B12 Obesity
Preventable Death B13 Premature Death Disparities*
Aging Conditions B14
Notable Comments - Health Conditions B15 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
"""""",
"""Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples
Mental Health Health Outcomes - Anxiety & Depression D1 Mental Health Diagnoses* COVID, economy, etc cause stress and depression
D2 Poor Mental Health
Health Outcomes - Deaths of Despair D3 Deaths of Despair* Suicide, and drug overdose. Pill mills. Big Pharma
D4 Suicide Mortality*
Risk Factors - Access to Care D5 Access to Mental Health Providers Lack of services for the mental ill.
D6 Medical Insurance
Risk Factors - Drugs & Alcohol D7 Binge Drinking Lack of treatment options
D8 Substance Use Disorder*
Risk Factors - Stress & Trauma D9 Unemployment Domestic violance. Child abuse.
D10 Violent Crime Rate*
Notable Comments - Mental Health D11 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
"""Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples
Food Security E Food Security Food security means that all people, at all times, have physical, social, and economic access to sufficient, safe, and nutritious food that meets their food preferences and dietary needs for an active and healthy life.
Economic Security E1 Free/Reduced Price Lunch Groceries cost too much. No money for good food.
E2 Poverty (100% FPL)
Food Access E3 Access to Healthy Food No supermarkets in the area. Bad food choice selection.
E4 Healthy Food Access Disparities
E5 Local Food Outlets
E6 SNAP-Authorized Retailers No supermarkets accept SNAP
Notable Comments - Food Security E7 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
"""""",
"""Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples
Education F Education Educational attainment is a well-established social determinant of health. It affects health through many mechanisms such as neural development, biological aging, health literacy and health behaviors, sense of control and empowerment, and life chances.
Achievement F1 Chronic Absenteeism Kids do not do well in school. Skipping class
F2 English Language Arts Proficiency
Attainment F3 Associate's Degree or Higher Students dropping out of school or not going to college
F4 Educational Attainment Disparities
F5 High School Graduation Rate
Early Childhood F6 Childcare Scarcity Lack of affordable or convient pre-schools.
F7 Preschool Enrollment
Notable Comments - Education F8 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
"""Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples
Financial Stability G Financial Stability Financial stability is the ability for someone to withstand financial emergencies and sustain themselves and their family. Financial stability enables families to access safe housing, healthy foods, and other necessities, to engage fully in their communities, and to plan for the future.
Employment G1 Labor Force Participation Rate No jobs or well paying jobs. Only min wage jobs
G2 Unemployment
Income G3 Childhood Poverty Rate Do not make enough to pay the bills. Just getting by. Pay check to pay check
G4 Senior Poverty Rate
G5 Income Inequality
G6 Median Household Income
Security G7 Housing Cost Burden (30%) Rent takes up all my income. Can't afford to but or rent a house.
G8 Population with Debt*
Notable Comments - Financial Stability G9 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
"""""",
"""Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples
H Housing Homelessness can take many forms, with people living on the streets, in encampments or shelters, in transitional housing programs, or doubled up with family and friends.
Housing Homelessness H1 Evictions* Homelessness status or mention of increase
H2 Homeless Population*
H3 Homeless Students
Housing Costs H4 Low-Income Housing Appartments too expensive. Can not afford to buy a house. Rent take up my budget
H5 Housing + Transportation Affordability Index
H6 Median Household Income
H7 Severe Housing Cost Burden (50%)
Housing Quality H8 Overcrowded Housing Housing conditions are not desirable for overall health and well-being.
H9 Owner Occupied Households
H10 Renter Occupied Households
H11 Incomplete Facilities (Plumbing, Kitchen)
Notable Comments - Housing H12 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
"""Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples
Climate & Natural Environment I Climate & Natural Environment Climate and the natural environment are about the impacts of climate change on livelihoods in the community.
Physical Environment - Air & Water I1 Air Toxics Risk Do not trust the water. Chemicals in the air. Too close to the fields with pesticides
I2 Particulate Matter (PM 2.5) Air quality from particulate matter (PM 2.5) and toxics risks are harmful to short-term and long-term health and has been associated with chronic health conditions such as lung disease, respiratory infections and heart disease among others.
Physical Environment - Heat & Climate I3 Disaster Risk Index Global warming. Droughts. Floods. Wild fires
I4 Extreme Heat Days* Extreme heat is a climate-driven hazard that affects the capacity of individuals, workers and communities.
Notable Comments - Climate & Natural Environment I5 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
"""Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples
Community Safety J Community Safety Community safety is both about people's sense of safety and about the community's crime rate.
Injuries J1 Injury Mortality (Falls, Firearms, Drowning)* Injuries related to Community Safety
J2 Motor Vehicle Crash Fatality
Public Safety J3 Property Crime* Comments that the community is not safe. Unsafe neighborhoods
J4 Violent Crime*
Risk Factors J5 Disengaged Youth Comments about risk factors
J6 School Suspensions + Expulsions
Notable Comments - Community Safety J7 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
"""Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples
Community Infrastructure K Community Infrastructure Having optimal environment and infrastructure for well-being means that streets are designed to be safe and enjoyable for those who are walking or biking. It means that air and water are safe to breathe and drink. It means that technical access infrastructure like broadband internet are available, to connect people to opportunities.
Access to Childcare K1 Childcare Access Disparities Lack of affordable or convient pre-schools.
K2 Childcare Scarcity
Community Amenities K3 Walkability No walking paths or parks in the area
Internet & Technology K4 Cellular Plan Only Bad or no Internet for people to connect to. Few daycare centers. Have to take the bus everywhere.
K5 Internet Access Disparities
K6 No Computer
K7 No High-Speed Internet
Transportation K8 Tansportation Access Inadequate transportation modes and access
Notable Comments - Built Environment K9 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
"""Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples
Social & Economic Context L Social & Economic Context Social & Economic Context is about whether people feel a sense of belonging to the place where they live and whether they feel connected to one another. Do they trust their leaders? Do they engage in community life and participate in decision-making?
Civic Engagement L1 Census Response Rate People are not engaged or aware of community or city programs. Not able or desire to vote
L2 Voter Participation Rate*
Economic Vitality L3 Business Vacancy Rate Lack of jobs in the community. Unemployment. Closed businesses
L4 Funding for Public Works & Welfare*
Place Attachment L5 Home Ownership Too many rentals. People move around a lot
L6 Net Migration (Population Loss)*
Social Inclusion L7 501c3 organizations People and seniors who live alone. Feelings of isolation and loniliness
L8 Neighborhood Segregation
L9 Older Adults Living Alone
Socioeconomic Disadvantage L10 Area Deprivation Index Comments related to socioeconomic disadvantage
Notable Comments - Social Environment L11 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)"""
]
def chunk_by_speaker(transcript):
"""
Chunks a transcript string by speaker, grouping all text by each speaker,
regardless of original order.
Args:
transcript: The transcript string.
Returns:
A dictionary where keys are speaker numbers and values are their complete text.
"""
# Split the transcript into lines
lines = transcript.splitlines()
# Initialize a dictionary to store speaker text
speaker_texts = {}
# Variable to keep track of the current speaker
current_speaker = None
# Iterate through each line
for line in lines:
# Check if the line starts with a speaker identifier
match = re.match(r"Speaker (\d+)", line)
if match:
# Extract the speaker number
current_speaker = int(match.group(1))
# Add the speaker to the dictionary if not already present
if current_speaker not in speaker_texts:
speaker_texts[current_speaker] = ""
# Append the line to the current speaker's text
if current_speaker is not None:
speaker_texts[current_speaker] += line + "\n"
# Return the dictionary of speaker texts
return speaker_texts
def generate_response(system_message, input_text):
"""
Generates a response from the OpenAI API using the given input text.
Args:
input_text: The input text to be analyzed.
Returns:
The response from the OpenAI API.
"""
try:
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": system_message},
{"role": "user", "content": input_text},
],
response_format={ "type": "json_object" },
)
return response.choices[0].message.content
except Exception as e:
return f"Error generating response: {e}"
def generate_response_summary(system_message, input_text):
"""
Generates a response from the OpenAI API using the given input text.
Args:
input_text: The input text to be analyzed.
Returns:
The response from the OpenAI API.
"""
try:
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": system_message},
{"role": "user", "content": input_text},
],
)
return response.choices[0].message.content
except Exception as e:
return f"Error generating response: {e}"
# divide the page into 2 columns
col1, col2 = st.columns([2, 5])
if 'r' not in st.session_state:
st.session_state.r = 'value'
if 'r_annotated' not in st.session_state:
st.session_state.r_annotated = 'value'
if 'colour_to_category' not in st.session_state:
st.session_state.colour_to_category = 'value'
with col1:
col1.subheader("Patient Note Input")
st.text("Enter your text input below:")
dax_input = st.text_area("", height=500)
analyze_btn = st.button("Analyze")
with col2:
col2.subheader("Text Analytics for Health Output")
if st.session_state.r_annotated != 'value':
with st.expander("Entity Mappings"):
st.markdown(st.session_state.r_annotated, unsafe_allow_html=True)
with st.expander("Show Relationships"):
st.plotly_chart(create_interactive_graph_from_json(st.session_state.r, st.session_state.colour_to_category), use_container_width=True)
with st.expander("Show JSON"):
st.json(st.session_state.r)
with st.expander("Show SDOH"):
st.write(format_sdoh_entities_as_list(st.session_state.r))
if analyze_btn:
st.session_state.r = analyze_healthcare_text(dax_input)["tasks"]["items"][0]["results"]
r_annotated, category_to_color = annotate_text_with_entities(dax_input, st.session_state.r)
st.session_state.r_annotated = r_annotated
st.session_state.colour_to_category = category_to_color
with st.expander("Entity Mappings"):
st.markdown(r_annotated, unsafe_allow_html=True)
with st.expander("Show Relationships"):
st.plotly_chart(create_interactive_graph_from_json(st.session_state.r, category_to_color), use_container_width=True)
with st.expander("Show JSON"):
st.json(st.session_state.r)
with st.expander("Show SDOH"):
st.write("Social Determinants of Health (SDOH) Entities")
st.write(format_sdoh_entities_as_list(st.session_state.r))
col2.subheader("GPT-4o for Health Output")
with st.expander("General Themes"):
st.write(generate_response_summary("Create a overal report of the SDOH themes mentioned in the text", dax_input))
with st.expander("Specific Themes (faster but less detailed)"):
system_message_template_total = """
You will get snippets from a transcript with a patient. Your job is to label the transcript with SDOH. You should reference the table below only when assigning values.
Reference Table:
Food Security E Food Security Food security means that all people, at all times, have physical, social, and economic access to sufficient, safe, and nutritious food that meets their food preferences and dietary needs for an active and healthy life.
Economic Security E1 Free/Reduced Price Lunch Groceries cost too much. No money for good food.
E2 Poverty (100% FPL)
Food Access E3 Access to Healthy Food No supermarkets in the area. Bad food choice selection.
E4 Healthy Food Access Disparities
E5 Local Food Outlets
E6 SNAP-Authorized Retailers No supermarkets accept SNAP
Notable Comments - Food Security E7 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)
Education F Education Educational attainment is a well-established social determinant of health. It affects health through many mechanisms such as neural development, biological aging, health literacy and health behaviors, sense of control and empowerment, and life chances.
Achievement F1 Chronic Absenteeism Kids do not do well in school. Skipping class
F2 English Language Arts Proficiency
Attainment F3 Associate's Degree or Higher Students dropping out of school or not going to college
F4 Educational Attainment Disparities
F5 High School Graduation Rate
Early Childhood F6 Childcare Scarcity Lack of affordable or convient pre-schools.
F7 Preschool Enrollment
Notable Comments - Education F8 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)
Financial Stability G Financial Stability Financial stability is the ability for someone to withstand financial emergencies and sustain themselves and their family. Financial stability enables families to access safe housing, healthy foods, and other necessities, to engage fully in their communities, and to plan for the future.
Employment G1 Labor Force Participation Rate No jobs or well paying jobs. Only min wage jobs
G2 Unemployment
Income G3 Childhood Poverty Rate Do not make enough to pay the bills. Just getting by. Pay check to pay check
G4 Senior Poverty Rate
G5 Income Inequality
G6 Median Household Income
Security G7 Housing Cost Burden (30%) Rent takes up all my income. Can't afford to but or rent a house.
G8 Population with Debt*
Notable Comments - Financial Stability G9 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)
H Housing Homelessness can take many forms, with people living on the streets, in encampments or shelters, in transitional housing programs, or doubled up with family and friends.
Housing Homelessness H1 Evictions* Homelessness status or mention of increase
H2 Homeless Population*
H3 Homeless Students
Housing Costs H4 Low-Income Housing Appartments too expensive. Can not afford to buy a house. Rent take up my budget
H5 Housing + Transportation Affordability Index
H6 Median Household Income
H7 Severe Housing Cost Burden (50%)
Housing Quality H8 Overcrowded Housing Housing conditions are not desirable for overall health and well-being.
H9 Owner Occupied Households
H10 Renter Occupied Households
H11 Incomplete Facilities (Plumbing, Kitchen)
Notable Comments - Housing H12 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)
Climate & Natural Environment I Climate & Natural Environment Climate and the natural environment are about the impacts of climate change on livelihoods in the community.
Physical Environment - Air & Water I1 Air Toxics Risk Do not trust the water. Chemicals in the air. Too close to the fields with pesticides
I2 Particulate Matter (PM 2.5) Air quality from particulate matter (PM 2.5) and toxics risks are harmful to short-term and long-term health and has been associated with chronic health conditions such as lung disease, respiratory infections and heart disease among others.
Physical Environment - Heat & Climate I3 Disaster Risk Index Global warming. Droughts. Floods. Wild fires
I4 Extreme Heat Days* Extreme heat is a climate-driven hazard that affects the capacity of individuals, workers and communities.
Notable Comments - Climate & Natural Environment I5 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)
Community Safety J Community Safety Community safety is both about people's sense of safety and about the community's crime rate.
Injuries J1 Injury Mortality (Falls, Firearms, Drowning)* Injuries related to Community Safety
J2 Motor Vehicle Crash Fatality
Public Safety J3 Property Crime* Comments that the community is not safe. Unsafe neighborhoods
J4 Violent Crime*
Risk Factors J5 Disengaged Youth Comments about risk factors
J6 School Suspensions + Expulsions
Notable Comments - Community Safety J7 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)
Community Infrastructure K Community Infrastructure Having optimal environment and infrastructure for well-being means that streets are designed to be safe and enjoyable for those who are walking or biking. It means that air and water are safe to breathe and drink. It means that technical access infrastructure like broadband internet are available, to connect people to opportunities.
Access to Childcare K1 Childcare Access Disparities Lack of affordable or convient pre-schools.
K2 Childcare Scarcity
Community Amenities K3 Walkability No walking paths or parks in the area
Internet & Technology K4 Cellular Plan Only Bad or no Internet for people to connect to. Few daycare centers. Have to take the bus everywhere.
K5 Internet Access Disparities
K6 No Computer
K7 No High-Speed Internet
Transportation K8 Tansportation Access Inadequate transportation modes and access
Notable Comments - Built Environment K9 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)
Social & Economic Context L Social & Economic Context Social & Economic Context is about whether people feel a sense of belonging to the place where they live and whether they feel connected to one another. Do they trust their leaders? Do they engage in community life and participate in decision-making?
Civic Engagement L1 Census Response Rate People are not engaged or aware of community or city programs. Not able or desire to vote
L2 Voter Participation Rate*
Economic Vitality L3 Business Vacancy Rate Lack of jobs in the community. Unemployment. Closed businesses
L4 Funding for Public Works & Welfare*
Place Attachment L5 Home Ownership Too many rentals. People move around a lot
L6 Net Migration (Population Loss)*
Social Inclusion L7 501c3 organizations People and seniors who live alone. Feelings of isolation and loniliness
L8 Neighborhood Segregation
L9 Older Adults Living Alone
Socioeconomic Disadvantage L10 Area Deprivation Index Comments related to socioeconomic disadvantage
Notable Comments - Social Environment L11 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)
There can be multiple excerpts for the same category, include all of them. If there are none, just return an empty values array.
make sure to give the full context of the excerpt, so it is clear without having to go back to the original transcript.
You should return your result in JSON as follows (example):
{{
"values" : [
{{
"category" : "Availability - Primary Care",
"subcategory": "A6",
"codename" : "Primary Care Providers",
"excerpt_from_text_evidence" : "xxxx",
"speaker" : "1"
}},
...n]
}}
"""
chunks = chunk_by_speaker(dax_input)
speaker_list = [{"speaker_number": speaker_number, "text": text.strip()} for speaker_number, text in chunks.items()]
progress_text = "Processing chunks. Please wait."
my_bar = st.progress(0, text=progress_text)
total_chunks = len(speaker_list)
chunk_counter = 0
all_outputs_faster = []
for chunk_index, chunk in enumerate(speaker_list):
print(chunk)
response = generate_response(system_message_template_total, chunk["text"])
response_json = json.loads(response)
all_outputs_faster.extend(response_json["values"])
# Update progress bar
chunk_counter += 1
my_bar.progress(chunk_counter / total_chunks, text=progress_text)
df_faster = pd.DataFrame(all_outputs_faster)
st.dataframe(df_faster)
with st.expander("Specific Themes (slower but more detailed)"):
chunks = chunk_by_speaker(dax_input)
speaker_list = [{"speaker_number": speaker_number, "text": text.strip()} for speaker_number, text in chunks.items()]
all_outputs = {}
system_message_template = """
You will get snippets from a transcript with a patient. Your job is to label the transcript with SDOH. You should reference the table below only when assigning values.
Reference Table: {reference_table}
There can be multiple excerpts for the same category, include all of them. If there are none, just return an empty values array.
make sure to give the full context of the excerpt, so it is clear without having to go back to the original transcript.
You should return your result in JSON as follows (example):
{{
"values" : [
{{
"category" : "Availability - Primary Care",
"subcategory": "A6",
"codename" : "Primary Care Providers",
"excerpt_from_text_evidence" : "xxxx",
"speaker" : "1"
}},
...n]
}}
"""
progress_text = "Processing chunks. Please wait."
my_bar = st.progress(0, text=progress_text)
total_chunks = len(table_arr) * len(speaker_list)
chunk_counter = 0
all_outputs = []
for table_index, table in enumerate(table_arr):
system_message = system_message_template.format(reference_table=table)
for chunk_index, chunk in enumerate(speaker_list):
response = generate_response(system_message, chunk["text"])
response_json = json.loads(response)
all_outputs.extend(response_json["values"])
# Update progress bar
chunk_counter += 1
my_bar.progress(chunk_counter / total_chunks, text=progress_text)
df = pd.DataFrame(all_outputs)
st.dataframe(df)