Spaces:
Build error
Build error
| import openai | |
| import streamlit as st | |
| import os | |
| import json | |
| import time | |
| import requests | |
| from html import escape | |
| from random import choice | |
| import plotly.graph_objects as go | |
| import networkx as nx | |
| from openai import AzureOpenAI | |
| import re | |
| import pandas as pd | |
| # Initialize the AzureOpenAI client | |
| client = AzureOpenAI( | |
| api_key=os.environ.get("AOAI_API_KEY"), | |
| api_version="2024-05-01-preview", | |
| azure_endpoint=os.environ.get("AOAI_AZURE_ENDPOINT"), | |
| azure_deployment="gpt-4o" | |
| ) | |
| st.set_page_config(layout="wide") | |
| def analyze_healthcare_text(text): | |
| # Endpoint, headers and subscription key | |
| base_url = "https://ta4h-endpoint.cognitiveservices.azure.com/language/analyze-text/jobs" | |
| headers = { | |
| "Content-Type": "application/json", | |
| "Ocp-Apim-Subscription-Key": "00667ce9381d46a3a279c4799dd698d0" | |
| } | |
| # Data to be sent in the initial POST request | |
| data = { | |
| "tasks": [{"kind": "Healthcare"}], | |
| "analysisInput": { | |
| "documents": [ | |
| { | |
| "id": "documentId", | |
| "text": text, | |
| "language": "en" | |
| } | |
| ] | |
| } | |
| } | |
| # Making the initial POST request | |
| response = requests.post(f"{base_url}?api-version=2022-10-01-preview", headers=headers, json=data) | |
| # Get the operation-location from the response header | |
| operation_location = response.headers.get('operation-location') | |
| # Extract JOB-ID from the operation-location | |
| job_id = operation_location.split('/')[-1].split('?')[0] | |
| # Make a subsequent GET request to retrieve the results using the JOB-ID | |
| success = False | |
| while not success: | |
| result_response = requests.get(f"{base_url}/{job_id}?api-version=2022-10-01-preview", headers=headers) | |
| if result_response.json()['status'] == 'succeeded': | |
| success = True | |
| else: | |
| time.sleep(1) | |
| # Return the JSON response from the GET request | |
| result = result_response.json() | |
| # save json to file | |
| return result | |
| def annotate_text_with_entities(original_text, entities_data): | |
| # save json to file | |
| with open('entities_data.json', 'w') as f: | |
| json.dump(entities_data, f) | |
| # Color palette for different categories | |
| PALETTE = [ | |
| "#ff4b4b", | |
| "#ffa421", | |
| "#ffe312", | |
| "#21c354", | |
| "#00d4b1", | |
| "#00c0f2", | |
| "#1c83e1", | |
| "#803df5", | |
| "#808495", | |
| ] | |
| # Opacities | |
| OPACITIES = [ | |
| "33", "66", | |
| ] | |
| json_data = entities_data | |
| # Extract entities from the JSON data | |
| entities = json_data['documents'][0]['entities'] | |
| # Get unique categories from entities | |
| unique_categories = list(set(entity['category'] for entity in entities)) | |
| # Create a mapping of categories to colors | |
| category_to_color = {category: PALETTE[i % len(PALETTE)] for i, category in enumerate(unique_categories)} | |
| # If we run out of colors in the palette, we will assign random colors to the remaining categories | |
| if len(unique_categories) > len(PALETTE): | |
| additional_colors = ['#'+''.join([choice('0123456789ABCDEF') for _ in range(6)]) for _ in range(len(unique_categories) - len(PALETTE))] | |
| for i, category in enumerate(unique_categories[len(PALETTE):]): | |
| category_to_color[category] = additional_colors[i] | |
| def create_entity_html(entity, entity_id): | |
| # Get the color for the entity category | |
| color = category_to_color[entity["category"]] + OPACITIES[entity["offset"] % len(OPACITIES)] | |
| entity_html = f'<span id="entity-{entity_id}"><span style="display: inline-flex; flex-direction: row; align-items: center; background: {color}; border-radius: 0.5rem; padding: 0.25rem 0.5rem; overflow: hidden; line-height: 1;">{escape(entity["text"])}' | |
| # If there are links, create a dropdown menu with the links | |
| if entity.get("links"): | |
| options = "".join(f'<option value="{link["id"]}">{link["dataSource"]} Code {link["id"]}</option>' for link in entity["links"]) | |
| dropdown_html = f''' | |
| <span style="border-left: 1px solid; opacity: 0.1; margin-left: 0.5rem; align-self: stretch;"></span> | |
| <span style="margin-left: 0.5rem; display: flex; flex-direction: column; align-items: flex-start;"> | |
| <select style="font-size: 0.75rem; opacity: 0.5;"> | |
| {options} | |
| </select> | |
| <label style="font-size: 0.6rem; margin-top: 0.25rem;">{entity["category"]}</label> | |
| </span> | |
| ''' | |
| entity_html += dropdown_html | |
| else: | |
| # If there are no links, just display the category label | |
| entity_html += f'<span style="border-left: 1px solid; opacity: 0.1; margin-left: 0.5rem; align-self: stretch;"></span><span style="margin-left: 0.5rem; font-size: 0.75rem; opacity: 0.5;">{entity["category"]}</span>' | |
| # Close the main span element | |
| entity_html += '</span></span>' | |
| return entity_html | |
| # Create HTML representation for each entity | |
| entity_htmls = [create_entity_html(entity, i) for i, entity in enumerate(entities)] | |
| # Replace entities in the original text with their HTML representations | |
| # We iterate from the end to avoid changing the offsets of the yet-to-be-replaced entities | |
| for entity, entity_html in sorted(zip(entities, entity_htmls), key=lambda x: x[0]['offset'], reverse=True): | |
| start = entity['offset'] | |
| end = start + entity['length'] | |
| original_text = original_text[:start] + entity_html + original_text[end:] | |
| # Create a color key section | |
| color_key_section = "<strong>Color Key:</strong><br>" | |
| for category, color in category_to_color.items(): | |
| color_key_section += f'<span style="display: inline-block; background: {color}; width: 1em; height: 1em; margin-right: 0.5em; vertical-align: middle;"></span>{category}<br>' | |
| original_text = color_key_section + original_text | |
| return original_text, category_to_color | |
| def create_interactive_graph_from_json(json_data, category_to_color): | |
| # Load the JSON data | |
| entities = json_data['documents'][0]['entities'] | |
| relations = json_data['documents'][0].get('relations', []) | |
| # Create a new directed graph | |
| graph = nx.DiGraph() | |
| # Add nodes to the graph | |
| for i, entity in enumerate(entities): | |
| graph.add_node(i, label=entity['text'], category=entity['category']) | |
| # Add edges to the graph | |
| for relation in relations: | |
| source_index = int(relation['entities'][0]['ref'].split('/')[-1]) | |
| target_index = int(relation['entities'][1]['ref'].split('/')[-1]) | |
| graph.add_edge(source_index, target_index, label=relation['relationType']) | |
| # Get positions of the nodes using spring layout | |
| pos = nx.spring_layout(graph) | |
| # Get node positions | |
| x_nodes = [pos[i][0] for i in graph.nodes] | |
| y_nodes = [pos[i][1] for i in graph.nodes] | |
| # Get the colors for each node based on its category | |
| node_colors = [category_to_color[graph.nodes[i]['category']] for i in graph.nodes] | |
| # Get edge positions | |
| x_edges = [] | |
| y_edges = [] | |
| for edge in graph.edges: | |
| x_edges += [pos[edge[0]][0], pos[edge[1]][0], None] | |
| y_edges += [pos[edge[0]][1], pos[edge[1]][1], None] | |
| # Create edge traces | |
| edge_trace = go.Scatter(x=x_edges, y=y_edges, line=dict(width=0.5, color='#888'), hoverinfo='none', mode='lines') | |
| # Create node traces with all nodes | |
| node_trace_all = go.Scatter(x=x_nodes, y=y_nodes, text=[graph.nodes[i]['label'] for i in graph.nodes], | |
| mode='markers+text', hoverinfo='text', marker=dict(color=node_colors, size=10)) | |
| # Create node traces with nodes having at least one edge | |
| nodes_with_edges = set([edge[0] for edge in graph.edges] + [edge[1] for edge in graph.edges]) | |
| x_nodes_with_edges = [pos[i][0] for i in nodes_with_edges] | |
| y_nodes_with_edges = [pos[i][1] for i in nodes_with_edges] | |
| node_trace_with_edges = go.Scatter(x=x_nodes_with_edges, y=y_nodes_with_edges, | |
| text=[graph.nodes[i]['label'] for i in nodes_with_edges], | |
| mode='markers+text', hoverinfo='text', marker=dict(color=node_colors, size=10)) | |
| # Create figure | |
| fig = go.Figure(data=[edge_trace, node_trace_all, node_trace_with_edges], | |
| layout=go.Layout(title='Entities and Relationships in Patient Notes', | |
| titlefont_size=16, | |
| showlegend=False, | |
| hovermode='closest', | |
| margin=dict(b=20, l=5, r=5, t=40), | |
| xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), | |
| yaxis=dict(showgrid=False, zeroline=False, showticklabels=False), | |
| updatemenus=[dict(type="buttons", | |
| x=1.15, | |
| y=1.2, | |
| buttons=[dict(label="All Entities", | |
| method="update", | |
| args=[{"visible": [True, True, False]}]), | |
| dict(label="Entities with Relationships", | |
| method="update", | |
| args=[{"visible": [True, False, True]}])])])) | |
| # Display the interactive plot | |
| return fig | |
| def format_sdoh_entities_as_list(json_data): | |
| # print(json_data) | |
| relevant_categories = ['EMPLOYMENT', 'LIVING_STATUS', 'SUBSTANCEUSE', 'SUBSTANCEUSEAMOUNT', 'ETHNICITY'] | |
| formatted_result = [] | |
| for document in json_data['documents']: | |
| for entity in document['entities']: | |
| category = entity['category'].upper() | |
| if category in relevant_categories: | |
| formatted_result.append(f"- **{category}** : '{entity['text']}' \n") | |
| print(formatted_result) | |
| return '\n'.join(formatted_result) | |
| table_arr = [ | |
| """Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples | |
| A Access to Care Access to health care is the timely use of personal health services to achieve the best possible health outcomes and pertains to a person's ability to access needed health or social services. | |
| Access to Care Availability - Hospitals & Clinics A1 FQHCs, Rate Per Low-Income Population Hospitals and Clinics are very over crowded. They are a long way from where I live. Services not provided in my area. Urgent Care closed when I am sick. | |
| A2 Hospital Beds Per Capita | |
| A3 Proximity to Hospitals with ER This refers to comments about not having enough doctors and nurses in a given geographic context. | |
| Availability - Mental Health Care A4 Mental Health Professional Shortage Areas "This refers to comments about not haveing enough Doctors and nurses to service the need of the mental needs of the community. Long wait times to get an appointment. Doctors not taking new patients | |
| Having to go to ER for mental health issues" | |
| A5 Mental Health Providers Comments related to not having enough mental health providers | |
| Availability - Primary Care A6 Primary Care Providers "This refers to comments about not having enough Doctors and nurses to service the need of the mental needs of the community. Long wait times to get an appointment. Doctors not taking new patients | |
| Having to go to ER for mental health issues" | |
| A7 Primary Care Shortage Areas This refers to comments about not having enough doctors and nurses in a given geographic context. | |
| Availability - Specialty Care A8 Maternal Care Providers | |
| A9 Dental Care Providers | |
| Barriers - Health Literacy A10 Educational Attainment Appointments on-line and do not know how to make appointment. They do not speak my language. I do not know what or who to ask a question. Do not understand what they did or what I need to do now. | |
| A11 Limited English Proficiency | |
| Barriers - Medical Insurance A12 Health Insurance Disparities I can't get insurance from my job. I am not eligible for medicaid or do not know how to sign up. Can not afford Insurance (see Financial Stability / Income) | |
| A13 Population without Medical Insurance | |
| Barriers - Transportation A14 Distance to Public Transit I don't have a way to get there. I don't have a car. We only have one car. Can't afford a car (Financial Stability / Income) | |
| A15 Households with No Vehicle 7 | |
| Notable Comments - Access to Care A16 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""", | |
| """Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples | |
| Health Conditions B Health Conditions According to the World Health Organization (WHO), health is a state of complete physical, mental, and social well-being and not merely the absence of disease. Chronic diseases are defined broadly as conditions that last 1 year or more and require ongoing medical attention or limit daily activities. | |
| Asthma & COPD B1 Lung Disease Mortality Comments related to Asthma & COPD | |
| B2 Lung Disease Prevalence | |
| Cancers B3 Cancer Prevalence Comments related to Cancers | |
| B4 Cancer Mortality* | |
| Chronic Brain Disorders B5 Alzheimer's Disease Mortality* Comments related to Chronic Brain Disorders | |
| B6 Alzheimer's Disease Prevalence | |
| Heart Disease & Stroke B7 Heart Disease & Stroke Mortality* Comments related to Heart Disease & Stroke | |
| B8 Heart Disease Prevalence | |
| Kidney & Liver Diseases B9 Kidney Disease Prevalence Comments related to Chronic Kidney & Liver Diseases | |
| B10 Liver Disease Mortality* | |
| Obesity & Diabetes B11 Diabetes Comments related to Obesity & Diabetes | |
| B12 Obesity | |
| Preventable Death B13 Premature Death Disparities* | |
| Aging Conditions B14 | |
| Notable Comments - Health Conditions B15 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""", | |
| """""", | |
| """Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples | |
| Mental Health Health Outcomes - Anxiety & Depression D1 Mental Health Diagnoses* COVID, economy, etc cause stress and depression | |
| D2 Poor Mental Health | |
| Health Outcomes - Deaths of Despair D3 Deaths of Despair* Suicide, and drug overdose. Pill mills. Big Pharma | |
| D4 Suicide Mortality* | |
| Risk Factors - Access to Care D5 Access to Mental Health Providers Lack of services for the mental ill. | |
| D6 Medical Insurance | |
| Risk Factors - Drugs & Alcohol D7 Binge Drinking Lack of treatment options | |
| D8 Substance Use Disorder* | |
| Risk Factors - Stress & Trauma D9 Unemployment Domestic violance. Child abuse. | |
| D10 Violent Crime Rate* | |
| Notable Comments - Mental Health D11 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""", | |
| """Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples | |
| Food Security E Food Security Food security means that all people, at all times, have physical, social, and economic access to sufficient, safe, and nutritious food that meets their food preferences and dietary needs for an active and healthy life. | |
| Economic Security E1 Free/Reduced Price Lunch Groceries cost too much. No money for good food. | |
| E2 Poverty (100% FPL) | |
| Food Access E3 Access to Healthy Food No supermarkets in the area. Bad food choice selection. | |
| E4 Healthy Food Access Disparities | |
| E5 Local Food Outlets | |
| E6 SNAP-Authorized Retailers No supermarkets accept SNAP | |
| Notable Comments - Food Security E7 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""", | |
| """""", | |
| """Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples | |
| Education F Education Educational attainment is a well-established social determinant of health. It affects health through many mechanisms such as neural development, biological aging, health literacy and health behaviors, sense of control and empowerment, and life chances. | |
| Achievement F1 Chronic Absenteeism Kids do not do well in school. Skipping class | |
| F2 English Language Arts Proficiency | |
| Attainment F3 Associate's Degree or Higher Students dropping out of school or not going to college | |
| F4 Educational Attainment Disparities | |
| F5 High School Graduation Rate | |
| Early Childhood F6 Childcare Scarcity Lack of affordable or convient pre-schools. | |
| F7 Preschool Enrollment | |
| Notable Comments - Education F8 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""", | |
| """Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples | |
| Financial Stability G Financial Stability Financial stability is the ability for someone to withstand financial emergencies and sustain themselves and their family. Financial stability enables families to access safe housing, healthy foods, and other necessities, to engage fully in their communities, and to plan for the future. | |
| Employment G1 Labor Force Participation Rate No jobs or well paying jobs. Only min wage jobs | |
| G2 Unemployment | |
| Income G3 Childhood Poverty Rate Do not make enough to pay the bills. Just getting by. Pay check to pay check | |
| G4 Senior Poverty Rate | |
| G5 Income Inequality | |
| G6 Median Household Income | |
| Security G7 Housing Cost Burden (30%) Rent takes up all my income. Can't afford to but or rent a house. | |
| G8 Population with Debt* | |
| Notable Comments - Financial Stability G9 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""", | |
| """""", | |
| """Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples | |
| H Housing Homelessness can take many forms, with people living on the streets, in encampments or shelters, in transitional housing programs, or doubled up with family and friends. | |
| Housing Homelessness H1 Evictions* Homelessness status or mention of increase | |
| H2 Homeless Population* | |
| H3 Homeless Students | |
| Housing Costs H4 Low-Income Housing Appartments too expensive. Can not afford to buy a house. Rent take up my budget | |
| H5 Housing + Transportation Affordability Index | |
| H6 Median Household Income | |
| H7 Severe Housing Cost Burden (50%) | |
| Housing Quality H8 Overcrowded Housing Housing conditions are not desirable for overall health and well-being. | |
| H9 Owner Occupied Households | |
| H10 Renter Occupied Households | |
| H11 Incomplete Facilities (Plumbing, Kitchen) | |
| Notable Comments - Housing H12 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""", | |
| """Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples | |
| Climate & Natural Environment I Climate & Natural Environment Climate and the natural environment are about the impacts of climate change on livelihoods in the community. | |
| Physical Environment - Air & Water I1 Air Toxics Risk Do not trust the water. Chemicals in the air. Too close to the fields with pesticides | |
| I2 Particulate Matter (PM 2.5) Air quality from particulate matter (PM 2.5) and toxics risks are harmful to short-term and long-term health and has been associated with chronic health conditions such as lung disease, respiratory infections and heart disease among others. | |
| Physical Environment - Heat & Climate I3 Disaster Risk Index Global warming. Droughts. Floods. Wild fires | |
| I4 Extreme Heat Days* Extreme heat is a climate-driven hazard that affects the capacity of individuals, workers and communities. | |
| Notable Comments - Climate & Natural Environment I5 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""", | |
| """Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples | |
| Community Safety J Community Safety Community safety is both about people's sense of safety and about the community's crime rate. | |
| Injuries J1 Injury Mortality (Falls, Firearms, Drowning)* Injuries related to Community Safety | |
| J2 Motor Vehicle Crash Fatality | |
| Public Safety J3 Property Crime* Comments that the community is not safe. Unsafe neighborhoods | |
| J4 Violent Crime* | |
| Risk Factors J5 Disengaged Youth Comments about risk factors | |
| J6 School Suspensions + Expulsions | |
| Notable Comments - Community Safety J7 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""", | |
| """Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples | |
| Community Infrastructure K Community Infrastructure Having optimal environment and infrastructure for well-being means that streets are designed to be safe and enjoyable for those who are walking or biking. It means that air and water are safe to breathe and drink. It means that technical access infrastructure like broadband internet are available, to connect people to opportunities. | |
| Access to Childcare K1 Childcare Access Disparities Lack of affordable or convient pre-schools. | |
| K2 Childcare Scarcity | |
| Community Amenities K3 Walkability No walking paths or parks in the area | |
| Internet & Technology K4 Cellular Plan Only Bad or no Internet for people to connect to. Few daycare centers. Have to take the bus everywhere. | |
| K5 Internet Access Disparities | |
| K6 No Computer | |
| K7 No High-Speed Internet | |
| Transportation K8 Tansportation Access Inadequate transportation modes and access | |
| Notable Comments - Built Environment K9 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""", | |
| """Category Sub Category (aka "Themes") Codes Code Name (Green= Indicator, Yellow= concept) Exerpt Examples | |
| Social & Economic Context L Social & Economic Context Social & Economic Context is about whether people feel a sense of belonging to the place where they live and whether they feel connected to one another. Do they trust their leaders? Do they engage in community life and participate in decision-making? | |
| Civic Engagement L1 Census Response Rate People are not engaged or aware of community or city programs. Not able or desire to vote | |
| L2 Voter Participation Rate* | |
| Economic Vitality L3 Business Vacancy Rate Lack of jobs in the community. Unemployment. Closed businesses | |
| L4 Funding for Public Works & Welfare* | |
| Place Attachment L5 Home Ownership Too many rentals. People move around a lot | |
| L6 Net Migration (Population Loss)* | |
| Social Inclusion L7 501c3 organizations People and seniors who live alone. Feelings of isolation and loniliness | |
| L8 Neighborhood Segregation | |
| L9 Older Adults Living Alone | |
| Socioeconomic Disadvantage L10 Area Deprivation Index Comments related to socioeconomic disadvantage | |
| Notable Comments - Social Environment L11 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""" | |
| ] | |
| def chunk_by_speaker(transcript): | |
| """ | |
| Chunks a transcript string by speaker, grouping all text by each speaker, | |
| regardless of original order. | |
| Args: | |
| transcript: The transcript string. | |
| Returns: | |
| A dictionary where keys are speaker numbers and values are their complete text. | |
| """ | |
| # Split the transcript into lines | |
| lines = transcript.splitlines() | |
| # Initialize a dictionary to store speaker text | |
| speaker_texts = {} | |
| # Variable to keep track of the current speaker | |
| current_speaker = None | |
| # Iterate through each line | |
| for line in lines: | |
| # Check if the line starts with a speaker identifier | |
| match = re.match(r"Speaker (\d+)", line) | |
| if match: | |
| # Extract the speaker number | |
| current_speaker = int(match.group(1)) | |
| # Add the speaker to the dictionary if not already present | |
| if current_speaker not in speaker_texts: | |
| speaker_texts[current_speaker] = "" | |
| # Append the line to the current speaker's text | |
| if current_speaker is not None: | |
| speaker_texts[current_speaker] += line + "\n" | |
| # Return the dictionary of speaker texts | |
| return speaker_texts | |
| def generate_response(system_message, input_text): | |
| """ | |
| Generates a response from the OpenAI API using the given input text. | |
| Args: | |
| input_text: The input text to be analyzed. | |
| Returns: | |
| The response from the OpenAI API. | |
| """ | |
| try: | |
| response = client.chat.completions.create( | |
| model="gpt-4o", | |
| messages=[ | |
| {"role": "system", "content": system_message}, | |
| {"role": "user", "content": input_text}, | |
| ], | |
| response_format={ "type": "json_object" }, | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| return f"Error generating response: {e}" | |
| def generate_response_summary(system_message, input_text): | |
| """ | |
| Generates a response from the OpenAI API using the given input text. | |
| Args: | |
| input_text: The input text to be analyzed. | |
| Returns: | |
| The response from the OpenAI API. | |
| """ | |
| try: | |
| response = client.chat.completions.create( | |
| model="gpt-4o", | |
| messages=[ | |
| {"role": "system", "content": system_message}, | |
| {"role": "user", "content": input_text}, | |
| ], | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| return f"Error generating response: {e}" | |
| # divide the page into 2 columns | |
| col1, col2 = st.columns([2, 5]) | |
| if 'r' not in st.session_state: | |
| st.session_state.r = 'value' | |
| if 'r_annotated' not in st.session_state: | |
| st.session_state.r_annotated = 'value' | |
| if 'colour_to_category' not in st.session_state: | |
| st.session_state.colour_to_category = 'value' | |
| with col1: | |
| col1.subheader("Patient Note Input") | |
| st.text("Enter your text input below:") | |
| dax_input = st.text_area("", height=500) | |
| analyze_btn = st.button("Analyze") | |
| with col2: | |
| col2.subheader("Text Analytics for Health Output") | |
| if st.session_state.r_annotated != 'value': | |
| with st.expander("Entity Mappings"): | |
| st.markdown(st.session_state.r_annotated, unsafe_allow_html=True) | |
| with st.expander("Show Relationships"): | |
| st.plotly_chart(create_interactive_graph_from_json(st.session_state.r, st.session_state.colour_to_category), use_container_width=True) | |
| with st.expander("Show JSON"): | |
| st.json(st.session_state.r) | |
| with st.expander("Show SDOH"): | |
| st.write(format_sdoh_entities_as_list(st.session_state.r)) | |
| if analyze_btn: | |
| st.session_state.r = analyze_healthcare_text(dax_input)["tasks"]["items"][0]["results"] | |
| r_annotated, category_to_color = annotate_text_with_entities(dax_input, st.session_state.r) | |
| st.session_state.r_annotated = r_annotated | |
| st.session_state.colour_to_category = category_to_color | |
| with st.expander("Entity Mappings"): | |
| st.markdown(r_annotated, unsafe_allow_html=True) | |
| with st.expander("Show Relationships"): | |
| st.plotly_chart(create_interactive_graph_from_json(st.session_state.r, category_to_color), use_container_width=True) | |
| with st.expander("Show JSON"): | |
| st.json(st.session_state.r) | |
| with st.expander("Show SDOH"): | |
| st.write("Social Determinants of Health (SDOH) Entities") | |
| st.write(format_sdoh_entities_as_list(st.session_state.r)) | |
| col2.subheader("GPT-4o for Health Output") | |
| with st.expander("General Themes"): | |
| st.write(generate_response_summary("Create a overal report of the SDOH themes mentioned in the text", dax_input)) | |
| with st.expander("Specific Themes (faster but less detailed)"): | |
| system_message_template_total = """ | |
| You will get snippets from a transcript with a patient. Your job is to label the transcript with SDOH. You should reference the table below only when assigning values. | |
| Reference Table: | |
| Food Security E Food Security Food security means that all people, at all times, have physical, social, and economic access to sufficient, safe, and nutritious food that meets their food preferences and dietary needs for an active and healthy life. | |
| Economic Security E1 Free/Reduced Price Lunch Groceries cost too much. No money for good food. | |
| E2 Poverty (100% FPL) | |
| Food Access E3 Access to Healthy Food No supermarkets in the area. Bad food choice selection. | |
| E4 Healthy Food Access Disparities | |
| E5 Local Food Outlets | |
| E6 SNAP-Authorized Retailers No supermarkets accept SNAP | |
| Notable Comments - Food Security E7 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) | |
| Education F Education Educational attainment is a well-established social determinant of health. It affects health through many mechanisms such as neural development, biological aging, health literacy and health behaviors, sense of control and empowerment, and life chances. | |
| Achievement F1 Chronic Absenteeism Kids do not do well in school. Skipping class | |
| F2 English Language Arts Proficiency | |
| Attainment F3 Associate's Degree or Higher Students dropping out of school or not going to college | |
| F4 Educational Attainment Disparities | |
| F5 High School Graduation Rate | |
| Early Childhood F6 Childcare Scarcity Lack of affordable or convient pre-schools. | |
| F7 Preschool Enrollment | |
| Notable Comments - Education F8 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) | |
| Financial Stability G Financial Stability Financial stability is the ability for someone to withstand financial emergencies and sustain themselves and their family. Financial stability enables families to access safe housing, healthy foods, and other necessities, to engage fully in their communities, and to plan for the future. | |
| Employment G1 Labor Force Participation Rate No jobs or well paying jobs. Only min wage jobs | |
| G2 Unemployment | |
| Income G3 Childhood Poverty Rate Do not make enough to pay the bills. Just getting by. Pay check to pay check | |
| G4 Senior Poverty Rate | |
| G5 Income Inequality | |
| G6 Median Household Income | |
| Security G7 Housing Cost Burden (30%) Rent takes up all my income. Can't afford to but or rent a house. | |
| G8 Population with Debt* | |
| Notable Comments - Financial Stability G9 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) | |
| H Housing Homelessness can take many forms, with people living on the streets, in encampments or shelters, in transitional housing programs, or doubled up with family and friends. | |
| Housing Homelessness H1 Evictions* Homelessness status or mention of increase | |
| H2 Homeless Population* | |
| H3 Homeless Students | |
| Housing Costs H4 Low-Income Housing Appartments too expensive. Can not afford to buy a house. Rent take up my budget | |
| H5 Housing + Transportation Affordability Index | |
| H6 Median Household Income | |
| H7 Severe Housing Cost Burden (50%) | |
| Housing Quality H8 Overcrowded Housing Housing conditions are not desirable for overall health and well-being. | |
| H9 Owner Occupied Households | |
| H10 Renter Occupied Households | |
| H11 Incomplete Facilities (Plumbing, Kitchen) | |
| Notable Comments - Housing H12 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) | |
| Climate & Natural Environment I Climate & Natural Environment Climate and the natural environment are about the impacts of climate change on livelihoods in the community. | |
| Physical Environment - Air & Water I1 Air Toxics Risk Do not trust the water. Chemicals in the air. Too close to the fields with pesticides | |
| I2 Particulate Matter (PM 2.5) Air quality from particulate matter (PM 2.5) and toxics risks are harmful to short-term and long-term health and has been associated with chronic health conditions such as lung disease, respiratory infections and heart disease among others. | |
| Physical Environment - Heat & Climate I3 Disaster Risk Index Global warming. Droughts. Floods. Wild fires | |
| I4 Extreme Heat Days* Extreme heat is a climate-driven hazard that affects the capacity of individuals, workers and communities. | |
| Notable Comments - Climate & Natural Environment I5 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) | |
| Community Safety J Community Safety Community safety is both about people's sense of safety and about the community's crime rate. | |
| Injuries J1 Injury Mortality (Falls, Firearms, Drowning)* Injuries related to Community Safety | |
| J2 Motor Vehicle Crash Fatality | |
| Public Safety J3 Property Crime* Comments that the community is not safe. Unsafe neighborhoods | |
| J4 Violent Crime* | |
| Risk Factors J5 Disengaged Youth Comments about risk factors | |
| J6 School Suspensions + Expulsions | |
| Notable Comments - Community Safety J7 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) | |
| Community Infrastructure K Community Infrastructure Having optimal environment and infrastructure for well-being means that streets are designed to be safe and enjoyable for those who are walking or biking. It means that air and water are safe to breathe and drink. It means that technical access infrastructure like broadband internet are available, to connect people to opportunities. | |
| Access to Childcare K1 Childcare Access Disparities Lack of affordable or convient pre-schools. | |
| K2 Childcare Scarcity | |
| Community Amenities K3 Walkability No walking paths or parks in the area | |
| Internet & Technology K4 Cellular Plan Only Bad or no Internet for people to connect to. Few daycare centers. Have to take the bus everywhere. | |
| K5 Internet Access Disparities | |
| K6 No Computer | |
| K7 No High-Speed Internet | |
| Transportation K8 Tansportation Access Inadequate transportation modes and access | |
| Notable Comments - Built Environment K9 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) | |
| Social & Economic Context L Social & Economic Context Social & Economic Context is about whether people feel a sense of belonging to the place where they live and whether they feel connected to one another. Do they trust their leaders? Do they engage in community life and participate in decision-making? | |
| Civic Engagement L1 Census Response Rate People are not engaged or aware of community or city programs. Not able or desire to vote | |
| L2 Voter Participation Rate* | |
| Economic Vitality L3 Business Vacancy Rate Lack of jobs in the community. Unemployment. Closed businesses | |
| L4 Funding for Public Works & Welfare* | |
| Place Attachment L5 Home Ownership Too many rentals. People move around a lot | |
| L6 Net Migration (Population Loss)* | |
| Social Inclusion L7 501c3 organizations People and seniors who live alone. Feelings of isolation and loniliness | |
| L8 Neighborhood Segregation | |
| L9 Older Adults Living Alone | |
| Socioeconomic Disadvantage L10 Area Deprivation Index Comments related to socioeconomic disadvantage | |
| Notable Comments - Social Environment L11 Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1) | |
| There can be multiple excerpts for the same category, include all of them. If there are none, just return an empty values array. | |
| make sure to give the full context of the excerpt, so it is clear without having to go back to the original transcript. | |
| You should return your result in JSON as follows (example): | |
| {{ | |
| "values" : [ | |
| {{ | |
| "category" : "Availability - Primary Care", | |
| "subcategory": "A6", | |
| "codename" : "Primary Care Providers", | |
| "excerpt_from_text_evidence" : "xxxx", | |
| "speaker" : "1" | |
| }}, | |
| ...n] | |
| }} | |
| """ | |
| chunks = chunk_by_speaker(dax_input) | |
| speaker_list = [{"speaker_number": speaker_number, "text": text.strip()} for speaker_number, text in chunks.items()] | |
| progress_text = "Processing chunks. Please wait." | |
| my_bar = st.progress(0, text=progress_text) | |
| total_chunks = len(speaker_list) | |
| chunk_counter = 0 | |
| all_outputs_faster = [] | |
| for chunk_index, chunk in enumerate(speaker_list): | |
| print(chunk) | |
| response = generate_response(system_message_template_total, chunk["text"]) | |
| response_json = json.loads(response) | |
| all_outputs_faster.extend(response_json["values"]) | |
| # Update progress bar | |
| chunk_counter += 1 | |
| my_bar.progress(chunk_counter / total_chunks, text=progress_text) | |
| df_faster = pd.DataFrame(all_outputs_faster) | |
| st.dataframe(df_faster) | |
| with st.expander("Specific Themes (slower but more detailed)"): | |
| chunks = chunk_by_speaker(dax_input) | |
| speaker_list = [{"speaker_number": speaker_number, "text": text.strip()} for speaker_number, text in chunks.items()] | |
| all_outputs = {} | |
| system_message_template = """ | |
| You will get snippets from a transcript with a patient. Your job is to label the transcript with SDOH. You should reference the table below only when assigning values. | |
| Reference Table: {reference_table} | |
| There can be multiple excerpts for the same category, include all of them. If there are none, just return an empty values array. | |
| make sure to give the full context of the excerpt, so it is clear without having to go back to the original transcript. | |
| You should return your result in JSON as follows (example): | |
| {{ | |
| "values" : [ | |
| {{ | |
| "category" : "Availability - Primary Care", | |
| "subcategory": "A6", | |
| "codename" : "Primary Care Providers", | |
| "excerpt_from_text_evidence" : "xxxx", | |
| "speaker" : "1" | |
| }}, | |
| ...n] | |
| }} | |
| """ | |
| progress_text = "Processing chunks. Please wait." | |
| my_bar = st.progress(0, text=progress_text) | |
| total_chunks = len(table_arr) * len(speaker_list) | |
| chunk_counter = 0 | |
| all_outputs = [] | |
| for table_index, table in enumerate(table_arr): | |
| system_message = system_message_template.format(reference_table=table) | |
| for chunk_index, chunk in enumerate(speaker_list): | |
| response = generate_response(system_message, chunk["text"]) | |
| response_json = json.loads(response) | |
| all_outputs.extend(response_json["values"]) | |
| # Update progress bar | |
| chunk_counter += 1 | |
| my_bar.progress(chunk_counter / total_chunks, text=progress_text) | |
| df = pd.DataFrame(all_outputs) | |
| st.dataframe(df) | |