File size: 41,710 Bytes
ad56c9a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
import openai
import streamlit as st
import os
import json
import time
import requests
from html import escape
from random import choice
import plotly.graph_objects as go
import networkx as nx
from openai import AzureOpenAI
import re
import pandas as pd

# Initialize the AzureOpenAI client
client = AzureOpenAI(
    api_key=os.environ.get("AOAI_API_KEY"),
    api_version="2024-05-01-preview",
    azure_endpoint=os.environ.get("AOAI_AZURE_ENDPOINT"),
    azure_deployment="gpt-4o"
)



st.set_page_config(layout="wide")

def analyze_healthcare_text(text):
    # Endpoint, headers and subscription key
    base_url = "https://ta4h-endpoint.cognitiveservices.azure.com/language/analyze-text/jobs"
    headers = {
        "Content-Type": "application/json",
        "Ocp-Apim-Subscription-Key": "00667ce9381d46a3a279c4799dd698d0"
    }

    # Data to be sent in the initial POST request
    data = {
        "tasks": [{"kind": "Healthcare"}],
        "analysisInput": {
            "documents": [
                {
                    "id": "documentId",
                    "text": text,
                    "language": "en"
                }
            ]
        }
    }

    # Making the initial POST request
    response = requests.post(f"{base_url}?api-version=2022-10-01-preview", headers=headers, json=data)
    
    # Get the operation-location from the response header
    operation_location = response.headers.get('operation-location')
    
    # Extract JOB-ID from the operation-location
    job_id = operation_location.split('/')[-1].split('?')[0]
    
    # Make a subsequent GET request to retrieve the results using the JOB-ID
    success = False
    while not success:
        result_response = requests.get(f"{base_url}/{job_id}?api-version=2022-10-01-preview", headers=headers)
        if result_response.json()['status'] == 'succeeded':
            success = True
        else:
            time.sleep(1)
    
    # Return the JSON response from the GET request
    result = result_response.json()
    # save json to file
  
    return result

def annotate_text_with_entities(original_text, entities_data):

    # save json to file
    with open('entities_data.json', 'w') as f:
        json.dump(entities_data, f)
        # Color palette for different categories
    PALETTE = [
        "#ff4b4b",
        "#ffa421",
        "#ffe312",
        "#21c354",
        "#00d4b1",
        "#00c0f2",
        "#1c83e1",
        "#803df5",
        "#808495",
    ]
    
    # Opacities
    OPACITIES = [
        "33", "66",
    ]
    
    json_data = entities_data
    
    # Extract entities from the JSON data
    entities = json_data['documents'][0]['entities']

    # Get unique categories from entities
    unique_categories = list(set(entity['category'] for entity in entities))

    # Create a mapping of categories to colors
    category_to_color = {category: PALETTE[i % len(PALETTE)] for i, category in enumerate(unique_categories)}
    
    # If we run out of colors in the palette, we will assign random colors to the remaining categories
    if len(unique_categories) > len(PALETTE):
        additional_colors = ['#'+''.join([choice('0123456789ABCDEF') for _ in range(6)]) for _ in range(len(unique_categories) - len(PALETTE))]
        for i, category in enumerate(unique_categories[len(PALETTE):]):
            category_to_color[category] = additional_colors[i]

    def create_entity_html(entity, entity_id):
        # Get the color for the entity category
        color = category_to_color[entity["category"]] + OPACITIES[entity["offset"] % len(OPACITIES)]
        
        entity_html = f'<span id="entity-{entity_id}"><span style="display: inline-flex; flex-direction: row; align-items: center; background: {color}; border-radius: 0.5rem; padding: 0.25rem 0.5rem; overflow: hidden; line-height: 1;">{escape(entity["text"])}'

        
        # If there are links, create a dropdown menu with the links
        if entity.get("links"):
            options = "".join(f'<option value="{link["id"]}">{link["dataSource"]} Code {link["id"]}</option>' for link in entity["links"])
            dropdown_html = f'''
            <span style="border-left: 1px solid; opacity: 0.1; margin-left: 0.5rem; align-self: stretch;"></span>
            <span style="margin-left: 0.5rem; display: flex; flex-direction: column; align-items: flex-start;">
                <select style="font-size: 0.75rem; opacity: 0.5;">
                    {options}
                </select>
                <label style="font-size: 0.6rem; margin-top: 0.25rem;">{entity["category"]}</label>
            </span>
            '''
            entity_html += dropdown_html
        else:
            # If there are no links, just display the category label
            entity_html += f'<span style="border-left: 1px solid; opacity: 0.1; margin-left: 0.5rem; align-self: stretch;"></span><span style="margin-left: 0.5rem; font-size: 0.75rem; opacity: 0.5;">{entity["category"]}</span>'

        # Close the main span element
        entity_html += '</span></span>'
        
        return entity_html
    
    # Create HTML representation for each entity
    entity_htmls = [create_entity_html(entity, i) for i, entity in enumerate(entities)]
    
    # Replace entities in the original text with their HTML representations
    # We iterate from the end to avoid changing the offsets of the yet-to-be-replaced entities
    for entity, entity_html in sorted(zip(entities, entity_htmls), key=lambda x: x[0]['offset'], reverse=True):
        start = entity['offset']
        end = start + entity['length']
        original_text = original_text[:start] + entity_html + original_text[end:]
    
    # Create a color key section
    color_key_section = "<strong>Color Key:</strong><br>"
    for category, color in category_to_color.items():
        color_key_section += f'<span style="display: inline-block; background: {color}; width: 1em; height: 1em; margin-right: 0.5em; vertical-align: middle;"></span>{category}<br>'
    
    
    original_text = color_key_section + original_text
    
    return original_text, category_to_color

def create_interactive_graph_from_json(json_data, category_to_color):
     # Load the JSON data
    entities = json_data['documents'][0]['entities']
    relations = json_data['documents'][0].get('relations', [])
    
    # Create a new directed graph
    graph = nx.DiGraph()
    
    # Add nodes to the graph
    for i, entity in enumerate(entities):
        graph.add_node(i, label=entity['text'], category=entity['category'])
    
    # Add edges to the graph
    for relation in relations:
        source_index = int(relation['entities'][0]['ref'].split('/')[-1])
        target_index = int(relation['entities'][1]['ref'].split('/')[-1])
        graph.add_edge(source_index, target_index, label=relation['relationType'])
    
    # Get positions of the nodes using spring layout
    pos = nx.spring_layout(graph)
    
    # Get node positions
    x_nodes = [pos[i][0] for i in graph.nodes]
    y_nodes = [pos[i][1] for i in graph.nodes]
    
    # Get the colors for each node based on its category
    node_colors = [category_to_color[graph.nodes[i]['category']] for i in graph.nodes]
    
    # Get edge positions
    x_edges = []
    y_edges = []
    for edge in graph.edges:
        x_edges += [pos[edge[0]][0], pos[edge[1]][0], None]
        y_edges += [pos[edge[0]][1], pos[edge[1]][1], None]
    
    # Create edge traces
    edge_trace = go.Scatter(x=x_edges, y=y_edges, line=dict(width=0.5, color='#888'), hoverinfo='none', mode='lines')
    
    # Create node traces with all nodes
    node_trace_all = go.Scatter(x=x_nodes, y=y_nodes, text=[graph.nodes[i]['label'] for i in graph.nodes], 
                                mode='markers+text', hoverinfo='text', marker=dict(color=node_colors, size=10))
    
    # Create node traces with nodes having at least one edge
    nodes_with_edges = set([edge[0] for edge in graph.edges] + [edge[1] for edge in graph.edges])
    x_nodes_with_edges = [pos[i][0] for i in nodes_with_edges]
    y_nodes_with_edges = [pos[i][1] for i in nodes_with_edges]
    
    node_trace_with_edges = go.Scatter(x=x_nodes_with_edges, y=y_nodes_with_edges, 
                                       text=[graph.nodes[i]['label'] for i in nodes_with_edges], 
                                       mode='markers+text', hoverinfo='text', marker=dict(color=node_colors, size=10))
    
    # Create figure
    fig = go.Figure(data=[edge_trace, node_trace_all, node_trace_with_edges],
                    layout=go.Layout(title='Entities and Relationships in Patient Notes',
                                     titlefont_size=16,
                                     showlegend=False,
                                     hovermode='closest',
                                     margin=dict(b=20, l=5, r=5, t=40),
                                     xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                                     yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                                     updatemenus=[dict(type="buttons",
                                                       x=1.15,
                                                       y=1.2,
                                                       buttons=[dict(label="All Entities",
                                                                     method="update",
                                                                     args=[{"visible": [True, True, False]}]),
                                                                 dict(label="Entities with Relationships",
                                                                     method="update",
                                                                     args=[{"visible": [True, False, True]}])])]))
    
    # Display the interactive plot
    return fig

def format_sdoh_entities_as_list(json_data):
    # print(json_data)
    relevant_categories = ['EMPLOYMENT', 'LIVING_STATUS', 'SUBSTANCEUSE', 'SUBSTANCEUSEAMOUNT', 'ETHNICITY']
    formatted_result = []
    

    for document in json_data['documents']:
        for entity in document['entities']:
            category = entity['category'].upper()
            if category in relevant_categories:
                formatted_result.append(f"- **{category}** : '{entity['text']}' \n")
    print(formatted_result)
    return '\n'.join(formatted_result)

table_arr = [
    """Category	Sub Category (aka "Themes")	Codes	Code Name (Green= Indicator, Yellow= concept) 	Exerpt Examples
		A	Access to Care	Access to health care is the timely use of personal health services to achieve the best possible health outcomes and pertains to a person's ability to access needed health or social services.
Access to Care	Availability - Hospitals & Clinics	A1	FQHCs, Rate Per Low-Income Population	Hospitals and Clinics are very over crowded.  They are a long way from where I live.  Services not provided in my area. Urgent Care closed when I am sick.  
		A2	Hospital Beds Per Capita	
		A3	Proximity to Hospitals with ER	This refers to comments about not having enough doctors and nurses in a given geographic context.
	Availability - Mental Health Care	A4	Mental Health Professional Shortage Areas	"This refers to comments about not haveing enough Doctors and nurses to service the need of the mental needs of the community.  Long wait times to get an appointment. Doctors not taking new patients
Having to go to ER for mental health issues"
		A5	Mental Health Providers	Comments related to not having enough mental health providers
	Availability - Primary Care	A6	Primary Care Providers	"This refers to comments about not having enough Doctors and nurses to service the need of the mental needs of the community.  Long wait times to get an appointment. Doctors not taking new patients
Having to go to ER for mental health issues"
		A7	Primary Care Shortage Areas	This refers to comments about not having enough doctors and nurses in a given geographic context.
	Availability - Specialty Care	A8	Maternal Care Providers	
		A9	Dental Care Providers	
	Barriers - Health Literacy	A10	Educational Attainment	Appointments on-line and do not know how to make appointment.  They do not speak my language.  I do not know what or who to ask a question.  Do not understand what they did or what I need to do now. 
		A11	Limited English Proficiency	
	Barriers - Medical Insurance	A12	Health Insurance Disparities	I can't get insurance from my job.  I am not eligible for medicaid or do not know how to sign up.  Can not afford Insurance (see Financial Stability / Income)
		A13	Population without Medical Insurance	
	Barriers - Transportation	A14	Distance to Public Transit	I don't have a way to get there. I don't have a car.  We only have one car.  Can't afford a car (Financial Stability / Income)
		A15	Households with No Vehicle	7
	Notable Comments - Access to Care	A16	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
    """Category	Sub Category (aka "Themes")	Codes	Code Name (Green= Indicator, Yellow= concept) 	Exerpt Examples
Health Conditions		B	Health Conditions	According to the World Health Organization (WHO), health is a state of complete physical, mental, and social well-being and not merely the absence of disease. Chronic diseases are defined broadly as conditions that last 1 year or more and require ongoing medical attention or limit daily activities.
	Asthma & COPD	B1	Lung Disease Mortality	Comments related to Asthma & COPD
		B2	Lung Disease Prevalence	
	Cancers	B3	Cancer Prevalence	Comments related to Cancers
		B4	Cancer Mortality*	
	Chronic Brain Disorders	B5	Alzheimer's Disease Mortality*	Comments related to Chronic Brain Disorders
		B6	Alzheimer's Disease Prevalence	
	Heart Disease & Stroke	B7	Heart Disease & Stroke Mortality*	Comments related to Heart Disease & Stroke
		B8	Heart Disease Prevalence	
	Kidney & Liver Diseases	B9	Kidney Disease Prevalence	Comments related to Chronic Kidney & Liver Diseases
		B10	Liver Disease Mortality*	
	Obesity & Diabetes	B11	Diabetes	Comments related to Obesity & Diabetes
		B12	Obesity 	
	Preventable Death	B13	Premature Death Disparities*	
	Aging Conditions	B14		
	Notable Comments - Health Conditions	B15	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
    """""",
    """Category	Sub Category (aka "Themes")	Codes	Code Name (Green= Indicator, Yellow= concept) 	Exerpt Examples
Mental Health	Health Outcomes - Anxiety & Depression	D1	Mental Health Diagnoses*	COVID, economy, etc cause stress and depression
		D2	Poor Mental Health	
	Health Outcomes - Deaths of Despair	D3	Deaths of Despair*	Suicide, and drug overdose.  Pill mills.  Big Pharma 
		D4	Suicide Mortality*	
	Risk Factors - Access to Care	D5	Access to Mental Health Providers	Lack of services for the mental ill.  
		D6	Medical Insurance	
	Risk Factors - Drugs & Alcohol	D7	Binge Drinking	Lack of treatment options
		D8	Substance Use Disorder*	
	Risk Factors - Stress & Trauma	D9	Unemployment	Domestic violance.  Child abuse.  
		D10	Violent Crime Rate*	
	Notable Comments - Mental Health	D11	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
    """Category	Sub Category (aka "Themes")	Codes	Code Name (Green= Indicator, Yellow= concept) 	Exerpt Examples
Food Security		E	Food Security	Food security means that all people, at all times, have physical, social, and economic access to sufficient, safe, and nutritious food that meets their food preferences and dietary needs for an active and healthy life.
	Economic Security	E1	Free/Reduced Price Lunch	Groceries cost too much.  No money for good food.  
		E2	Poverty (100% FPL)	
	Food Access	E3	Access to Healthy Food	No supermarkets in the area.  Bad food choice selection.
		E4	Healthy Food Access Disparities	
		E5	Local Food Outlets	
		E6	SNAP-Authorized Retailers	No supermarkets accept SNAP
	Notable Comments - Food Security	E7	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
    """""",
    """Category	Sub Category (aka "Themes")	Codes	Code Name (Green= Indicator, Yellow= concept) 	Exerpt Examples
Education		F	Education	Educational attainment is a well-established social determinant of health. It affects health through many mechanisms such as neural development, biological aging, health literacy and health behaviors, sense of control and empowerment, and life chances.
	Achievement	F1	Chronic Absenteeism	Kids do not do well in school.  Skipping class
		F2	English Language Arts Proficiency	
	Attainment	F3	Associate's Degree or Higher	Students dropping out of school or not going to college
		F4	Educational Attainment Disparities	
		F5	High School Graduation Rate	
	Early Childhood	F6	Childcare Scarcity	Lack of affordable or convient pre-schools.
		F7	Preschool Enrollment	
	Notable Comments - Education	F8	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
    """Category	Sub Category (aka "Themes")	Codes	Code Name (Green= Indicator, Yellow= concept) 	Exerpt Examples
Financial Stability		G	Financial Stability	Financial stability is the ability for someone to withstand financial emergencies and sustain themselves and their family. Financial stability enables families to access safe housing, healthy foods, and other necessities, to engage fully in their communities, and to plan for the future. 
	Employment	G1	Labor Force Participation Rate	No jobs or well paying jobs.  Only min wage jobs
		G2	Unemployment	
	Income	G3	Childhood Poverty Rate	Do not make enough to pay the bills.  Just getting by.  Pay check to pay check
		G4	Senior Poverty Rate	
		G5	Income Inequality	
		G6	Median Household Income	
	Security	G7	Housing Cost Burden (30%)	Rent takes up all my income.  Can't afford to but or rent a house.
		G8	Population with Debt*	
	Notable Comments - Financial Stability	G9	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
    """""",
    """Category	Sub Category (aka "Themes")	Codes	Code Name (Green= Indicator, Yellow= concept) 	Exerpt Examples
		H	Housing	Homelessness can take many forms, with people living on the streets, in encampments or shelters, in transitional housing programs, or doubled up with family and friends.
Housing	Homelessness	H1	Evictions*	Homelessness status or mention of increase
		H2	Homeless Population*	
		H3	Homeless Students	
	Housing Costs	H4	Low-Income Housing	Appartments too expensive.  Can not afford to buy a house.  Rent take up my budget
		H5	Housing + Transportation Affordability Index	
		H6	Median Household Income	
		H7	Severe Housing Cost Burden (50%)	
	Housing Quality	H8	Overcrowded Housing	Housing conditions are not desirable for overall health and well-being.
		H9	Owner Occupied Households	
		H10	Renter Occupied Households	
		H11	Incomplete Facilities (Plumbing, Kitchen)	
	Notable Comments - Housing	H12	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
    """Category	Sub Category (aka "Themes")	Codes	Code Name (Green= Indicator, Yellow= concept) 	Exerpt Examples
Climate & Natural Environment		I	Climate & Natural Environment	Climate and the natural environment are about the impacts of climate change on livelihoods in the community. 
	Physical Environment - Air & Water	I1	Air Toxics Risk	Do not trust the water.  Chemicals in the air.  Too close to the fields with pesticides
		I2	Particulate Matter (PM 2.5)	Air quality from particulate matter (PM 2.5) and toxics risks are harmful to short-term and long-term health and has been associated with chronic health conditions such as lung disease, respiratory infections and heart disease among others. 
	Physical Environment - Heat & Climate	I3	Disaster Risk Index	Global warming.  Droughts.  Floods.  Wild fires
		I4	Extreme Heat Days*	Extreme heat is a climate-driven hazard that affects the capacity of individuals, workers and communities.
	Notable Comments - Climate & Natural Environment	I5	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
    """Category	Sub Category (aka "Themes")	Codes	Code Name (Green= Indicator, Yellow= concept) 	Exerpt Examples
Community Safety		J	Community Safety	Community safety is both about people's sense of safety and about the community's crime rate. 
	Injuries	J1	Injury Mortality (Falls, Firearms, Drowning)*	Injuries related to Community Safety
		J2	Motor Vehicle Crash Fatality	
	Public Safety	J3	Property Crime*	Comments that the community is not safe.  Unsafe neighborhoods
		J4	Violent Crime*	
	Risk Factors	J5	Disengaged Youth	Comments about risk factors
		J6	School Suspensions + Expulsions	
	Notable Comments - Community Safety	J7	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
    """Category	Sub Category (aka "Themes")	Codes	Code Name (Green= Indicator, Yellow= concept) 	Exerpt Examples
Community Infrastructure		K	Community Infrastructure	Having optimal environment and infrastructure for well-being means that streets are designed to be safe and enjoyable for those who are walking or biking. It means that air and water are safe to breathe and drink. It means that technical access infrastructure like broadband internet are available, to connect people to opportunities.
	Access to Childcare	K1	Childcare Access Disparities	Lack of affordable or convient pre-schools.
		K2	Childcare Scarcity	
	Community Amenities	K3	Walkability	No walking paths or parks in the area
	Internet & Technology	K4	Cellular Plan Only	Bad or no Internet for people to connect to.  Few daycare centers.  Have to take the bus everywhere.
		K5	Internet Access Disparities	
		K6	No Computer	
		K7	No High-Speed Internet	
	Transportation	K8	Tansportation Access	Inadequate transportation modes and access
	Notable Comments - Built Environment	K9	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)""",
    """Category	Sub Category (aka "Themes")	Codes	Code Name (Green= Indicator, Yellow= concept) 	Exerpt Examples
Social & Economic Context		L	Social & Economic Context	Social & Economic Context is about whether people feel a sense of belonging to the place where they live and whether they feel connected to one another. Do they trust their leaders? Do they engage in community life and participate in decision-making? 
	Civic Engagement	L1	Census Response Rate	People are not engaged or aware of community or city programs.  Not able or desire to vote
		L2	Voter Participation Rate*	
	Economic Vitality	L3	Business Vacancy Rate	Lack of jobs in the community.  Unemployment.  Closed businesses
		L4	Funding for Public Works & Welfare*	
	Place Attachment	L5	Home Ownership	Too many rentals.  People move around a lot
		L6	Net Migration (Population Loss)*	
	Social Inclusion	L7	501c3 organizations	People and seniors who live alone.  Feelings of isolation and loniliness
		L8	Neighborhood Segregation	
		L9	Older Adults Living Alone	
	Socioeconomic Disadvantage	L10	Area Deprivation Index	Comments related to socioeconomic disadvantage
	Notable Comments - Social Environment	L11	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)"""
]

def chunk_by_speaker(transcript):
    """
    Chunks a transcript string by speaker, grouping all text by each speaker,
    regardless of original order.

    Args:
        transcript: The transcript string.

    Returns:
        A dictionary where keys are speaker numbers and values are their complete text.
    """
    # Split the transcript into lines
    lines = transcript.splitlines()

    # Initialize a dictionary to store speaker text
    speaker_texts = {}

    # Variable to keep track of the current speaker
    current_speaker = None

    # Iterate through each line
    for line in lines:
        # Check if the line starts with a speaker identifier
        match = re.match(r"Speaker (\d+)", line)
        if match:
            # Extract the speaker number
            current_speaker = int(match.group(1))

            # Add the speaker to the dictionary if not already present
            if current_speaker not in speaker_texts:
                speaker_texts[current_speaker] = ""

        # Append the line to the current speaker's text
        if current_speaker is not None:
            speaker_texts[current_speaker] += line + "\n"

    # Return the dictionary of speaker texts
    return speaker_texts



def generate_response(system_message, input_text):
    """
    Generates a response from the OpenAI API using the given input text.

    Args:
        input_text: The input text to be analyzed.

    Returns:
        The response from the OpenAI API.
    """
    try:
        response = client.chat.completions.create(
			model="gpt-4o",
            messages=[
                {"role": "system", "content": system_message},
                {"role": "user", "content": input_text},
            ],
            response_format={ "type": "json_object" },
        )

        return response.choices[0].message.content
    except Exception as e:
        return f"Error generating response: {e}"
    
def generate_response_summary(system_message, input_text):
    """
    Generates a response from the OpenAI API using the given input text.

    Args:
        input_text: The input text to be analyzed.

    Returns:
        The response from the OpenAI API.
    """
    try:
        response = client.chat.completions.create(
			model="gpt-4o",
            messages=[
                {"role": "system", "content": system_message},
                {"role": "user", "content": input_text},
            ],
        )

        return response.choices[0].message.content
    except Exception as e:
        return f"Error generating response: {e}"
    

# divide the page into 2 columns
col1, col2 = st.columns([2, 5])

if 'r' not in st.session_state:
    st.session_state.r = 'value'

if 'r_annotated' not in st.session_state:
    st.session_state.r_annotated = 'value'

if 'colour_to_category' not in st.session_state:
    st.session_state.colour_to_category = 'value'

with col1:
    col1.subheader("Patient Note Input")

    st.text("Enter your text input below:")

    dax_input = st.text_area("", height=500)
    analyze_btn = st.button("Analyze")



with col2:
    col2.subheader("Text Analytics for Health Output")
    if st.session_state.r_annotated != 'value':
        with st.expander("Entity Mappings"):
            st.markdown(st.session_state.r_annotated, unsafe_allow_html=True)
        with st.expander("Show Relationships"):
            st.plotly_chart(create_interactive_graph_from_json(st.session_state.r, st.session_state.colour_to_category), use_container_width=True)
        with st.expander("Show JSON"):
            st.json(st.session_state.r)
        with st.expander("Show SDOH"):
            st.write(format_sdoh_entities_as_list(st.session_state.r))
        
    if analyze_btn:
        
        st.session_state.r = analyze_healthcare_text(dax_input)["tasks"]["items"][0]["results"]
        r_annotated, category_to_color = annotate_text_with_entities(dax_input, st.session_state.r)
        st.session_state.r_annotated = r_annotated
        st.session_state.colour_to_category = category_to_color
        with st.expander("Entity Mappings"):
            st.markdown(r_annotated, unsafe_allow_html=True)
        with st.expander("Show Relationships"):
            st.plotly_chart(create_interactive_graph_from_json(st.session_state.r, category_to_color), use_container_width=True)
        with st.expander("Show JSON"):
            st.json(st.session_state.r)
        with st.expander("Show SDOH"):
            st.write("Social Determinants of Health (SDOH) Entities")
            st.write(format_sdoh_entities_as_list(st.session_state.r))
        col2.subheader("GPT-4o for Health Output")
        with st.expander("General Themes"):
            st.write(generate_response_summary("Create a overal report of the SDOH themes mentioned in the text", dax_input))
        with st.expander("Specific Themes (faster but less detailed)"):
            system_message_template_total = """
                You will get snippets from a transcript with a patient. Your job is to label the transcript with SDOH. You should reference the table below only when assigning values. 

                Reference Table: 
                Food Security		E	Food Security	Food security means that all people, at all times, have physical, social, and economic access to sufficient, safe, and nutritious food that meets their food preferences and dietary needs for an active and healthy life.
	Economic Security	E1	Free/Reduced Price Lunch	Groceries cost too much.  No money for good food.  
		E2	Poverty (100% FPL)	
	Food Access	E3	Access to Healthy Food	No supermarkets in the area.  Bad food choice selection.
		E4	Healthy Food Access Disparities	
		E5	Local Food Outlets	
		E6	SNAP-Authorized Retailers	No supermarkets accept SNAP
	Notable Comments - Food Security	E7	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)
Education		F	Education	Educational attainment is a well-established social determinant of health. It affects health through many mechanisms such as neural development, biological aging, health literacy and health behaviors, sense of control and empowerment, and life chances.
	Achievement	F1	Chronic Absenteeism	Kids do not do well in school.  Skipping class
		F2	English Language Arts Proficiency	
	Attainment	F3	Associate's Degree or Higher	Students dropping out of school or not going to college
		F4	Educational Attainment Disparities	
		F5	High School Graduation Rate	
	Early Childhood	F6	Childcare Scarcity	Lack of affordable or convient pre-schools.
		F7	Preschool Enrollment	
	Notable Comments - Education	F8	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)
Financial Stability		G	Financial Stability	Financial stability is the ability for someone to withstand financial emergencies and sustain themselves and their family. Financial stability enables families to access safe housing, healthy foods, and other necessities, to engage fully in their communities, and to plan for the future. 
	Employment	G1	Labor Force Participation Rate	No jobs or well paying jobs.  Only min wage jobs
		G2	Unemployment	
	Income	G3	Childhood Poverty Rate	Do not make enough to pay the bills.  Just getting by.  Pay check to pay check
		G4	Senior Poverty Rate	
		G5	Income Inequality	
		G6	Median Household Income	
	Security	G7	Housing Cost Burden (30%)	Rent takes up all my income.  Can't afford to but or rent a house.
		G8	Population with Debt*	
	Notable Comments - Financial Stability	G9	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)
		H	Housing	Homelessness can take many forms, with people living on the streets, in encampments or shelters, in transitional housing programs, or doubled up with family and friends.
Housing	Homelessness	H1	Evictions*	Homelessness status or mention of increase
		H2	Homeless Population*	
		H3	Homeless Students	
	Housing Costs	H4	Low-Income Housing	Appartments too expensive.  Can not afford to buy a house.  Rent take up my budget
		H5	Housing + Transportation Affordability Index	
		H6	Median Household Income	
		H7	Severe Housing Cost Burden (50%)	
	Housing Quality	H8	Overcrowded Housing	Housing conditions are not desirable for overall health and well-being.
		H9	Owner Occupied Households	
		H10	Renter Occupied Households	
		H11	Incomplete Facilities (Plumbing, Kitchen)	
	Notable Comments - Housing	H12	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)
Climate & Natural Environment		I	Climate & Natural Environment	Climate and the natural environment are about the impacts of climate change on livelihoods in the community. 
	Physical Environment - Air & Water	I1	Air Toxics Risk	Do not trust the water.  Chemicals in the air.  Too close to the fields with pesticides
		I2	Particulate Matter (PM 2.5)	Air quality from particulate matter (PM 2.5) and toxics risks are harmful to short-term and long-term health and has been associated with chronic health conditions such as lung disease, respiratory infections and heart disease among others. 
	Physical Environment - Heat & Climate	I3	Disaster Risk Index	Global warming.  Droughts.  Floods.  Wild fires
		I4	Extreme Heat Days*	Extreme heat is a climate-driven hazard that affects the capacity of individuals, workers and communities.
	Notable Comments - Climate & Natural Environment	I5	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)
Community Safety		J	Community Safety	Community safety is both about people's sense of safety and about the community's crime rate. 
	Injuries	J1	Injury Mortality (Falls, Firearms, Drowning)*	Injuries related to Community Safety
		J2	Motor Vehicle Crash Fatality	
	Public Safety	J3	Property Crime*	Comments that the community is not safe.  Unsafe neighborhoods
		J4	Violent Crime*	
	Risk Factors	J5	Disengaged Youth	Comments about risk factors
		J6	School Suspensions + Expulsions	
	Notable Comments - Community Safety	J7	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)
Community Infrastructure		K	Community Infrastructure	Having optimal environment and infrastructure for well-being means that streets are designed to be safe and enjoyable for those who are walking or biking. It means that air and water are safe to breathe and drink. It means that technical access infrastructure like broadband internet are available, to connect people to opportunities.
	Access to Childcare	K1	Childcare Access Disparities	Lack of affordable or convient pre-schools.
		K2	Childcare Scarcity	
	Community Amenities	K3	Walkability	No walking paths or parks in the area
	Internet & Technology	K4	Cellular Plan Only	Bad or no Internet for people to connect to.  Few daycare centers.  Have to take the bus everywhere.
		K5	Internet Access Disparities	
		K6	No Computer	
		K7	No High-Speed Internet	
	Transportation	K8	Tansportation Access	Inadequate transportation modes and access
	Notable Comments - Built Environment	K9	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)
Social & Economic Context		L	Social & Economic Context	Social & Economic Context is about whether people feel a sense of belonging to the place where they live and whether they feel connected to one another. Do they trust their leaders? Do they engage in community life and participate in decision-making? 
	Civic Engagement	L1	Census Response Rate	People are not engaged or aware of community or city programs.  Not able or desire to vote
		L2	Voter Participation Rate*	
	Economic Vitality	L3	Business Vacancy Rate	Lack of jobs in the community.  Unemployment.  Closed businesses
		L4	Funding for Public Works & Welfare*	
	Place Attachment	L5	Home Ownership	Too many rentals.  People move around a lot
		L6	Net Migration (Population Loss)*	
	Social Inclusion	L7	501c3 organizations	People and seniors who live alone.  Feelings of isolation and loniliness
		L8	Neighborhood Segregation	
		L9	Older Adults Living Alone	
	Socioeconomic Disadvantage	L10	Area Deprivation Index	Comments related to socioeconomic disadvantage
	Notable Comments - Social Environment	L11	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)	Notable comments should be coded here and in the appropriate category it belongs in. (ex: A7 & A1)

                There can be multiple excerpts for the same category, include all of them. If there are none, just return an empty values array.
                make sure to give the full context of the excerpt, so it is clear without having to go back to the original transcript.
                You should return your result in JSON as follows (example):

                {{
                "values" : [
                {{
                "category" : "Availability - Primary Care",
                "subcategory": "A6",
                "codename" : "Primary Care Providers",
                "excerpt_from_text_evidence" : "xxxx",
                "speaker" : "1"
                }},
                ...n]
                }}
        """
            
            chunks = chunk_by_speaker(dax_input)
            speaker_list = [{"speaker_number": speaker_number, "text": text.strip()} for speaker_number, text in chunks.items()]
            progress_text = "Processing chunks. Please wait."
            my_bar = st.progress(0, text=progress_text)
            
            total_chunks = len(speaker_list)
            chunk_counter = 0

            all_outputs_faster = []

            for chunk_index, chunk in enumerate(speaker_list):
                print(chunk)
                response = generate_response(system_message_template_total, chunk["text"])
                response_json = json.loads(response)
                all_outputs_faster.extend(response_json["values"])

                # Update progress bar
                chunk_counter += 1
                my_bar.progress(chunk_counter / total_chunks, text=progress_text)
            
            df_faster = pd.DataFrame(all_outputs_faster)
            st.dataframe(df_faster)






        with st.expander("Specific Themes (slower but more detailed)"):
            chunks = chunk_by_speaker(dax_input)
            speaker_list = [{"speaker_number": speaker_number, "text": text.strip()} for speaker_number, text in chunks.items()]
            all_outputs = {}

            system_message_template = """
                You will get snippets from a transcript with a patient. Your job is to label the transcript with SDOH. You should reference the table below only when assigning values. 

                Reference Table: {reference_table}
                There can be multiple excerpts for the same category, include all of them. If there are none, just return an empty values array.
                make sure to give the full context of the excerpt, so it is clear without having to go back to the original transcript.
                You should return your result in JSON as follows (example):

                {{
                "values" : [
                {{
                "category" : "Availability - Primary Care",
                "subcategory": "A6",
                "codename" : "Primary Care Providers",
                "excerpt_from_text_evidence" : "xxxx",
                "speaker" : "1"
                }},
                ...n]
                }}
        """

            progress_text = "Processing chunks. Please wait."
            my_bar = st.progress(0, text=progress_text)
            
            total_chunks = len(table_arr) * len(speaker_list)
            chunk_counter = 0

            all_outputs = []


            for table_index, table in enumerate(table_arr):
                system_message = system_message_template.format(reference_table=table)
                for chunk_index, chunk in enumerate(speaker_list):
                    response = generate_response(system_message, chunk["text"])
                    response_json = json.loads(response)
                    all_outputs.extend(response_json["values"])
                    


                    # Update progress bar
                    chunk_counter += 1
                    my_bar.progress(chunk_counter / total_chunks, text=progress_text)

            df = pd.DataFrame(all_outputs)
            st.dataframe(df)