File size: 2,955 Bytes
9bde11f
 
 
 
 
 
 
 
 
 
 
 
 
 
dfc9b9e
9bde11f
 
 
dfc9b9e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9bde11f
 
 
dfc9b9e
9bde11f
dfc9b9e
9bde11f
 
 
 
 
 
 
 
 
dfc9b9e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import pandas as pd

def date_dict(df_list, pif_key):
    encounter_dates = []
    for df in df_list:
        df_date = df.loc[df['pif_key'].astype(str) == str(pif_key), 'encounter_date'].values
        if len(df_date) > 0:
            encounter_dates.extend(df_date)
    if encounter_dates:
        latest_date = max(encounter_dates)
        return str(latest_date)
    else:
        return ''

def report_date_check(dict_list, df_list, logging_df):
    pif_keys_with_report_date = set()
    pif_keys_without_report_date = set()

    for col in dict_list:
        pif_key = col.get('attribute_prediction', None)
        if pif_key is not None:
            if col['attribute_name'] == 'report_date':
                pif_keys_with_report_date.add(pif_key)
            else:
                pif_keys_without_report_date.add(pif_key)
                latest_date = date_dict(df_list, pif_key)
                if latest_date:
                    logging_df = logging_df.append({'pif_key': pif_key,
                                                    'report_date_exists': False,
                                                    'report_date_missing': True,
                                                    'encounter_dates': None,
                                                    'latest_date': latest_date}, ignore_index=True)
                else:
                    logging_df = logging_df.append({'pif_key': pif_key,
                                                    'report_date_exists': False,
                                                    'report_date_missing': True,
                                                    'encounter_dates': None,
                                                    'latest_date': ''}, ignore_index=True)

    for pif_key in pif_keys_with_report_date:
        logging_df = logging_df.append({'pif_key': pif_key,
                                        'report_date_exists': True,
                                        'report_date_missing': False,
                                        'encounter_dates': None,
                                        'latest_date': ''}, ignore_index=True)

    return logging_df

def json_report_date_insertion(json_data, df_list):
    logging_df = pd.DataFrame(columns=['pif_key', 'report_date_exists', 'report_date_missing', 'encounter_dates', 'latest_date'])

    for biomarker_detail in json_data['patient_level']['biomarkers']['details']:
        for attribute in biomarker_detail['attribute']:
            attribute_details = attribute['attribute_details']
            logging_df = report_date_check(attribute_details, df_list, logging_df)

    return logging_df

# Usage
# Load dataframes df2022, df2023, df2024
# df2022 = pd.read_csv('df2022.csv')
# df2023 = pd.read_csv('df2023.csv')
# df2024 = pd.read_csv('df2024.csv')

# json_data = {}  # Load JSON data

# logging_df = json_report_date_insertion(json_data, [df2022, df2023, df2024])
# print(logging_df)