import pandas as pd def date_dict(df_list, pif_key): encounter_dates = [] for df in df_list: df_date = df.loc[df['pif_key'].astype(str) == str(pif_key), 'encounter_date'].values if len(df_date) > 0: encounter_dates.extend(df_date) if encounter_dates: latest_date = max(encounter_dates) return str(latest_date) else: return '' def report_date_check(dict_list, df_list, logging_df): pif_keys_with_report_date = set() pif_keys_without_report_date = set() for col in dict_list: pif_key = col.get('attribute_prediction', None) if pif_key is not None: if col['attribute_name'] == 'report_date': pif_keys_with_report_date.add(pif_key) else: pif_keys_without_report_date.add(pif_key) latest_date = date_dict(df_list, pif_key) if latest_date: logging_df = logging_df.append({'pif_key': pif_key, 'report_date_exists': False, 'report_date_missing': True, 'encounter_dates': None, 'latest_date': latest_date}, ignore_index=True) else: logging_df = logging_df.append({'pif_key': pif_key, 'report_date_exists': False, 'report_date_missing': True, 'encounter_dates': None, 'latest_date': ''}, ignore_index=True) for pif_key in pif_keys_with_report_date: logging_df = logging_df.append({'pif_key': pif_key, 'report_date_exists': True, 'report_date_missing': False, 'encounter_dates': None, 'latest_date': ''}, ignore_index=True) return logging_df def json_report_date_insertion(json_data, df_list): logging_df = pd.DataFrame(columns=['pif_key', 'report_date_exists', 'report_date_missing', 'encounter_dates', 'latest_date']) for biomarker_detail in json_data['patient_level']['biomarkers']['details']: for attribute in biomarker_detail['attribute']: attribute_details = attribute['attribute_details'] logging_df = report_date_check(attribute_details, df_list, logging_df) return logging_df # Usage # Load dataframes df2022, df2023, df2024 # df2022 = pd.read_csv('df2022.csv') # df2023 = pd.read_csv('df2023.csv') # df2024 = pd.read_csv('df2024.csv') # json_data = {} # Load JSON data # logging_df = json_report_date_insertion(json_data, [df2022, df2023, df2024]) # print(logging_df)