jkushwaha
/

code

jkushwaha commited on Apr 22, 2024

Commit

2015edf

verified ·

1 Parent(s): c9a5fe2

Create MSI_issue_check.py

Files changed (1) hide show

MSI_issue_check.py ADDED Viewed

+from glob import glob
+import pandas as pd
+import json
+def dicts_to_dataframe(json_data):
+    dict_full = []
+    for e1 in json_data['patient_level']['biomarkers']['details']:
+        bm = e1['prediction']
+        for e2 in e1['attribute']:
+            row_dict = {}
+            for e3 in e2['attribute_details']:
+                row_dict['BM'] = bm
+                if e3['attribute_name'] == 'pif_key':
+                    pif_key = e3['attribute_prediction']
+                row_dict['pif_key'] = pif_key
+                if e3['attribute_name'] != 'pif_key':
+                    row_dict[e3['attribute_name']]=e3['attribute_prediction']
+            dict_full.append(row_dict)
+    df = pd.DataFrame()
+    for d in dict_full:
+        temp_df = pd.DataFrame([d])
+        df = pd.concat([df, temp_df], ignore_index=True)
+    df = df.fillna('')
+#     df.drop_duplicates(inplace=True)
+    return df
+msi_list = []
+for json_f in glob('/nlp_efs/pat_level_json_delivery_lungca_2024.tar.gz/*.json'):
+    filename = json_f.split('/')[-1]
+    profile = filename.split('.')[0]
+    with open(json_f) as of:
+        json_data = json.load(of)
+    tmp = json_data['patient_level']['biomarkers']['base_prediction']
+    if 'msi' in tmp.split(';'):
+        kdf = dicts_to_dataframe(json_data)
+        kdf.insert(0, 'profile_key', profile)
+        msi_list.append(kdf[kdf['BM']=='MSI'])
+msi_df = pd.concat(msi_list)