jkushwaha commited on
Commit
2015edf
·
verified ·
1 Parent(s): c9a5fe2

Create MSI_issue_check.py

Browse files
Files changed (1) hide show
  1. MSI_issue_check.py +40 -0
MSI_issue_check.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from glob import glob
2
+ import pandas as pd
3
+ import json
4
+
5
+ def dicts_to_dataframe(json_data):
6
+ dict_full = []
7
+ for e1 in json_data['patient_level']['biomarkers']['details']:
8
+ bm = e1['prediction']
9
+ for e2 in e1['attribute']:
10
+ row_dict = {}
11
+ for e3 in e2['attribute_details']:
12
+ row_dict['BM'] = bm
13
+ if e3['attribute_name'] == 'pif_key':
14
+ pif_key = e3['attribute_prediction']
15
+ row_dict['pif_key'] = pif_key
16
+ if e3['attribute_name'] != 'pif_key':
17
+ row_dict[e3['attribute_name']]=e3['attribute_prediction']
18
+ dict_full.append(row_dict)
19
+ df = pd.DataFrame()
20
+ for d in dict_full:
21
+ temp_df = pd.DataFrame([d])
22
+ df = pd.concat([df, temp_df], ignore_index=True)
23
+
24
+ df = df.fillna('')
25
+ # df.drop_duplicates(inplace=True)
26
+
27
+ return df
28
+
29
+ msi_list = []
30
+ for json_f in glob('/nlp_efs/pat_level_json_delivery_lungca_2024.tar.gz/*.json'):
31
+ filename = json_f.split('/')[-1]
32
+ profile = filename.split('.')[0]
33
+ with open(json_f) as of:
34
+ json_data = json.load(of)
35
+ tmp = json_data['patient_level']['biomarkers']['base_prediction']
36
+ if 'msi' in tmp.split(';'):
37
+ kdf = dicts_to_dataframe(json_data)
38
+ kdf.insert(0, 'profile_key', profile)
39
+ msi_list.append(kdf[kdf['BM']=='MSI'])
40
+ msi_df = pd.concat(msi_list)