Spaces:
Sleeping
Sleeping
Commit
·
ad9add7
1
Parent(s):
f44aa18
Update code/modbaseModelAdd.py
Browse files- code/modbaseModelAdd.py +16 -6
code/modbaseModelAdd.py
CHANGED
|
@@ -10,7 +10,6 @@ def addModbaseModels(dataframe, path_to_input_files, path_to_output_files):
|
|
| 10 |
# GET MODBASE MODELS
|
| 11 |
# Get IDs from data to retrieve only their models from MODBASE
|
| 12 |
dataframe.reset_index(inplace=True, drop=True)
|
| 13 |
-
|
| 14 |
existing_modbase_models = list(Path(path_to_output_files / 'modbase_structures').glob("*"))
|
| 15 |
existing_modbase_models = [str(i) for i in existing_modbase_models]
|
| 16 |
existing_modbase_models = [i.split('/')[-1].split('.')[0] for i in existing_modbase_models]
|
|
@@ -32,11 +31,15 @@ def addModbaseModels(dataframe, path_to_input_files, path_to_output_files):
|
|
| 32 |
existing_free_sasa = list(Path(path_to_output_files / 'freesasa_files').glob("*"))
|
| 33 |
existing_free_sasa = [str(i) for i in existing_free_sasa]
|
| 34 |
existing_free_sasa = [i.split('/')[-1].split('.')[0] for i in existing_free_sasa]
|
|
|
|
| 35 |
for i in dataframe.index:
|
| 36 |
coordDict = {}
|
| 37 |
protein = dataframe.at[i, 'uniprotID']
|
| 38 |
varPos = int(dataframe.at[i, 'pos'])
|
| 39 |
wt = dataframe.at[i, 'wt']
|
|
|
|
|
|
|
|
|
|
| 40 |
if protein not in existing_modbase_models:
|
| 41 |
print('Downloading Modbase models for ', protein)
|
| 42 |
url = 'https://salilab.org/modbase/retrieve/modbase/?databaseID=' + protein
|
|
@@ -104,7 +107,7 @@ def addModbaseModels(dataframe, path_to_input_files, path_to_output_files):
|
|
| 104 |
'model_id': model_id, 'coordinates': coordDict,
|
| 105 |
'AAonPDB': AAonPDB, 'coordVAR': coordVAR}
|
| 106 |
modbase_reduced = modbase_reduced.append(new_row, ignore_index=True)
|
| 107 |
-
modbase_reduced = modbase_reduced[['uniprotID', 'quality_score', 'model_id', 'coordinates', 'AAonPDB', 'coordVAR']]
|
| 108 |
modbase = dataframe.merge(modbase_reduced, on='uniprotID', how='left')
|
| 109 |
modbase.quality_score = modbase.quality_score.astype(float)
|
| 110 |
modbase = modbase.sort_values(by=['datapoint', 'quality_score'], ascending=False)
|
|
@@ -119,13 +122,20 @@ def addModbaseModels(dataframe, path_to_input_files, path_to_output_files):
|
|
| 119 |
'': np.NaN}, inplace=True)
|
| 120 |
except NameError:
|
| 121 |
print('This file doesnt have Quality Score. Replacer: -999', model_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
else:
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
no_modbase = no_modbase.append(
|
|
|
|
|
|
|
| 126 |
|
| 127 |
no_modbase_no_Coord = modbase[pd.isna(modbase['coordVAR'])]
|
| 128 |
no_modbase = pd.concat([no_modbase, no_modbase_no_Coord])
|
| 129 |
modbase = modbase[~pd.isna(modbase['coordVAR'])]
|
| 130 |
-
|
| 131 |
return modbase, no_modbase
|
|
|
|
| 10 |
# GET MODBASE MODELS
|
| 11 |
# Get IDs from data to retrieve only their models from MODBASE
|
| 12 |
dataframe.reset_index(inplace=True, drop=True)
|
|
|
|
| 13 |
existing_modbase_models = list(Path(path_to_output_files / 'modbase_structures').glob("*"))
|
| 14 |
existing_modbase_models = [str(i) for i in existing_modbase_models]
|
| 15 |
existing_modbase_models = [i.split('/')[-1].split('.')[0] for i in existing_modbase_models]
|
|
|
|
| 31 |
existing_free_sasa = list(Path(path_to_output_files / 'freesasa_files').glob("*"))
|
| 32 |
existing_free_sasa = [str(i) for i in existing_free_sasa]
|
| 33 |
existing_free_sasa = [i.split('/')[-1].split('.')[0] for i in existing_free_sasa]
|
| 34 |
+
keep_cols = dataframe.columns
|
| 35 |
for i in dataframe.index:
|
| 36 |
coordDict = {}
|
| 37 |
protein = dataframe.at[i, 'uniprotID']
|
| 38 |
varPos = int(dataframe.at[i, 'pos'])
|
| 39 |
wt = dataframe.at[i, 'wt']
|
| 40 |
+
mut = dataframe.at[i, 'mut']
|
| 41 |
+
datapoint = dataframe.at[i, 'datapoint']
|
| 42 |
+
|
| 43 |
if protein not in existing_modbase_models:
|
| 44 |
print('Downloading Modbase models for ', protein)
|
| 45 |
url = 'https://salilab.org/modbase/retrieve/modbase/?databaseID=' + protein
|
|
|
|
| 107 |
'model_id': model_id, 'coordinates': coordDict,
|
| 108 |
'AAonPDB': AAonPDB, 'coordVAR': coordVAR}
|
| 109 |
modbase_reduced = modbase_reduced.append(new_row, ignore_index=True)
|
| 110 |
+
modbase_reduced = modbase_reduced[['uniprotID', 'quality_score', 'model_id', 'coordinates', 'AAonPDB', 'coordVAR']]
|
| 111 |
modbase = dataframe.merge(modbase_reduced, on='uniprotID', how='left')
|
| 112 |
modbase.quality_score = modbase.quality_score.astype(float)
|
| 113 |
modbase = modbase.sort_values(by=['datapoint', 'quality_score'], ascending=False)
|
|
|
|
| 122 |
'': np.NaN}, inplace=True)
|
| 123 |
except NameError:
|
| 124 |
print('This file doesnt have Quality Score. Replacer: -999', model_id)
|
| 125 |
+
else:
|
| 126 |
+
new_row = {'uniprotID': uniprot_id, 'wt': wt,
|
| 127 |
+
'pos': varPos, 'mut': mut, 'datapoint': datapoint }
|
| 128 |
+
no_modbase = no_modbase.append(new_row, ignore_index=True)
|
| 129 |
+
|
| 130 |
else:
|
| 131 |
+
new_row = {'uniprotID': uniprot_id, 'wt': wt,
|
| 132 |
+
'pos': varPos, 'mut': mut, 'datapoint': datapoint }
|
| 133 |
+
no_modbase = no_modbase.append(new_row, ignore_index=True)
|
| 134 |
+
|
| 135 |
+
|
| 136 |
|
| 137 |
no_modbase_no_Coord = modbase[pd.isna(modbase['coordVAR'])]
|
| 138 |
no_modbase = pd.concat([no_modbase, no_modbase_no_Coord])
|
| 139 |
modbase = modbase[~pd.isna(modbase['coordVAR'])]
|
| 140 |
+
no_modbase = no_modbase[keep_cols]
|
| 141 |
return modbase, no_modbase
|