Spaces:
Sleeping
Sleeping
Commit
·
a1c7054
1
Parent(s):
35bdb15
Update code/pdb_featureVector.py
Browse files
code/pdb_featureVector.py
CHANGED
|
@@ -221,6 +221,7 @@ def pdb(input_set, mode, impute):
|
|
| 221 |
|
| 222 |
cnt = 0
|
| 223 |
for search in pdbs:
|
|
|
|
| 224 |
try:
|
| 225 |
if search.lower() not in existing_pdb:
|
| 226 |
|
|
@@ -228,6 +229,7 @@ def pdb(input_set, mode, impute):
|
|
| 228 |
pdb_url = f"https://files.rcsb.org/download/{search}.pdb"
|
| 229 |
# Set the path within your Hugging Face space where you want to store the PDB files
|
| 230 |
pdb_folder_path = Path(path_to_output_files / 'pdb_structures')
|
|
|
|
| 231 |
# Extract the PDB filename from the URL
|
| 232 |
pdb_filename = pdb_url.split("/")[-1]
|
| 233 |
|
|
@@ -281,6 +283,8 @@ def pdb(input_set, mode, impute):
|
|
| 281 |
pdb_info.at[index, 'resolution'] = 'nan'
|
| 282 |
index += 1
|
| 283 |
cnt += 1
|
|
|
|
|
|
|
| 284 |
st.write(pdb_info)
|
| 285 |
print('PDB file processing finished..')
|
| 286 |
for filename in list(Path(path_to_output_files / 'pdb_structures').glob("*")):
|
|
@@ -304,7 +308,8 @@ def pdb(input_set, mode, impute):
|
|
| 304 |
|
| 305 |
uniprot_matched = uniprot_matched.merge(pdb_fasta, on=['pdbID', 'chain'], how='left')
|
| 306 |
uniprot_matched = uniprot_matched.astype(str)
|
| 307 |
-
|
|
|
|
| 308 |
with_pdb = uniprot_matched[(uniprot_matched.pdbID != 'nan') & (
|
| 309 |
(uniprot_matched.resolution != 'nan') & (uniprot_matched.resolution != 'OT') & (
|
| 310 |
uniprot_matched.resolution != 'None'))].drop_duplicates()
|
|
@@ -313,6 +318,7 @@ def pdb(input_set, mode, impute):
|
|
| 313 |
uniprot_matched.resolution == 'None'))]
|
| 314 |
no_pdb = no_pdb[~no_pdb.datapoint.isin(with_pdb.datapoint.to_list())]
|
| 315 |
no_pdb.drop(columns=['chain', 'pdbID', 'pdbSequence', 'resolution'], inplace=True)
|
|
|
|
| 316 |
st.write(with_pdb)
|
| 317 |
print(
|
| 318 |
'PDB Information successfully added...\nPDB structures are found for %d of %d.\n%d of %d failed to match with PDB structure.\n'
|
|
|
|
| 221 |
|
| 222 |
cnt = 0
|
| 223 |
for search in pdbs:
|
| 224 |
+
st.write('pdb',pdb)
|
| 225 |
try:
|
| 226 |
if search.lower() not in existing_pdb:
|
| 227 |
|
|
|
|
| 229 |
pdb_url = f"https://files.rcsb.org/download/{search}.pdb"
|
| 230 |
# Set the path within your Hugging Face space where you want to store the PDB files
|
| 231 |
pdb_folder_path = Path(path_to_output_files / 'pdb_structures')
|
| 232 |
+
st.write(pdb_folder_path)
|
| 233 |
# Extract the PDB filename from the URL
|
| 234 |
pdb_filename = pdb_url.split("/")[-1]
|
| 235 |
|
|
|
|
| 283 |
pdb_info.at[index, 'resolution'] = 'nan'
|
| 284 |
index += 1
|
| 285 |
cnt += 1
|
| 286 |
+
st.write('pdb_info')
|
| 287 |
+
|
| 288 |
st.write(pdb_info)
|
| 289 |
print('PDB file processing finished..')
|
| 290 |
for filename in list(Path(path_to_output_files / 'pdb_structures').glob("*")):
|
|
|
|
| 308 |
|
| 309 |
uniprot_matched = uniprot_matched.merge(pdb_fasta, on=['pdbID', 'chain'], how='left')
|
| 310 |
uniprot_matched = uniprot_matched.astype(str)
|
| 311 |
+
st.write('uniprot_matched')
|
| 312 |
+
st.write(uniprot_matched)
|
| 313 |
with_pdb = uniprot_matched[(uniprot_matched.pdbID != 'nan') & (
|
| 314 |
(uniprot_matched.resolution != 'nan') & (uniprot_matched.resolution != 'OT') & (
|
| 315 |
uniprot_matched.resolution != 'None'))].drop_duplicates()
|
|
|
|
| 318 |
uniprot_matched.resolution == 'None'))]
|
| 319 |
no_pdb = no_pdb[~no_pdb.datapoint.isin(with_pdb.datapoint.to_list())]
|
| 320 |
no_pdb.drop(columns=['chain', 'pdbID', 'pdbSequence', 'resolution'], inplace=True)
|
| 321 |
+
st.write('with_pdb')
|
| 322 |
st.write(with_pdb)
|
| 323 |
print(
|
| 324 |
'PDB Information successfully added...\nPDB structures are found for %d of %d.\n%d of %d failed to match with PDB structure.\n'
|