Spaces:
Sleeping
Sleeping
Commit
·
b441d1f
1
Parent(s):
89cdd80
Update code/pdb_featureVector.py
Browse files- code/pdb_featureVector.py +51 -79
code/pdb_featureVector.py
CHANGED
|
@@ -203,14 +203,6 @@ def pdb(input_set, mode, impute):
|
|
| 203 |
print('Processing PDB structures...\n')
|
| 204 |
if pdbs == []:
|
| 205 |
print('No PDB structure found for the query. ')
|
| 206 |
-
"""
|
| 207 |
-
try:
|
| 208 |
-
pdbs = [j.strip('[').strip(']').strip().strip('\'').strip('\"') for j in
|
| 209 |
-
((',').join([str(item) for item in pdbs])).split(',')]
|
| 210 |
-
except IndexError:
|
| 211 |
-
pdbs = []
|
| 212 |
-
print('No PDB structure found for the query. ')
|
| 213 |
-
"""
|
| 214 |
print('Starting PDB structures download...\n')
|
| 215 |
pdbs = list(filter(None, pdbs))
|
| 216 |
pdbs = (set(pdbs))
|
|
@@ -223,82 +215,61 @@ def pdb(input_set, mode, impute):
|
|
| 223 |
shutil.rmtree('obsolete')
|
| 224 |
except OSError as e:
|
| 225 |
pass
|
| 226 |
-
|
| 227 |
-
#st.write('existing_pdb')
|
| 228 |
-
#st.write(existing_pdb)
|
| 229 |
-
#existing_pdb = [str(i) for i in existing_pdb]
|
| 230 |
-
#existing_pdb = [i.split('/')[-1].split('.')[0].lower() for i in existing_pdb]
|
| 231 |
cnt = 0
|
| 232 |
st.write('this is the pdbs', pdbs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
for search in pdbs:
|
| 234 |
-
|
|
|
|
| 235 |
try:
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
st.write('file: ', file)
|
| 243 |
-
|
| 244 |
-
existing_pdb = list(Path(path_to_output_files / 'pdb_structures').glob("*"))
|
| 245 |
-
st.write('after download:', existing_pdb)
|
| 246 |
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
#
|
| 264 |
-
#
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
st.write(f"PDB file {search}.pdb downloaded successfully.")
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
# Aug 23
|
| 284 |
-
content = response.content.decode("utf-8") # Decode the content if it's not already a string
|
| 285 |
-
st.write('Content')
|
| 286 |
-
st.write(content)
|
| 287 |
-
parsed_records = list(SeqIO.parse(content, "fasta"))
|
| 288 |
-
st.write('parsed_records')
|
| 289 |
-
st.write(parsed_records)
|
| 290 |
-
for rec in parsed_records:
|
| 291 |
-
st.write(rec)
|
| 292 |
-
st.write(rec.id)
|
| 293 |
-
st.write(rec.pdbSequence)
|
| 294 |
-
else:
|
| 295 |
-
st.write('Here2')
|
| 296 |
-
st.write(f"Failed to retrieve PDB file for {search}.")
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
st.write('what')
|
| 300 |
-
existing_pdb = list(Path(path_to_output_files / 'pdb_structures').glob("*"))
|
| 301 |
-
st.write('existing_pdb3', existing_pdb)
|
| 302 |
|
| 303 |
"""
|
| 304 |
|
|
@@ -324,6 +295,7 @@ def pdb(input_set, mode, impute):
|
|
| 324 |
pdb_info.at[index, 'chain'] = 'nan'
|
| 325 |
pdb_info.at[index, 'resolution'] = 'nan'
|
| 326 |
cnt += 1
|
|
|
|
| 327 |
print()
|
| 328 |
st.write()
|
| 329 |
st.write(pdb_info)
|
|
|
|
| 203 |
print('Processing PDB structures...\n')
|
| 204 |
if pdbs == []:
|
| 205 |
print('No PDB structure found for the query. ')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
print('Starting PDB structures download...\n')
|
| 207 |
pdbs = list(filter(None, pdbs))
|
| 208 |
pdbs = (set(pdbs))
|
|
|
|
| 215 |
shutil.rmtree('obsolete')
|
| 216 |
except OSError as e:
|
| 217 |
pass
|
| 218 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
cnt = 0
|
| 220 |
st.write('this is the pdbs', pdbs)
|
| 221 |
+
def fetch_uniprot_ids(pdb_code):
|
| 222 |
+
try:
|
| 223 |
+
response = requests.get(f"https://www.ebi.ac.uk/pdbe/api/mappings/uniprot/{pdb_code}")
|
| 224 |
+
response.raise_for_status() # Check for a successful response
|
| 225 |
+
data = response.json()
|
| 226 |
+
return list(list(list(data.values())[0].values())[0].keys())
|
| 227 |
+
except requests.exceptions.RequestException as e:
|
| 228 |
+
print(f"Failed to retrieve UniProt data for PDB code {pdb_code}: {e}")
|
| 229 |
+
return []
|
| 230 |
for search in pdbs:
|
| 231 |
+
# Step 1: Fetch the PDB file
|
| 232 |
+
pdb_url = f"https://files.rcsb.org/download/{search}.pdb"
|
| 233 |
try:
|
| 234 |
+
response = requests.get(pdb_url)
|
| 235 |
+
response.raise_for_status() # Check for a successful response
|
| 236 |
+
except requests.exceptions.RequestException as e:
|
| 237 |
+
print(f"Failed to retrieve data for PDB code {search}: {e}")
|
| 238 |
+
continue # Skip to the next PDB code if fetching fails
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
|
| 240 |
+
# Step 2: Parse the PDB file from memory
|
| 241 |
+
pdb_data = response.text
|
| 242 |
+
pdb_parser = PDBParser(QUIET=True) # QUIET=True suppresses warnings
|
| 243 |
+
pdb_file_content = StringIO(pdb_data)
|
| 244 |
+
structure = pdb_parser.get_structure(pdb_code, pdb_file_content)
|
| 245 |
+
ppb = PPBuilder()
|
| 246 |
+
for model in structure:
|
| 247 |
+
for pp in ppb.build_peptides(model):
|
| 248 |
+
sequence = pp.get_sequence()
|
| 249 |
+
for chain in model:
|
| 250 |
+
chain_id = chain.get_id()
|
| 251 |
+
# Extract UniProt ID if available in the chain's annotations
|
| 252 |
+
uniprot_ids = fetch_uniprot_ids(search)
|
| 253 |
+
# Get the resolution from the PDB header
|
| 254 |
+
header = structure.header
|
| 255 |
+
resolution = header.get('resolution', 'N/A')
|
| 256 |
+
# Print UniProt IDs, chain ID, and resolution for the current model
|
| 257 |
+
#for i, chain in enumerate(model, start=1):
|
| 258 |
+
chain_id = chain.get_id()
|
| 259 |
+
st.write(f"---- Information for Chain {chain_id} in Model {i} ----")
|
| 260 |
+
st.write(f"UniProt IDs: {', '.join(uniprot_ids)}")
|
| 261 |
+
st.write(f"Chain ID: {chain_id}")
|
| 262 |
+
st.write(f"PDB ID: {search.upper()}")
|
| 263 |
+
st.write(f"Resolution: {resolution}")
|
| 264 |
+
st.write(f"Sequence: {sequence}")
|
| 265 |
+
pdb_fasta.at[index, 'pdbID'] = search
|
| 266 |
+
pdb_fasta.at[index, 'chain'] = chain_id
|
| 267 |
+
pdb_fasta.at[index, 'pdbSequence'] = str(sequence)
|
| 268 |
+
pdb_info.at[index, 'uniprotID'] = record.dbxrefs[0].split(':')[1]
|
| 269 |
+
pdb_info.at[index, 'pdbID'] = search
|
| 270 |
+
pdb_info.at[index, 'chain'] = chain_id
|
| 271 |
+
pdb_info.at[index, 'resolution'] = resolution
|
| 272 |
+
index += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
|
| 274 |
"""
|
| 275 |
|
|
|
|
| 295 |
pdb_info.at[index, 'chain'] = 'nan'
|
| 296 |
pdb_info.at[index, 'resolution'] = 'nan'
|
| 297 |
cnt += 1
|
| 298 |
+
"""
|
| 299 |
print()
|
| 300 |
st.write()
|
| 301 |
st.write(pdb_info)
|