Spaces:

HUBioDataLab
/

ASCARIS

Sleeping

App Files Files Community

fatmacankara commited on Aug 24, 2023

Commit

b441d1f

1 Parent(s): 89cdd80

Update code/pdb_featureVector.py

Browse files

Files changed (1) hide show

code/pdb_featureVector.py +51 -79

code/pdb_featureVector.py CHANGED Viewed

@@ -203,14 +203,6 @@ def pdb(input_set, mode, impute):
         print('Processing PDB structures...\n')
         if pdbs == []:
             print('No PDB structure found for the query. ')
-        """
-        try:
-            pdbs = [j.strip('[').strip(']').strip().strip('\'').strip('\"') for j in
-                    ((',').join([str(item) for item in pdbs])).split(',')]
-        except IndexError:
-            pdbs = []
-            print('No PDB structure found for the query. ')
-        """
         print('Starting PDB structures download...\n')
         pdbs = list(filter(None, pdbs))
         pdbs = (set(pdbs))
@@ -223,82 +215,61 @@ def pdb(input_set, mode, impute):
             shutil.rmtree('obsolete')
         except OSError as e:
             pass
-        #existing_pdb = list(Path(path_to_output_files / 'pdb_structures').glob("*"))
-        #st.write('existing_pdb')
-        #st.write(existing_pdb)
-        #existing_pdb = [str(i) for i in existing_pdb]
-        #existing_pdb = [i.split('/')[-1].split('.')[0].lower() for i in existing_pdb]
         cnt = 0
         st.write('this is the pdbs', pdbs)
         for search in pdbs:
-            st.write('searching for pdb:', search)
             try:
-                file = pdbl.retrieve_pdb_file(search,  file_format="pdb")
-                """
-                path_pdb = 'out_files/pdb/pdb_structures'
-                st.write('path for pdb: ', path_pdb)
-                file = pdbl.retrieve_pdb_file(search, pdir=path_pdb, file_format="pdb")
-                st.write('file: ', file)
-                existing_pdb = list(Path(path_to_output_files / 'pdb_structures').glob("*"))
-                st.write('after download:', existing_pdb)
-                existing_pdb = list(glob.glob(f"{path_pdb}/*"))
-                st.write('after download 2:', existing_pdb)
-                st.write('NEW METHOD')
-                # Define the URL to retrieve the PDB file
-                url = f"https://files.rcsb.org/download/{search}.pdb"
-                st.write('url', url)
-                # Send an HTTP GET request to the PDB website to download the PDB file
-                response = requests.get(url)
-                st.write('response', response)
-                # Check if the request was successful
-                if response.status_code == 200:
-                    st.write('here1')
-                    # Save the PDB file to a local file
-                    #st.write(f'out_files/pdb/pdb_structures/{search}.pdb')
-                    #try:
-                    #    with open(f'out_files/pdb/pdb_structures/{search}.pdb', "wb") as f:
-                    #        st.write('WRITING TO FILE')
-                    #except:
-                    #    st.write('ERROR')
-                    from huggingface_hub import Hf
-                    api = HfApi()
-                    st.write('api', API)
-                    api.upload_file(
-                        path_or_fileobj=response.content,
-                        path_in_repo="out_files/pdb/pdb_structures/",
-                        repo_id="HUBioDataLab/ASCARIS",
-                        repo_type="space")
-                    st.write(f"PDB file {search}.pdb downloaded successfully.")
-                    # Aug 23
-                    content = response.content.decode("utf-8")  # Decode the content if it's not already a string
-                    st.write('Content')
-                    st.write(content)
-                    parsed_records = list(SeqIO.parse(content, "fasta"))
-                    st.write('parsed_records')
-                    st.write(parsed_records)
-                    for rec in parsed_records:
-                        st.write(rec)
-                        st.write(rec.id)
-                        st.write(rec.pdbSequence)
-                else:
-                    st.write('Here2')
-                    st.write(f"Failed to retrieve PDB file for {search}.")
-                st.write('what')
-                existing_pdb = list(Path(path_to_output_files / 'pdb_structures').glob("*"))
-                st.write('existing_pdb3', existing_pdb)
                 """
@@ -324,6 +295,7 @@ def pdb(input_set, mode, impute):
                 pdb_info.at[index, 'chain'] = 'nan'
                 pdb_info.at[index, 'resolution'] = 'nan'
             cnt += 1
         print()
         st.write()
         st.write(pdb_info)

         print('Processing PDB structures...\n')
         if pdbs == []:
             print('No PDB structure found for the query. ')
         print('Starting PDB structures download...\n')
         pdbs = list(filter(None, pdbs))
         pdbs = (set(pdbs))
             shutil.rmtree('obsolete')
         except OSError as e:
             pass
         cnt = 0
         st.write('this is the pdbs', pdbs)
+        def fetch_uniprot_ids(pdb_code):
+            try:
+                response = requests.get(f"https://www.ebi.ac.uk/pdbe/api/mappings/uniprot/{pdb_code}")
+                response.raise_for_status()  # Check for a successful response
+                data = response.json()
+                return list(list(list(data.values())[0].values())[0].keys())
+            except requests.exceptions.RequestException as e:
+                print(f"Failed to retrieve UniProt data for PDB code {pdb_code}: {e}")
+                return []
         for search in pdbs:
+            # Step 1: Fetch the PDB file
+            pdb_url = f"https://files.rcsb.org/download/{search}.pdb"
             try:
+                response = requests.get(pdb_url)
+                response.raise_for_status()  # Check for a successful response
+            except requests.exceptions.RequestException as e:
+                print(f"Failed to retrieve data for PDB code {search}: {e}")
+                continue  # Skip to the next PDB code if fetching fails
+            # Step 2: Parse the PDB file from memory
+            pdb_data = response.text
+            pdb_parser = PDBParser(QUIET=True)  # QUIET=True suppresses warnings
+            pdb_file_content = StringIO(pdb_data)
+            structure = pdb_parser.get_structure(pdb_code, pdb_file_content)
+            ppb = PPBuilder()
+            for model in structure:
+                for pp in ppb.build_peptides(model):
+                    sequence = pp.get_sequence()
+                for chain in model:
+                    chain_id = chain.get_id()
+                    # Extract UniProt ID if available in the chain's annotations
+                    uniprot_ids = fetch_uniprot_ids(search)
+                    # Get the resolution from the PDB header
+                    header = structure.header
+                    resolution = header.get('resolution', 'N/A')
+                    # Print UniProt IDs, chain ID, and resolution for the current model
+                    #for i, chain in enumerate(model, start=1):
+                    chain_id = chain.get_id()
+                    st.write(f"---- Information for Chain {chain_id} in Model {i} ----")
+                    st.write(f"UniProt IDs: {', '.join(uniprot_ids)}")
+                    st.write(f"Chain ID: {chain_id}")
+                    st.write(f"PDB ID: {search.upper()}")
+                    st.write(f"Resolution: {resolution}")
+                    st.write(f"Sequence: {sequence}")
+                    pdb_fasta.at[index, 'pdbID'] = search
+                    pdb_fasta.at[index, 'chain'] = chain_id
+                    pdb_fasta.at[index, 'pdbSequence'] = str(sequence)
+                    pdb_info.at[index, 'uniprotID'] = record.dbxrefs[0].split(':')[1]
+                    pdb_info.at[index, 'pdbID'] = search
+                    pdb_info.at[index, 'chain'] = chain_id
+                    pdb_info.at[index, 'resolution'] = resolution
+                index += 1
                 """
                 pdb_info.at[index, 'chain'] = 'nan'
                 pdb_info.at[index, 'resolution'] = 'nan'
             cnt += 1
+            """
         print()
         st.write()
         st.write(pdb_info)