Spaces:

ronig
/

protein_binding_search

Running

App Files Files Community

roni commited on Mar 23, 2023

Commit

1694358

1 Parent(s): 5e7a3eb

using metadata instead of fetching it from the internet

Browse files

Files changed (3) hide show

app.py +8 -32
concurrency.py +0 -22
protein_viz.py +0 -56

app.py CHANGED Viewed

@@ -1,8 +1,7 @@
 import gradio as gr
-from concurrency import execute_multithread
 from get_index import get_engines
-from protein_viz import get_gene_name, get_protein_name, render_html
 index_repo = "ronig/siamese_protein_index"
 model_repo = "ronig/protein_search_engine"
@@ -15,6 +14,7 @@ You can use it to search the full [PDB](https://www.rcsb.org/) database or in a
 """
 max_results = 100
 def search_and_display(seq, n_res, index_selection):
     n_res = int(limit_n_results(n_res))
     engine = engines[index_selection]
@@ -47,48 +47,24 @@ def update_dropdown_menu(search_res):
 def format_search_results(raw_search_results):
     formatted_search_results = {}
-    for key, value in execute_multithread(
-        func=format_search_result,
-        inputs=({"raw_result": res} for res in raw_search_results),
-        n_workers=len(raw_search_results),
-    ):
         formatted_search_results[key] = value
     return formatted_search_results
-def format_search_result(raw_result):
-    is_pdb = "pdb_name" in raw_result
-    if is_pdb:
-        key, value = parse_pdb_search_result(raw_result)
-    else:
-        key, value = parse_fasta_search_result(raw_result)
-    return key, value
-def parse_fasta_search_result(raw_result):
-    gene = parse_gene_from_fasta_entry(raw_result["description"])
-    key = f"Gene: {gene}"
-    value = raw_result["score"]
-    return key, value
 def parse_pdb_search_result(raw_result):
     prot = raw_result["pdb_name"]
     chain = raw_result["chain_id"]
     value = raw_result["score"]
-    gene_name, species = get_gene_name(pdb_id=prot, chain_id=chain)
     key = f"PDB: {prot}.{chain}"
-    if gene_name is not None:
-        key += f" | Gene: {gene_name} | Organism: {species}"
     return key, value
-def parse_gene_from_fasta_entry(description):
-    after = description.split("GN=")[1]
-    gene = after.split(" ")[0]
-    return gene
 def switch_viz(new_choice):
     if new_choice is None:
         html = ""

 import gradio as gr
 from get_index import get_engines
+from protein_viz import get_protein_name, render_html
 index_repo = "ronig/siamese_protein_index"
 model_repo = "ronig/protein_search_engine"
 """
 max_results = 100
 def search_and_display(seq, n_res, index_selection):
     n_res = int(limit_n_results(n_res))
     engine = engines[index_selection]
 def format_search_results(raw_search_results):
     formatted_search_results = {}
+    for res in raw_search_results:
+        key, value = parse_pdb_search_result(res)
         formatted_search_results[key] = value
     return formatted_search_results
 def parse_pdb_search_result(raw_result):
     prot = raw_result["pdb_name"]
     chain = raw_result["chain_id"]
     value = raw_result["score"]
+    gene_names = raw_result["genes"]
+    species = raw_result["organism"]
     key = f"PDB: {prot}.{chain}"
+    if gene_names is not None:
+        key += f" | Genes: {gene_names} | Organism: {species}"
     return key, value
 def switch_viz(new_choice):
     if new_choice is None:
         html = ""

concurrency.py DELETED Viewed

@@ -1,22 +0,0 @@
-import concurrent.futures
-import itertools
-from typing import Callable, Iterable
-def execute_multithread(func: Callable, inputs: Iterable, n_workers):
-    with concurrent.futures.ThreadPoolExecutor(max_workers=n_workers) as executor:
-        futures = {
-            executor.submit(func, **task)
-            for task in itertools.islice(inputs, n_workers)
-        }
-        while futures:
-            done, futures = concurrent.futures.wait(
-                futures, return_when=concurrent.futures.FIRST_COMPLETED
-            )
-            for future in done:
-                yield future.result()
-            for task in itertools.islice(inputs, len(done)):
-                futures.add(executor.submit(func, **task))

protein_viz.py CHANGED Viewed

@@ -30,62 +30,6 @@ def render_html(pdb_id, chain):
     return iframe
-def get_gene_name(pdb_id, chain_id):
-    entity_id = get_polymer_entity_id(chain_id, pdb_id)
-    gene_name, species = get_gene_name_from_polymer_entity(
-        pdb_id=pdb_id, entity_id=entity_id
-    )
-    return gene_name, species
-def get_polymer_entity_id(chain_id, pdb_id):
-    url = (
-        f"https://data.rcsb.org/rest/v1/core/"
-        f"polymer_entity_instance/{pdb_id}/{chain_id}"
-    )
-    response = requests.get(url, timeout=1)
-    if response.ok:
-        res_data = response.json()
-        entity_id = int(
-            res_data["rcsb_polymer_entity_instance_container_identifiers"]["entity_id"]
-        )
-    else:
-        entity_id = None
-    return entity_id
-def get_gene_name_from_polymer_entity(pdb_id, entity_id):
-    gene_name, species = None, None
-    if entity_id:
-        url = f"https://data.rcsb.org/rest/v1/core/polymer_entity/{pdb_id}/{entity_id}"
-        response = requests.get(url, timeout=1)
-        if response.ok:
-            res_data = response.json()
-            uniprot_id = _extract_uniprot_id(res_data)
-            source_organism = res_data.get("rcsb_entity_source_organism", [{}])[0]
-            gene_name = source_organism.get("rcsb_gene_name", [{}])[0].get("value")
-            species = source_organism.get("scientific_name")
-            if gene_name is None and uniprot_id is not None:
-                gene_name = get_gene_name_from_uniprot(uniprot_id)
-    return gene_name, species
-def get_gene_name_from_uniprot(uniprot_id):
-    gene_name = None
-    url = f"https://rest.uniprot.org/uniprotkb/{uniprot_id}"
-    response = requests.get(url, timeout=1.0)
-    if response.ok:
-        uniprot_data = response.json()
-        gene_name = uniprot_data.get("genes", [{}])[0].get("geneName", {}).get("value")
-    return gene_name
-def _extract_uniprot_id(res_data):
-    ids = res_data.get("rcsb_polymer_entity_container_identifiers", {})
-    uniprot_id = ids.get("uniprot_ids", [None])[0]
-    return uniprot_id
 def get_protein_name(pdb_id: str):
     url = f"https://data.rcsb.org/rest/v1/core/entry/{pdb_id}"
     response = requests.get(url, timeout=1)

     return iframe
 def get_protein_name(pdb_id: str):
     url = f"https://data.rcsb.org/rest/v1/core/entry/{pdb_id}"
     response = requests.get(url, timeout=1)