Spaces:
Running
Running
roni
commited on
Commit
·
6f068fd
1
Parent(s):
9ec67f0
deduplicating on uniprot id
Browse files- app.py +21 -15
- get_index.py +2 -1
app.py
CHANGED
|
@@ -42,20 +42,19 @@ def limit_n_results(n):
|
|
| 42 |
|
| 43 |
|
| 44 |
def aggregate_search_results(raw_results: List[dict], max_res: int) -> Dict[str, dict]:
|
| 45 |
-
|
| 46 |
for raw_result in raw_results:
|
| 47 |
entry = select_keys(
|
| 48 |
raw_result,
|
| 49 |
-
keys=["pdb_name", "chain_id", "score", "organism", "uniprot_id"]
|
| 50 |
)
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
return dict(aggregated_by_gene)
|
| 59 |
|
| 60 |
|
| 61 |
def select_keys(d: dict, keys: List[str]):
|
|
@@ -64,21 +63,28 @@ def select_keys(d: dict, keys: List[str]):
|
|
| 64 |
|
| 65 |
def format_search_results(agg_search_results):
|
| 66 |
formatted_search_results = {}
|
| 67 |
-
for
|
| 68 |
entry = entries[0]
|
| 69 |
organism = entry["organism"]
|
| 70 |
score = entry["score"]
|
| 71 |
-
|
| 72 |
-
key = f"
|
| 73 |
formatted_search_results[key] = score
|
| 74 |
return formatted_search_results
|
| 75 |
|
| 76 |
|
| 77 |
def update_dropdown_menu(agg_search_res):
|
| 78 |
choices = []
|
| 79 |
-
for
|
| 80 |
for entry in entries:
|
| 81 |
-
choice = choice_sep.join(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
choices.append(choice)
|
| 83 |
|
| 84 |
if choices:
|
|
|
|
| 42 |
|
| 43 |
|
| 44 |
def aggregate_search_results(raw_results: List[dict], max_res: int) -> Dict[str, dict]:
|
| 45 |
+
aggregated_by_uniprot = collections.defaultdict(list)
|
| 46 |
for raw_result in raw_results:
|
| 47 |
entry = select_keys(
|
| 48 |
raw_result,
|
| 49 |
+
keys=["pdb_name", "chain_id", "score", "organism", "uniprot_id", "genes"],
|
| 50 |
)
|
| 51 |
+
uniprot_id = raw_result["uniprot_id"]
|
| 52 |
+
|
| 53 |
+
if uniprot_id is not None:
|
| 54 |
+
aggregated_by_uniprot[uniprot_id].append(entry)
|
| 55 |
+
if len(aggregated_by_uniprot) >= max_res:
|
| 56 |
+
return dict(aggregated_by_uniprot)
|
| 57 |
+
return dict(aggregated_by_uniprot)
|
|
|
|
| 58 |
|
| 59 |
|
| 60 |
def select_keys(d: dict, keys: List[str]):
|
|
|
|
| 63 |
|
| 64 |
def format_search_results(agg_search_results):
|
| 65 |
formatted_search_results = {}
|
| 66 |
+
for uniprot_id, entries in agg_search_results.items():
|
| 67 |
entry = entries[0]
|
| 68 |
organism = entry["organism"]
|
| 69 |
score = entry["score"]
|
| 70 |
+
genes = entry["genes"]
|
| 71 |
+
key = f"Uniprot ID: {uniprot_id} | Organism: {organism} | Gene Names: {genes}"
|
| 72 |
formatted_search_results[key] = score
|
| 73 |
return formatted_search_results
|
| 74 |
|
| 75 |
|
| 76 |
def update_dropdown_menu(agg_search_res):
|
| 77 |
choices = []
|
| 78 |
+
for uniprot_id, entries in agg_search_res.items():
|
| 79 |
for entry in entries:
|
| 80 |
+
choice = choice_sep.join(
|
| 81 |
+
[
|
| 82 |
+
uniprot_id,
|
| 83 |
+
entry["pdb_name"],
|
| 84 |
+
entry["chain_id"],
|
| 85 |
+
entry["genes"] or "",
|
| 86 |
+
]
|
| 87 |
+
)
|
| 88 |
choices.append(choice)
|
| 89 |
|
| 90 |
if choices:
|
get_index.py
CHANGED
|
@@ -18,7 +18,8 @@ def get_engines(index_repo: str, model_repo: str):
|
|
| 18 |
)
|
| 19 |
sys.path.append(str(local_arch_path))
|
| 20 |
from protein_index import ( # pylint: disable=import-error,import-outside-toplevel
|
| 21 |
-
ProteinSearchEngine,
|
|
|
|
| 22 |
)
|
| 23 |
|
| 24 |
subindex_paths = glob(str(index_path / "*/"))
|
|
|
|
| 18 |
)
|
| 19 |
sys.path.append(str(local_arch_path))
|
| 20 |
from protein_index import ( # pylint: disable=import-error,import-outside-toplevel
|
| 21 |
+
ProteinSearchEngine,
|
| 22 |
+
ProteinIndexError,
|
| 23 |
)
|
| 24 |
|
| 25 |
subindex_paths = glob(str(index_path / "*/"))
|