BioMCP-explorer / tabs /search.py
singhankit16's picture
Deploy BioMCP Explorer
f103ad7
"""
Search tab — entity-based discovery across all 13+ entity types.
Dynamic forms show/hide filters based on selected entity.
"""
import gradio as gr
from core import config, runner
from core.formatter import format_result
ENTITY_CHOICES = [
"all", "gene", "disease", "variant", "article", "trial",
"drug", "pathway", "protein", "adverse-event", "pgx", "gwas", "phenotype",
]
TRIAL_STATUS = [
"", "recruiting", "not yet recruiting", "active, not recruiting",
"completed", "terminated", "suspended", "withdrawn",
]
TRIAL_PHASES = ["", "1", "2", "3", "4"]
VARIANT_SIGNIFICANCE = [
"", "pathogenic", "likely_pathogenic", "uncertain_significance",
"likely_benign", "benign", "conflicting_interpretations", "risk_factor",
]
VARIANT_CONSEQUENCE = [
"", "missense_variant", "nonsense_variant", "synonymous_variant",
"frameshift_variant", "splice_acceptor_variant", "splice_donor_variant",
"inframe_deletion", "inframe_insertion", "stop_lost", "start_lost",
]
ARTICLE_SOURCE = ["", "all", "pubtator", "europepmc", "pubmed"]
TRIAL_SOURCE = ["", "ctgov", "nci"]
DISEASE_SOURCE = ["", "mondo"]
SEARCH_EXAMPLES = {
"all": ("--gene BRAF --disease melanoma", "Cross-entity overview for BRAF + melanoma"),
"gene": ("-q BRAF", "Search genes matching BRAF"),
"disease": ("-q melanoma", "Search diseases matching melanoma"),
"variant": ("-g BRAF --hgvsp V600E", "Search BRAF V600E variants"),
"article": ("-g BRAF -d melanoma --since 2024-01-01", "BRAF melanoma articles since 2024"),
"trial": ("-c melanoma --status recruiting", "Recruiting melanoma trials"),
"drug": ("-q pembrolizumab", "Search drug pembrolizumab"),
"pathway": ('-q "MAPK signaling"', "Search MAPK signaling pathways"),
"protein": ("-q kinase", "Search kinase proteins"),
"adverse-event": ("--drug pembrolizumab --serious", "Serious adverse events for pembrolizumab"),
"pgx": ("-g CYP2D6", "PGx data for CYP2D6"),
"gwas": ('--trait "type 2 diabetes"', "GWAS for type 2 diabetes"),
"phenotype": ('"HP:0001250 HP:0001263"', "Phenotype similarity search"),
}
def create_search_tab(session_keys):
"""Build the Search tab with dynamic entity forms."""
with gr.Tab("🔎 Search"):
gr.Markdown(
"## Entity Search\n"
"Discovery across 13 biomedical entity types. Select an entity and fill in the relevant filters."
)
with gr.Row():
entity = gr.Dropdown(
choices=ENTITY_CHOICES,
value="gene",
label="Entity Type",
scale=1,
)
limit = gr.Number(label="Limit", value=10, minimum=1, maximum=100, scale=1)
offset = gr.Number(label="Offset", value=0, minimum=0, scale=1)
with gr.Row():
no_cache = gr.Checkbox(label="Bypass cache", value=False)
# === All (cross-entity) filters ===
with gr.Group(visible=False) as all_group:
gr.Markdown("### Cross-Entity Search Filters")
with gr.Row():
all_gene = gr.Textbox(label="Gene", placeholder="e.g., BRAF")
all_disease = gr.Textbox(label="Disease", placeholder="e.g., melanoma")
all_keyword = gr.Textbox(label="Keyword", placeholder="e.g., immunotherapy resistance")
with gr.Row():
all_since = gr.Textbox(label="Since (date)", placeholder="e.g., 2024-01-01")
all_counts_only = gr.Checkbox(label="Counts only")
all_debug_plan = gr.Checkbox(label="Debug plan")
# === Gene filters ===
with gr.Group(visible=True) as gene_group:
gr.Markdown("### Gene Search")
gene_query = gr.Textbox(label="Query", placeholder="e.g., BRAF, TP53, EGFR")
# === Disease filters ===
with gr.Group(visible=False) as disease_group:
gr.Markdown("### Disease Search")
with gr.Row():
disease_query = gr.Textbox(label="Query", placeholder="e.g., melanoma, Lynch syndrome")
disease_source = gr.Dropdown(choices=DISEASE_SOURCE, value="", label="Source")
# === Variant filters ===
with gr.Group(visible=False) as variant_group:
gr.Markdown("### Variant Search")
with gr.Row():
variant_gene = gr.Textbox(label="Gene (-g)", placeholder="e.g., BRAF")
variant_hgvsp = gr.Textbox(label="HGVSp", placeholder="e.g., V600E")
with gr.Row():
variant_sig = gr.Dropdown(choices=VARIANT_SIGNIFICANCE, value="", label="Significance")
variant_consequence = gr.Dropdown(choices=VARIANT_CONSEQUENCE, value="", label="Consequence")
# === Article filters ===
with gr.Group(visible=False) as article_group:
gr.Markdown("### Article Search")
with gr.Row():
article_gene = gr.Textbox(label="Gene (-g)", placeholder="e.g., BRAF")
article_disease = gr.Textbox(label="Disease (-d)", placeholder="e.g., melanoma")
with gr.Row():
article_since = gr.Textbox(label="Since", placeholder="e.g., 2024-01-01")
article_source = gr.Dropdown(choices=ARTICLE_SOURCE, value="", label="Source")
# === Trial filters ===
with gr.Group(visible=False) as trial_group:
gr.Markdown("### Trial Search")
with gr.Row():
trial_condition = gr.Textbox(label="Condition (-c)", placeholder="e.g., melanoma")
trial_status = gr.Dropdown(choices=TRIAL_STATUS, value="", label="Status")
trial_phase = gr.Dropdown(choices=TRIAL_PHASES, value="", label="Phase")
with gr.Row():
trial_source = gr.Dropdown(choices=TRIAL_SOURCE, value="", label="Source")
trial_lat = gr.Textbox(label="Latitude", placeholder="e.g., 42.3601")
trial_lon = gr.Textbox(label="Longitude", placeholder="e.g., -71.0589")
trial_distance = gr.Textbox(label="Distance (mi)", placeholder="e.g., 50")
# === Drug filters ===
with gr.Group(visible=False) as drug_group:
gr.Markdown("### Drug Search")
with gr.Row():
drug_query = gr.Textbox(label="Query", placeholder="e.g., pembrolizumab, kinase inhibitor")
drug_region = gr.Dropdown(
choices=[
("— Default (US + EU auto)", ""),
("🇪🇺 EU (European Medicines Agency)", "eu"),
],
value="",
label="Region",
)
# === Pathway filters ===
with gr.Group(visible=False) as pathway_group:
gr.Markdown("### Pathway Search")
pathway_query = gr.Textbox(label="Query", placeholder='e.g., "MAPK signaling", "Pathways in cancer"')
# === Protein filters ===
with gr.Group(visible=False) as protein_group:
gr.Markdown("### Protein Search")
with gr.Row():
protein_query = gr.Textbox(label="Query", placeholder="e.g., kinase")
protein_all_species = gr.Checkbox(label="All species")
# === Adverse Event filters ===
with gr.Group(visible=False) as ae_group:
gr.Markdown("### Adverse Event Search")
with gr.Row():
ae_drug = gr.Textbox(label="Drug", placeholder="e.g., pembrolizumab")
ae_serious = gr.Checkbox(label="Serious only")
with gr.Row():
ae_type = gr.Dropdown(choices=["", "device"], value="", label="Type")
ae_manufacturer = gr.Textbox(label="Manufacturer", placeholder="e.g., Medtronic")
ae_product_code = gr.Textbox(label="Product code", placeholder="e.g., PQP")
# === PGx filters ===
with gr.Group(visible=False) as pgx_group:
gr.Markdown("### PGx Search")
with gr.Row():
pgx_gene = gr.Textbox(label="Gene (-g)", placeholder="e.g., CYP2D6")
pgx_drug = gr.Textbox(label="Drug (-d)", placeholder="e.g., warfarin")
# === GWAS filters ===
with gr.Group(visible=False) as gwas_group:
gr.Markdown("### GWAS Search")
with gr.Row():
gwas_gene = gr.Textbox(label="Gene (-g)", placeholder="e.g., TCF7L2")
gwas_trait = gr.Textbox(label="Trait", placeholder='e.g., "type 2 diabetes"')
# === Phenotype filters ===
with gr.Group(visible=False) as phenotype_group:
gr.Markdown("### Phenotype Search (Monarch Semsim)")
phenotype_terms = gr.Textbox(label="HPO Terms", placeholder="e.g., HP:0001250 HP:0001263")
# Dynamic visibility
entity_groups = {
"all": all_group, "gene": gene_group, "disease": disease_group,
"variant": variant_group, "article": article_group, "trial": trial_group,
"drug": drug_group, "pathway": pathway_group, "protein": protein_group,
"adverse-event": ae_group, "pgx": pgx_group, "gwas": gwas_group,
"phenotype": phenotype_group,
}
def toggle_visibility(selected):
return [gr.Group(visible=(k == selected)) for k in entity_groups]
entity.change(
fn=toggle_visibility,
inputs=[entity],
outputs=list(entity_groups.values()),
)
# Example button
example_display = gr.Markdown("")
def show_example(ent):
if ent in SEARCH_EXAMPLES:
hint, desc = SEARCH_EXAMPLES[ent]
return f"**Example:** `biomcp search {ent} {hint}` — {desc}"
return ""
entity.change(fn=show_example, inputs=[entity], outputs=[example_display])
# Run button
run_btn = gr.Button("🔎 Search", variant="primary")
output_md = gr.Markdown(label="Results")
with gr.Accordion("Raw JSON", open=False):
output_json = gr.Code(language="json")
def run_search(
ent, lim, off, skip_cache, keys,
# all
a_gene, a_disease, a_keyword, a_since, a_counts, a_debug,
# gene
g_query,
# disease
d_query, d_source,
# variant
v_gene, v_hgvsp, v_sig, v_cons,
# article
ar_gene, ar_disease, ar_since, ar_source,
# trial
t_cond, t_status, t_phase, t_source, t_lat, t_lon, t_dist,
# drug
dr_query, dr_region,
# pathway
pw_query,
# protein
pr_query, pr_all_species,
# adverse event
ae_d, ae_s, ae_t, ae_m, ae_pc,
# pgx
pgx_g, pgx_d,
# gwas
gw_g, gw_t,
# phenotype
ph_terms,
):
args = ["search", ent]
lim = int(lim) if lim else 10
off = int(off) if off else 0
if ent == "all":
if a_gene: args.extend(["--gene", a_gene.strip()])
if a_disease: args.extend(["--disease", a_disease.strip()])
if a_keyword: args.extend(["--keyword", a_keyword.strip()])
if a_since: args.extend(["--since", a_since.strip()])
if a_counts: args.append("--counts-only")
if a_debug: args.append("--debug-plan")
elif ent == "gene":
if g_query: args.extend(["-q", g_query.strip()])
elif ent == "disease":
if d_query: args.extend(["-q", d_query.strip()])
if d_source: args.extend(["--source", d_source])
elif ent == "variant":
if v_gene: args.extend(["-g", v_gene.strip()])
if v_hgvsp: args.extend(["--hgvsp", v_hgvsp.strip()])
if v_sig: args.extend(["--significance", v_sig])
if v_cons: args.extend(["--consequence", v_cons])
elif ent == "article":
if ar_gene: args.extend(["-g", ar_gene.strip()])
if ar_disease: args.extend(["-d", ar_disease.strip()])
if ar_since: args.extend(["--since", ar_since.strip()])
if ar_source: args.extend(["--source", ar_source])
elif ent == "trial":
if t_cond: args.extend(["-c", t_cond.strip()])
if t_status: args.extend(["--status", t_status])
if t_phase: args.extend(["--phase", t_phase])
if t_source: args.extend(["--source", t_source])
if t_lat: args.extend(["--lat", t_lat.strip()])
if t_lon: args.extend(["--lon", t_lon.strip()])
if t_dist: args.extend(["--distance", t_dist.strip()])
elif ent == "drug":
if dr_query: args.extend(["-q", dr_query.strip()])
if dr_region: args.extend(["--region", dr_region])
elif ent == "pathway":
if pw_query: args.extend(["-q", pw_query.strip()])
elif ent == "protein":
if pr_query: args.extend(["-q", pr_query.strip()])
if pr_all_species: args.append("--all-species")
elif ent == "adverse-event":
if ae_d: args.extend(["--drug", ae_d.strip()])
if ae_s: args.append("--serious")
if ae_t: args.extend(["--type", ae_t])
if ae_m: args.extend(["--manufacturer", ae_m.strip()])
if ae_pc: args.extend(["--product-code", ae_pc.strip()])
elif ent == "pgx":
if pgx_g: args.extend(["-g", pgx_g.strip()])
if pgx_d: args.extend(["-d", pgx_d.strip()])
elif ent == "gwas":
if gw_g: args.extend(["-g", gw_g.strip()])
if gw_t: args.extend(["--trait", gw_t.strip()])
elif ent == "phenotype":
if ph_terms: args.append(ph_terms.strip())
# Add limit/offset
args.extend(["--limit", str(lim)])
if off > 0:
args.extend(["--offset", str(off)])
env = config.build_env_overrides(keys)
result = runner.run(args, json_mode=True, no_cache=skip_cache, env_overrides=env)
if not result["success"]:
raise gr.Error(f"BioMCP error: {result['error']}")
md, js = format_result(result)
return md, js
all_inputs = [
entity, limit, offset, no_cache, session_keys,
# all
all_gene, all_disease, all_keyword, all_since, all_counts_only, all_debug_plan,
# gene
gene_query,
# disease
disease_query, disease_source,
# variant
variant_gene, variant_hgvsp, variant_sig, variant_consequence,
# article
article_gene, article_disease, article_since, article_source,
# trial
trial_condition, trial_status, trial_phase, trial_source,
trial_lat, trial_lon, trial_distance,
# drug
drug_query, drug_region,
# pathway
pathway_query,
# protein
protein_query, protein_all_species,
# adverse-event
ae_drug, ae_serious, ae_type, ae_manufacturer, ae_product_code,
# pgx
pgx_gene, pgx_drug,
# gwas
gwas_gene, gwas_trait,
# phenotype
phenotype_terms,
]
run_btn.click(
fn=run_search,
inputs=all_inputs,
outputs=[output_md, output_json],
)