Spaces:

hemantn
/

antibody-database

Running

File size: 6,339 Bytes

import gradio as gr
import sqlite3
import pandas as pd
import plotly.express as px
from utils import *
from huggingface_hub import hf_hub_download

db_path = hf_hub_download(
    repo_id="hemantn/antibody-paired-sequences",
    filename="antibody_data_year.db",
    repo_type="dataset"
)


conn = sqlite3.connect(db_path, check_same_thread=False)

#conn = sqlite3.connect('/data/hn533621/OAS/Paired_Data_Analysis/antibody_data_year.db', check_same_thread=False)
all_df = pd.read_sql("SELECT * FROM antibody_data", conn)

# Compute min / max for each column
vh_min, vh_max = int(all_df["vh_length"].min()), int(all_df["vh_length"].max())
vl_min, vl_max = int(all_df["vl_length"].min()), int(all_df["vl_length"].max())

def apply_filters(dropdown_1, dropdown_2, dropdown_3, dropdown_4, slider_vh, slider_vl):
    """
    Build a query to filter the data based on the user's input
    """
    clauses = []
    params = {}
    if dropdown_1: clauses.append('v_call_heavy_first = :vh_germline');  params['vh_germline'] = dropdown_1
    if dropdown_2: clauses.append('v_call_light_first = :vl_germline');  params['vl_germline'] = dropdown_2
    if dropdown_3: clauses.append('BType = :btype');  params['btype'] = dropdown_3
    if dropdown_4: clauses.append('Disease = :disease');  params['disease'] = dropdown_4
    if slider_vh:
       clauses.append("vh_length BETWEEN :vh_min AND :vh_max")
       params["vh_min"] = 80
       params["vh_max"] = slider_vh
    if slider_vl:
        clauses.append("vl_length BETWEEN :vl_min AND :vl_max")
        params["vl_min"] = 80
        params["vl_max"] = slider_vl

    sql = "SELECT * FROM antibody_data"
    if clauses:
        sql += " WHERE " + " AND ".join(clauses)

    df = pd.read_sql_query(sql, conn, params=params)
    #print(df.columns)
        # ---- Reorder and rename columns ----
    # 1. Rename to your desired display names
    rename_map = {
        "BType": "BType",
        "Disease": "Disease",
        "v_call_heavy_first": "vcall_VH",
        "v_call_light_first": "vcall_VL",
        "vh_length": "VH_length",
        "vl_length": "VL_length",
        "sequence_alignment_aa_heavy": "VH",
        "sequence_alignment_aa_light": "VL",
        "Year": "Year",
    }
    df = df.rename(columns=rename_map)

    # 2. Reorder the columns
    desired_order = [
        "BType",
        "Disease",
        "vcall_VH",
        "vcall_VL",
        "VH_length",
        "VL_length",
        "Year",
        "VH",
        "VL",
    ]
    # Only keep columns that actually exist (avoids errors if some missing)
    df = df[[c for c in desired_order if c in df.columns]]
    
    total_rows = conn.execute("SELECT COUNT(*) FROM antibody_data").fetchone()[0]
    #vcall_vh_bar = bar_vcall_vh(df, total_rows, dropdown_1)
    year_bar = bar_year_count(df)
    #vcall_vl_bar = bar_vcall_vl(df, total_rows, dropdown_2)
    combined_vh_vl_bar = bar_vh_vl_combined(df, total_rows, dropdown_1, dropdown_2)
    #disease_bar  = bar_disease_count(df, total_rows, dropdown_4)
    #btype_bar = bar_btype_count(df, total_rows, dropdown_3)
    vh_fig, vl_fig = hist_vh_vl_separate(df)
    fasta_file = make_fasta_file(df)
    total_sequences = 2*len(df)
    return df.head(5), combined_vh_vl_bar, year_bar, vh_fig, vl_fig, fasta_file, total_sequences

with gr.Blocks(theme=gr.themes.Ocean()) as demo:
    gr.Markdown(
        """
        <h1 style='text-align:center; color:#3A7; margin-bottom:0.5em;'>🔎 Antibody Database Dashboard</h1>
        <p style='text-align:center; color:gray;'>
        Filter sequences, explore counts, and download custom FASTA files.
        </p>
        """
    )

    with gr.Row():
        with gr.Column(scale=1):
            dropdown_1 = gr.Dropdown(choices = [""] + sorted(all_df['v_call_heavy_first'].unique()), \
                value = None, label = 'VH germline')
            dropdown_2 = gr.Dropdown(choices = [""] + sorted(all_df['v_call_light_first'].unique()), \
                value = None, label = 'VL germline')
            dropdown_3 = gr.Dropdown(choices = [""] + sorted(all_df['BType'].unique()), \
                value = None, label = 'B-Type')
            dropdown_4 = gr.Dropdown(choices = [""] + sorted(all_df['Disease'].unique()), \
                value = None, label = 'Disase')

            slider_vh = gr.Slider(value=vh_max, minimum=vh_min, maximum=vh_max, step=1, label = 'VH length') 
            slider_vl = gr.Slider(value=vl_max, minimum=vl_min, maximum=vl_max, step=1, label = 'VL length')  

            button = gr.Button('Apply Filters', variant='primary')

        with gr.Column(scale=2):
            with gr.Row():
                with gr.Column(scale=1):
                      combined_vh_vl_bar = gr.Plot(label = 'VH and VL Germline')
                with gr.Column(scale=1):
                      vh_fig = gr.Plot(label = 'VH Length Distribution')
            with gr.Row():
                with gr.Column(scale=1):
                      year_bar = gr.Plot(label = 'Year Wise Distribution')
                with gr.Column(scale=1):
                      vl_fig = gr.Plot(label = 'VL Length Distribution')

    with gr.Row():
        #with gr.Column(scale=0.3):
            #gr.Plot(label= 'Yearwise distribution of antibodies')
        with gr.Column(scale=1):
            with gr.Row():
                 with gr.Column(scale=2):
                     df_out = gr.Dataframe()
                 with gr.Column(scale=1):
                     fasta_file = gr.File(label= 'Download Antibody Data Fasta file', interactive=False)

                     with gr.Row():
                        total_sequences = gr.Textbox(label= 'No of Sequences in Fasta file', value=0, interactive=False)
                    
    
    slider_vh.input(update_vh, inputs=slider_vh, outputs=slider_vh, queue=False)
    slider_vl.input(update_vl, inputs=slider_vl, outputs=slider_vl, queue=False)

    inputs = [dropdown_1, dropdown_2, dropdown_3, dropdown_4, slider_vh, slider_vl]
    #outputs = df,
    #outputs = [plot_vh_germline, plot_vl_germline, plot_disease_count, plot_btype_count, \
    #                plot_year_data, dataframe, download_fasta]
    
    button.click(
        fn=apply_filters,
        inputs = inputs,
        outputs = [df_out, combined_vh_vl_bar, year_bar, vh_fig, vl_fig, fasta_file, total_sequences]
    )

demo.launch()