import gradio as gr import sqlite3 import pandas as pd import plotly.express as px from utils import * from huggingface_hub import hf_hub_download db_path = hf_hub_download( repo_id="hemantn/antibody-paired-sequences", filename="antibody_data_year.db", repo_type="dataset" ) conn = sqlite3.connect(db_path, check_same_thread=False) #conn = sqlite3.connect('/data/hn533621/OAS/Paired_Data_Analysis/antibody_data_year.db', check_same_thread=False) all_df = pd.read_sql("SELECT * FROM antibody_data", conn) # Compute min / max for each column vh_min, vh_max = int(all_df["vh_length"].min()), int(all_df["vh_length"].max()) vl_min, vl_max = int(all_df["vl_length"].min()), int(all_df["vl_length"].max()) def apply_filters(dropdown_1, dropdown_2, dropdown_3, dropdown_4, slider_vh, slider_vl): """ Build a query to filter the data based on the user's input """ clauses = [] params = {} if dropdown_1: clauses.append('v_call_heavy_first = :vh_germline'); params['vh_germline'] = dropdown_1 if dropdown_2: clauses.append('v_call_light_first = :vl_germline'); params['vl_germline'] = dropdown_2 if dropdown_3: clauses.append('BType = :btype'); params['btype'] = dropdown_3 if dropdown_4: clauses.append('Disease = :disease'); params['disease'] = dropdown_4 if slider_vh: clauses.append("vh_length BETWEEN :vh_min AND :vh_max") params["vh_min"] = 80 params["vh_max"] = slider_vh if slider_vl: clauses.append("vl_length BETWEEN :vl_min AND :vl_max") params["vl_min"] = 80 params["vl_max"] = slider_vl sql = "SELECT * FROM antibody_data" if clauses: sql += " WHERE " + " AND ".join(clauses) df = pd.read_sql_query(sql, conn, params=params) #print(df.columns) # ---- Reorder and rename columns ---- # 1. Rename to your desired display names rename_map = { "BType": "BType", "Disease": "Disease", "v_call_heavy_first": "vcall_VH", "v_call_light_first": "vcall_VL", "vh_length": "VH_length", "vl_length": "VL_length", "sequence_alignment_aa_heavy": "VH", "sequence_alignment_aa_light": "VL", "Year": "Year", } df = df.rename(columns=rename_map) # 2. Reorder the columns desired_order = [ "BType", "Disease", "vcall_VH", "vcall_VL", "VH_length", "VL_length", "Year", "VH", "VL", ] # Only keep columns that actually exist (avoids errors if some missing) df = df[[c for c in desired_order if c in df.columns]] total_rows = conn.execute("SELECT COUNT(*) FROM antibody_data").fetchone()[0] #vcall_vh_bar = bar_vcall_vh(df, total_rows, dropdown_1) year_bar = bar_year_count(df) #vcall_vl_bar = bar_vcall_vl(df, total_rows, dropdown_2) combined_vh_vl_bar = bar_vh_vl_combined(df, total_rows, dropdown_1, dropdown_2) #disease_bar = bar_disease_count(df, total_rows, dropdown_4) #btype_bar = bar_btype_count(df, total_rows, dropdown_3) vh_fig, vl_fig = hist_vh_vl_separate(df) fasta_file = make_fasta_file(df) total_sequences = 2*len(df) return df.head(5), combined_vh_vl_bar, year_bar, vh_fig, vl_fig, fasta_file, total_sequences with gr.Blocks(theme=gr.themes.Ocean()) as demo: gr.Markdown( """

🔎 Antibody Database Dashboard

Filter sequences, explore counts, and download custom FASTA files.

""" ) with gr.Row(): with gr.Column(scale=1): dropdown_1 = gr.Dropdown(choices = [""] + sorted(all_df['v_call_heavy_first'].unique()), \ value = None, label = 'VH germline') dropdown_2 = gr.Dropdown(choices = [""] + sorted(all_df['v_call_light_first'].unique()), \ value = None, label = 'VL germline') dropdown_3 = gr.Dropdown(choices = [""] + sorted(all_df['BType'].unique()), \ value = None, label = 'B-Type') dropdown_4 = gr.Dropdown(choices = [""] + sorted(all_df['Disease'].unique()), \ value = None, label = 'Disase') slider_vh = gr.Slider(value=vh_max, minimum=vh_min, maximum=vh_max, step=1, label = 'VH length') slider_vl = gr.Slider(value=vl_max, minimum=vl_min, maximum=vl_max, step=1, label = 'VL length') button = gr.Button('Apply Filters', variant='primary') with gr.Column(scale=2): with gr.Row(): with gr.Column(scale=1): combined_vh_vl_bar = gr.Plot(label = 'VH and VL Germline') with gr.Column(scale=1): vh_fig = gr.Plot(label = 'VH Length Distribution') with gr.Row(): with gr.Column(scale=1): year_bar = gr.Plot(label = 'Year Wise Distribution') with gr.Column(scale=1): vl_fig = gr.Plot(label = 'VL Length Distribution') with gr.Row(): #with gr.Column(scale=0.3): #gr.Plot(label= 'Yearwise distribution of antibodies') with gr.Column(scale=1): with gr.Row(): with gr.Column(scale=2): df_out = gr.Dataframe() with gr.Column(scale=1): fasta_file = gr.File(label= 'Download Antibody Data Fasta file', interactive=False) with gr.Row(): total_sequences = gr.Textbox(label= 'No of Sequences in Fasta file', value=0, interactive=False) slider_vh.input(update_vh, inputs=slider_vh, outputs=slider_vh, queue=False) slider_vl.input(update_vl, inputs=slider_vl, outputs=slider_vl, queue=False) inputs = [dropdown_1, dropdown_2, dropdown_3, dropdown_4, slider_vh, slider_vl] #outputs = df, #outputs = [plot_vh_germline, plot_vl_germline, plot_disease_count, plot_btype_count, \ # plot_year_data, dataframe, download_fasta] button.click( fn=apply_filters, inputs = inputs, outputs = [df_out, combined_vh_vl_bar, year_bar, vh_fig, vl_fig, fasta_file, total_sequences] ) demo.launch()