File size: 6,339 Bytes
fc56c31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
afc8e3b
fc56c31
7d9ee3f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import gradio as gr
import sqlite3
import pandas as pd
import plotly.express as px
from utils import *
from huggingface_hub import hf_hub_download

db_path = hf_hub_download(
    repo_id="hemantn/antibody-paired-sequences",
    filename="antibody_data_year.db",
    repo_type="dataset"
)


conn = sqlite3.connect(db_path, check_same_thread=False)

#conn = sqlite3.connect('/data/hn533621/OAS/Paired_Data_Analysis/antibody_data_year.db', check_same_thread=False)
all_df = pd.read_sql("SELECT * FROM antibody_data", conn)

# Compute min / max for each column
vh_min, vh_max = int(all_df["vh_length"].min()), int(all_df["vh_length"].max())
vl_min, vl_max = int(all_df["vl_length"].min()), int(all_df["vl_length"].max())

def apply_filters(dropdown_1, dropdown_2, dropdown_3, dropdown_4, slider_vh, slider_vl):
    """
    Build a query to filter the data based on the user's input
    """
    clauses = []
    params = {}
    if dropdown_1: clauses.append('v_call_heavy_first = :vh_germline');  params['vh_germline'] = dropdown_1
    if dropdown_2: clauses.append('v_call_light_first = :vl_germline');  params['vl_germline'] = dropdown_2
    if dropdown_3: clauses.append('BType = :btype');  params['btype'] = dropdown_3
    if dropdown_4: clauses.append('Disease = :disease');  params['disease'] = dropdown_4
    if slider_vh:
       clauses.append("vh_length BETWEEN :vh_min AND :vh_max")
       params["vh_min"] = 80
       params["vh_max"] = slider_vh
    if slider_vl:
        clauses.append("vl_length BETWEEN :vl_min AND :vl_max")
        params["vl_min"] = 80
        params["vl_max"] = slider_vl

    sql = "SELECT * FROM antibody_data"
    if clauses:
        sql += " WHERE " + " AND ".join(clauses)

    df = pd.read_sql_query(sql, conn, params=params)
    #print(df.columns)
        # ---- Reorder and rename columns ----
    # 1. Rename to your desired display names
    rename_map = {
        "BType": "BType",
        "Disease": "Disease",
        "v_call_heavy_first": "vcall_VH",
        "v_call_light_first": "vcall_VL",
        "vh_length": "VH_length",
        "vl_length": "VL_length",
        "sequence_alignment_aa_heavy": "VH",
        "sequence_alignment_aa_light": "VL",
        "Year": "Year",
    }
    df = df.rename(columns=rename_map)

    # 2. Reorder the columns
    desired_order = [
        "BType",
        "Disease",
        "vcall_VH",
        "vcall_VL",
        "VH_length",
        "VL_length",
        "Year",
        "VH",
        "VL",
    ]
    # Only keep columns that actually exist (avoids errors if some missing)
    df = df[[c for c in desired_order if c in df.columns]]
    
    total_rows = conn.execute("SELECT COUNT(*) FROM antibody_data").fetchone()[0]
    #vcall_vh_bar = bar_vcall_vh(df, total_rows, dropdown_1)
    year_bar = bar_year_count(df)
    #vcall_vl_bar = bar_vcall_vl(df, total_rows, dropdown_2)
    combined_vh_vl_bar = bar_vh_vl_combined(df, total_rows, dropdown_1, dropdown_2)
    #disease_bar  = bar_disease_count(df, total_rows, dropdown_4)
    #btype_bar = bar_btype_count(df, total_rows, dropdown_3)
    vh_fig, vl_fig = hist_vh_vl_separate(df)
    fasta_file = make_fasta_file(df)
    total_sequences = 2*len(df)
    return df.head(5), combined_vh_vl_bar, year_bar, vh_fig, vl_fig, fasta_file, total_sequences

with gr.Blocks(theme=gr.themes.Ocean()) as demo:
    gr.Markdown(
        """
        <h1 style='text-align:center; color:#3A7; margin-bottom:0.5em;'>🔎 Antibody Database Dashboard</h1>
        <p style='text-align:center; color:gray;'>
        Filter sequences, explore counts, and download custom FASTA files.
        </p>
        """
    )

    with gr.Row():
        with gr.Column(scale=1):
            dropdown_1 = gr.Dropdown(choices = [""] + sorted(all_df['v_call_heavy_first'].unique()), \
                value = None, label = 'VH germline')
            dropdown_2 = gr.Dropdown(choices = [""] + sorted(all_df['v_call_light_first'].unique()), \
                value = None, label = 'VL germline')
            dropdown_3 = gr.Dropdown(choices = [""] + sorted(all_df['BType'].unique()), \
                value = None, label = 'B-Type')
            dropdown_4 = gr.Dropdown(choices = [""] + sorted(all_df['Disease'].unique()), \
                value = None, label = 'Disase')

            slider_vh = gr.Slider(value=vh_max, minimum=vh_min, maximum=vh_max, step=1, label = 'VH length') 
            slider_vl = gr.Slider(value=vl_max, minimum=vl_min, maximum=vl_max, step=1, label = 'VL length')  

            button = gr.Button('Apply Filters', variant='primary')

        with gr.Column(scale=2):
            with gr.Row():
                with gr.Column(scale=1):
                      combined_vh_vl_bar = gr.Plot(label = 'VH and VL Germline')
                with gr.Column(scale=1):
                      vh_fig = gr.Plot(label = 'VH Length Distribution')
            with gr.Row():
                with gr.Column(scale=1):
                      year_bar = gr.Plot(label = 'Year Wise Distribution')
                with gr.Column(scale=1):
                      vl_fig = gr.Plot(label = 'VL Length Distribution')

    with gr.Row():
        #with gr.Column(scale=0.3):
            #gr.Plot(label= 'Yearwise distribution of antibodies')
        with gr.Column(scale=1):
            with gr.Row():
                 with gr.Column(scale=2):
                     df_out = gr.Dataframe()
                 with gr.Column(scale=1):
                     fasta_file = gr.File(label= 'Download Antibody Data Fasta file', interactive=False)

                     with gr.Row():
                        total_sequences = gr.Textbox(label= 'No of Sequences in Fasta file', value=0, interactive=False)
                    
    
    slider_vh.input(update_vh, inputs=slider_vh, outputs=slider_vh, queue=False)
    slider_vl.input(update_vl, inputs=slider_vl, outputs=slider_vl, queue=False)

    inputs = [dropdown_1, dropdown_2, dropdown_3, dropdown_4, slider_vh, slider_vl]
    #outputs = df,
    #outputs = [plot_vh_germline, plot_vl_germline, plot_disease_count, plot_btype_count, \
    #                plot_year_data, dataframe, download_fasta]
    
    button.click(
        fn=apply_filters,
        inputs = inputs,
        outputs = [df_out, combined_vh_vl_bar, year_bar, vh_fig, vl_fig, fasta_file, total_sequences]
    )

demo.launch()