Spaces:

RobinChu
/

test1

Sleeping

App Files Files Community

RobinChu commited on Jun 16, 2025

Commit

e42f219

1 Parent(s): 7536e13

initial commit

Browse files

Files changed (21) hide show

.gitignore +4 -0
README.md +5 -4
app.py +230 -0
config.yml +14 -0
dockerfile +129 -0
files/analyzed_results_compilation.csv +0 -0
files/analyzed_results_compilation_wCDAResults.csv +0 -0
files/file_qc_results.csv +0 -0
files/refactored_results_compilation.csv +0 -0
files/site_ins_qc_results.csv +43 -0
nist_cda_dashboard/__init__.py +0 -0
nist_cda_dashboard/__pycache__/__init__.cpython-312.pyc +0 -0
nist_cda_dashboard/__pycache__/component.cpython-312.pyc +0 -0
nist_cda_dashboard/__pycache__/utils.cpython-312.pyc +0 -0
nist_cda_dashboard/__pycache__/visualization.cpython-312.pyc +0 -0
nist_cda_dashboard/component.py +517 -0
nist_cda_dashboard/utils.py +21 -0
nist_cda_dashboard/visualization.py +743 -0
poetry.lock +0 -0
pyproject.toml +31 -0
requirements.txt +61 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+files/*
+!files/*.csv
+.venv/
+app_st.py

README.md CHANGED Viewed

@@ -1,10 +1,11 @@
 ---
-title: Test1
-emoji: 📈
-colorFrom: red
-colorTo: indigo
 sdk: docker
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: NIST CDA Dashboard
+emoji: 📑
+# colorFrom: red
+# colorTo: indigo
 sdk: docker
 pinned: false
+app_port: 7860
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,230 @@

+import yaml
+import gradio as gr
+import pandas as pd
+from functools import partial
+from nist_cda_dashboard.component import ComponentHelper
+from nist_cda_dashboard.utils import DownloadHelper
+with open("./config.yml", 'r') as file:
+    config = yaml.safe_load(file)
+component_helper = ComponentHelper(file_qc_results=pd.read_csv(config["file_path"]["file_qc_results"]),
+                                   dataset_qc_results=pd.read_csv(
+                                       config["file_path"]["dataset_qc_results"]),
+                                   analyzed_gating_results=pd.read_csv(
+                                       config["file_path"]["analyzed_gating_results"]),
+                                   analyzed_gating_results_wCDAResults=pd.read_csv(
+                                       config["file_path"]["analyzed_gating_results_wCDAResults"]),
+                                   config=config)
+custom_css = """
+#plot_wScrollBar {
+    overflow-x: auto !important;
+    text-align: center !important;
+}
+#plot_wScrollBar > .js-plotly-plot,
+#plot_wScrollBar > .plotly,
+#plot_wScrollBar > div:first-child {
+    display: inline-block !important;
+    text-align: left !important;
+}
+"""
+with gr.Blocks(css=custom_css) as demo:
+    with gr.Sidebar(position="left", width=700):
+        gr.Markdown("# STEP 1: Filter Dataset")
+        clear_all_models_button, select_all_models_button, model_filter, \
+            clear_all_datasets_button, select_all_datasets_button, dataset_filter, \
+            main_apply_filters_button = component_helper.creator.main_filter_components()
+    clear_all_models_button.click(fn=partial(component_helper.updater.clean_all_choices),
+                                  outputs=model_filter)
+    select_all_models_button.click(fn=partial(component_helper.updater.select_all_choices, "model_filter"),
+                                   outputs=model_filter)
+    clear_all_datasets_button.click(fn=partial(component_helper.updater.clean_all_choices),
+                                    outputs=dataset_filter)
+    select_all_datasets_button.click(fn=partial(component_helper.updater.select_all_choices, "dataset_filter"),
+                                     outputs=dataset_filter)
+    model_filter.change(fn=component_helper.updater.update_dataset_filter,
+                        inputs=[model_filter,
+                                dataset_filter],
+                        outputs=dataset_filter)
+    gr.Markdown("# STEP 2: Choose QC or Analysis Tab")
+    with gr.Tab(label="Quality Check"):
+        gr.Markdown("# STEP 3: Filter File")
+        clear_all_sop_exps_qc_tab_button, select_all_sop_exps_qc_tab_button, sop_exp_qc_tab_filter, \
+            clear_all_materials_button, select_all_materials_button, material_filter, \
+            clear_all_issues_button, select_all_issues_button, issue_filter, \
+            qc_tab_apply_filters_button = component_helper.creator.qc_tab_filter_components()
+        clear_all_sop_exps_qc_tab_button.click(fn=partial(component_helper.updater.clean_all_choices),
+                                               outputs=sop_exp_qc_tab_filter)
+        select_all_sop_exps_qc_tab_button.click(fn=partial(component_helper.updater.select_all_choices, "sop_exp_qc_tab_filter"),
+                                                outputs=sop_exp_qc_tab_filter)
+        clear_all_materials_button.click(fn=partial(component_helper.updater.clean_all_choices),
+                                         outputs=material_filter)
+        select_all_materials_button.click(fn=partial(component_helper.updater.select_all_choices, "material_filter"),
+                                          outputs=material_filter)
+        clear_all_issues_button.click(fn=partial(component_helper.updater.clean_all_choices),
+                                      outputs=issue_filter)
+        select_all_issues_button.click(fn=partial(component_helper.updater.select_all_choices, "issue_filter"),
+                                       outputs=issue_filter)
+        sop_exp_qc_tab_filter.change(fn=component_helper.updater.update_material_filter,
+                                     inputs=[sop_exp_qc_tab_filter,
+                                             material_filter],
+                                     outputs=material_filter)
+        gr.Markdown("<br><br>")
+        gr.Markdown("# STEP 4: Results")
+        qc_tab_dataset_qc_status_filter = component_helper.creator.qc_tab_dataset_qc_status_filter_component()
+        with gr.Row():
+            gr.Markdown("## File QC Results Table")
+            download_file_qc_button = gr.Button(
+                "Download File QC Results")
+        file_qc_status_filter, file_qc_table, file_qc_table_no_file_msg = component_helper.creator.file_qc_result_components()
+        download_file_qc_button_hidden = gr.DownloadButton(
+            visible=False, elem_id="download_file_qc_button_hidden")
+        download_file_qc_button.click(fn=(lambda df: DownloadHelper.df2csv(df, "file_qc_table")),
+                                      inputs=[file_qc_table],
+                                      outputs=[download_file_qc_button_hidden]). \
+            then(fn=None,
+                 inputs=None,
+                 outputs=None,
+                 js="() => document.querySelector('#download_file_qc_button_hidden').click()")
+        gr.Markdown("<br><br>")
+        with gr.Row():
+            gr.Markdown("## Dataset QC Results Table")
+            download_dataset_qc_button = gr.Button(
+                "Download Dataset QC Results")
+        dataset_qc_table, dataset_qc_table_no_dataset_msg = component_helper.creator.dataset_qc_result_components()
+        download_dataset_qc_button_hidden = gr.DownloadButton(
+            visible=False, elem_id="download_dataset_qc_button_hidden")
+        download_dataset_qc_button.click(fn=(lambda df: DownloadHelper.df2csv(df, "dataset_qc_table")),
+                                         inputs=[dataset_qc_table],
+                                         outputs=[download_dataset_qc_button_hidden]). \
+            then(fn=None,
+                 inputs=None,
+                 outputs=None,
+                 js="() => document.querySelector('#download_dataset_qc_button_hidden').click()")
+        gr.Markdown("<br><br>")
+        with gr.Row():
+            gr.Markdown("## QC Results Visualization")
+            download_qc_fig_button = gr.Button(
+                "Download QC Visualization Figure")
+        qc_fig, qc_fig_no_file_msg = component_helper.creator.qc_result_visual_components()
+        download_qc_fig_button_hidden = gr.DownloadButton(
+            visible=False, elem_id="download_qc_fig_button_hidden")
+        download_qc_fig_button.click(fn=(lambda fig: DownloadHelper.fig2png(fig, "qc_visual_fig")),
+                                     inputs=[qc_fig],
+                                     outputs=[download_qc_fig_button_hidden]). \
+            then(fn=None,
+                 inputs=None,
+                 outputs=None,
+                 js="() => document.querySelector('#download_qc_fig_button_hidden').click()")
+        gr.on(fn=component_helper.updater.update_file_qc_table,
+              triggers=[main_apply_filters_button.click, qc_tab_apply_filters_button.click,
+                        qc_tab_dataset_qc_status_filter.change, file_qc_status_filter.change],
+              inputs=[dataset_filter, sop_exp_qc_tab_filter, material_filter,
+                      issue_filter, qc_tab_dataset_qc_status_filter, file_qc_status_filter],
+              outputs=[file_qc_table, file_qc_table_no_file_msg,
+                       dataset_qc_table, dataset_qc_table_no_dataset_msg])
+        gr.on(fn=component_helper.updater.update_qc_fig,
+              triggers=[main_apply_filters_button.click, qc_tab_apply_filters_button.click,
+                        qc_tab_dataset_qc_status_filter.change],
+              inputs=[dataset_filter, sop_exp_qc_tab_filter, material_filter,
+                      issue_filter, qc_tab_dataset_qc_status_filter],
+              outputs=[qc_fig, qc_fig_no_file_msg])
+    with gr.Tab(label="Gating Result Analysis"):
+        gr.Markdown("# STEP 3: Filter File")
+        sample_filter, sop_exp_analysis_tab_filter, compensation_control_filter, \
+            gating_control_filter, pop_pheno_parent_filter, clear_gating_tab_filters_button, analysis_tab_apply_filters_button = component_helper.creator.analysis_tab_filter_components()
+        gr.Markdown("<br><br>")
+        gr.Markdown("# STEP 4: Results")
+        gr.Markdown("## Analysis Visualization")
+        analysis_tab_dataset_qc_status_filter, analyzed_result_filter = component_helper.creator.analyzed_result_filter_component()
+        with gr.Tab(label="Single Result  Barplot"):
+            download_barplot_fig_button = gr.Button("Download Barplot Figure")
+            analysis_barplot_fig, barplot_not_reportable_msg = component_helper.creator.analysis_barplot_components()
+            download_barplot_fig_button_hidden = gr.DownloadButton(
+                visible=False, elem_id="download_barplot_fig_button_hidden")
+            download_barplot_fig_button.click(fn=(lambda fig: DownloadHelper.fig2png(fig, "analysis_barplot_fig")),
+                                              inputs=[analysis_barplot_fig],
+                                              outputs=[download_barplot_fig_button_hidden]). \
+                then(fn=None,
+                    inputs=None,
+                    outputs=None,
+                     js="() => document.querySelector('#download_barplot_fig_button_hidden').click()")
+        with gr.Tab(label="Multiple Results Heatmap"):
+            download_heatmap_fig_button = gr.Button(
+                "Download Heatmap Figure")
+            compared_protocol_filter, include_CDA_results_checkbox, analysis_heatmap_exp_info_table, analysis_heatmap_exp_comparison_table, \
+                analysis_heatmap_fig, heatmap_not_reportable_msg = component_helper.creator.analysis_heatmap_components()
+            download_heatmap_fig_button_hidden = gr.DownloadButton(
+                visible=False, elem_id="download_heatmap_fig_button_hidden")
+            download_heatmap_fig_button.click(fn=(lambda fig: DownloadHelper.fig2png(fig, "analysis_heatmap_fig")),
+                                              inputs=[analysis_heatmap_fig],
+                                              outputs=[download_heatmap_fig_button_hidden]). \
+                then(fn=None,
+                     inputs=None,
+                     outputs=None,
+                     js="() => document.querySelector('#download_heatmap_fig_button_hidden').click()")
+        gr.on(fn=component_helper.updater.update_barplot_fig,
+              triggers=[main_apply_filters_button.click, analysis_tab_apply_filters_button.click,
+                        analysis_tab_dataset_qc_status_filter.change, analyzed_result_filter.change],
+              inputs=[dataset_filter, analysis_tab_dataset_qc_status_filter,
+                      sample_filter, sop_exp_analysis_tab_filter, compensation_control_filter,
+                      gating_control_filter, pop_pheno_parent_filter, analyzed_result_filter],
+              outputs=[analysis_barplot_fig, barplot_not_reportable_msg])
+        gr.on(fn=component_helper.updater.update_heatmap_fig,
+              triggers=[main_apply_filters_button.click, analysis_tab_apply_filters_button.click,
+                        analysis_tab_dataset_qc_status_filter.change, analyzed_result_filter.change,
+                        compared_protocol_filter.change, include_CDA_results_checkbox.change],
+              inputs=[dataset_filter, analysis_tab_dataset_qc_status_filter,
+                      sample_filter, sop_exp_analysis_tab_filter, compensation_control_filter,
+                      gating_control_filter, pop_pheno_parent_filter,
+                      analyzed_result_filter, compared_protocol_filter, include_CDA_results_checkbox],
+              outputs=[analysis_heatmap_exp_info_table, analysis_heatmap_exp_comparison_table,
+                       analysis_heatmap_fig, heatmap_not_reportable_msg])
+        gr.on(fn=component_helper.updater.update_analysis_tab_filters,
+              triggers=[sample_filter.change,
+                        sop_exp_analysis_tab_filter.change, compensation_control_filter.change,
+                        gating_control_filter.change, pop_pheno_parent_filter.change],
+              inputs=[sample_filter,
+                      sop_exp_analysis_tab_filter, compensation_control_filter,
+                      gating_control_filter, pop_pheno_parent_filter],
+              outputs=[sample_filter,
+                       sop_exp_analysis_tab_filter, compensation_control_filter,
+                       gating_control_filter, pop_pheno_parent_filter])
+        clear_gating_tab_filters_button.click(fn=component_helper.updater.clear_analysis_tab_filters,
+                                              outputs=[sample_filter,
+                                                       sop_exp_analysis_tab_filter, compensation_control_filter,
+                                                       gating_control_filter, pop_pheno_parent_filter])
+demo.launch(inbrowser=True)

config.yml ADDED Viewed

	@@ -0,0 +1,14 @@

+file_path:
+  file_qc_results: './files/file_qc_results.csv'
+  dataset_qc_results: './files/site_ins_qc_results.csv'
+  analyzed_gating_results: './files/analyzed_results_compilation.csv'
+  analyzed_gating_results_wCDAResults: './files/analyzed_results_compilation_wCDAResults.csv'
+qc_results_tab:
+  file_infos: ['Dataset', 'Site (anonymized)', 'Instrument model', 'File', 'SOP-Exp', 'Material', 'Repeat times', 'QC status']
+  issues: ['Missing file', 'Insufficient event', 'Missing time', 'Missing FSCA/SSCA', 'Missing FSCH/SSCH', 'Missing fluorescence', 'Voltage flipped']
+  file_sets: ['Rainbow bead and FC beads (SOP1-e1)', 'All cryoPBMCs (SOP3-e1, except FMO)']
+gating_results_tab:
+  exp_infos: ['Dataset', 'Site (anonymized)', 'Instrument model', 'Result ID', 'Sample', 'SOP-Exp', 'Repetition', 'Compensation control', 'Gating control', 'Population', 'Phenotype', 'Parent gate']
+  results: ['Cell population (%)', 'Abs. cell count (volume)', 'Abs. cell count (TruCount)', 'MedFI', 'rSD', 'ERF']

dockerfile ADDED Viewed

	@@ -0,0 +1,129 @@

+# ====================
+# 基礎映像檔設定
+# ====================
+# 使用輕量版 Python（固定具體版本避免意外更新）
+FROM python:3.12.7-slim
+# 安裝系統相依套件
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    libglib2.0-0 \
+    libgl1-mesa-glx \
+    && rm -rf /var/lib/apt/lists/*
+# ====================
+# 環境變數設定
+# ====================
+# 設定 Python 環境變數以提升安全性和效能
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PIP_NO_CACHE_DIR=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1
+# PYTHONUNBUFFERED=1: 確保 Python 輸出立即顯示
+# PYTHONDONTWRITEBYTECODE=1: 防止生成 .pyc 檔案，減少映像檔大小
+# PIP_NO_CACHE_DIR=1: 禁用 pip 快取，減少映像檔大小
+# PIP_DISABLE_PIP_VERSION_CHECK=1: 禁用 pip 版本檢查，加速安裝
+# ====================
+# 使用者和群組設定
+# ====================
+# 建立專用的應用程式使用者和群組（早期建立以確保安全）
+RUN groupadd -r appgroup && \
+    useradd -r -g appgroup -u 1001 -d /app -s /sbin/nologin appuser
+# -r: 建立系統使用者/群組
+# -u 1001: 指定 UID，避免與 host 使用者衝突
+# -d /app: 設定家目錄
+# -s /sbin/nologin: 禁止 shell 登入，增強安全性
+# ====================
+# 系統套件安裝
+# ====================
+# 安裝必要的系統相依套件並立即清理，減少映像檔大小和攻擊面
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    build-essential \
+    libglib2.0-0 \
+    libgl1-mesa-glx && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+# --no-install-recommends: 只安裝必要套件，不安裝建議套件
+# 立即清理 apt 快取和臨時檔案
+# ====================
+# 工作目錄和權限設定
+# ====================
+# 設定應用程式工作目錄
+WORKDIR /app
+# 建立必要目錄並設定適當權限（避免使用危險的 777 權限）
+# RUN mkdir -p /app/tmp /app/logs /app/data && \
+#     chown -R appuser:appgroup /app && \
+#     chmod 777 /app && \
+#     chmod 777 /app/tmp /app/logs /app/data
+# 755: 擁有者可讀寫執行，群組和其他人可讀執行
+# 750: 擁有者可讀寫執行，群組可讀執行，其他人無權限
+# ====================
+# Python 依賴安裝
+# ====================
+# 先複製 requirements.txt（利用 Docker 層快取優化）
+COPY --chown=appuser:appgroup requirements.txt .
+# --chown: 複製時直接設定擁有者，避免額外的 chown 指令
+# 升級 pip 並安裝 Python 套件
+RUN pip install --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+# --no-cache-dir: 不使用快取，減少映像檔大小
+# ====================
+# 應用程式代碼複製
+# ====================
+# 複製應用程式代碼並設定擁有者
+COPY --chown=appuser:appgroup . .
+# ====================
+# 清理和優化
+# ====================
+# 移除不必要的 Python 快取檔案，進一步減少映像檔大小
+RUN find /app -name "*.pyc" -delete && \
+    find /app -name "__pycache__" -type d -exec rm -rf {} + 2>/dev/null || true
+# 刪除所有 .pyc 檔案和 __pycache__ 目錄
+# 2>/dev/null || true: 忽略錯誤訊息
+# ====================
+# 使用者切換
+# ====================
+# 切換到非特權使用者執行應用程式（重要安全措施）
+USER appuser
+# ====================
+# 網路設定
+# ====================
+# 暴露應用程式端口
+EXPOSE 7860
+# ====================
+# 健康檢查
+# ====================
+# 設定容器健康檢查，確保應用程式正常運作
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:7860/_stcore/health || exit 1
+# --interval: 檢查間隔
+# --timeout: 檢查超時時間
+# --start-period: 啟動寬限期
+# --retries: 重試次數
+# ====================
+# 應用程式啟動
+# ====================
+# 啟動 Streamlit 應用程式（啟用安全功能）
+CMD ["gradio", "app.py", \
+    # "--server.port=7860", \
+    # "--server.address=0.0.0.0", \
+    # "--server.enableXsrfProtection=false", \
+    # "--server.enableCORS=false", \
+    # "--server.headless=true"
+    ]
+# enableXsrfProtection=true: 啟用 CSRF 保護
+# enableCORS=false: 禁用跨域請求，增強安全性
+# headless=true: 無頭模式，適合容器環境

files/analyzed_results_compilation.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

files/analyzed_results_compilation_wCDAResults.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

files/file_qc_results.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

files/refactored_results_compilation.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

files/site_ins_qc_results.csv ADDED Viewed

	@@ -0,0 +1,43 @@

+Dataset,Dataset code,Site (anonymized),Instrument model,QC status,Rainbow bead and FC beads (SOP1-e1),"All cryoPBMCs (SOP3-e1, except FMO)"
+Site01-Quanteon,1,Site01,Quanteon,Pass QC,Completed,Completed
+Site02-Penteon,2,Site02,Penteon,Fail QC,Completed,Incomplete
+Site03-Aurora,3,Site03,Aurora,Pass QC,Completed,Completed
+Site03-SymphonyA3,4,Site03,SymphonyA3,Pass QC,Completed,Completed
+Site03-SymphonyA5,5,Site03,SymphonyA5,Pass QC,Completed,Completed
+Site04-Aurora,6,Site04,Aurora,Pass QC,Completed,Completed
+Site05-Cytoflex,7,Site05,Cytoflex,Fail QC,Completed,Incomplete
+Site05-CytoflexLX,8,Site05,CytoflexLX,Fail QC,Completed,Incomplete
+Site05-CytoflexLXNUV,9,Site05,CytoflexLXNUV,Fail QC,Completed,Incomplete
+Site06-Lyric-1,10,Site06,Lyric,Fail QC,Completed,Incomplete
+Site06-Lyric-2,11,Site06,Lyric,Pass QC,Completed,Completed
+Site07-CantoSORP-1,12,Site07,CantoSORP,Pass QC,Completed,Completed
+Site07-CantoSORP-2,13,Site07,CantoSORP,Fail QC,Completed,Incomplete
+Site08-CantoSORP,14,Site08,CantoSORP,Pass QC,Completed,Completed
+Site08-MQA10-1,15,Site08,MQA10,Pass QC,Completed,Completed
+Site08-MQA10-2,16,Site08,MQA10,Pass QC,Completed,Completed
+Site09-CellStream,17,Site09,CellStream,Fail QC,Completed,Incomplete
+Site09-CytoflexS,18,Site09,CytoflexS,Fail QC,Completed,Incomplete
+Site10-AriaIII,19,Site10,AriaIII,Pass QC,Completed,Completed
+Site10-Canto10,20,Site10,Canto10,Pass QC,Completed,Completed
+Site10-CantoII,21,Site10,CantoII,Pass QC,Completed,Completed
+Site10-Fortessa,22,Site10,Fortessa,Pass QC,Completed,Completed
+Site11-BRZE5,23,Site11,BRZE5,Pass QC,Completed,Completed
+Site11-CantoII,24,Site11,CantoII,Pass QC,Completed,Completed
+Site11-Lyric,25,Site11,Lyric,Pass QC,Completed,Completed
+Site12-CytoflexS,26,Site12,CytoflexS,Pass QC,Completed,Completed
+Site13-CellStream-1,27,Site13,CellStream,Pass QC,Completed,Completed
+Site13-CellStream-2,28,Site13,CellStream,Pass QC,Completed,Completed
+Site13-ImageStreamX,29,Site13,ImageStreamX,Pass QC,Completed,Completed
+Site14-Aurora,30,Site14,Aurora,Pass QC,Completed,Completed
+Site15-AriaIII,31,Site15,AriaIII,Pass QC,Completed,Completed
+Site15-CantoII-1,32,Site15,CantoII,Pass QC,Completed,Completed
+Site15-CantoII-2,33,Site15,CantoII,Pass QC,Completed,Completed
+Site16-AttuneNXT,34,Site16,AttuneNXT,Pass QC,Completed,Completed
+Site16-CytoflexLX,35,Site16,CytoflexLX,Pass QC,Completed,Completed
+Site17-Aurora-1,36,Site17,Aurora,Pass QC,Completed,Completed
+Site17-Aurora-2,37,Site17,Aurora,Pass QC,Completed,Completed
+Site18-CytoflexS,38,Site18,CytoflexS,Fail QC,Incomplete,Incomplete
+Site18-NorthernL,39,Site18,NorthernL,Fail QC,Incomplete,Incomplete
+Site19-Fusion-1,40,Site19,Fusion,Pass QC,Completed,Completed
+Site19-Fusion-2,41,Site19,Fusion,Pass QC,Completed,Completed
+Site20-Fortessa,42,Site20,Fortessa,Pass QC,Completed,Completed

nist_cda_dashboard/__init__.py ADDED Viewed

File without changes

nist_cda_dashboard/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (161 Bytes). View file

nist_cda_dashboard/__pycache__/component.cpython-312.pyc ADDED Viewed

Binary file (34.9 kB). View file

nist_cda_dashboard/__pycache__/utils.cpython-312.pyc ADDED Viewed

Binary file (1.91 kB). View file

nist_cda_dashboard/__pycache__/visualization.cpython-312.pyc ADDED Viewed

Binary file (32.4 kB). View file

nist_cda_dashboard/component.py ADDED Viewed

	@@ -0,0 +1,517 @@

+import pandas as pd
+import gradio as gr
+from .visualization import QCVisualizer, AnalysisVisualizer
+class ComponentHelper:
+    def __init__(self,
+                 file_qc_results: pd.DataFrame,
+                 dataset_qc_results: pd.DataFrame,
+                 analyzed_gating_results: pd.DataFrame,
+                 analyzed_gating_results_wCDAResults: pd.DataFrame,
+                 config: dict):
+        self.file_qc_results = file_qc_results
+        self.dataset_qc_results = dataset_qc_results
+        self.analyzed_gating_results = analyzed_gating_results
+        self.analyzed_gating_results_wCDAResults = analyzed_gating_results_wCDAResults
+        self.config = config
+        self.choices = {"dataset_filter": list(file_qc_results["Dataset"].unique()),
+                        "model_filter": sorted(list(file_qc_results["Instrument model"].unique())),
+                        "sop_exp_qc_tab_filter": list(file_qc_results["SOP-Exp"].unique()),
+                        "material_filter": list((file_qc_results["SOP-Exp"] + " " + file_qc_results["Material"]).unique()),
+                        "issue_filter": config["qc_results_tab"]["issues"],
+                        "sample_filter": list(analyzed_gating_results["Sample"].unique()),
+                        "sop_exp_analysis_tab_filter": list(analyzed_gating_results["SOP-Exp"].unique()),
+                        "compensation_control_filter": list(analyzed_gating_results["Compensation control"].unique()),
+                        "gating_control_filter": list(analyzed_gating_results["Gating control"].unique()),
+                        "pop_pheno_parent_filter": list(analyzed_gating_results["Population; Phenotype (Parent gate)"].unique()),
+                        "analyzed_result_filter": config["gating_results_tab"]["results"],
+                        }
+        self.creator = self._Creator(self)
+        self.updater = self._Updater(self)
+    class _Creator:
+        def __init__(self,
+                     helper_instance):
+            self.helper = helper_instance
+            self.initialization_values = {}
+        def main_filter_components(self):
+            with gr.Row():
+                gr.Markdown("Instrument model")
+                clear_all_models_button = gr.Button("Clear All", size="sm")
+                select_all_models_button = gr.Button("Select All", size="sm")
+            model_filter = gr.Dropdown(label='', multiselect=True,
+                                       choices=self.helper.choices["model_filter"],
+                                       value=self.helper.choices["model_filter"])
+            self.initialization_values["model_filter"] = self.helper.choices["model_filter"]
+            with gr.Row():
+                gr.Markdown("Dataset")
+                clear_all_datasets_button = gr.Button("Clear All", size="sm")
+                select_all_datasets_button = gr.Button(
+                    "Select All", size="sm")
+            dataset_filter = gr.Dropdown(label='', multiselect=True,
+                                         choices=self.helper.choices["dataset_filter"],
+                                         value=self.helper.choices["dataset_filter"])
+            self.initialization_values["dataset_filter"] = self.helper.choices["dataset_filter"]
+            main_apply_filters_button = gr.Button(value="Apply filters")
+            return clear_all_models_button, select_all_models_button, model_filter, \
+                clear_all_datasets_button, select_all_datasets_button, dataset_filter, \
+                main_apply_filters_button
+        def qc_tab_filter_components(self):
+            with gr.Row():
+                gr.Markdown("SOP-Exp")
+                clear_all_sop_exps_qc_tab_button = gr.Button(
+                    "Clear All", size="sm")
+                select_all_sop_exps_qc_tab_button = gr.Button(
+                    "Select All", size="sm")
+            sop_exp_qc_tab_filter = gr.Dropdown(label='', multiselect=True,
+                                                choices=self.helper.choices["sop_exp_qc_tab_filter"],
+                                                value=self.helper.choices["sop_exp_qc_tab_filter"])
+            self.initialization_values["sop_exp_qc_tab_filter"] = self.helper.choices["sop_exp_qc_tab_filter"]
+            with gr.Row():
+                gr.Markdown("Material")
+                clear_all_materials_button = gr.Button("Clear All", size="sm")
+                select_all_materials_button = gr.Button(
+                    "Select All", size="sm")
+            material_filter = gr.Dropdown(label='', multiselect=True,
+                                          choices=self.helper.choices["material_filter"],
+                                          value=self.helper.choices["material_filter"])
+            self.initialization_values["material_filter"] = self.helper.choices["material_filter"]
+            with gr.Row():
+                gr.Markdown("Issues")
+                clear_all_issues_button = gr.Button("Clear All", size="sm")
+                select_all_issues_button = gr.Button("Select All", size="sm")
+            issue_filter = gr.Dropdown(label='', multiselect=True,
+                                       choices=self.helper.choices["issue_filter"],
+                                       value=self.helper.choices["issue_filter"])
+            self.initialization_values["issue_filter"] = self.helper.choices["issue_filter"]
+            qc_tab_apply_filters_button = gr.Button(value="Apply filters")
+            return clear_all_sop_exps_qc_tab_button, select_all_sop_exps_qc_tab_button, sop_exp_qc_tab_filter, \
+                clear_all_materials_button, select_all_materials_button, material_filter, \
+                clear_all_issues_button, select_all_issues_button, issue_filter, \
+                qc_tab_apply_filters_button
+        def qc_tab_dataset_qc_status_filter_component(self):
+            gr.Markdown("Dataset QC status")
+            qc_tab_dataset_qc_status_filter = gr.CheckboxGroup(label='',
+                                                               choices=[
+                                                                   "Pass QC", "Fail QC"],
+                                                               value=["Pass QC",  "Fail QC"])
+            self.initialization_values["qc_tab_dataset_qc_status_filter"] = [
+                "Pass QC",  "Fail QC"]
+            return qc_tab_dataset_qc_status_filter
+        def file_qc_result_components(self):
+            gr.Markdown("File QC status")
+            file_qc_status_filter = gr.CheckboxGroup(label='',
+                                                     choices=[
+                                                         "Pass QC", "Fail QC"],
+                                                     value=["Fail QC"])
+            self.initialization_values["file_qc_status_filter"] = ["Fail QC"]
+            updater = self.helper.updater
+            updater.update_file_qc_table(self.initialization_values["dataset_filter"],
+                                         self.initialization_values["sop_exp_qc_tab_filter"],
+                                         self.initialization_values["material_filter"],
+                                         self.initialization_values["issue_filter"],
+                                         self.initialization_values["qc_tab_dataset_qc_status_filter"],
+                                         self.initialization_values["file_qc_status_filter"])
+            file_qc_table = gr.Dataframe(
+                type="pandas",
+                show_copy_button=True, show_row_numbers=True,
+                value=updater.filtered_file_qc_results)
+            file_qc_table_no_file_msg = gr.Markdown(
+                value="All files are filtered", visible=False)
+            return file_qc_status_filter, file_qc_table, file_qc_table_no_file_msg
+        def dataset_qc_result_components(self):
+            updater = self.helper.updater
+            updater.update_file_qc_table(self.initialization_values["dataset_filter"],
+                                         self.initialization_values["sop_exp_qc_tab_filter"],
+                                         self.initialization_values["material_filter"],
+                                         self.initialization_values["issue_filter"],
+                                         self.initialization_values["qc_tab_dataset_qc_status_filter"],
+                                         self.initialization_values["file_qc_status_filter"])
+            dataset_qc_table = gr.Dataframe(
+                type="pandas",
+                show_copy_button=True, show_row_numbers=True,
+                value=updater.filtered_dataset_qc_results)
+            dataset_qc_table_no_dataset_msg = gr.Markdown(
+                value="All datasets are filtered", visible=False)
+            return dataset_qc_table, dataset_qc_table_no_dataset_msg
+        def qc_result_visual_components(self):
+            updater = self.helper.updater
+            updater.update_qc_fig(self.initialization_values["dataset_filter"],
+                                  self.initialization_values["sop_exp_qc_tab_filter"],
+                                  self.initialization_values["material_filter"],
+                                  self.initialization_values["issue_filter"],
+                                  self.initialization_values["qc_tab_dataset_qc_status_filter"])
+            qc_fig = gr.Plot(elem_id="plot_wScrollBar",
+                             value=updater.qc_visual)
+            qc_fig_no_file_msg = gr.Markdown(
+                value="All files are filtered", visible=False)
+            return qc_fig, qc_fig_no_file_msg
+        def analysis_tab_filter_components(self):
+            gr.Markdown("Sample")
+            sample_filter = gr.Dropdown(label='',
+                                        choices=self.helper.choices["sample_filter"],
+                                        value=self.helper.choices["sample_filter"][0])
+            self.initialization_values["sample_filter"] = self.helper.choices["sample_filter"][0]
+            gr.Markdown("SOP-Exp")
+            sop_exp_analysis_tab_filter = gr.Dropdown(label='',
+                                                      choices=self.helper.choices["sop_exp_analysis_tab_filter"],
+                                                      value=self.helper.choices["sop_exp_analysis_tab_filter"][0])
+            self.initialization_values["sop_exp_analysis_tab_filter"] = self.helper.choices["sop_exp_analysis_tab_filter"][0]
+            gr.Markdown("Compensation control")
+            compensation_control_filter = gr.Dropdown(label='',
+                                                      choices=self.helper.choices["compensation_control_filter"],
+                                                      value=self.helper.choices["compensation_control_filter"][0])
+            self.initialization_values["compensation_control_filter"] = self.helper.choices["compensation_control_filter"][0]
+            gr.Markdown("Gating control")
+            gating_control_filter = gr.Dropdown(label='',
+                                                choices=self.helper.choices["gating_control_filter"],
+                                                value=self.helper.choices["gating_control_filter"][0])
+            self.initialization_values["gating_control_filter"] = self.helper.choices["gating_control_filter"][0]
+            gr.Markdown("Population; Phenotype (Parent gate)")
+            pop_pheno_parent_filter = gr.Dropdown(label='',
+                                                  choices=self.helper.choices["pop_pheno_parent_filter"],
+                                                  value=self.helper.choices["pop_pheno_parent_filter"][3])
+            self.initialization_values["pop_pheno_parent_filter"] = self.helper.choices["pop_pheno_parent_filter"][3]
+            clear_gating_tab_filters_button = gr.Button(
+                value="Clear selections")
+            analysis_tab_apply_filters_button = gr.Button(
+                value="Apply filters")
+            return sample_filter, sop_exp_analysis_tab_filter, compensation_control_filter, \
+                gating_control_filter, pop_pheno_parent_filter, clear_gating_tab_filters_button, analysis_tab_apply_filters_button
+        def analyzed_result_filter_component(self):
+            gr.Markdown("Dataset QC status")
+            analysis_tab_dataset_qc_status_filter = gr.CheckboxGroup(label='',
+                                                                     choices=[
+                                                                         "Pass QC", "Fail QC"],
+                                                                     value=["Pass QC"])
+            self.initialization_values["analysis_tab_dataset_qc_status_filter"] = [
+                "Pass QC"]
+            gr.Markdown("Analyzed result")
+            analyzed_result_filter = gr.Dropdown(label='',
+                                                 multiselect=True,
+                                                 choices=self.helper.config["gating_results_tab"]["results"],
+                                                 value=[self.helper.config["gating_results_tab"]["results"][0]])
+            self.initialization_values["analyzed_result_filter"] = [
+                self.helper.config["gating_results_tab"]["results"][0]]
+            return analysis_tab_dataset_qc_status_filter, analyzed_result_filter
+        def analysis_barplot_components(self):
+            updater = self.helper.updater
+            updater.update_barplot_fig(self.initialization_values["dataset_filter"],
+                                       self.initialization_values["analysis_tab_dataset_qc_status_filter"],
+                                       self.initialization_values["sample_filter"],
+                                       self.initialization_values["sop_exp_analysis_tab_filter"],
+                                       self.initialization_values["compensation_control_filter"],
+                                       self.initialization_values["gating_control_filter"],
+                                       self.initialization_values["pop_pheno_parent_filter"],
+                                       self.initialization_values["analyzed_result_filter"])
+            analysis_barplot_fig = gr.Plot(elem_id="plot_wScrollBar",
+                                           value=updater.barplot)
+            barplot_not_reportable_msg = gr.Markdown(
+                value="The assigned result type of the experiment is not reportable.", visible=False)
+            return analysis_barplot_fig, barplot_not_reportable_msg
+        def analysis_heatmap_components(self):
+            gr.Markdown("Protocol for multi-exps comparison")
+            compared_protocol_filter = gr.Radio(label='',
+                                                choices=[
+                                                    "Compensation control", "Population; Phenotype (Parent gate)"],
+                                                value="Compensation control")
+            include_CDA_results_checkbox = gr.Checkbox(label="Include available CDA results",
+                                                       value=False)
+            updater = self.helper.updater
+            updater.update_heatmap_fig(self.initialization_values["dataset_filter"],
+                                       self.initialization_values["analysis_tab_dataset_qc_status_filter"],
+                                       self.initialization_values["sample_filter"],
+                                       self.initialization_values["sop_exp_analysis_tab_filter"],
+                                       self.initialization_values["compensation_control_filter"],
+                                       self.initialization_values["gating_control_filter"],
+                                       self.initialization_values["pop_pheno_parent_filter"],
+                                       self.initialization_values["analyzed_result_filter"],
+                                       "Compensation control",
+                                       False)
+            with gr.Row():
+                analysis_heatmap_exp_info_table = gr.Dataframe(
+                    show_copy_button=True, value=updater.exp_info_table)
+                analysis_heatmap_exp_comparison_table = gr.Dataframe(
+                    show_copy_button=True, value=updater.exp_comparison_table)
+            analysis_heatmap_fig = gr.Plot(elem_id="plot_wScrollBar",
+                                           value=updater.heatmap)
+            heatmap_not_reportable_msg = gr.Markdown(
+                value="The assigned result type of all compared experiments are not reportable.", visible=False)
+            return compared_protocol_filter, include_CDA_results_checkbox, analysis_heatmap_exp_info_table, analysis_heatmap_exp_comparison_table, \
+                analysis_heatmap_fig, heatmap_not_reportable_msg
+    class _Updater:
+        def __init__(self,
+                     helper_instance):
+            self.helper = helper_instance
+        def select_all_choices(self, filter_name: str):
+            return gr.update(value=self.helper.choices[filter_name])
+        def clean_all_choices(self):
+            return gr.update(value=[])
+        def update_dataset_filter(self, selected_models: list[str], selected_datasets: list[str]):
+            model_dataset_mapping = self.helper.file_qc_results.groupby("Instrument model")["Dataset"].apply(
+                lambda x: sorted(x.unique().tolist())).to_dict()
+            updated_dataset_choices = set()
+            if selected_models:
+                for selected_model in selected_models:
+                    if selected_model in model_dataset_mapping.keys():
+                        updated_dataset_choices.update(
+                            model_dataset_mapping[selected_model])
+            updated_dataset_choices = sorted(
+                list(updated_dataset_choices))
+            self.helper.choices["dataset_filter"] = updated_dataset_choices
+            updated_dataset_values = [
+                item for item in selected_datasets if item in updated_dataset_choices]
+            return gr.update(choices=updated_dataset_choices,
+                             value=updated_dataset_values)
+        def update_material_filter(self, selected_sop_exps: list[str], selected_materials: list[str]):
+            df = self.helper.file_qc_results.copy()
+            df["Material"] = df["SOP-Exp"] + " " + df["Material"]
+            sop_exp_material_mapping = df.groupby("SOP-Exp")["Material"].apply(
+                lambda x: sorted(x.unique().tolist())).to_dict()
+            updated_material_choices = set()
+            if selected_sop_exps:
+                for selected_sop_exp in selected_sop_exps:
+                    if selected_sop_exp in sop_exp_material_mapping.keys():
+                        updated_material_choices.update(
+                            sop_exp_material_mapping[selected_sop_exp])
+            updated_material_choices = sorted(
+                list(updated_material_choices))
+            self.helper.choices["material_filter"] = updated_material_choices
+            updated_material_values = [
+                item for item in selected_materials if item in updated_material_choices]
+            return gr.update(choices=updated_material_choices,
+                             value=updated_material_values)
+        def _filter_dataset_qc_result(self, selected_datasets, selected_dataset_qc_status):
+            return self.helper.dataset_qc_results[(self.helper.dataset_qc_results["Dataset"].isin(selected_datasets)) &
+                                                  (self.helper.dataset_qc_results["QC status"].isin(selected_dataset_qc_status))]
+        def update_file_qc_table(self,
+                                 selected_datasets: list[str], selected_sop_exps: list[str], selected_materials: list[str],
+                                 selected_qc_issues: list[str], selected_dataset_qc_status: list[str], selected_file_qc_status: list[str]):
+            self.filtered_dataset_qc_results = self._filter_dataset_qc_result(
+                selected_datasets, selected_dataset_qc_status)
+            selected_materials = [material.split(
+                " ")[1] for material in selected_materials]
+            self.filtered_file_qc_results = self.helper.file_qc_results.loc[(self.helper.file_qc_results["Dataset"].isin(self.filtered_dataset_qc_results["Dataset"])) &
+                                                                            (self.helper.file_qc_results["SOP-Exp"].isin(selected_sop_exps)) &
+                                                                            (self.helper.file_qc_results["Material"].isin(selected_materials)) &
+                                                                            (self.helper.file_qc_results["QC status"].isin(
+                                                                                selected_file_qc_status)),
+                                                                            self.helper.config["qc_results_tab"]["file_infos"]+selected_qc_issues]
+            if len(self.filtered_file_qc_results) == 0:
+                file_qc_table_update = {"visible": False}
+                file_qc_table_no_file_msg_update = {"visible": True}
+            else:
+                file_qc_table_update = {
+                    "value": {"data": self.filtered_file_qc_results.values.tolist(),
+                              "headers": self.filtered_file_qc_results.columns.to_list()},
+                    "visible": True}
+                file_qc_table_no_file_msg_update = {"visible": False}
+            if len(self.filtered_dataset_qc_results) == 0:
+                dataset_qc_table_update = {"visible": False}
+                dataset_qc_table_no_dataset_msg_update = {
+                    "visible": True}
+            else:
+                dataset_qc_table_update = {
+                    "value": {"data": self.filtered_dataset_qc_results.values.tolist(),
+                              "headers": self.filtered_dataset_qc_results.columns.to_list()},
+                    "visible": True}
+                dataset_qc_table_no_dataset_msg_update = {
+                    "visible": False}
+            return [gr.update(**file_qc_table_update),
+                    gr.update(**file_qc_table_no_file_msg_update),
+                    gr.update(**dataset_qc_table_update),
+                    gr.update(**dataset_qc_table_no_dataset_msg_update)]
+        def update_qc_fig(self,
+                          selected_datasets: list[str], selected_sop_exps: list[str], selected_materials: list[str],
+                          selected_qc_issues: list[str], selected_dataset_qc_status: list[str]):
+            filtered_dataset_qc_results = self.filtered_dataset_qc_results = self._filter_dataset_qc_result(
+                selected_datasets, selected_dataset_qc_status)
+            selected_materials = [material.split(
+                " ")[1] for material in selected_materials]
+            filtered_file_qc_results = self.helper.file_qc_results.loc[(self.helper.file_qc_results["Dataset"].isin(filtered_dataset_qc_results["Dataset"])) &
+                                                                       (self.helper.file_qc_results["SOP-Exp"].isin(selected_sop_exps)) &
+                                                                       (self.helper.file_qc_results["Material"].isin(
+                                                                           selected_materials)),
+                                                                       self.helper.config["qc_results_tab"]["file_infos"]+selected_qc_issues]
+            if len(filtered_file_qc_results) == 0:
+                return [gr.update(visible=False), gr.update(visible=True)]
+            else:
+                self.qc_visual = QCVisualizer.visualize(filtered_dataset_qc_results,
+                                                        self.helper.config["qc_results_tab"]["file_sets"],
+                                                        filtered_file_qc_results,
+                                                        selected_qc_issues)
+                return [gr.update(value=self.qc_visual,
+                                  visible=True),
+                        gr.update(visible=False)]
+        def update_barplot_fig(self,
+                               selected_datasets: list[str], selected_dataset_qc_status: list[str],
+                               selected_sample, selected_sop_exp, selected_comp,
+                               selected_fmo, selected_pop_pheno_parent, selected_results: list[str]):
+            if any([s is None for s in [selected_sample, selected_sop_exp, selected_comp, selected_fmo, selected_pop_pheno_parent]]):
+                return [gr.update(visible=False), gr.update(visible=False)]
+            filtered_dataset_qc_results = self.filtered_dataset_qc_results = self._filter_dataset_qc_result(
+                selected_datasets, selected_dataset_qc_status)
+            filtered_analyzed_gating_results = self.helper.analyzed_gating_results.loc[(self.helper.analyzed_gating_results["Dataset"].isin(filtered_dataset_qc_results["Dataset"])) &
+                                                                                       (self.helper.analyzed_gating_results["Sample"] == selected_sample) &
+                                                                                       (self.helper.analyzed_gating_results["SOP-Exp"] == selected_sop_exp) &
+                                                                                       (self.helper.analyzed_gating_results["Compensation control"] == selected_comp) &
+                                                                                       (self.helper.analyzed_gating_results["Gating control"] == selected_fmo) &
+                                                                                       (self.helper.analyzed_gating_results["Population; Phenotype (Parent gate)"] == selected_pop_pheno_parent),
+                                                                                       self.helper.config["gating_results_tab"]["exp_infos"]+[c for c in self.helper.analyzed_gating_results.columns if any(selected_result in c for selected_result in selected_results)]]
+            # if (filtered_analyzed_gating_results[[f"{selected_result}_mean", f"{selected_result}_std"]] == "Not reportable").all(axis=None):
+            #     return [gr.update(visible=False), gr.update(visible=True)]
+            self.barplot = AnalysisVisualizer.visualize_barplot(filtered_analyzed_gating_results,
+                                                                selected_results)
+            return [gr.update(value=self.barplot,
+                              visible=True),
+                    gr.update(visible=False)]
+        def update_heatmap_fig(self,
+                               selected_datasets: list[str], selected_dataset_qc_status: list[str],
+                               selected_sample, selected_sop_exp, selected_comp,
+                               selected_fmo, selected_pop_pheno_parent, selected_results: list[str], selected_comparison,
+                               include_CDA):
+            if include_CDA:
+                analyzed_results = self.helper.analyzed_gating_results_wCDAResults
+            else:
+                analyzed_results = self.helper.analyzed_gating_results
+            if any([s is None for s in [selected_sample, selected_sop_exp, selected_comp, selected_fmo, selected_pop_pheno_parent]]):
+                return [gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)]
+            masks = {"Sample": analyzed_results["Sample"] == selected_sample,
+                     "SOP-Exp": analyzed_results["SOP-Exp"] == selected_sop_exp,
+                     "Compensation control": analyzed_results["Compensation control"] == selected_comp,
+                     "Gating control": analyzed_results["Gating control"] == selected_fmo,
+                     "Population; Phenotype (Parent gate)": analyzed_results["Population; Phenotype (Parent gate)"] == selected_pop_pheno_parent}
+            filtering_mask = pd.DataFrame(pd.concat([mask for mask_name, mask in masks.items(
+            ) if mask_name != selected_comparison], axis=1)).all(axis=1)
+            filtered_dataset_qc_results = self.filtered_dataset_qc_results = self._filter_dataset_qc_result(
+                selected_datasets, selected_dataset_qc_status)
+            filtered_analyzed_gating_results = analyzed_results.loc[analyzed_results["Dataset"].isin(filtered_dataset_qc_results["Dataset"]) &
+                                                                    filtering_mask,
+                                                                    self.helper.config["gating_results_tab"]["exp_infos"]+[c for c in analyzed_results.columns if any(selected_result in c for selected_result in selected_results)]]
+            # if (filtered_analyzed_gating_results[[f"{selected_result}_mean", f"{selected_result}_std"]] == "Not reportable").all(axis=None):
+            #     return [gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)]
+            self.exp_info_table = filtered_analyzed_gating_results[[
+                e for e in self.helper.config["gating_results_tab"]["exp_infos"] if e not in ["Dataset", "Site (anonymized)", "Instrument model", "Result ID"]]]
+            if selected_comparison == "Population; Phenotype (Parent gate)":
+                shown_exp_comparison = [
+                    "Population", "Phenotype", "Parent gate"]
+            else:
+                shown_exp_comparison = [selected_comparison]
+            self.exp_info_table = pd.DataFrame(self.exp_info_table).drop(
+                columns=shown_exp_comparison)
+            self.exp_info_table = self.exp_info_table.value_counts().reset_index().drop(
+                columns="count").T.reset_index().set_axis(["Protocol", "Content"], axis=1)
+            self.exp_comparison_table = filtered_analyzed_gating_results[["Result ID"]+shown_exp_comparison].value_counts(
+            ).reset_index().drop(columns="count")
+            if selected_comparison == "Population; Phenotype (Parent gate)":
+                filtered_analyzed_gating_results["Result ID"] = filtered_analyzed_gating_results["Result ID"].astype(str) + \
+                    " (" + filtered_analyzed_gating_results["Population"] + ")"
+            else:
+                filtered_analyzed_gating_results["Result ID"] = filtered_analyzed_gating_results["Result ID"].astype(str) + \
+                    " (" + filtered_analyzed_gating_results[selected_comparison] + ")"
+            self.heatmap = AnalysisVisualizer.visualize_heatmap(filtered_analyzed_gating_results,
+                                                                selected_results)
+            return [gr.update(value=self.exp_info_table, visible=True),
+                    gr.update(value=self.exp_comparison_table, visible=True),
+                    gr.update(value=self.heatmap,
+                              visible=True),
+                    gr.update(visible=False)]
+        def update_analysis_tab_filters(self, selected_sample, selected_sop_exp, selected_comp, selected_fmo, selected_pop_pheno_parent):
+            masks = {}
+            col_selection_mapping = dict(zip(["Sample", "SOP-Exp", "Compensation control", "Gating control", "Population; Phenotype (Parent gate)"],
+                                             [selected_sample, selected_sop_exp, selected_comp, selected_fmo, selected_pop_pheno_parent]))
+            for col, selection in col_selection_mapping.items():
+                if selection is not None:
+                    masks[col] = self.helper.analyzed_gating_results[col] == selection
+            if len(masks) == 0:
+                return self.clear_analysis_tab_filters()
+            filtering_mask = pd.DataFrame(
+                pd.concat([mask for mask in masks.values()], axis=1)).all(axis=1)
+            filtered_analyzed_gating_results = self.helper.analyzed_gating_results[
+                filtering_mask]
+            updated_choices = {}
+            updated_values = {}
+            for col in ["Sample", "SOP-Exp", "Compensation control", "Gating control", "Population; Phenotype (Parent gate)"]:
+                updated_choices[col] = sorted(
+                    list(filtered_analyzed_gating_results[col].unique()))
+                updated_values[col] = (
+                    col_selection_mapping[col] if col_selection_mapping[col] in updated_choices[col] else None)
+            return [gr.update(choices=updated_choices[col], value=updated_values[col])
+                    for col in ["Sample", "SOP-Exp", "Compensation control", "Gating control", "Population; Phenotype (Parent gate)"]]
+        def clear_analysis_tab_filters(self):
+            return [gr.update(choices=self.helper.choices["sample_filter"], value=None),
+                    gr.update(
+                        choices=self.helper.choices["sop_exp_analysis_tab_filter"], value=None),
+                    gr.update(
+                        choices=self.helper.choices["compensation_control_filter"], value=None),
+                    gr.update(
+                        choices=self.helper.choices["gating_control_filter"], value=None),
+                    gr.update(choices=self.helper.choices["pop_pheno_parent_filter"], value=None)]

nist_cda_dashboard/utils.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import tempfile
+import pandas as pd
+import gradio as gr
+import json
+import plotly.io as pio
+class DownloadHelper:
+    @staticmethod
+    def df2csv(df: pd.DataFrame, prefix: str):
+        with tempfile.NamedTemporaryFile(mode="w", delete=False, prefix=f"{prefix}_",
+                                         suffix=".csv", encoding="utf-8") as tmpfile:
+            df.to_csv(tmpfile.name, index=False)
+        return tmpfile.name
+    @staticmethod
+    def fig2png(fig: gr.components.plot.PlotData, prefix: str):
+        with tempfile.NamedTemporaryFile(mode="wb", delete=False, prefix=f"{prefix}_",
+                                         suffix=".png") as tmpfile:
+            pio.write_image(json.loads(fig.plot), tmpfile.name, format="png")
+        return tmpfile.name

nist_cda_dashboard/visualization.py ADDED Viewed

	@@ -0,0 +1,743 @@

+import math
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as go
+from plotly.express import colors
+from plotly.subplots import make_subplots
+import base64
+class _CustomizedMarker:
+    def regular_polygon_coords(self, corners: int) -> list[np.array]:
+        if corners < 3:
+            raise ValueError("A polygon must have at least 3 corners.")
+        radius = 0.4
+        angle_step = 2 * math.pi / corners
+        polygon_coordinates = []
+        for i in range(corners):
+            angle = i * angle_step
+            x = radius * math.sin(angle)
+            y = radius * math.cos(angle)
+            polygon_coordinates.append(np.array([x, y]))
+        polygon_coordinates.append(polygon_coordinates[0])
+        return polygon_coordinates
+    def clock_marker_coords(self, corners: int, target_corner: int) -> list[np.array]:
+        if target_corner > corners or target_corner <= 0:
+            raise ValueError("Target corner outside available value range.")
+        target_corner -= 1
+        polygon_coords = self.regular_polygon_coords(corners)
+        corner_coord = polygon_coords[target_corner]
+        left_coord = (
+            corner_coord + polygon_coords[(target_corner+1) % corners]) / 2
+        right_coord = (
+            corner_coord + polygon_coords[(target_corner-1) % corners]) / 2
+        center_coord = np.array([0, 0])
+        return [corner_coord, right_coord, center_coord, left_coord, corner_coord]
+    def marker_to_scatter_line_coords(self, clock_marker_coords, x_coords, y_coords):
+        scatter_marker_x_coords = []
+        scatter_marker_y_coords = []
+        for x_coord, y_coord in zip(x_coords, y_coords):
+            scatter_marker_x_coords.extend([marker_coord[0] + x_coord
+                                            for marker_coord in clock_marker_coords])
+            scatter_marker_x_coords.append(None)
+            scatter_marker_y_coords.extend([marker_coord[1] + y_coord
+                                            for marker_coord in clock_marker_coords])
+            scatter_marker_y_coords.append(None)
+        return scatter_marker_x_coords, scatter_marker_y_coords
+class QCVisualizer:
+    @staticmethod
+    def visualize(site_ins_qc_results: pd.DataFrame,
+                  file_sets: list[str],
+                  file_qc_results: pd.DataFrame,
+                  qc_issues: list[str]
+                  ) -> go.Figure:
+        site_ins_coord_mapping = dict(
+            zip(list(file_qc_results["Dataset"].unique()), list(range(len(file_qc_results["Dataset"].unique())))))
+        file_coord_mapping = dict(
+            zip(file_qc_results["File"].unique(), list(range(len(file_qc_results["File"].unique())))))
+        fig = make_subplots(rows=1, cols=2,
+                            shared_yaxes=True)
+        # Figure 1. site-ins QC results
+        site_ins_plot_table = pd.melt(site_ins_qc_results,
+                                      id_vars=[
+                                          "Dataset", "Site (anonymized)", "Instrument model"],
+                                      value_vars=file_sets,
+                                      var_name="File set",
+                                      value_name="status")
+        status_marker_mapping = {"Completed": "circle", "Incomplete": "x"}
+        default_colors = colors.qualitative.Set1
+        status_marker_color_mapping = {
+            "Completed": default_colors[1], "Incomplete": default_colors[0]}
+        default_colors = [default_colors[i]
+                          for i in range(len(default_colors)) if i not in [0, 1]]
+        for status in ["Completed", "Incomplete"]:
+            sub_site_ins_plot_table = site_ins_plot_table[site_ins_plot_table["status"] == status]
+            sub_site_ins_plot_table.loc[:, "Dataset"] = sub_site_ins_plot_table["Dataset"].map(
+                site_ins_coord_mapping)
+            if status == "Incomplete":
+                for y in sub_site_ins_plot_table["Dataset"].unique():
+                    fig.add_shape(
+                        type="line",
+                        y0=y, y1=y,
+                        x0=-0.5, x1=1.5,
+                        line=dict(color="red", width=1),
+                        layer="between",
+                        row=1, col=1
+                    )
+                    fig.add_shape(
+                        type="line",
+                        y0=y, y1=y,
+                        x0=-2, x1=(len(file_qc_results["File"].unique())-1)+2,
+                        line=dict(color="red", width=1),
+                        layer="between",
+                        row=1, col=2
+                    )
+            if len(sub_site_ins_plot_table) == 0:
+                fig.add_trace(go.Scatter(x=[None],
+                                         y=[None],
+                                         mode="markers",
+                                         marker=dict(symbol=status_marker_mapping[status],
+                                                     color=status_marker_color_mapping[status],
+                                                     size=12,
+                                                     ),
+                                         name=f"File set {status.lower()}",
+                                         visible="legendonly"
+                                         ),
+                              row=1, col=1)
+            else:
+                hover_info = []
+                for _, row in sub_site_ins_plot_table.iterrows():
+                    hover_info.append(
+                        [row["Site (anonymized)"], row["Instrument model"], row["status"]])
+                fig.add_trace(go.Scatter(x=sub_site_ins_plot_table["File set"],
+                                         y=sub_site_ins_plot_table["Dataset"],
+                                         mode="markers",
+                                         marker=dict(symbol=status_marker_mapping[status],
+                                                     color=status_marker_color_mapping[status],
+                                                     size=12,
+                                                     ),
+                                         name=f"File set {status.lower()}",
+                                         customdata=hover_info,
+                                         hovertemplate=("Dataset: %{y}<br>" +
+                                                        "Site (anonymized): %{customdata[0]}<br>" +
+                                                        "Instrument model: %{customdata[1]}<br>" +
+                                                        "File set: %{x}<br>" +
+                                                        "Status: %{customdata[2]}" +
+                                                        "<extra></extra>"
+                                                        )
+                                         ),
+                              row=1, col=1)
+        fig.update_xaxes(title_text="File set",
+                         range=[
+                             0-0.5, (len(site_ins_plot_table["File set"].unique())-1)+0.5],
+                         tickangle=90,
+                         gridcolor="lightgray",
+                         zeroline=False,
+                         showline=False,
+                         row=1, col=1)
+        # Figure 2. file QC results
+        file_plot_table = file_qc_results[file_qc_results[qc_issues].any(
+            axis=1)]
+        clock_marker_index = 1
+        hover_information = pd.DataFrame(
+            columns=["x", "y", "Dataset", "Site (anonymized)", "Instrument model", "File", "Issues"])
+        for issue_index, qc_issue in enumerate(qc_issues):
+            sub_file_plot_table = file_plot_table[file_plot_table[qc_issue]]
+            if len(sub_file_plot_table) == 0:
+                fig.add_trace(go.Scatter(x=[None],
+                                         y=[None],
+                                         mode="lines",
+                                         fill="toself",
+                                         fillcolor=default_colors[issue_index],
+                                         line=dict(color="black", width=0.5),
+                                         name=qc_issue,
+                                         visible="legendonly"
+                                         ),
+                              row=1, col=2)
+                if qc_issue != "Missing file":
+                    clock_marker_index += 1
+            else:
+                x_coords = [file_coord_mapping[file_code]
+                            for file_code in sub_file_plot_table["File"]]
+                y_coords = [site_ins_coord_mapping[site_ins_code]
+                            for site_ins_code in sub_file_plot_table["Dataset"]]
+                hover_information = pd.concat(
+                    [hover_information, pd.DataFrame({"x": x_coords,
+                                                      "y": y_coords,
+                                                      "Dataset": sub_file_plot_table["Dataset"],
+                                                      "Site (anonymized)": sub_file_plot_table["Site (anonymized)"],
+                                                      "Instrument model": sub_file_plot_table["Instrument model"],
+                                                      "File": sub_file_plot_table["File"],
+                                                      "Issues": qc_issue})])
+                if qc_issue == "Missing file":
+                    marker_coords = _CustomizedMarker().regular_polygon_coords(len(qc_issues)-1)
+                else:
+                    marker_coords = _CustomizedMarker().clock_marker_coords(
+                        len(qc_issues)-1, clock_marker_index)
+                    clock_marker_index += 1
+                issue_marker_x_coords, issue_marker_y_coords = \
+                    _CustomizedMarker().marker_to_scatter_line_coords(marker_coords,
+                                                                      x_coords,
+                                                                      y_coords)
+                fig.add_trace(go.Scatter(x=issue_marker_x_coords,
+                                         y=issue_marker_y_coords,
+                                         mode="lines",
+                                         fill="toself",
+                                         fillcolor=default_colors[issue_index],
+                                         line=dict(color="black", width=0.5),
+                                         name=qc_issue,
+                                         hoverinfo="skip"
+                                         ),
+                              row=1, col=2)
+        qc_issues_woMissingFile = [
+            issue for issue in qc_issues if issue != "Missing file"]
+        for issue_index, qc_issue in enumerate(qc_issues_woMissingFile):
+            sub_file_plot_table = file_plot_table[(~file_plot_table[qc_issue]) & (
+                file_plot_table[[issue for issue in qc_issues_woMissingFile if issue != qc_issue]].any(axis=1))]
+            x_coords = [file_coord_mapping[file_code]
+                        for file_code in sub_file_plot_table["File"]]
+            y_coords = [site_ins_coord_mapping[site_ins_code]
+                        for site_ins_code in sub_file_plot_table["Dataset"]]
+            marker_coords = _CustomizedMarker().clock_marker_coords(
+                len(qc_issues)-1, issue_index+1)
+            issue_marker_x_coords, issue_marker_y_coords = \
+                _CustomizedMarker().marker_to_scatter_line_coords(marker_coords,
+                                                                  x_coords,
+                                                                  y_coords)
+            fig.add_trace(go.Scatter(x=issue_marker_x_coords,
+                                     y=issue_marker_y_coords,
+                                     mode="lines",
+                                     fill="toself",
+                                     fillcolor="rgba(0,0,0,0)",
+                                     line=dict(color="black", width=0.5),
+                                     showlegend=False,
+                                     hoverinfo="skip"
+                                     ),
+                          row=1, col=2)
+            pass
+        hover_information = hover_information.groupby(["x", "y", "Dataset", "Site (anonymized)", "Instrument model",  "File"], dropna=False)[
+            "Issues"].apply(lambda x: ", ".join(x.astype(str))).reset_index()
+        marker_coords = _CustomizedMarker().regular_polygon_coords(len(qc_issues)-1)
+        hover_marker_x_coords = []
+        hover_marker_y_coords = []
+        hover_marker_customdata = []
+        for row_index, row in hover_information.iterrows():
+            hover_marker_x_coords.extend([marker_coord[0] + row["x"]
+                                          for marker_coord in marker_coords])
+            hover_marker_x_coords.append(None)
+            hover_marker_y_coords.extend([marker_coord[1] + row["y"]
+                                          for marker_coord in marker_coords])
+            hover_marker_y_coords.append(None)
+            for marker_coord in marker_coords:
+                hover_marker_customdata.append(
+                    [row["Dataset"], row["Site (anonymized)"], row["Instrument model"], row["File"], row["Issues"]])
+            hover_marker_customdata.append(None)
+        fig.add_trace(go.Scatter(x=hover_marker_x_coords,
+                                 y=hover_marker_y_coords,
+                                 showlegend=False,
+                                 mode="none",
+                                 customdata=hover_marker_customdata,
+                                 hovertemplate=("Dataset: %{customdata[0]}<br>" +
+                                                "Site (anonymized): %{customdata[1]}<br>" +
+                                                "Instrument model: %{customdata[2]}<br>" +
+                                                "File: %{customdata[3]}<br>" +
+                                                "Issues: %{customdata[4]}" +
+                                                "<extra></extra>"
+                                                )
+                                 ),
+                      row=1, col=2)
+        fig.update_xaxes(title_text="File",
+                         range=[
+                             0-2, (len(file_qc_results["File"].unique())-1)+2],
+                         tickvals=list(file_coord_mapping.values()),
+                         ticktext=list(file_coord_mapping.keys()),
+                         tickangle=90,
+                         gridcolor="lightgray",
+                         zeroline=False,
+                         showline=False,
+                         row=1, col=2)
+        # figure 3. legend
+        legend_fig = go.Figure()
+        legend_list = ["Completed", "Incomplete"] + qc_issues
+        for status_index, status in enumerate(["Completed", "Incomplete"]):
+            legend_fig.add_trace(go.Scatter(x=[0], y=[status_index],
+                                            marker=dict(symbol=status_marker_mapping[status],
+                                                        color=status_marker_color_mapping[status],
+                                                        size=12,
+                                                        ),
+                                            hoverinfo="skip"))
+            legend_fig.add_annotation(x=0, y=status_index,
+                                      text=status,
+                                      xanchor="left",
+                                      yanchor="middle",
+                                      showarrow=False,
+                                      xshift=15)
+        clock_marker_count = 1
+        indexes = [i for i in range(len(legend_list)) if legend_list[i] not in [
+            "Completed", "Incomplete", "Missing file"]]
+        for issue_index, qc_issue in enumerate(qc_issues):
+            if qc_issue == "Missing file":
+                marker_coords = _CustomizedMarker().regular_polygon_coords(len(qc_issues)-1)
+            else:
+                marker_coords = _CustomizedMarker().clock_marker_coords(len(qc_issues)-1,
+                                                                        clock_marker_count)
+                clock_marker_count += 1
+            x, y = _CustomizedMarker().marker_to_scatter_line_coords(marker_coords,
+                                                                     [0], [legend_list.index(qc_issue)])
+            legend_fig.add_trace(go.Scatter(x=x, y=y,
+                                            mode="lines",
+                                            fill="toself",
+                                            fillcolor=default_colors[issue_index],
+                                            line=dict(
+                                                color="black", width=0.5),
+                                            hoverinfo="skip"))
+            if qc_issue != "Miising file":
+                x, y = _CustomizedMarker().marker_to_scatter_line_coords(marker_coords,
+                                                                         [0] *
+                                                                         (len(
+                                                                             indexes)-1),
+                                                                         [i for i in indexes
+                                                                          if i != issue_index+2])
+                legend_fig.add_trace(go.Scatter(x=x, y=y,
+                                                mode="lines",
+                                                fill="toself",
+                                                fillcolor="rgba(0,0,0,0)",
+                                                line=dict(
+                                                    color="black", width=0.5),
+                                                hoverinfo="skip"))
+            legend_fig.add_annotation(x=0, y=issue_index+2,
+                                      text=qc_issue,
+                                      xanchor="left",
+                                      yanchor="middle",
+                                      showarrow=False,
+                                      xshift=15)
+        legend_fig.update_xaxes(visible=False,
+                                range=[0-1, 0+7])
+        legend_fig.update_yaxes(visible=False,
+                                autorange="reversed")
+        legend_fig_aspect = [25*8, 25*(2+len(qc_issues)+2)]
+        legend_fig.update_layout(width=legend_fig_aspect[0],
+                                 height=legend_fig_aspect[1],
+                                 title="Marker Legend",
+                                 margin=dict(l=0, r=0, t=50, b=0),
+                                 showlegend=False,
+                                 plot_bgcolor="white")
+        image_bytes = legend_fig.to_image(format="png", scale=2)
+        base64_image_string = base64.b64encode(image_bytes).decode("utf-8")
+        image_data_uri = f"data:image/png;base64,{base64_image_string}"
+        file_counts = len(file_qc_results["File"].unique())
+        fig.update_yaxes(title_text="Dataset",
+                         showticklabels=True,
+                         range=[0-2, (len(site_ins_coord_mapping)-1)+2],
+                         tickvals=list(site_ins_coord_mapping.values()),
+                         ticktext=list(site_ins_coord_mapping.keys()),
+                         gridcolor="lightgray",
+                         zeroline=False,
+                         showline=False)
+        def _normalize(values: list, value_range: list):
+            return [(value-min(value_range))/(max(value_range)-min(value_range)) for value in values]
+        margin_left = 200
+        subplot1_width = 85
+        space12 = 200
+        subplot2_width = 25*(file_counts+4)
+        margin_right = 250
+        center_plots_width = subplot1_width + space12 + subplot2_width
+        figure_width = margin_left + center_plots_width + margin_right
+        subplot1_x_domain = _normalize([0,
+                                        subplot1_width],
+                                       [0, center_plots_width])
+        subplot2_x_domain = _normalize([subplot1_width + space12,
+                                        subplot1_width + space12 + subplot2_width],
+                                       [0, center_plots_width])
+        margin_top = 30
+        center_plots_height = subplot12_height = 25 * \
+            (len(site_ins_coord_mapping)+4)
+        margin_bottom = 300
+        figure_height = margin_top + subplot12_height + margin_bottom
+        legend_image_x = (center_plots_width + 20) / center_plots_width
+        legend_image_sizex = legend_fig_aspect[0]*1.2
+        legend_image_sizey = legend_image_sizex*(legend_fig_aspect[1]/legend_fig_aspect[0])
+        fig.add_layout_image(dict(source=image_data_uri,
+                                  xref="paper", yref="paper",
+                                  x=legend_image_x, y=0.98,
+                                  sizex=legend_image_sizex/center_plots_width,
+                                  sizey=legend_image_sizey/center_plots_height,
+                                  sizing="contain",
+                                  xanchor="left", yanchor="top",
+                                  layer="below"
+                                  ))
+        fig.add_annotation(x=np.mean(subplot1_x_domain), y=1, yshift=5,
+                           xref="paper", yref="paper",
+                           text="Dataset QC",
+                           showarrow=False,
+                           xanchor="center",
+                           yanchor="bottom",
+                           font=dict(size=16))
+        fig.add_annotation(x=np.mean(subplot2_x_domain), y=1, yshift=5,
+                           xref="paper", yref="paper",
+                           text="File QC",
+                           showarrow=False,
+                           xanchor="center",
+                           yanchor="bottom",
+                           font=dict(size=16))
+        fig.update_layout(height=figure_height, width=figure_width,
+                          xaxis=dict(domain=subplot1_x_domain),
+                          xaxis2=dict(domain=subplot2_x_domain),
+                          yaxis=dict(automargin=False),
+                          margin=dict(
+                              t=margin_top, b=margin_bottom, l=margin_left, r=margin_right),
+                          autosize=False,
+                          showlegend=False,
+                          hoverlabel=dict(bgcolor="white",
+                                          font_color="black",
+                                          align="left"),
+                          plot_bgcolor="white"
+                          )
+        return fig
+class AnalysisVisualizer:
+    def visualize_barplot(analyzed_gating_results: pd.DataFrame,
+                          visualized_results: list[str]
+                          ) -> go.Figure:
+        fig = make_subplots(rows=1, cols=len(visualized_results),
+                            horizontal_spacing=0.4/len(visualized_results),
+                            subplot_titles=visualized_results
+                            )
+        dataset_counts = []
+        for result_index, visualized_result in enumerate(visualized_results):
+            result_index += 1
+            df = analyzed_gating_results.copy()
+            df[f"{visualized_result}_count"] = df[f"{visualized_result}_count"].astype(
+                int)
+            df["Dataset_wCount"] = df["Dataset"].astype(
+                str) + " (" + df[f"{visualized_result}_count"].astype(str) + ")"
+            dataset_wCount = df["Dataset_wCount"].to_list()
+            dataset_wCount_mapping = dict(
+                zip(dataset_wCount[::-1], list(range(len(dataset_wCount)))))
+            df["Dataset_wCount_index"] = df["Dataset_wCount"].map(
+                dataset_wCount_mapping)
+            if (df[[f"{visualized_result}_mean", f"{visualized_result}_std"]] == "Not reportable").all(axis=None):
+                fig.add_annotation(x=0, y=np.percentile(list(dataset_wCount_mapping.values()), 50),
+                                   text="Not reportable",
+                                   xanchor="center",
+                                   yanchor="middle",
+                                   font=dict(size=24),
+                                   showarrow=False,
+                                   xshift=0,
+                                   row=1, col=result_index)
+                fig.update_xaxes(range=[-1, 1],
+                                 showticklabels=False,
+                                 row=1, col=result_index)
+            else:
+                df = df[df[f"{visualized_result}_count"]
+                        != 0].reset_index(drop=True)
+                df[f"{visualized_result}_std"] = df[f"{visualized_result}_std"].replace({
+                    "Only one data": None})
+                for analysis in ["mean", "std"]:
+                    df[f"{visualized_result}_{analysis}"] = df[f"{visualized_result}_{analysis}"].astype(
+                        float).round(2)
+                statistic = {}
+                statistic["Q1"] = df[f"{visualized_result}_mean"].quantile(
+                    0.25)
+                statistic["Q2"] = df[f"{visualized_result}_mean"].quantile(0.5)
+                statistic["Q3"] = df[f"{visualized_result}_mean"].quantile(
+                    0.75)
+                statistic["IQR"] = statistic["Q3"] - statistic["Q1"]
+                statistic["Lower fence"] = statistic["Q1"] - \
+                    1.5*statistic["IQR"]
+                statistic["Upper fence"] = statistic["Q3"] + \
+                    1.5*statistic["IQR"]
+                statistic["Extreme lower fence"] = statistic["Q1"] - \
+                    3*statistic["IQR"]
+                statistic["Extreme upper fence"] = statistic["Q3"] + \
+                    3*statistic["IQR"]
+                for index in df.index.to_list():
+                    if df.loc[index, f"{visualized_result}_mean"] >= statistic["Lower fence"] and \
+                            df.loc[index, f"{visualized_result}_mean"] <= statistic["Upper fence"]:
+                        df.loc[index, "distribution"] = "Normal"
+                        df.loc[index, "bar_color"] = "#636EFA"
+                    elif df.loc[index, f"{visualized_result}_mean"] >= statistic["Extreme lower fence"] and \
+                            df.loc[index, f"{visualized_result}_mean"] <= statistic["Extreme upper fence"]:
+                        df.loc[index, "distribution"] = "Outlier"
+                        df.loc[index, "bar_color"] = "#FFA15A"
+                    else:
+                        df.loc[index, "distribution"] = "Extreme outlier"
+                        df.loc[index, "bar_color"] = "#EF553B"
+                if visualized_result == "Cell population (%)":
+                    xmin = 0
+                    xmax = 100
+                else:
+                    xmin = df[f"{visualized_result}_mean"].min()
+                    xmax = df[f"{visualized_result}_mean"].max()
+                xrange = [xmin-0.02*(xmax-xmin), xmax+0.02*(xmax-xmin)]
+                for distribution in ["Normal", "Outlier", "Extreme outlier"]:
+                    sub_df = df[
+                        df["distribution"] == distribution]
+                    hover_info = [[row["Dataset"], row["Site (anonymized)"], row["Instrument model"],
+                                   row[f"{visualized_result}_count"], row[f"{visualized_result}_mean"], row[f"{visualized_result}_std"],
+                                   row["distribution"]]
+                                  for row_index, row in sub_df.iterrows()]
+                    fig.add_trace(go.Bar(x=sub_df[f"{visualized_result}_mean"],
+                                         y=sub_df["Dataset_wCount_index"],
+                                         base=[xrange[0]] * len(sub_df),
+                                         orientation="h",
+                                         error_x=dict(type="data",
+                                                      array=sub_df[f"{visualized_result}_std"],
+                                                      visible=True),
+                                         marker_color=sub_df["bar_color"],
+                                         width=0.5,
+                                         name=distribution,
+                                         customdata=hover_info,
+                                         hovertemplate=("Dataset: %{customdata[0]}<br>" +
+                                                        "Site (anonymized): %{customdata[1]}<br>" +
+                                                        "Instrument model: %{customdata[2]}<br>" +
+                                                        "Data count: %{customdata[3]}<br>" +
+                                                        "Mean (bar): %{customdata[4]}<br>" +
+                                                        "STD (error bar): %{customdata[5]}<br>" +
+                                                        "Distribution: %{customdata[6]}" +
+                                                        "<extra></extra>"
+                                                        )),
+                                  row=1, col=result_index)
+                for statistic_key, statistic_value in statistic.items():
+                    if statistic_key != "IQR" and (statistic_value >= xrange[0] and statistic_value <= xrange[1]):
+                        if statistic_key in ["Q1", "Q2", "Q3"]:
+                            line_color = "blue"
+                        elif statistic_key in ["Lower fence", "Upper fence"]:
+                            line_color = "orange"
+                        elif statistic_key in ["Extreme lower fence", "Extreme upper fence"]:
+                            line_color = "red"
+                        fig.add_trace(go.Scatter(x=[statistic_value]*(len(dataset_wCount_mapping)+2),
+                                                 y=[min(list(dataset_wCount_mapping.values()))-0.5] +
+                                                 list(dataset_wCount_mapping.values()) +
+                                                 [max(
+                                                     list(dataset_wCount_mapping.values()))+0.5],
+                                                 mode="lines",
+                                                 line=dict(
+                                                     color=line_color, width=2),
+                                                 showlegend=False,
+                                                 hoverinfo="text",
+                                                 hovertext=f"{statistic_key} ({statistic_value})"
+                                                 ),
+                                      row=1, col=result_index
+                                      )
+                fig.update_xaxes(title_text=visualized_result,
+                                 range=xrange,
+                                 automargin=False,
+                                 row=1, col=result_index)
+            fig.update_yaxes(title_text="Dataset (Result counts)",
+                             range=[min(list(dataset_wCount_mapping.values()))-1,
+                                    max(list(dataset_wCount_mapping.values()))+1],
+                             tickvals=list(
+                                 dataset_wCount_mapping.values()),
+                             ticktext=list(dataset_wCount_mapping.keys()),
+                             autorange=False,
+                             automargin=False,
+                             row=1, col=result_index)
+            dataset_counts.append(len(dataset_wCount_mapping))
+        margin_top = 30
+        margin_bottom = 50
+        margin_left = 200
+        plot_height = 40 * max(dataset_counts)
+        fig.update_layout(height=max([margin_top + plot_height + margin_bottom,
+                                      250]),
+                          width=800*len(visualized_results),
+                          margin=dict(t=margin_top, b=margin_bottom, l=margin_left))
+        return fig
+    def visualize_heatmap(analyzed_gating_results: pd.DataFrame,
+                          visualized_results: list[str]
+                          ) -> go.Figure:
+        space_between_ratio = 0.4/(len(visualized_results))
+        fig = make_subplots(rows=1, cols=len(visualized_results),
+                            subplot_titles=visualized_results,
+                            horizontal_spacing=space_between_ratio,
+                            shared_xaxes=True, shared_yaxes=True
+                            )
+        for result_index, visualized_result in enumerate(visualized_results):
+            result_index += 1
+            std_table = analyzed_gating_results.pivot(
+                index=["Dataset", "Site (anonymized)", "Instrument model"],
+                columns="Result ID", values=f"{visualized_result}_std")
+            count_table = analyzed_gating_results.pivot(
+                index=["Dataset", "Site (anonymized)", "Instrument model"],
+                columns="Result ID", values=f"{visualized_result}_count")
+            std_table = std_table[sorted(
+                std_table.columns.to_list(), key=lambda s: int(s.split(" ")[0]))]
+            count_table = count_table[sorted(
+                count_table.columns.to_list(), key=lambda s: int(s.split(" ")[0]))]
+            numeric_values = pd.to_numeric(
+                std_table.values.flatten(), errors="coerce")
+            numeric_values = numeric_values[~np.isnan(numeric_values)]
+            def scientific_anno(x):
+                try:
+                    x = float(x)
+                    if x >= 1000:
+                        return f"{x:.1e}"
+                    else:
+                        return f"{round(x, 2)}"
+                except:
+                    return x
+            annotation_table = pd.DataFrame()
+            for col in std_table.columns:
+                annotation_table[col] = std_table[col].apply(
+                    scientific_anno)
+            annotation_table = annotation_table.replace({"Not reportable": "Not<br>reportable",
+                                                         "Only one data": "Only<br>one data"})
+            hover_info = [[[row["Site (anonymized)"], row["Instrument model"], count_table.iloc[row_index, col_index]]
+                           for col_index, col_key in enumerate(std_table.columns.to_list())]
+                          for row_index, row in std_table.reset_index().iterrows()]
+            subplot_ratio = (
+                1 - space_between_ratio * (len(visualized_results)-1))/len(visualized_results)
+            if len(numeric_values) > 0:
+                fig.add_trace(go.Heatmap(z=std_table.values,
+                                         text=annotation_table.values,
+                                         texttemplate="%{text}",
+                                         x=std_table.columns,
+                                         y=std_table.reset_index()["Dataset"],
+                                         xgap=2, ygap=2,
+                                         colorbar_x=(
+                                             (subplot_ratio+space_between_ratio)*result_index - space_between_ratio*(8/9)),
+                                         colorscale="Magma",
+                                         zauto=False,
+                                         zmin=np.percentile(numeric_values, 2),
+                                         zmax=np.percentile(
+                                             numeric_values, 98),
+                                         customdata=hover_info,
+                                         hovertemplate=("Dataset: %{y}<br>" +
+                                                        "Site (anonymized): %{customdata[0]}<br>" +
+                                                        "Instrument model: %{customdata[1]}<br>" +
+                                                        "Result ID: %{x}<br>" +
+                                                        "Data count: %{customdata[2]}<br>" +
+                                                        "STD value: %{z}" +
+                                                        "<extra></extra>"
+                                                        )
+                                         ),
+                              row=1, col=result_index)
+            else:
+                fig.add_trace(go.Heatmap(z=pd.DataFrame(0.5, index=std_table.index, columns=std_table.columns),
+                                         text=annotation_table.values,
+                                         texttemplate="%{text}",
+                                         x=std_table.columns,
+                                         y=std_table.reset_index()["Dataset"],
+                                         xgap=2, ygap=2,
+                                         showscale=False,
+                                         colorscale=[
+                                             [0, "blue"], [0.5, "rgba(0,0,0,0)"], [1, "red"]],
+                                         zauto=False, zmin=0, zmax=1,
+                                         customdata=hover_info,
+                                         hovertemplate=("Dataset: %{y}<br>" +
+                                                        "Site (anonymized): %{customdata[0]}<br>" +
+                                                        "Instrument model: %{customdata[1]}<br>" +
+                                                        "Result ID: %{x}<br>" +
+                                                        "Data count: %{customdata[2]}<br>" +
+                                                        "STD value: %{z}" +
+                                                        "<extra></extra>"
+                                                        )
+                                         ),
+                              row=1, col=result_index)
+        fig.update_xaxes(title_text="Result ID",
+                         automargin=False,
+                         showticklabels=True,
+                         type="category",
+                         tickmode="array",
+                         tickvals=std_table.columns.to_list(),
+                         side="top"
+                         )
+        for col_index in range(len(visualized_results)):
+            col_index += 1
+            fig.layout[f"xaxis{len(visualized_results) + col_index}"] = {
+                "title_text": None,
+                "automargin": False,
+                "showticklabels": True,
+                "type": "category",
+                "tickmode": "array",
+                "tickvals": std_table.columns.to_list(),
+                "mirror": "allticks",
+                "overlaying": f"x{col_index}",
+                "anchor": f"y{col_index}",
+                "side": "top"
+            }
+        fig.update_yaxes(title_text="Dataset",
+                         autorange="reversed",
+                         automargin=False,
+                         showticklabels=True,
+                         type="category",
+                         tickmode="array",
+                         tickvals=std_table.reset_index()["Dataset"].to_list())
+        margin_top = 200
+        margin_bottom = 30
+        margin_left = 200
+        margin_right = 30
+        plot_height = 50 * len(std_table.reset_index()["Dataset"].to_list())
+        plot_width = 900*len(visualized_results)
+        fig.update_layout(height=max([margin_top + plot_height + margin_bottom,
+                                      200]),
+                          width=plot_width,
+                          margin=dict(t=margin_top, b=margin_bottom, l=margin_left, r=margin_right))
+        for annotation in fig.layout.annotations:
+            annotation.xshift = -400
+            annotation.yshift = 5
+        return fig

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,31 @@

+[project] # This PEP 621 section is good for metadata, but Poetry will use its own for active dependency management.
+name = "nist-cda-dashboard"
+version = "0.1.0"
+description = ""
+authors = [
+    {name = "robinchu", email = "robin.chu@aheadmedicine.com"}
+]
+readme = "README.md"
+requires-python = ">=3.12"
+[tool.poetry]
+name = "nist-cda-dashboard"
+version = "0.1.0"
+description = ""
+authors = ["robinchu <robin.chu@aheadmedicine.com>"] # Poetry's preferred author format
+readme = "README.md"
+[tool.poetry.dependencies]
+python = ">=3.12"
+rapidfuzz = ">=3.13.0,<4.0.0" # Add these here
+openpyxl = ">=3.1.5,<4.0.0"  # Add these here
+gradio = "^5.29.0"
+plotly = "^6.0.1"
+pandas = "^2.2.3"
+kaleido = "0.2.1"
+[tool.poetry.group.dev.dependencies]
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"

requirements.txt ADDED Viewed

	@@ -0,0 +1,61 @@

+aiofiles==24.1.0 ; python_version >= "3.12"
+annotated-types==0.7.0 ; python_version >= "3.12"
+anyio==4.9.0 ; python_version >= "3.12"
+audioop-lts==0.2.1 ; python_version >= "3.13"
+certifi==2025.4.26 ; python_version >= "3.12"
+charset-normalizer==3.4.2 ; python_version >= "3.12"
+click==8.1.8 ; sys_platform != "emscripten" and python_version >= "3.12"
+colorama==0.4.6 ; python_version >= "3.12" and platform_system == "Windows"
+et-xmlfile==2.0.0 ; python_version >= "3.12"
+fastapi==0.115.12 ; python_version >= "3.12"
+ffmpy==0.3.2 ; python_version >= "3.12"
+filelock==3.18.0 ; python_version >= "3.12"
+fsspec==2025.3.2 ; python_version >= "3.12"
+gradio-client==1.10.1 ; python_version >= "3.12"
+gradio==5.29.1 ; python_version >= "3.12"
+groovy==0.1.2 ; python_version >= "3.12"
+h11==0.16.0 ; python_version >= "3.12"
+httpcore==1.0.9 ; python_version >= "3.12"
+httpx==0.28.1 ; python_version >= "3.12"
+huggingface-hub==0.31.2 ; python_version >= "3.12"
+idna==3.10 ; python_version >= "3.12"
+jinja2==3.1.6 ; python_version >= "3.12"
+kaleido==0.2.1 ; python_version >= "3.12"
+markdown-it-py==3.0.0 ; sys_platform != "emscripten" and python_version >= "3.12"
+markupsafe==3.0.2 ; python_version >= "3.12"
+mdurl==0.1.2 ; sys_platform != "emscripten" and python_version >= "3.12"
+narwhals==1.38.0 ; python_version >= "3.12"
+numpy==2.2.5 ; python_version >= "3.12"
+openpyxl==3.1.5 ; python_version >= "3.12"
+orjson==3.10.18 ; python_version >= "3.12"
+packaging==24.2 ; python_version >= "3.12"
+pandas==2.2.3 ; python_version >= "3.12"
+pillow==11.2.1 ; python_version >= "3.12"
+plotly==6.1.0 ; python_version >= "3.12"
+pydantic-core==2.33.2 ; python_version >= "3.12"
+pydantic==2.11.4 ; python_version >= "3.12"
+pydub==0.25.1 ; python_version >= "3.12"
+pygments==2.19.1 ; sys_platform != "emscripten" and python_version >= "3.12"
+python-dateutil==2.9.0.post0 ; python_version >= "3.12"
+python-multipart==0.0.20 ; python_version >= "3.12"
+pytz==2025.2 ; python_version >= "3.12"
+pyyaml==6.0.2 ; python_version >= "3.12"
+rapidfuzz==3.13.0 ; python_version >= "3.12"
+requests==2.32.3 ; python_version >= "3.12"
+rich==14.0.0 ; sys_platform != "emscripten" and python_version >= "3.12"
+ruff==0.11.10 ; sys_platform != "emscripten" and python_version >= "3.12"
+safehttpx==0.1.6 ; python_version >= "3.12"
+semantic-version==2.10.0 ; python_version >= "3.12"
+shellingham==1.5.4 ; sys_platform != "emscripten" and python_version >= "3.12"
+six==1.17.0 ; python_version >= "3.12"
+sniffio==1.3.1 ; python_version >= "3.12"
+starlette==0.46.2 ; python_version >= "3.12"
+tomlkit==0.13.2 ; python_version >= "3.12"
+tqdm==4.67.1 ; python_version >= "3.12"
+typer==0.15.4 ; sys_platform != "emscripten" and python_version >= "3.12"
+typing-extensions==4.13.2 ; python_version >= "3.12"
+typing-inspection==0.4.0 ; python_version >= "3.12"
+tzdata==2025.2 ; python_version >= "3.12"
+urllib3==2.4.0 ; python_version >= "3.12"
+uvicorn==0.34.2 ; sys_platform != "emscripten" and python_version >= "3.12"
+websockets==15.0.1 ; python_version >= "3.12"