Spaces:

PortPy-Project
/

portpy_dataset_visualization

Running

App Files Files Community

gourav3017 commited on Apr 30, 2025

Commit

a2cf83f

1 Parent(s): 8de5f4e

Add app.py

Browse files

Files changed (1) hide show

app.py +291 -0

app.py ADDED Viewed

	@@ -0,0 +1,291 @@

+import streamlit as st
+import pandas as pd
+import json
+import os
+import posixpath
+from huggingface_hub import hf_hub_download
+from huggingface_hub import list_repo_files
+# Replace this with your actual Hugging Face repo ID
+REPO_ID = "PortPy-Project/PortPy_Dataset"
+@st.cache_data
+def get_patient_ids():
+    # Extract disease site from patient ID prefix (e.g., Lung_Patient_1)
+    file = hf_hub_download(REPO_ID, repo_type="dataset", filename="data_info.jsonl", local_dir="./temp")
+    with open(file) as f:
+        # data_info = json.load(f)
+        data_info = [json.loads(line) for line in f]
+    patient_ids = [pat['patient_id'] for pat in data_info]
+    df = pd.DataFrame(patient_ids, columns=["patient_id"])
+    df["disease_site"] = df["patient_id"].str.extract(r"^(.*?)_")
+    return df
+@st.cache_data
+def load_all_metadata(disease_site):
+    # Get the list of patient IDs for the selected disease site
+    patient_df = get_patient_ids()
+    filtered_patients = patient_df[patient_df["disease_site"] == disease_site]
+    metadata = {}
+    for patient_id in filtered_patients["patient_id"]:
+        # Load structure metadata for the patient
+        structs = load_structure_metadata(patient_id)
+        # Load beam metadata for the patient
+        beams = load_beam_metadata(patient_id)
+        planner_file = hf_hub_download(REPO_ID, repo_type="dataset", filename=f"data/{patient_id}/PlannerBeams.json", local_dir="./temp")
+        with open(planner_file) as f:
+            planner_data = json.load(f)
+            planner_beam_ids = planner_data.get("IDs", [])
+        metadata[patient_id] = {
+            "structures": structs,
+            "beams": beams,
+            "planner_beam_ids": planner_beam_ids
+        }
+    return metadata
+@st.cache_data
+def load_structure_metadata(patient_id):
+    file = hf_hub_download(REPO_ID, repo_type="dataset", filename=f"data/{patient_id}/StructureSet_MetaData.json", local_dir="./temp")
+    with open(file) as f:
+        return json.load(f)
+@st.cache_data
+def load_beam_metadata(patient_id):
+    beam_meta_paths = []
+    files = list_repo_files(repo_id=REPO_ID, repo_type="dataset")
+    beam_meta_paths = [
+        f for f in files
+        if f.startswith(f"data/{patient_id}/Beams/Beam_") and f.endswith("_MetaData.json")
+    ]
+    # for bid in beam_ids:
+    #     beam_meta_paths.append(f"data/{patient_id}/Beams/Beam_{bid}_MetaData.json")
+    beam_meta = []
+    for path in beam_meta_paths:
+        file = hf_hub_download(REPO_ID, repo_type="dataset", filename=path, local_dir="./temp")
+        with open(file) as f:
+            beam_meta.append(json.load(f))
+    return beam_meta
+def get_patient_summary_from_cached_data(patient_id, all_metadata):
+    structs = all_metadata[patient_id]["structures"]
+    beams = all_metadata[patient_id]["beams"]
+    ptv_vol = None
+    for s in structs:
+        if "PTV" in s["name"].upper():
+            ptv_vol = s.get("volume_cc")
+            break
+    return {
+        "ptv_volume": ptv_vol,
+        "num_beams": len(beams),
+        "beams": beams
+    }
+def filter_matched_data(filtered_patients, query_ptv_vol, beam_gantry_filter,
+                            beam_collimator_filter, beam_energy_filter, beam_couch_filter,
+                            only_planner, all_metadata):
+    matched = []
+    gantry_angles = set(map(int, beam_gantry_filter.split(","))) if beam_gantry_filter else None
+    collimator_angles = set(map(int, beam_collimator_filter.split(","))) if beam_collimator_filter else None
+    couch_angles = set(map(int, beam_couch_filter.split(","))) if beam_couch_filter else None
+    energies = set(beam_energy_filter.replace(" ", "").split(",")) if beam_energy_filter else None
+    for pid in filtered_patients["patient_id"]:
+        # Retrieve metadata for the patient from the pre-cached all_metadata
+        summary = get_patient_summary_from_cached_data(pid, all_metadata)
+        if summary["ptv_volume"] is None or summary["ptv_volume"] < query_ptv_vol:
+            continue
+        # Filter beams by all conditions
+        selected_beams = summary["beams"]
+        if gantry_angles:
+            selected_beams = [b for b in selected_beams if b["gantry_angle"] in gantry_angles]
+        if collimator_angles:
+            selected_beams = [b for b in selected_beams if b["collimator_angle"] in collimator_angles]
+        if couch_angles:
+            selected_beams = [b for b in selected_beams if b["couch_angle"] in couch_angles]
+        if energies:
+            selected_beams = [b for b in selected_beams if b['energy_MV'] in energies]
+        selected_beam_ids = [b["ID"] for b in selected_beams]
+        if not selected_beam_ids:
+            continue
+        if only_planner:
+            planner_beam_ids = set(all_metadata[pid]["planner_beam_ids"])
+            selected_beam_ids = list(planner_beam_ids.intersection(selected_beam_ids))
+            if not selected_beam_ids:
+                continue
+        matched.append({
+            "patient_id": pid,
+            "num_beams": len(selected_beam_ids),
+            "ptv_volume": summary["ptv_volume"],
+            "selected_beam_ids": selected_beam_ids
+        })
+    return pd.DataFrame(matched)
+def download_data(repo_id, patient_ids, beam_ids=None, planner_beam_ids=True, max_retries=2, local_dir='./'):
+    from huggingface_hub import hf_hub_download
+    downloaded_files = []
+    for patient_id in patient_ids:
+        static_files = [
+            "CT_Data.h5", "CT_MetaData.json",
+            "StructureSet_Data.h5", "StructureSet_MetaData.json",
+            "OptimizationVoxels_Data.h5", "OptimizationVoxels_MetaData.json",
+            "PlannerBeams.json",
+            "rt_dose_echo_imrt.dcm", "rt_plan_echo_imrt.dcm"
+        ]
+        for filename in static_files:
+            hf_path = posixpath.join("data", patient_id, filename)
+            for attempt in range(max_retries):
+                try:
+                    local_path = hf_hub_download(
+                        repo_id=repo_id,
+                        repo_type="dataset",
+                        filename=hf_path,
+                        local_dir=local_dir
+                    )
+                    downloaded_files.append(local_path)
+                    break
+                except Exception as e:
+                    if attempt == max_retries - 1:
+                        st.error(f"Failed to download {hf_path}: {e}")
+        if planner_beam_ids:
+            planner_file = os.path.join(local_dir, 'data', patient_id, "PlannerBeams.json")
+            try:
+                with open(planner_file, "r") as f:
+                    planner_data = json.load(f)
+                    beam_ids = planner_data.get("IDs", [])
+            except Exception as e:
+                st.error(f"Error reading PlannerBeams.json: {e}")
+                beam_ids = []
+        if beam_ids is not None:
+            for bid in beam_ids:
+                beam_data_file = f"Beams/Beam_{bid}_Data.h5"
+                beam_meta_file = f"Beams/Beam_{bid}_MetaData.json"
+                for beam_file in [beam_data_file, beam_meta_file]:
+                    hf_path = posixpath.join("data", patient_id, beam_file)
+                    for attempt in range(max_retries):
+                        try:
+                            local_path = hf_hub_download(
+                                repo_id=repo_id,
+                                repo_type="dataset",
+                                filename=hf_path,
+                                local_dir=local_dir
+                            )
+                            downloaded_files.append(local_path)
+                            break
+                        except Exception as e:
+                            if attempt == max_retries - 1:
+                                st.error(f"Failed to download {hf_path}: {e}")
+    return downloaded_files
+from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
+def show_aggrid_table(df):
+    gb = GridOptionsBuilder.from_dataframe(df)
+    gb.configure_default_column(groupable=True, value=True, enableRowGroup=True, aggFunc='sum', editable=False)
+    gb.configure_grid_options(domLayout='normal')
+    # Enable multiple row selection with checkboxes
+    gb.configure_selection('multiple', use_checkbox=True)
+    gb.configure_column("patient_id", checkboxSelection=True)
+    grid_options = gb.build()
+    grid_response = AgGrid(
+        df,
+        gridOptions=grid_options,
+        enable_enterprise_modules=False,
+        allow_unsafe_jscode=True,
+        fit_columns_on_grid_load=True,
+        theme='balham',
+        update_mode=GridUpdateMode.SELECTION_CHANGED
+    )
+    return grid_response
+def main():
+    st.set_page_config(page_title="PortPy Metadata Explorer", layout="wide")
+    st.title("📊 PortPy Metadata Explorer & Downloader")
+    patient_df = get_patient_ids()
+    disease_site = st.sidebar.selectbox("Select Disease Site", patient_df["disease_site"].unique())
+    all_metadata = load_all_metadata(disease_site)  # Load and cache all metadata for selected disease site
+    filtered_patients = pd.DataFrame(all_metadata.keys(), columns=["patient_id"])
+    beam_gantry_filter = st.sidebar.text_input("Gantry Angles (comma-separated)", "")
+    beam_collimator_filter = st.sidebar.text_input("Collimator Angles (comma-separated)", "")
+    beam_energy_filter = st.sidebar.text_input("Beam Energies (comma-separated)", "")
+    beam_couch_filter = st.sidebar.text_input("Couch Angles (comma-separated)", "")
+    query_ptv_vol = st.sidebar.number_input("Minimum PTV volume (cc):", value=0)
+    # Checkbox: Only planner beams
+    only_planner = st.sidebar.checkbox("Show only planner beams", value=True)
+    results_df = filter_matched_data(
+        filtered_patients, query_ptv_vol, beam_gantry_filter,
+        beam_collimator_filter, beam_energy_filter, beam_couch_filter,
+        only_planner, all_metadata
+    )
+    # Summary Table
+    # st.dataframe(results_df)
+    grid_response = show_aggrid_table(results_df)
+    selected_rows = grid_response.get("selected_rows", pd.DataFrame())
+    if isinstance(selected_rows, pd.DataFrame):
+        print(selected_rows)
+        if not selected_rows.empty:
+            for _, row in selected_rows.iterrows():
+                pid = row["patient_id"]
+                st.markdown(f"### Patient: {pid}")
+                st.markdown("#### Structures")
+                st.dataframe(pd.DataFrame(all_metadata[pid]["structures"]))
+                st.markdown("#### Beams")
+                st.dataframe(pd.DataFrame(all_metadata[pid]["beams"]))
+    # selected_patient = st.selectbox("Select patient for detailed view", results_df["patient_id"] if not results_df.empty else [])
+    # if selected_patient:
+    #     structs = all_metadata[selected_patient]["structures"]
+    #     beams = all_metadata[selected_patient]["beams"]
+    #     st.subheader(f"🏗️ Structures for {selected_patient}")
+    #     st.dataframe(pd.DataFrame(structs), use_container_width=True)
+    #     st.subheader(f"📡 Beams for {selected_patient}")
+    #     st.dataframe(pd.DataFrame(beams), use_container_width=True)
+    with st.expander("Download matched patients"):
+        # Multi-select and download
+        to_download = st.sidebar.multiselect("Select Patients to Download", results_df["patient_id"].tolist())
+        local_dir = st.sidebar.text_input("Enter local directory to download data:", value="./downloaded")
+        if st.sidebar.button("Download Selected Patients"):
+            if to_download:
+                patient_to_beams = {
+                    row["patient_id"]: row["beam_ids"] for ind, row in results_df.iterrows() if ind in to_download
+                }
+                for pid, beam_ids in patient_to_beams.items():
+                    download_data(REPO_ID, [pid], beam_ids=beam_ids, planner_beam_ids=False, local_dir=local_dir)
+                st.success("Download complete!")
+            else:
+                st.warning("No patients selected.")
+        # if st.button("Download Data"):
+        #     patients_to_download = results_df["patient_id"].tolist()
+        #     download_data(REPO_ID, patients_to_download, planner_beam_ids=True, local_dir=local_dir)
+        #     st.success("Download complete!")
+if __name__ == "__main__":
+    main()