gourav3017 commited on
Commit
a2cf83f
·
1 Parent(s): 8de5f4e

Add app.py

Browse files
Files changed (1) hide show
  1. app.py +291 -0
app.py ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import json
4
+ import os
5
+ import posixpath
6
+ from huggingface_hub import hf_hub_download
7
+ from huggingface_hub import list_repo_files
8
+
9
+ # Replace this with your actual Hugging Face repo ID
10
+ REPO_ID = "PortPy-Project/PortPy_Dataset"
11
+
12
+ @st.cache_data
13
+ def get_patient_ids():
14
+ # Extract disease site from patient ID prefix (e.g., Lung_Patient_1)
15
+ file = hf_hub_download(REPO_ID, repo_type="dataset", filename="data_info.jsonl", local_dir="./temp")
16
+ with open(file) as f:
17
+ # data_info = json.load(f)
18
+ data_info = [json.loads(line) for line in f]
19
+ patient_ids = [pat['patient_id'] for pat in data_info]
20
+ df = pd.DataFrame(patient_ids, columns=["patient_id"])
21
+ df["disease_site"] = df["patient_id"].str.extract(r"^(.*?)_")
22
+ return df
23
+
24
+
25
+ @st.cache_data
26
+ def load_all_metadata(disease_site):
27
+ # Get the list of patient IDs for the selected disease site
28
+ patient_df = get_patient_ids()
29
+ filtered_patients = patient_df[patient_df["disease_site"] == disease_site]
30
+
31
+ metadata = {}
32
+ for patient_id in filtered_patients["patient_id"]:
33
+ # Load structure metadata for the patient
34
+ structs = load_structure_metadata(patient_id)
35
+ # Load beam metadata for the patient
36
+ beams = load_beam_metadata(patient_id)
37
+ planner_file = hf_hub_download(REPO_ID, repo_type="dataset", filename=f"data/{patient_id}/PlannerBeams.json", local_dir="./temp")
38
+ with open(planner_file) as f:
39
+ planner_data = json.load(f)
40
+ planner_beam_ids = planner_data.get("IDs", [])
41
+ metadata[patient_id] = {
42
+ "structures": structs,
43
+ "beams": beams,
44
+ "planner_beam_ids": planner_beam_ids
45
+ }
46
+
47
+ return metadata
48
+
49
+ @st.cache_data
50
+ def load_structure_metadata(patient_id):
51
+ file = hf_hub_download(REPO_ID, repo_type="dataset", filename=f"data/{patient_id}/StructureSet_MetaData.json", local_dir="./temp")
52
+ with open(file) as f:
53
+ return json.load(f)
54
+
55
+ @st.cache_data
56
+ def load_beam_metadata(patient_id):
57
+ beam_meta_paths = []
58
+
59
+ files = list_repo_files(repo_id=REPO_ID, repo_type="dataset")
60
+ beam_meta_paths = [
61
+ f for f in files
62
+ if f.startswith(f"data/{patient_id}/Beams/Beam_") and f.endswith("_MetaData.json")
63
+ ]
64
+ # for bid in beam_ids:
65
+ # beam_meta_paths.append(f"data/{patient_id}/Beams/Beam_{bid}_MetaData.json")
66
+
67
+ beam_meta = []
68
+ for path in beam_meta_paths:
69
+ file = hf_hub_download(REPO_ID, repo_type="dataset", filename=path, local_dir="./temp")
70
+ with open(file) as f:
71
+ beam_meta.append(json.load(f))
72
+ return beam_meta
73
+
74
+ def get_patient_summary_from_cached_data(patient_id, all_metadata):
75
+ structs = all_metadata[patient_id]["structures"]
76
+ beams = all_metadata[patient_id]["beams"]
77
+
78
+ ptv_vol = None
79
+ for s in structs:
80
+ if "PTV" in s["name"].upper():
81
+ ptv_vol = s.get("volume_cc")
82
+ break
83
+
84
+ return {
85
+ "ptv_volume": ptv_vol,
86
+ "num_beams": len(beams),
87
+ "beams": beams
88
+ }
89
+
90
+ def filter_matched_data(filtered_patients, query_ptv_vol, beam_gantry_filter,
91
+ beam_collimator_filter, beam_energy_filter, beam_couch_filter,
92
+ only_planner, all_metadata):
93
+ matched = []
94
+ gantry_angles = set(map(int, beam_gantry_filter.split(","))) if beam_gantry_filter else None
95
+ collimator_angles = set(map(int, beam_collimator_filter.split(","))) if beam_collimator_filter else None
96
+ couch_angles = set(map(int, beam_couch_filter.split(","))) if beam_couch_filter else None
97
+ energies = set(beam_energy_filter.replace(" ", "").split(",")) if beam_energy_filter else None
98
+
99
+ for pid in filtered_patients["patient_id"]:
100
+ # Retrieve metadata for the patient from the pre-cached all_metadata
101
+ summary = get_patient_summary_from_cached_data(pid, all_metadata)
102
+ if summary["ptv_volume"] is None or summary["ptv_volume"] < query_ptv_vol:
103
+ continue
104
+
105
+ # Filter beams by all conditions
106
+ selected_beams = summary["beams"]
107
+ if gantry_angles:
108
+ selected_beams = [b for b in selected_beams if b["gantry_angle"] in gantry_angles]
109
+ if collimator_angles:
110
+ selected_beams = [b for b in selected_beams if b["collimator_angle"] in collimator_angles]
111
+ if couch_angles:
112
+ selected_beams = [b for b in selected_beams if b["couch_angle"] in couch_angles]
113
+ if energies:
114
+ selected_beams = [b for b in selected_beams if b['energy_MV'] in energies]
115
+
116
+ selected_beam_ids = [b["ID"] for b in selected_beams]
117
+ if not selected_beam_ids:
118
+ continue
119
+
120
+ if only_planner:
121
+ planner_beam_ids = set(all_metadata[pid]["planner_beam_ids"])
122
+ selected_beam_ids = list(planner_beam_ids.intersection(selected_beam_ids))
123
+ if not selected_beam_ids:
124
+ continue
125
+
126
+ matched.append({
127
+ "patient_id": pid,
128
+ "num_beams": len(selected_beam_ids),
129
+ "ptv_volume": summary["ptv_volume"],
130
+ "selected_beam_ids": selected_beam_ids
131
+ })
132
+
133
+ return pd.DataFrame(matched)
134
+
135
+ def download_data(repo_id, patient_ids, beam_ids=None, planner_beam_ids=True, max_retries=2, local_dir='./'):
136
+ from huggingface_hub import hf_hub_download
137
+
138
+ downloaded_files = []
139
+ for patient_id in patient_ids:
140
+ static_files = [
141
+ "CT_Data.h5", "CT_MetaData.json",
142
+ "StructureSet_Data.h5", "StructureSet_MetaData.json",
143
+ "OptimizationVoxels_Data.h5", "OptimizationVoxels_MetaData.json",
144
+ "PlannerBeams.json",
145
+ "rt_dose_echo_imrt.dcm", "rt_plan_echo_imrt.dcm"
146
+ ]
147
+ for filename in static_files:
148
+ hf_path = posixpath.join("data", patient_id, filename)
149
+ for attempt in range(max_retries):
150
+ try:
151
+ local_path = hf_hub_download(
152
+ repo_id=repo_id,
153
+ repo_type="dataset",
154
+ filename=hf_path,
155
+ local_dir=local_dir
156
+ )
157
+ downloaded_files.append(local_path)
158
+ break
159
+ except Exception as e:
160
+ if attempt == max_retries - 1:
161
+ st.error(f"Failed to download {hf_path}: {e}")
162
+
163
+ if planner_beam_ids:
164
+ planner_file = os.path.join(local_dir, 'data', patient_id, "PlannerBeams.json")
165
+ try:
166
+ with open(planner_file, "r") as f:
167
+ planner_data = json.load(f)
168
+ beam_ids = planner_data.get("IDs", [])
169
+ except Exception as e:
170
+ st.error(f"Error reading PlannerBeams.json: {e}")
171
+ beam_ids = []
172
+
173
+ if beam_ids is not None:
174
+ for bid in beam_ids:
175
+ beam_data_file = f"Beams/Beam_{bid}_Data.h5"
176
+ beam_meta_file = f"Beams/Beam_{bid}_MetaData.json"
177
+ for beam_file in [beam_data_file, beam_meta_file]:
178
+ hf_path = posixpath.join("data", patient_id, beam_file)
179
+ for attempt in range(max_retries):
180
+ try:
181
+ local_path = hf_hub_download(
182
+ repo_id=repo_id,
183
+ repo_type="dataset",
184
+ filename=hf_path,
185
+ local_dir=local_dir
186
+ )
187
+ downloaded_files.append(local_path)
188
+ break
189
+ except Exception as e:
190
+ if attempt == max_retries - 1:
191
+ st.error(f"Failed to download {hf_path}: {e}")
192
+ return downloaded_files
193
+
194
+ from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
195
+
196
+ def show_aggrid_table(df):
197
+ gb = GridOptionsBuilder.from_dataframe(df)
198
+ gb.configure_default_column(groupable=True, value=True, enableRowGroup=True, aggFunc='sum', editable=False)
199
+ gb.configure_grid_options(domLayout='normal')
200
+
201
+ # Enable multiple row selection with checkboxes
202
+ gb.configure_selection('multiple', use_checkbox=True)
203
+ gb.configure_column("patient_id", checkboxSelection=True)
204
+
205
+ grid_options = gb.build()
206
+
207
+ grid_response = AgGrid(
208
+ df,
209
+ gridOptions=grid_options,
210
+ enable_enterprise_modules=False,
211
+ allow_unsafe_jscode=True,
212
+ fit_columns_on_grid_load=True,
213
+ theme='balham',
214
+ update_mode=GridUpdateMode.SELECTION_CHANGED
215
+ )
216
+
217
+ return grid_response
218
+
219
+ def main():
220
+ st.set_page_config(page_title="PortPy Metadata Explorer", layout="wide")
221
+ st.title("📊 PortPy Metadata Explorer & Downloader")
222
+
223
+ patient_df = get_patient_ids()
224
+ disease_site = st.sidebar.selectbox("Select Disease Site", patient_df["disease_site"].unique())
225
+ all_metadata = load_all_metadata(disease_site) # Load and cache all metadata for selected disease site
226
+
227
+ filtered_patients = pd.DataFrame(all_metadata.keys(), columns=["patient_id"])
228
+
229
+
230
+ beam_gantry_filter = st.sidebar.text_input("Gantry Angles (comma-separated)", "")
231
+ beam_collimator_filter = st.sidebar.text_input("Collimator Angles (comma-separated)", "")
232
+ beam_energy_filter = st.sidebar.text_input("Beam Energies (comma-separated)", "")
233
+ beam_couch_filter = st.sidebar.text_input("Couch Angles (comma-separated)", "")
234
+ query_ptv_vol = st.sidebar.number_input("Minimum PTV volume (cc):", value=0)
235
+
236
+ # Checkbox: Only planner beams
237
+ only_planner = st.sidebar.checkbox("Show only planner beams", value=True)
238
+
239
+ results_df = filter_matched_data(
240
+ filtered_patients, query_ptv_vol, beam_gantry_filter,
241
+ beam_collimator_filter, beam_energy_filter, beam_couch_filter,
242
+ only_planner, all_metadata
243
+ )
244
+ # Summary Table
245
+ # st.dataframe(results_df)
246
+ grid_response = show_aggrid_table(results_df)
247
+
248
+ selected_rows = grid_response.get("selected_rows", pd.DataFrame())
249
+
250
+ if isinstance(selected_rows, pd.DataFrame):
251
+ print(selected_rows)
252
+ if not selected_rows.empty:
253
+ for _, row in selected_rows.iterrows():
254
+ pid = row["patient_id"]
255
+ st.markdown(f"### Patient: {pid}")
256
+ st.markdown("#### Structures")
257
+ st.dataframe(pd.DataFrame(all_metadata[pid]["structures"]))
258
+ st.markdown("#### Beams")
259
+ st.dataframe(pd.DataFrame(all_metadata[pid]["beams"]))
260
+
261
+ # selected_patient = st.selectbox("Select patient for detailed view", results_df["patient_id"] if not results_df.empty else [])
262
+ # if selected_patient:
263
+ # structs = all_metadata[selected_patient]["structures"]
264
+ # beams = all_metadata[selected_patient]["beams"]
265
+ # st.subheader(f"🏗️ Structures for {selected_patient}")
266
+ # st.dataframe(pd.DataFrame(structs), use_container_width=True)
267
+ # st.subheader(f"📡 Beams for {selected_patient}")
268
+ # st.dataframe(pd.DataFrame(beams), use_container_width=True)
269
+
270
+ with st.expander("Download matched patients"):
271
+ # Multi-select and download
272
+ to_download = st.sidebar.multiselect("Select Patients to Download", results_df["patient_id"].tolist())
273
+ local_dir = st.sidebar.text_input("Enter local directory to download data:", value="./downloaded")
274
+ if st.sidebar.button("Download Selected Patients"):
275
+ if to_download:
276
+ patient_to_beams = {
277
+ row["patient_id"]: row["beam_ids"] for ind, row in results_df.iterrows() if ind in to_download
278
+ }
279
+ for pid, beam_ids in patient_to_beams.items():
280
+ download_data(REPO_ID, [pid], beam_ids=beam_ids, planner_beam_ids=False, local_dir=local_dir)
281
+ st.success("Download complete!")
282
+ else:
283
+ st.warning("No patients selected.")
284
+
285
+ # if st.button("Download Data"):
286
+ # patients_to_download = results_df["patient_id"].tolist()
287
+ # download_data(REPO_ID, patients_to_download, planner_beam_ids=True, local_dir=local_dir)
288
+ # st.success("Download complete!")
289
+
290
+ if __name__ == "__main__":
291
+ main()