gourav3017 commited on
Commit
c8e0302
·
1 Parent(s): 4ab9818

Update app.py for new hub version and download to local

Browse files
Files changed (1) hide show
  1. app.py +68 -37
app.py CHANGED
@@ -8,6 +8,7 @@ from huggingface_hub import list_repo_files
8
  import io
9
  import zipfile
10
  import shutil
 
11
 
12
  # Replace this with your actual Hugging Face repo ID
13
  REPO_ID = "PortPy-Project/PortPy_Dataset"
@@ -140,7 +141,7 @@ def filter_matched_data(filtered_patients, query_ptv_vol, beam_gantry_filter,
140
 
141
  return pd.DataFrame(matched)
142
 
143
- def download_data(repo_id, patient_ids, beam_ids=None, planner_beam_ids=True, max_retries=2, local_dir='./'):
144
  from huggingface_hub import hf_hub_download
145
 
146
  downloaded_files = []
@@ -171,29 +172,30 @@ def download_data(repo_id, patient_ids, beam_ids=None, planner_beam_ids=True, ma
171
  # ---------------------------------------------------------------
172
  # 2. Download all DICOM files under data/<patient_id>/DicomFiles/
173
  # ---------------------------------------------------------------
174
- try:
175
- all_files = list_repo_files(repo_id, repo_type="dataset")
176
- dicom_prefix = f"data/{patient_id}/DicomFiles/"
177
- dicom_files = [f for f in all_files if f.startswith(dicom_prefix)]
178
-
179
- for hf_path in dicom_files:
180
- for attempt in range(max_retries):
181
- try:
182
- local_path = hf_hub_download(
183
- repo_id=repo_id,
184
- repo_type="dataset",
185
- filename=hf_path,
186
- local_dir=local_dir,
187
- token=token
188
- )
189
- downloaded_files.append(local_path)
190
- break
191
- except Exception as e:
192
- if attempt == max_retries - 1:
193
- st.error(f"Failed to download {hf_path}: {e}")
194
-
195
- except Exception as e:
196
- st.error(f"Error listing DICOM files for {patient_id}: {e}")
 
197
  if planner_beam_ids:
198
  planner_file = os.path.join(local_dir, 'data', patient_id, "PlannerBeams.json")
199
  try:
@@ -346,31 +348,60 @@ def main():
346
 
347
  download_data(REPO_ID, [pid], beam_ids=beam_ids,
348
  planner_beam_ids=only_planner,
349
- local_dir=local_dir)
350
 
351
  progress.progress(i / total)
352
 
353
 
354
  status.success("All downloads complete. Preparing zip…")
355
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
 
357
- # Create zip in memory
358
- buf = io.BytesIO()
359
- with zipfile.ZipFile(buf, "w", zipfile.ZIP_STORED) as zf:
360
  for root, _, files in os.walk(local_dir):
361
  for f in files:
362
  full_path = os.path.join(root, f)
363
  rel_path = os.path.relpath(full_path, local_dir)
364
  zf.write(full_path, rel_path)
365
- buf.seek(0)
366
-
367
- # Trigger file download automatically from the SAME BUTTON CLICK
368
- st.download_button(
369
- label="Your download is ready! Click to save.",
370
- data=buf,
371
- file_name="portpy_patients.zip",
372
- mime="application/zip",
373
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
 
375
  # if st.button("Download Data"):
376
  # patients_to_download = results_df["patient_id"].tolist()
 
8
  import io
9
  import zipfile
10
  import shutil
11
+ import tempfile, uuid
12
 
13
  # Replace this with your actual Hugging Face repo ID
14
  REPO_ID = "PortPy-Project/PortPy_Dataset"
 
141
 
142
  return pd.DataFrame(matched)
143
 
144
+ def download_data(repo_id, patient_ids, beam_ids=None, planner_beam_ids=True, max_retries=2, local_dir='./', download_dicom=True):
145
  from huggingface_hub import hf_hub_download
146
 
147
  downloaded_files = []
 
172
  # ---------------------------------------------------------------
173
  # 2. Download all DICOM files under data/<patient_id>/DicomFiles/
174
  # ---------------------------------------------------------------
175
+ if download_dicom:
176
+ try:
177
+ all_files = list_repo_files(repo_id, repo_type="dataset")
178
+ dicom_prefix = f"data/{patient_id}/DicomFiles/"
179
+ dicom_files = [f for f in all_files if f.startswith(dicom_prefix)]
180
+
181
+ for hf_path in dicom_files:
182
+ for attempt in range(max_retries):
183
+ try:
184
+ local_path = hf_hub_download(
185
+ repo_id=repo_id,
186
+ repo_type="dataset",
187
+ filename=hf_path,
188
+ local_dir=local_dir,
189
+ token=token
190
+ )
191
+ downloaded_files.append(local_path)
192
+ break
193
+ except Exception as e:
194
+ if attempt == max_retries - 1:
195
+ st.error(f"Failed to download {hf_path}: {e}")
196
+
197
+ except Exception as e:
198
+ st.error(f"Error listing DICOM files for {patient_id}: {e}")
199
  if planner_beam_ids:
200
  planner_file = os.path.join(local_dir, 'data', patient_id, "PlannerBeams.json")
201
  try:
 
348
 
349
  download_data(REPO_ID, [pid], beam_ids=beam_ids,
350
  planner_beam_ids=only_planner,
351
+ local_dir=local_dir, download_dicom=False)
352
 
353
  progress.progress(i / total)
354
 
355
 
356
  status.success("All downloads complete. Preparing zip…")
357
 
358
+ zip_path = os.path.join(tempfile.gettempdir(), f"portpy_patients_{uuid.uuid4().hex}.zip")
359
+
360
+ # optional: guard size to avoid crashes
361
+ total_bytes = 0
362
+ for root, _, files in os.walk(local_dir):
363
+ for f in files:
364
+ total_bytes += os.path.getsize(os.path.join(root, f))
365
+ total_gb = total_bytes / (1024 ** 3)
366
+ status.write(f"Preparing zip (~{total_gb:.2f} GB)…")
367
+ if total_gb > 40.0:
368
+ st.error("Selection too large for a single zip. Please download fewer patients.")
369
+ st.stop()
370
+
371
+ if os.path.exists(zip_path):
372
+ os.remove(zip_path)
373
 
374
+ with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_STORED, allowZip64=True) as zf:
 
 
375
  for root, _, files in os.walk(local_dir):
376
  for f in files:
377
  full_path = os.path.join(root, f)
378
  rel_path = os.path.relpath(full_path, local_dir)
379
  zf.write(full_path, rel_path)
380
+
381
+ with open(zip_path, "rb") as fp:
382
+ st.download_button(
383
+ label="Your download is ready! Click to save.",
384
+ data=fp,
385
+ file_name="portpy_patients.zip",
386
+ mime="application/zip",
387
+ )
388
+ # # Create zip in memory
389
+ # buf = io.BytesIO()
390
+ # with zipfile.ZipFile(buf, "w", zipfile.ZIP_STORED) as zf:
391
+ # for root, _, files in os.walk(local_dir):
392
+ # for f in files:
393
+ # full_path = os.path.join(root, f)
394
+ # rel_path = os.path.relpath(full_path, local_dir)
395
+ # zf.write(full_path, rel_path)
396
+ # buf.seek(0)
397
+ #
398
+ # # Trigger file download automatically from the SAME BUTTON CLICK
399
+ # st.download_button(
400
+ # label="Your download is ready! Click to save.",
401
+ # data=buf,
402
+ # file_name="portpy_patients.zip",
403
+ # mime="application/zip",
404
+ # )
405
 
406
  # if st.button("Download Data"):
407
  # patients_to_download = results_df["patient_id"].tolist()