| | import json |
| | import os |
| | from pathlib import Path |
| | import requests |
| | from tqdm import tqdm |
| |
|
| |
|
| | def download_eurorad_figures(metadata_path: str, output_dir: str) -> None: |
| | """ |
| | Download figures from Eurorad dataset and save them organized by case_id. |
| | |
| | Args: |
| | metadata_path: Path to the eurorad_metadata.json file |
| | output_dir: Base directory where figures will be saved |
| | |
| | The figures will be saved as: |
| | {output_dir}/{case_id}/{figure_number}.jpg |
| | Example: |
| | figures/189/Figure_1a.jpg |
| | """ |
| | |
| | output_path = Path(output_dir) |
| | output_path.mkdir(exist_ok=True) |
| |
|
| | |
| | with open(metadata_path) as f: |
| | metadata = json.load(f) |
| |
|
| | |
| | for case_id in tqdm(metadata, desc="Downloading cases", unit="case"): |
| | case = metadata[case_id] |
| | case_dir = output_path / str(case["case_id"]) |
| | case_dir.mkdir(exist_ok=True) |
| |
|
| | |
| | for figure in case["figures"]: |
| | for subfig in figure["subfigures"]: |
| |
|
| | |
| | subfig_name = f"{subfig['number'].strip().replace(' ', '_').lower()}.jpg" |
| | subfig_path = Path(case_dir) / subfig_name |
| |
|
| | save_figure( |
| | url=subfig["url"], |
| | output_path=subfig_path, |
| | ) |
| |
|
| |
|
| | def save_figure(url: str, output_path: Path) -> None: |
| | """ |
| | Download and save a single figure. |
| | |
| | Args: |
| | url: URL of the figure to download |
| | output_path: Path where the figure should be saved |
| | """ |
| | if output_path.exists(): |
| | return |
| |
|
| | try: |
| | response = requests.get(url, timeout=10) |
| | response.raise_for_status() |
| | with open(output_path, "wb") as f: |
| | f.write(response.content) |
| | except Exception as e: |
| | print(f"Error downloading {url}: {e}") |
| |
|
| |
|
| | if __name__ == "__main__": |
| | root = os.path.dirname(os.path.abspath(__file__)) |
| | download_eurorad_figures( |
| | metadata_path=os.path.join(root, "eurorad_metadata.json"), |
| | output_dir=os.path.join(root, "figures"), |
| | ) |
| |
|