Spaces:
Runtime error
Runtime error
| import json | |
| import os | |
| from pathlib import Path | |
| import requests | |
| from tqdm import tqdm | |
| def download_eurorad_figures(metadata_path: str, output_dir: str) -> None: | |
| """ | |
| Download figures from Eurorad dataset and save them organized by case_id. | |
| Args: | |
| metadata_path: Path to the eurorad_metadata.json file | |
| output_dir: Base directory where figures will be saved | |
| The figures will be saved as: | |
| {output_dir}/{case_id}/{figure_number}.jpg | |
| Example: | |
| figures/189/Figure_1a.jpg | |
| """ | |
| # Create output directory if it doesn't exist | |
| output_path = Path(output_dir) | |
| output_path.mkdir(exist_ok=True) | |
| # Load metadata | |
| with open(metadata_path) as f: | |
| metadata = json.load(f) | |
| # Iterate through all cases with progress bar | |
| for case_id in tqdm(metadata, desc="Downloading cases", unit="case"): | |
| case = metadata[case_id] | |
| case_dir = output_path / str(case["case_id"]) | |
| case_dir.mkdir(exist_ok=True) | |
| # Process all figures and their subfigures | |
| for figure in case["figures"]: | |
| for subfig in figure["subfigures"]: | |
| # Remove leading and trailing whitespace and convert to lowercase | |
| subfig_name = f"{subfig['number'].strip().replace(' ', '_').lower()}.jpg" | |
| subfig_path = Path(case_dir) / subfig_name | |
| save_figure( | |
| url=subfig["url"], | |
| output_path=subfig_path, | |
| ) | |
| def save_figure(url: str, output_path: Path) -> None: | |
| """ | |
| Download and save a single figure. | |
| Args: | |
| url: URL of the figure to download | |
| output_path: Path where the figure should be saved | |
| """ | |
| if output_path.exists(): | |
| return | |
| try: | |
| response = requests.get(url, timeout=10) | |
| response.raise_for_status() | |
| with open(output_path, "wb") as f: | |
| f.write(response.content) | |
| except Exception as e: | |
| print(f"Error downloading {url}: {e}") | |
| if __name__ == "__main__": | |
| root = os.path.dirname(os.path.abspath(__file__)) | |
| download_eurorad_figures( | |
| metadata_path=os.path.join(root, "eurorad_metadata.json"), | |
| output_dir=os.path.join(root, "figures"), | |
| ) | |