| import os | |
| import io | |
| from PIL import Image | |
| import pandas as pd | |
| from datasets import load_dataset | |
| def save_images_from_parquet(parquet_path, target_folder): | |
| """ | |
| Load parquet file using datasets, read each row's BYTES and FILENAME, | |
| and save the image to the target folder with the name FILENAME. | |
| Args: | |
| parquet_path: Path to the input parquet file | |
| target_folder: Path to the target folder to save images | |
| """ | |
| # Load the parquet file | |
| dataset = load_dataset('loooooong/Any2anyTryon_vitonhd_test', data_files=parquet_path)['train'] | |
| # Ensure target folder exists | |
| os.makedirs(target_folder, exist_ok=True) | |
| # Process each row | |
| for row in dataset: | |
| try: | |
| # Read image bytes and filename | |
| image = row['BYTES'] | |
| filename = row['FILENAME'] | |
| # Open image using PIL | |
| # image = Image.open(io.BytesIO(image_bytes)) | |
| # Save image to target folder | |
| image.save(os.path.join(target_folder, filename)) | |
| except Exception as e: | |
| print(f"Failed to process row with filename {row['FILENAME']}: {e}") | |
| if __name__ == "__main__": | |
| parquet_path = "data/vitonhd_test.parquet" | |
| target_folder = "data/zalando-hd-resized/test/image_synthesis" | |
| save_images_from_parquet(parquet_path, target_folder) | |