Spaces:
Sleeping
Sleeping
| import cv2 | |
| import pandas as pd | |
| from pathlib import Path | |
| IN_IMG = Path("data_merged/images") | |
| OUT_IMG = Path("data_processed/images") | |
| IN_CSV = Path("data_merged/metadata_raw.csv") | |
| OUT_CSV = Path("data_processed/metadata_resized.csv") | |
| OUT_IMG.mkdir(parents=True, exist_ok=True) | |
| df = pd.read_csv(IN_CSV) | |
| kept_rows = [] | |
| for _, row in df.iterrows(): | |
| src = IN_IMG / row["filename"] | |
| dst = OUT_IMG / row["filename"] | |
| img = cv2.imread(str(src)) | |
| if img is None: | |
| continue | |
| img = cv2.resize(img, (224, 224)) | |
| cv2.imwrite(str(dst), img) | |
| kept_rows.append(row) | |
| pd.DataFrame(kept_rows).to_csv(OUT_CSV, index=False) | |
| print("Images kept:", len(kept_rows)) | |