File size: 3,097 Bytes
2ae0027
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d3fca531-0f68-4951-b168-db8ad2d25971",
   "metadata": {},
   "outputs": [],
   "source": [
    "from PIL import Image, UnidentifiedImageError\n",
    "import os\n",
    "import pillow_avif  # AVIF support for Pillow\n",
    "\n",
    "# Define paths to folders\n",
    "data_dir = \"./data\"\n",
    "folders = [\"comic\", \"not-comic\"]  # Both folders to process\n",
    "output_format = \"png\"  # Target image format\n",
    "\n",
    "# Function to clean, convert, and rename images\n",
    "def process_images(data_dir, folders, output_format):\n",
    "    for folder in folders:\n",
    "        folder_path = os.path.join(data_dir, folder)\n",
    "        print(f\"Processing folder: {folder_path}\")\n",
    "\n",
    "        # Ensure the folder exists\n",
    "        if not os.path.exists(folder_path):\n",
    "            print(f\"Folder {folder_path} does not exist. Skipping.\")\n",
    "            continue\n",
    "\n",
    "        # Sort files to preserve order and avoid overwriting\n",
    "        image_count = 1  # Start numbering images\n",
    "        for filename in sorted(os.listdir(folder_path)):\n",
    "            file_path = os.path.join(folder_path, filename)\n",
    "            if \"test_sample.png\" in filename:  # Ignore test_sample.png\n",
    "                continue\n",
    "\n",
    "            try:\n",
    "                # Open and convert image (supports AVIF and others)\n",
    "                with Image.open(file_path) as img:\n",
    "                    img = img.convert(\"RGB\")  # Ensure compatible format\n",
    "                    new_filename = f\"{str(image_count).zfill(4)}.{output_format}\"\n",
    "                    new_file_path = os.path.join(folder_path, new_filename)\n",
    "\n",
    "                    # Save as new file\n",
    "                    img.save(new_file_path, format=output_format.upper())\n",
    "                    print(f\"Converted: {filename} -> {new_filename}\")\n",
    "\n",
    "                    # Remove old file if different\n",
    "                    if file_path != new_file_path:\n",
    "                        os.remove(file_path)\n",
    "\n",
    "                    image_count += 1\n",
    "\n",
    "            except (UnidentifiedImageError, IOError) as e:\n",
    "                print(f\"Invalid or unreadable file: {filename} ({e}). Deleting.\")\n",
    "                os.remove(file_path)  # Delete invalid files\n",
    "\n",
    "# Run the processing function\n",
    "process_images(data_dir, folders, output_format)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}