File size: 3,896 Bytes
2ee0ba5
 
 
 
 
 
c3d3f5a
 
2ee0ba5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c3d3f5a
2ee0ba5
c3d3f5a
2ee0ba5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env python3
"""
Script to create a sample_images directory with representative test images
from the MVTecAD dataset for use in the Gradio web UI.

For each category, it copies:
- 1 good (normal) image
- 2 images from each anomaly type (up to 4 anomaly types per category)
"""

import os
import shutil
from pathlib import Path

# Base paths
BASE_DIR = Path(__file__).parent.parent
DATASET_DIR = BASE_DIR / "data" / "MVTecAD"
SAMPLE_DIR = BASE_DIR / "sample_images"

# MVTec categories
CATEGORIES = [
    "bottle", "cable", "capsule", "carpet", "grid",
    "hazelnut", "leather", "metal_nut", "pill", "screw",
    "tile", "toothbrush", "transistor", "wood", "zipper"
]

# Number of samples per type
NUM_GOOD_SAMPLES = 1
NUM_ANOMALY_SAMPLES_PER_TYPE = 2
MAX_ANOMALY_TYPES = 4  # Maximum number of different anomaly types to include


def get_anomaly_types(category_path: Path) -> list:
    """Get list of anomaly types for a category (excluding 'good')."""
    test_path = category_path / "test"
    if not test_path.exists():
        return []

    anomaly_types = [d.name for d in test_path.iterdir()
                     if d.is_dir() and d.name != "good"]
    return sorted(anomaly_types)


def copy_samples(src_dir: Path, dst_dir: Path, num_samples: int):
    """Copy a limited number of image samples from source to destination."""
    dst_dir.mkdir(parents=True, exist_ok=True)

    # Get all image files
    image_extensions = {'.png', '.jpg', '.jpeg', '.bmp'}
    image_files = [f for f in src_dir.iterdir()
                   if f.is_file() and f.suffix.lower() in image_extensions]

    # Sort for consistency
    image_files.sort()

    # Copy only the requested number of samples
    for img_file in image_files[:num_samples]:
        shutil.copy2(img_file, dst_dir / img_file.name)


def create_sample_images():
    """Create sample images directory structure."""
    print(f"Creating sample images directory: {SAMPLE_DIR}")

    # Create main sample directory
    SAMPLE_DIR.mkdir(exist_ok=True)

    total_images = 0

    for category in CATEGORIES:
        category_src = DATASET_DIR / category
        category_dst = SAMPLE_DIR / category

        if not category_src.exists():
            print(f"โš ๏ธ  Skipping {category}: source directory not found")
            continue

        print(f"\n๐Ÿ“ Processing {category}...")

        # Copy good (normal) samples
        good_src = category_src / "test" / "good"
        good_dst = category_dst / "good"

        if good_src.exists():
            copy_samples(good_src, good_dst, NUM_GOOD_SAMPLES)
            num_good = len(list(good_dst.glob("*")))
            print(f"   โœ“ Copied {num_good} good samples")
            total_images += num_good

        # Get all anomaly types for this category
        anomaly_types = get_anomaly_types(category_src)

        # Limit to MAX_ANOMALY_TYPES
        selected_anomalies = anomaly_types[:MAX_ANOMALY_TYPES]

        # Copy anomaly samples
        for anomaly_type in selected_anomalies:
            anomaly_src = category_src / "test" / anomaly_type
            anomaly_dst = category_dst / anomaly_type

            if anomaly_src.exists():
                copy_samples(anomaly_src, anomaly_dst, NUM_ANOMALY_SAMPLES_PER_TYPE)
                num_anomaly = len(list(anomaly_dst.glob("*")))
                print(f"   โœ“ Copied {num_anomaly} {anomaly_type} samples")
                total_images += num_anomaly

        if len(anomaly_types) > MAX_ANOMALY_TYPES:
            skipped = len(anomaly_types) - MAX_ANOMALY_TYPES
            print(f"   โ„น๏ธ  Skipped {skipped} additional anomaly types: {', '.join(anomaly_types[MAX_ANOMALY_TYPES:])}")

    print(f"\nโœ… Sample images directory created successfully!")
    print(f"๐Ÿ“Š Total images copied: {total_images}")
    print(f"๐Ÿ“‚ Location: {SAMPLE_DIR}")


if __name__ == "__main__":
    create_sample_images()