File size: 5,237 Bytes
b94b2ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/env python3
"""
Test script to verify that the prepared data for classification problems can be loaded correctly
"""

import numpy as np
import os

def test_data_loading():
    """Test that all prepared data files can be loaded correctly."""
    base_path = "Data-intensive-systems/A13/classification_problems/prepared_data/"

    print("Testing data loading for classification problems...")

    # Define all expected files
    expected_files = [
        # ADense files
        "A_Dense_train_X.npy", "A_Dense_train_y.npy",
        "A_Dense_test_X.npy", "A_Dense_test_y.npy",
        "A_Dense_train_aug_X.npy", "A_Dense_train_aug_y.npy",
        "A_Dense_test_aug_X.npy", "A_Dense_test_aug_y.npy",

        # ACNN files
        "A_CNN_train_X.npy", "A_CNN_train_y.npy",
        "A_CNN_test_X.npy", "A_CNN_test_y.npy",
        "A_CNN_train_aug_X.npy", "A_CNN_train_aug_y.npy",
        "A_CNN_test_aug_X.npy", "A_CNN_test_aug_y.npy",

        # BDense files
        "B_Dense_train_X.npy", "B_Dense_train_y.npy",
        "B_Dense_test_X.npy", "B_Dense_test_y.npy",
        "B_Dense_train_aug_X.npy", "B_Dense_train_aug_y.npy",
        "B_Dense_test_aug_X.npy", "B_Dense_test_aug_y.npy",

        # BCNN files
        "B_CNN_train_X.npy", "B_CNN_train_y.npy",
        "B_CNN_test_X.npy", "B_CNN_test_y.npy",
        "B_CNN_train_aug_X.npy", "B_CNN_train_aug_y.npy",
        "B_CNN_test_aug_X.npy", "B_CNN_test_aug_y.npy"
    ]

    print(f"\nChecking {len(expected_files)} expected files...")

    missing_files = []
    loaded_data_info = {}

    for file in expected_files:
        file_path = os.path.join(base_path, file)
        if os.path.exists(file_path):
            try:
                data = np.load(file_path)
                loaded_data_info[file] = {
                    'shape': data.shape,
                    'dtype': data.dtype,
                    'size_mb': data.nbytes / (1024*1024)
                }
                print(f"✓ Loaded {file}: shape={data.shape}, dtype={data.dtype}, size={data.nbytes/(1024*1024):.2f}MB")
            except Exception as e:
                print(f"✗ Error loading {file}: {e}")
                missing_files.append(file)
        else:
            print(f"✗ Missing {file}")
            missing_files.append(file)

    if missing_files:
        print(f"\n❌ {len(missing_files)} files could not be loaded:")
        for f in missing_files:
            print(f"  - {f}")
        return False
    else:
        print(f"\n✅ All {len(expected_files)} files loaded successfully!")

    # Print summary statistics
    print("\n" + "="*60)
    print("DATA SUMMARY")
    print("="*60)

    # Problem A (3D) statistics
    print("\nProblem A (3D - Kinect):")
    print(f"  ADense - Train: {loaded_data_info['A_Dense_train_X.npy']['shape']}")
    print(f"  ADense - Test: {loaded_data_info['A_Dense_test_X.npy']['shape']}")
    print(f"  ADense - Train Aug: {loaded_data_info['A_Dense_train_aug_X.npy']['shape']}")
    print(f"  ADense - Test Aug: {loaded_data_info['A_Dense_test_aug_X.npy']['shape']}")
    print(f"  ACNN - Train: {loaded_data_info['A_CNN_train_X.npy']['shape']}")
    print(f"  ACNN - Test: {loaded_data_info['A_CNN_test_X.npy']['shape']}")
    print(f"  ACNN - Train Aug: {loaded_data_info['A_CNN_train_aug_X.npy']['shape']}")
    print(f"  ACNN - Test Aug: {loaded_data_info['A_CNN_test_aug_X.npy']['shape']}")

    # Problem B (2D) statistics
    print("\nProblem B (2D - PoseNet):")
    print(f"  BDense - Train: {loaded_data_info['B_Dense_train_X.npy']['shape']}")
    print(f"  BDense - Test: {loaded_data_info['B_Dense_test_X.npy']['shape']}")
    print(f"  BDense - Train Aug: {loaded_data_info['B_Dense_train_aug_X.npy']['shape']}")
    print(f"  BDense - Test Aug: {loaded_data_info['B_Dense_test_aug_X.npy']['shape']}")
    print(f"  BCNN - Train: {loaded_data_info['B_CNN_train_X.npy']['shape']}")
    print(f"  BCNN - Test: {loaded_data_info['B_CNN_test_X.npy']['shape']}")
    print(f"  BCNN - Train Aug: {loaded_data_info['B_CNN_train_aug_X.npy']['shape']}")
    print(f"  BCNN - Test Aug: {loaded_data_info['B_CNN_test_aug_X.npy']['shape']}")

    # Verify label consistency
    print("\nLabel consistency check:")
    a_train_labels = np.load(os.path.join(base_path, "A_Dense_train_y.npy"))
    b_train_labels = np.load(os.path.join(base_path, "B_Dense_train_y.npy"))
    a_test_labels = np.load(os.path.join(base_path, "A_Dense_test_y.npy"))
    b_test_labels = np.load(os.path.join(base_path, "B_Dense_test_y.npy"))

    print(f"  A train labels: {a_train_labels.shape}, unique values: {np.unique(a_train_labels)}")
    print(f"  B train labels: {b_train_labels.shape}, unique values: {np.unique(b_train_labels)}")
    print(f"  A test labels: {a_test_labels.shape}, unique values: {np.unique(a_test_labels)}")
    print(f"  B test labels: {b_test_labels.shape}, unique values: {np.unique(b_test_labels)}")

    print("\n✅ All prepared data is ready for machine learning experiments!")
    return True

if __name__ == "__main__":
    success = test_data_loading()
    if success:
        print("\n🎉 Classification problems data preparation verified successfully!")
    else:
        print("\n❌ Issues found with data preparation.")