File size: 14,095 Bytes
b94b2ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
#!/usr/bin/env python3
"""
Script to prepare data for 2 classification problems:
- Problem A (3D): Kinect frame sequence: 13 joints x 3 dimensions = 39 features per frame
- Problem B (2D): PoseNet frame sequence: 13 joints x 2 dimensions = 26 features per frame

Each problem will have two approaches:
- Dense: Flattened features for dense neural networks
- CNN: Structured features for convolutional neural networks
"""

import pandas as pd
import numpy as np
from pathlib import Path
import os


def load_processed_data(csv_path):
    """Load processed data from CSV file."""
    df = pd.read_csv(csv_path)
    # Extract features (skip filename and label columns)
    feature_cols = [col for col in df.columns if col not in ['filename', 'label']]
    X = df[feature_cols].values
    y = df['label'].values
    filenames = df['filename'].values

    return X, y, filenames


def reshape_for_3d_problem(X, frames_per_seq=10, joints_per_frame=13, dims=3):
    """
    Reshape data for 3D problem (Kinect: 13 joints x 3 dimensions = 39 features per frame).

    Args:
        X: Input data of shape (samples, total_features)
        frames_per_seq: Number of frames per sequence (default 10)
        joints_per_frame: Number of joints per frame (default 13)
        dims: Number of dimensions (default 3 for 3D)

    Returns:
        Reshaped data of shape (samples, frames_per_seq, joints_per_frame, dims)
    """
    total_features = frames_per_seq * joints_per_frame * dims
    samples = X.shape[0]

    # Check if the data has the expected number of features
    if X.shape[1] != total_features:
        print(f"Warning: Expected {total_features} features per sample, got {X.shape[1]}")
        print("Attempting to extract 3D features by taking first 39 per frame...")

        # If we have more features per frame, take the first 39 per frame as 3D coordinates
        features_per_frame = X.shape[1] // frames_per_seq
        if features_per_frame >= joints_per_frame * dims:
            # Extract 3D coordinates from each frame
            X_3d = np.zeros((samples, frames_per_seq, joints_per_frame, dims))
            for frame_idx in range(frames_per_seq):
                start_idx = frame_idx * features_per_frame
                end_idx = start_idx + joints_per_frame * dims
                frame_data = X[:, start_idx:end_idx]
                X_3d[:, frame_idx, :, :] = frame_data.reshape(samples, joints_per_frame, dims)
        else:
            raise ValueError(f"Insufficient features per frame for 3D interpretation: {features_per_frame}")
    else:
        X_3d = X.reshape(samples, frames_per_seq, joints_per_frame, dims)

    return X_3d


def reshape_for_2d_problem(X, frames_per_seq=10, joints_per_frame=13, dims=2):
    """
    Reshape data for 2D problem (PoseNet: 13 joints x 2 dimensions = 26 features per frame).

    Args:
        X: Input data of shape (samples, total_features)
        frames_per_seq: Number of frames per sequence (default 10)
        joints_per_frame: Number of joints per frame (default 13)
        dims: Number of dimensions (default 2 for 2D)

    Returns:
        Reshaped data of shape (samples, frames_per_seq, joints_per_frame, dims)
    """
    total_features = frames_per_seq * joints_per_frame * dims
    samples = X.shape[0]

    # Check if the data has the expected number of features
    if X.shape[1] != total_features:
        print(f"Warning: Expected {total_features} features per sample, got {X.shape[1]}")
        print("Attempting to extract 2D features by taking first 26 per frame...")

        # If we have more features per frame, take the first 26 per frame as 2D coordinates
        features_per_frame = X.shape[1] // frames_per_seq
        if features_per_frame >= joints_per_frame * dims:
            # Extract 2D coordinates from each frame
            X_2d = np.zeros((samples, frames_per_seq, joints_per_frame, dims))
            for frame_idx in range(frames_per_seq):
                start_idx = frame_idx * features_per_frame
                end_idx = start_idx + joints_per_frame * dims
                frame_data = X[:, start_idx:end_idx]
                X_2d[:, frame_idx, :, :] = frame_data.reshape(samples, joints_per_frame, dims)
        else:
            raise ValueError(f"Insufficient features per frame for 2D interpretation: {features_per_frame}")
    else:
        X_2d = X.reshape(samples, frames_per_seq, joints_per_frame, dims)

    return X_2d


def prepare_adense_data(X_3d):
    """
    Prepare data for ADense (3D Dense network).

    Args:
        X_3d: 3D data of shape (samples, frames, joints, dims)

    Returns:
        Flattened data of shape (samples, frames*joints*dims)
    """
    samples, frames, joints, dims = X_3d.shape
    X_flat = X_3d.reshape(samples, frames * joints * dims)
    return X_flat


def prepare_acnn_data(X_3d):
    """
    Prepare data for ACNN (3D Convolutional network).

    Args:
        X_3d: 3D data of shape (samples, frames, joints, dims)

    Returns:
        Data suitable for CNN: (samples, channels, frames, joints, dims) or (samples, frames, joints, dims)
    """
    # For CNN, we can keep the 4D structure or add a channel dimension
    # Standard format for 3D CNN would be (samples, channels, depth, height, width)
    # Or we can use (samples, time_steps, joints, features) for temporal CNN
    return X_3d


def prepare_bdense_data(X_2d):
    """
    Prepare data for BDense (2D Dense network).

    Args:
        X_2d: 2D data of shape (samples, frames, joints, dims)

    Returns:
        Flattened data of shape (samples, frames*joints*dims)
    """
    samples, frames, joints, dims = X_2d.shape
    X_flat = X_2d.reshape(samples, frames * joints * dims)
    return X_flat


def prepare_bcnn_data(X_2d):
    """
    Prepare data for BCNN (2D Convolutional network).

    Args:
        X_2d: 2D data of shape (samples, frames, joints, dims)

    Returns:
        Data suitable for CNN: (samples, frames, joints, dims) or with added channel dim
    """
    # For CNN, we can keep the structure as is for temporal processing
    return X_2d


def save_data(X, y, filenames, output_dir, prefix):
    """Save prepared data to the specified directory."""
    os.makedirs(output_dir, exist_ok=True)

    np.save(os.path.join(output_dir, f'{prefix}_X.npy'), X)
    np.save(os.path.join(output_dir, f'{prefix}_y.npy'), y)
    np.save(os.path.join(output_dir, f'{prefix}_filenames.npy'), filenames)

    print(f"Saved {prefix} data: X shape {X.shape}, y shape {y.shape}")


def main():
    print("Preparing data for classification problems A and B...")

    # Load the processed data
    data_dir = Path("Data-intensive-systems/A13/Processed_Data")

    # Load training data
    print("\nLoading training data...")
    X_train, y_train, fn_train = load_processed_data(
        data_dir / "processed_sequences_Good_vs_Bad_train.csv"
    )
    print(f"Training data shape: {X_train.shape}")

    # Load test data
    print("Loading test data...")
    X_test, y_test, fn_test = load_processed_data(
        data_dir / "processed_sequences_Good_vs_Bad_test.csv"
    )
    print(f"Test data shape: {X_test.shape}")

    # Load augmented training data
    print("Loading augmented training data...")
    X_train_aug, y_train_aug, fn_train_aug = load_processed_data(
        data_dir / "processed_sequences_Good_vs_Bad_train_augmented.csv"
    )
    print(f"Augmented training data shape: {X_train_aug.shape}")

    # Load augmented test data
    print("Loading augmented test data...")
    X_test_aug, y_test_aug, fn_test_aug = load_processed_data(
        data_dir / "processed_sequences_Good_vs_Bad_test_augmented.csv"
    )
    print(f"Augmented test data shape: {X_test_aug.shape}")

    # Prepare output directory
    output_dir = Path("Data-intensive-systems/A13/classification_problems/prepared_data")
    os.makedirs(output_dir, exist_ok=True)

    # Prepare data for Problem A (3D - Kinect)
    print("\n" + "="*60)
    print("PREPARING PROBLEM A (3D - Kinect: 13 joints x 3 dims)")
    print("="*60)

    try:
        # Convert to 3D format (samples, frames, joints, dimensions)
        X_train_3d = reshape_for_3d_problem(X_train, frames_per_seq=10, joints_per_frame=13, dims=3)
        X_test_3d = reshape_for_3d_problem(X_test, frames_per_seq=10, joints_per_frame=13, dims=3)
        X_train_aug_3d = reshape_for_3d_problem(X_train_aug, frames_per_seq=10, joints_per_frame=13, dims=3)
        X_test_aug_3d = reshape_for_3d_problem(X_test_aug, frames_per_seq=10, joints_per_frame=13, dims=3)

        print(f"3D training data shape: {X_train_3d.shape}")
        print(f"3D test data shape: {X_test_3d.shape}")
        print(f"3D augmented training data shape: {X_train_aug_3d.shape}")
        print(f"3D augmented test data shape: {X_test_aug_3d.shape}")

        # Prepare ADense data (flattened)
        print("\nPreparing ADense data (flattened)...")
        X_train_adense = prepare_adense_data(X_train_3d)
        X_test_adense = prepare_adense_data(X_test_3d)
        X_train_aug_adense = prepare_adense_data(X_train_aug_3d)
        X_test_aug_adense = prepare_adense_data(X_test_aug_3d)

        print(f"A-Dense training shape: {X_train_adense.shape}")
        print(f"A-Dense test shape: {X_test_adense.shape}")

        # Save ADense data
        save_data(X_train_adense, y_train, fn_train, output_dir, "A_Dense_train")
        save_data(X_test_adense, y_test, fn_test, output_dir, "A_Dense_test")
        save_data(X_train_aug_adense, y_train_aug, fn_train_aug, output_dir, "A_Dense_train_aug")
        save_data(X_test_aug_adense, y_test_aug, fn_test_aug, output_dir, "A_Dense_test_aug")

        # Prepare ACNN data (structured)
        print("\nPreparing ACNN data (structured)...")
        X_train_acnn = prepare_acnn_data(X_train_3d)
        X_test_acnn = prepare_acnn_data(X_test_3d)
        X_train_aug_acnn = prepare_acnn_data(X_train_aug_3d)
        X_test_aug_acnn = prepare_acnn_data(X_test_aug_3d)

        print(f"A-CNN training shape: {X_train_acnn.shape}")
        print(f"A-CNN test shape: {X_test_acnn.shape}")

        # Save ACNN data
        save_data(X_train_acnn, y_train, fn_train, output_dir, "A_CNN_train")
        save_data(X_test_acnn, y_test, fn_test, output_dir, "A_CNN_test")
        save_data(X_train_aug_acnn, y_train_aug, fn_train_aug, output_dir, "A_CNN_train_aug")
        save_data(X_test_aug_acnn, y_test_aug, fn_test_aug, output_dir, "A_CNN_test_aug")

        print("\nProblem A (3D) data preparation completed!")

    except Exception as e:
        print(f"Error preparing Problem A data: {e}")
        print("Skipping Problem A...")

    # Prepare data for Problem B (2D - PoseNet)
    print("\n" + "="*60)
    print("PREPARING PROBLEM B (2D - PoseNet: 13 joints x 2 dims)")
    print("="*60)

    try:
        # Convert to 2D format (samples, frames, joints, dimensions)
        X_train_2d = reshape_for_2d_problem(X_train, frames_per_seq=10, joints_per_frame=13, dims=2)
        X_test_2d = reshape_for_2d_problem(X_test, frames_per_seq=10, joints_per_frame=13, dims=2)
        X_train_aug_2d = reshape_for_2d_problem(X_train_aug, frames_per_seq=10, joints_per_frame=13, dims=2)
        X_test_aug_2d = reshape_for_2d_problem(X_test_aug, frames_per_seq=10, joints_per_frame=13, dims=2)

        print(f"2D training data shape: {X_train_2d.shape}")
        print(f"2D test data shape: {X_test_2d.shape}")
        print(f"2D augmented training data shape: {X_train_aug_2d.shape}")
        print(f"2D augmented test data shape: {X_test_aug_2d.shape}")

        # Prepare BDense data (flattened)
        print("\nPreparing BDense data (flattened)...")
        X_train_bdense = prepare_bdense_data(X_train_2d)
        X_test_bdense = prepare_bdense_data(X_test_2d)
        X_train_aug_bdense = prepare_bdense_data(X_train_aug_2d)
        X_test_aug_bdense = prepare_bdense_data(X_test_aug_2d)

        print(f"B-Dense training shape: {X_train_bdense.shape}")
        print(f"B-Dense test shape: {X_test_bdense.shape}")

        # Save BDense data
        save_data(X_train_bdense, y_train, fn_train, output_dir, "B_Dense_train")
        save_data(X_test_bdense, y_test, fn_test, output_dir, "B_Dense_test")
        save_data(X_train_aug_bdense, y_train_aug, fn_train_aug, output_dir, "B_Dense_train_aug")
        save_data(X_test_aug_bdense, y_test_aug, fn_test_aug, output_dir, "B_Dense_test_aug")

        # Prepare BCNN data (structured)
        print("\nPreparing BCNN data (structured)...")
        X_train_bcnn = prepare_bcnn_data(X_train_2d)
        X_test_bcnn = prepare_bcnn_data(X_test_2d)
        X_train_aug_bcnn = prepare_bcnn_data(X_train_aug_2d)
        X_test_aug_bcnn = prepare_bcnn_data(X_test_aug_2d)

        print(f"B-CNN training shape: {X_train_bcnn.shape}")
        print(f"B-CNN test shape: {X_test_bcnn.shape}")

        # Save BCNN data
        save_data(X_train_bcnn, y_train, fn_train, output_dir, "B_CNN_train")
        save_data(X_test_bcnn, y_test, fn_test, output_dir, "B_CNN_test")
        save_data(X_train_aug_bcnn, y_train_aug, fn_train_aug, output_dir, "B_CNN_train_aug")
        save_data(X_test_aug_bcnn, y_test_aug, fn_test_aug, output_dir, "B_CNN_test_aug")

        print("\nProblem B (2D) data preparation completed!")

    except Exception as e:
        print(f"Error preparing Problem B data: {e}")
        print("Skipping Problem B...")

    print("\n" + "="*60)
    print("CLASSIFICATION PROBLEMS DATA PREPARATION SUMMARY")
    print("="*60)
    print("Problem A (3D - Kinect): 13 joints x 3 dimensions per frame")
    print("  - ADense: Flattened features for dense networks")
    print("  - ACNN: Structured features for convolutional networks")
    print("")
    print("Problem B (2D - PoseNet): 13 joints x 2 dimensions per frame")
    print("  - BDense: Flattened features for dense networks")
    print("  - BCNN: Structured features for convolutional networks")
    print("")
    print("All prepared datasets saved to:", output_dir)
    print("Both original and augmented versions are available")


if __name__ == "__main__":
    main()