pose-deep-learning / A13 /classification_problems /prepare_classification_problems.py
Bachstelze
init A13 data
b94b2ad
#!/usr/bin/env python3
"""
Script to prepare data for 2 classification problems:
- Problem A (3D): Kinect frame sequence: 13 joints x 3 dimensions = 39 features per frame
- Problem B (2D): PoseNet frame sequence: 13 joints x 2 dimensions = 26 features per frame
Each problem will have two approaches:
- Dense: Flattened features for dense neural networks
- CNN: Structured features for convolutional neural networks
"""
import pandas as pd
import numpy as np
from pathlib import Path
import os
def load_processed_data(csv_path):
"""Load processed data from CSV file."""
df = pd.read_csv(csv_path)
# Extract features (skip filename and label columns)
feature_cols = [col for col in df.columns if col not in ['filename', 'label']]
X = df[feature_cols].values
y = df['label'].values
filenames = df['filename'].values
return X, y, filenames
def reshape_for_3d_problem(X, frames_per_seq=10, joints_per_frame=13, dims=3):
"""
Reshape data for 3D problem (Kinect: 13 joints x 3 dimensions = 39 features per frame).
Args:
X: Input data of shape (samples, total_features)
frames_per_seq: Number of frames per sequence (default 10)
joints_per_frame: Number of joints per frame (default 13)
dims: Number of dimensions (default 3 for 3D)
Returns:
Reshaped data of shape (samples, frames_per_seq, joints_per_frame, dims)
"""
total_features = frames_per_seq * joints_per_frame * dims
samples = X.shape[0]
# Check if the data has the expected number of features
if X.shape[1] != total_features:
print(f"Warning: Expected {total_features} features per sample, got {X.shape[1]}")
print("Attempting to extract 3D features by taking first 39 per frame...")
# If we have more features per frame, take the first 39 per frame as 3D coordinates
features_per_frame = X.shape[1] // frames_per_seq
if features_per_frame >= joints_per_frame * dims:
# Extract 3D coordinates from each frame
X_3d = np.zeros((samples, frames_per_seq, joints_per_frame, dims))
for frame_idx in range(frames_per_seq):
start_idx = frame_idx * features_per_frame
end_idx = start_idx + joints_per_frame * dims
frame_data = X[:, start_idx:end_idx]
X_3d[:, frame_idx, :, :] = frame_data.reshape(samples, joints_per_frame, dims)
else:
raise ValueError(f"Insufficient features per frame for 3D interpretation: {features_per_frame}")
else:
X_3d = X.reshape(samples, frames_per_seq, joints_per_frame, dims)
return X_3d
def reshape_for_2d_problem(X, frames_per_seq=10, joints_per_frame=13, dims=2):
"""
Reshape data for 2D problem (PoseNet: 13 joints x 2 dimensions = 26 features per frame).
Args:
X: Input data of shape (samples, total_features)
frames_per_seq: Number of frames per sequence (default 10)
joints_per_frame: Number of joints per frame (default 13)
dims: Number of dimensions (default 2 for 2D)
Returns:
Reshaped data of shape (samples, frames_per_seq, joints_per_frame, dims)
"""
total_features = frames_per_seq * joints_per_frame * dims
samples = X.shape[0]
# Check if the data has the expected number of features
if X.shape[1] != total_features:
print(f"Warning: Expected {total_features} features per sample, got {X.shape[1]}")
print("Attempting to extract 2D features by taking first 26 per frame...")
# If we have more features per frame, take the first 26 per frame as 2D coordinates
features_per_frame = X.shape[1] // frames_per_seq
if features_per_frame >= joints_per_frame * dims:
# Extract 2D coordinates from each frame
X_2d = np.zeros((samples, frames_per_seq, joints_per_frame, dims))
for frame_idx in range(frames_per_seq):
start_idx = frame_idx * features_per_frame
end_idx = start_idx + joints_per_frame * dims
frame_data = X[:, start_idx:end_idx]
X_2d[:, frame_idx, :, :] = frame_data.reshape(samples, joints_per_frame, dims)
else:
raise ValueError(f"Insufficient features per frame for 2D interpretation: {features_per_frame}")
else:
X_2d = X.reshape(samples, frames_per_seq, joints_per_frame, dims)
return X_2d
def prepare_adense_data(X_3d):
"""
Prepare data for ADense (3D Dense network).
Args:
X_3d: 3D data of shape (samples, frames, joints, dims)
Returns:
Flattened data of shape (samples, frames*joints*dims)
"""
samples, frames, joints, dims = X_3d.shape
X_flat = X_3d.reshape(samples, frames * joints * dims)
return X_flat
def prepare_acnn_data(X_3d):
"""
Prepare data for ACNN (3D Convolutional network).
Args:
X_3d: 3D data of shape (samples, frames, joints, dims)
Returns:
Data suitable for CNN: (samples, channels, frames, joints, dims) or (samples, frames, joints, dims)
"""
# For CNN, we can keep the 4D structure or add a channel dimension
# Standard format for 3D CNN would be (samples, channels, depth, height, width)
# Or we can use (samples, time_steps, joints, features) for temporal CNN
return X_3d
def prepare_bdense_data(X_2d):
"""
Prepare data for BDense (2D Dense network).
Args:
X_2d: 2D data of shape (samples, frames, joints, dims)
Returns:
Flattened data of shape (samples, frames*joints*dims)
"""
samples, frames, joints, dims = X_2d.shape
X_flat = X_2d.reshape(samples, frames * joints * dims)
return X_flat
def prepare_bcnn_data(X_2d):
"""
Prepare data for BCNN (2D Convolutional network).
Args:
X_2d: 2D data of shape (samples, frames, joints, dims)
Returns:
Data suitable for CNN: (samples, frames, joints, dims) or with added channel dim
"""
# For CNN, we can keep the structure as is for temporal processing
return X_2d
def save_data(X, y, filenames, output_dir, prefix):
"""Save prepared data to the specified directory."""
os.makedirs(output_dir, exist_ok=True)
np.save(os.path.join(output_dir, f'{prefix}_X.npy'), X)
np.save(os.path.join(output_dir, f'{prefix}_y.npy'), y)
np.save(os.path.join(output_dir, f'{prefix}_filenames.npy'), filenames)
print(f"Saved {prefix} data: X shape {X.shape}, y shape {y.shape}")
def main():
print("Preparing data for classification problems A and B...")
# Load the processed data
data_dir = Path("Data-intensive-systems/A13/Processed_Data")
# Load training data
print("\nLoading training data...")
X_train, y_train, fn_train = load_processed_data(
data_dir / "processed_sequences_Good_vs_Bad_train.csv"
)
print(f"Training data shape: {X_train.shape}")
# Load test data
print("Loading test data...")
X_test, y_test, fn_test = load_processed_data(
data_dir / "processed_sequences_Good_vs_Bad_test.csv"
)
print(f"Test data shape: {X_test.shape}")
# Load augmented training data
print("Loading augmented training data...")
X_train_aug, y_train_aug, fn_train_aug = load_processed_data(
data_dir / "processed_sequences_Good_vs_Bad_train_augmented.csv"
)
print(f"Augmented training data shape: {X_train_aug.shape}")
# Load augmented test data
print("Loading augmented test data...")
X_test_aug, y_test_aug, fn_test_aug = load_processed_data(
data_dir / "processed_sequences_Good_vs_Bad_test_augmented.csv"
)
print(f"Augmented test data shape: {X_test_aug.shape}")
# Prepare output directory
output_dir = Path("Data-intensive-systems/A13/classification_problems/prepared_data")
os.makedirs(output_dir, exist_ok=True)
# Prepare data for Problem A (3D - Kinect)
print("\n" + "="*60)
print("PREPARING PROBLEM A (3D - Kinect: 13 joints x 3 dims)")
print("="*60)
try:
# Convert to 3D format (samples, frames, joints, dimensions)
X_train_3d = reshape_for_3d_problem(X_train, frames_per_seq=10, joints_per_frame=13, dims=3)
X_test_3d = reshape_for_3d_problem(X_test, frames_per_seq=10, joints_per_frame=13, dims=3)
X_train_aug_3d = reshape_for_3d_problem(X_train_aug, frames_per_seq=10, joints_per_frame=13, dims=3)
X_test_aug_3d = reshape_for_3d_problem(X_test_aug, frames_per_seq=10, joints_per_frame=13, dims=3)
print(f"3D training data shape: {X_train_3d.shape}")
print(f"3D test data shape: {X_test_3d.shape}")
print(f"3D augmented training data shape: {X_train_aug_3d.shape}")
print(f"3D augmented test data shape: {X_test_aug_3d.shape}")
# Prepare ADense data (flattened)
print("\nPreparing ADense data (flattened)...")
X_train_adense = prepare_adense_data(X_train_3d)
X_test_adense = prepare_adense_data(X_test_3d)
X_train_aug_adense = prepare_adense_data(X_train_aug_3d)
X_test_aug_adense = prepare_adense_data(X_test_aug_3d)
print(f"A-Dense training shape: {X_train_adense.shape}")
print(f"A-Dense test shape: {X_test_adense.shape}")
# Save ADense data
save_data(X_train_adense, y_train, fn_train, output_dir, "A_Dense_train")
save_data(X_test_adense, y_test, fn_test, output_dir, "A_Dense_test")
save_data(X_train_aug_adense, y_train_aug, fn_train_aug, output_dir, "A_Dense_train_aug")
save_data(X_test_aug_adense, y_test_aug, fn_test_aug, output_dir, "A_Dense_test_aug")
# Prepare ACNN data (structured)
print("\nPreparing ACNN data (structured)...")
X_train_acnn = prepare_acnn_data(X_train_3d)
X_test_acnn = prepare_acnn_data(X_test_3d)
X_train_aug_acnn = prepare_acnn_data(X_train_aug_3d)
X_test_aug_acnn = prepare_acnn_data(X_test_aug_3d)
print(f"A-CNN training shape: {X_train_acnn.shape}")
print(f"A-CNN test shape: {X_test_acnn.shape}")
# Save ACNN data
save_data(X_train_acnn, y_train, fn_train, output_dir, "A_CNN_train")
save_data(X_test_acnn, y_test, fn_test, output_dir, "A_CNN_test")
save_data(X_train_aug_acnn, y_train_aug, fn_train_aug, output_dir, "A_CNN_train_aug")
save_data(X_test_aug_acnn, y_test_aug, fn_test_aug, output_dir, "A_CNN_test_aug")
print("\nProblem A (3D) data preparation completed!")
except Exception as e:
print(f"Error preparing Problem A data: {e}")
print("Skipping Problem A...")
# Prepare data for Problem B (2D - PoseNet)
print("\n" + "="*60)
print("PREPARING PROBLEM B (2D - PoseNet: 13 joints x 2 dims)")
print("="*60)
try:
# Convert to 2D format (samples, frames, joints, dimensions)
X_train_2d = reshape_for_2d_problem(X_train, frames_per_seq=10, joints_per_frame=13, dims=2)
X_test_2d = reshape_for_2d_problem(X_test, frames_per_seq=10, joints_per_frame=13, dims=2)
X_train_aug_2d = reshape_for_2d_problem(X_train_aug, frames_per_seq=10, joints_per_frame=13, dims=2)
X_test_aug_2d = reshape_for_2d_problem(X_test_aug, frames_per_seq=10, joints_per_frame=13, dims=2)
print(f"2D training data shape: {X_train_2d.shape}")
print(f"2D test data shape: {X_test_2d.shape}")
print(f"2D augmented training data shape: {X_train_aug_2d.shape}")
print(f"2D augmented test data shape: {X_test_aug_2d.shape}")
# Prepare BDense data (flattened)
print("\nPreparing BDense data (flattened)...")
X_train_bdense = prepare_bdense_data(X_train_2d)
X_test_bdense = prepare_bdense_data(X_test_2d)
X_train_aug_bdense = prepare_bdense_data(X_train_aug_2d)
X_test_aug_bdense = prepare_bdense_data(X_test_aug_2d)
print(f"B-Dense training shape: {X_train_bdense.shape}")
print(f"B-Dense test shape: {X_test_bdense.shape}")
# Save BDense data
save_data(X_train_bdense, y_train, fn_train, output_dir, "B_Dense_train")
save_data(X_test_bdense, y_test, fn_test, output_dir, "B_Dense_test")
save_data(X_train_aug_bdense, y_train_aug, fn_train_aug, output_dir, "B_Dense_train_aug")
save_data(X_test_aug_bdense, y_test_aug, fn_test_aug, output_dir, "B_Dense_test_aug")
# Prepare BCNN data (structured)
print("\nPreparing BCNN data (structured)...")
X_train_bcnn = prepare_bcnn_data(X_train_2d)
X_test_bcnn = prepare_bcnn_data(X_test_2d)
X_train_aug_bcnn = prepare_bcnn_data(X_train_aug_2d)
X_test_aug_bcnn = prepare_bcnn_data(X_test_aug_2d)
print(f"B-CNN training shape: {X_train_bcnn.shape}")
print(f"B-CNN test shape: {X_test_bcnn.shape}")
# Save BCNN data
save_data(X_train_bcnn, y_train, fn_train, output_dir, "B_CNN_train")
save_data(X_test_bcnn, y_test, fn_test, output_dir, "B_CNN_test")
save_data(X_train_aug_bcnn, y_train_aug, fn_train_aug, output_dir, "B_CNN_train_aug")
save_data(X_test_aug_bcnn, y_test_aug, fn_test_aug, output_dir, "B_CNN_test_aug")
print("\nProblem B (2D) data preparation completed!")
except Exception as e:
print(f"Error preparing Problem B data: {e}")
print("Skipping Problem B...")
print("\n" + "="*60)
print("CLASSIFICATION PROBLEMS DATA PREPARATION SUMMARY")
print("="*60)
print("Problem A (3D - Kinect): 13 joints x 3 dimensions per frame")
print(" - ADense: Flattened features for dense networks")
print(" - ACNN: Structured features for convolutional networks")
print("")
print("Problem B (2D - PoseNet): 13 joints x 2 dimensions per frame")
print(" - BDense: Flattened features for dense networks")
print(" - BCNN: Structured features for convolutional networks")
print("")
print("All prepared datasets saved to:", output_dir)
print("Both original and augmented versions are available")
if __name__ == "__main__":
main()