Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| Script to prepare data for 2 classification problems: | |
| - Problem A (3D): Kinect frame sequence: 13 joints x 3 dimensions = 39 features per frame | |
| - Problem B (2D): PoseNet frame sequence: 13 joints x 2 dimensions = 26 features per frame | |
| Each problem will have two approaches: | |
| - Dense: Flattened features for dense neural networks | |
| - CNN: Structured features for convolutional neural networks | |
| """ | |
| import pandas as pd | |
| import numpy as np | |
| from pathlib import Path | |
| import os | |
| def load_processed_data(csv_path): | |
| """Load processed data from CSV file.""" | |
| df = pd.read_csv(csv_path) | |
| # Extract features (skip filename and label columns) | |
| feature_cols = [col for col in df.columns if col not in ['filename', 'label']] | |
| X = df[feature_cols].values | |
| y = df['label'].values | |
| filenames = df['filename'].values | |
| return X, y, filenames | |
| def reshape_for_3d_problem(X, frames_per_seq=10, joints_per_frame=13, dims=3): | |
| """ | |
| Reshape data for 3D problem (Kinect: 13 joints x 3 dimensions = 39 features per frame). | |
| Args: | |
| X: Input data of shape (samples, total_features) | |
| frames_per_seq: Number of frames per sequence (default 10) | |
| joints_per_frame: Number of joints per frame (default 13) | |
| dims: Number of dimensions (default 3 for 3D) | |
| Returns: | |
| Reshaped data of shape (samples, frames_per_seq, joints_per_frame, dims) | |
| """ | |
| total_features = frames_per_seq * joints_per_frame * dims | |
| samples = X.shape[0] | |
| # Check if the data has the expected number of features | |
| if X.shape[1] != total_features: | |
| print(f"Warning: Expected {total_features} features per sample, got {X.shape[1]}") | |
| print("Attempting to extract 3D features by taking first 39 per frame...") | |
| # If we have more features per frame, take the first 39 per frame as 3D coordinates | |
| features_per_frame = X.shape[1] // frames_per_seq | |
| if features_per_frame >= joints_per_frame * dims: | |
| # Extract 3D coordinates from each frame | |
| X_3d = np.zeros((samples, frames_per_seq, joints_per_frame, dims)) | |
| for frame_idx in range(frames_per_seq): | |
| start_idx = frame_idx * features_per_frame | |
| end_idx = start_idx + joints_per_frame * dims | |
| frame_data = X[:, start_idx:end_idx] | |
| X_3d[:, frame_idx, :, :] = frame_data.reshape(samples, joints_per_frame, dims) | |
| else: | |
| raise ValueError(f"Insufficient features per frame for 3D interpretation: {features_per_frame}") | |
| else: | |
| X_3d = X.reshape(samples, frames_per_seq, joints_per_frame, dims) | |
| return X_3d | |
| def reshape_for_2d_problem(X, frames_per_seq=10, joints_per_frame=13, dims=2): | |
| """ | |
| Reshape data for 2D problem (PoseNet: 13 joints x 2 dimensions = 26 features per frame). | |
| Args: | |
| X: Input data of shape (samples, total_features) | |
| frames_per_seq: Number of frames per sequence (default 10) | |
| joints_per_frame: Number of joints per frame (default 13) | |
| dims: Number of dimensions (default 2 for 2D) | |
| Returns: | |
| Reshaped data of shape (samples, frames_per_seq, joints_per_frame, dims) | |
| """ | |
| total_features = frames_per_seq * joints_per_frame * dims | |
| samples = X.shape[0] | |
| # Check if the data has the expected number of features | |
| if X.shape[1] != total_features: | |
| print(f"Warning: Expected {total_features} features per sample, got {X.shape[1]}") | |
| print("Attempting to extract 2D features by taking first 26 per frame...") | |
| # If we have more features per frame, take the first 26 per frame as 2D coordinates | |
| features_per_frame = X.shape[1] // frames_per_seq | |
| if features_per_frame >= joints_per_frame * dims: | |
| # Extract 2D coordinates from each frame | |
| X_2d = np.zeros((samples, frames_per_seq, joints_per_frame, dims)) | |
| for frame_idx in range(frames_per_seq): | |
| start_idx = frame_idx * features_per_frame | |
| end_idx = start_idx + joints_per_frame * dims | |
| frame_data = X[:, start_idx:end_idx] | |
| X_2d[:, frame_idx, :, :] = frame_data.reshape(samples, joints_per_frame, dims) | |
| else: | |
| raise ValueError(f"Insufficient features per frame for 2D interpretation: {features_per_frame}") | |
| else: | |
| X_2d = X.reshape(samples, frames_per_seq, joints_per_frame, dims) | |
| return X_2d | |
| def prepare_adense_data(X_3d): | |
| """ | |
| Prepare data for ADense (3D Dense network). | |
| Args: | |
| X_3d: 3D data of shape (samples, frames, joints, dims) | |
| Returns: | |
| Flattened data of shape (samples, frames*joints*dims) | |
| """ | |
| samples, frames, joints, dims = X_3d.shape | |
| X_flat = X_3d.reshape(samples, frames * joints * dims) | |
| return X_flat | |
| def prepare_acnn_data(X_3d): | |
| """ | |
| Prepare data for ACNN (3D Convolutional network). | |
| Args: | |
| X_3d: 3D data of shape (samples, frames, joints, dims) | |
| Returns: | |
| Data suitable for CNN: (samples, channels, frames, joints, dims) or (samples, frames, joints, dims) | |
| """ | |
| # For CNN, we can keep the 4D structure or add a channel dimension | |
| # Standard format for 3D CNN would be (samples, channels, depth, height, width) | |
| # Or we can use (samples, time_steps, joints, features) for temporal CNN | |
| return X_3d | |
| def prepare_bdense_data(X_2d): | |
| """ | |
| Prepare data for BDense (2D Dense network). | |
| Args: | |
| X_2d: 2D data of shape (samples, frames, joints, dims) | |
| Returns: | |
| Flattened data of shape (samples, frames*joints*dims) | |
| """ | |
| samples, frames, joints, dims = X_2d.shape | |
| X_flat = X_2d.reshape(samples, frames * joints * dims) | |
| return X_flat | |
| def prepare_bcnn_data(X_2d): | |
| """ | |
| Prepare data for BCNN (2D Convolutional network). | |
| Args: | |
| X_2d: 2D data of shape (samples, frames, joints, dims) | |
| Returns: | |
| Data suitable for CNN: (samples, frames, joints, dims) or with added channel dim | |
| """ | |
| # For CNN, we can keep the structure as is for temporal processing | |
| return X_2d | |
| def save_data(X, y, filenames, output_dir, prefix): | |
| """Save prepared data to the specified directory.""" | |
| os.makedirs(output_dir, exist_ok=True) | |
| np.save(os.path.join(output_dir, f'{prefix}_X.npy'), X) | |
| np.save(os.path.join(output_dir, f'{prefix}_y.npy'), y) | |
| np.save(os.path.join(output_dir, f'{prefix}_filenames.npy'), filenames) | |
| print(f"Saved {prefix} data: X shape {X.shape}, y shape {y.shape}") | |
| def main(): | |
| print("Preparing data for classification problems A and B...") | |
| # Load the processed data | |
| data_dir = Path("Data-intensive-systems/A13/Processed_Data") | |
| # Load training data | |
| print("\nLoading training data...") | |
| X_train, y_train, fn_train = load_processed_data( | |
| data_dir / "processed_sequences_Good_vs_Bad_train.csv" | |
| ) | |
| print(f"Training data shape: {X_train.shape}") | |
| # Load test data | |
| print("Loading test data...") | |
| X_test, y_test, fn_test = load_processed_data( | |
| data_dir / "processed_sequences_Good_vs_Bad_test.csv" | |
| ) | |
| print(f"Test data shape: {X_test.shape}") | |
| # Load augmented training data | |
| print("Loading augmented training data...") | |
| X_train_aug, y_train_aug, fn_train_aug = load_processed_data( | |
| data_dir / "processed_sequences_Good_vs_Bad_train_augmented.csv" | |
| ) | |
| print(f"Augmented training data shape: {X_train_aug.shape}") | |
| # Load augmented test data | |
| print("Loading augmented test data...") | |
| X_test_aug, y_test_aug, fn_test_aug = load_processed_data( | |
| data_dir / "processed_sequences_Good_vs_Bad_test_augmented.csv" | |
| ) | |
| print(f"Augmented test data shape: {X_test_aug.shape}") | |
| # Prepare output directory | |
| output_dir = Path("Data-intensive-systems/A13/classification_problems/prepared_data") | |
| os.makedirs(output_dir, exist_ok=True) | |
| # Prepare data for Problem A (3D - Kinect) | |
| print("\n" + "="*60) | |
| print("PREPARING PROBLEM A (3D - Kinect: 13 joints x 3 dims)") | |
| print("="*60) | |
| try: | |
| # Convert to 3D format (samples, frames, joints, dimensions) | |
| X_train_3d = reshape_for_3d_problem(X_train, frames_per_seq=10, joints_per_frame=13, dims=3) | |
| X_test_3d = reshape_for_3d_problem(X_test, frames_per_seq=10, joints_per_frame=13, dims=3) | |
| X_train_aug_3d = reshape_for_3d_problem(X_train_aug, frames_per_seq=10, joints_per_frame=13, dims=3) | |
| X_test_aug_3d = reshape_for_3d_problem(X_test_aug, frames_per_seq=10, joints_per_frame=13, dims=3) | |
| print(f"3D training data shape: {X_train_3d.shape}") | |
| print(f"3D test data shape: {X_test_3d.shape}") | |
| print(f"3D augmented training data shape: {X_train_aug_3d.shape}") | |
| print(f"3D augmented test data shape: {X_test_aug_3d.shape}") | |
| # Prepare ADense data (flattened) | |
| print("\nPreparing ADense data (flattened)...") | |
| X_train_adense = prepare_adense_data(X_train_3d) | |
| X_test_adense = prepare_adense_data(X_test_3d) | |
| X_train_aug_adense = prepare_adense_data(X_train_aug_3d) | |
| X_test_aug_adense = prepare_adense_data(X_test_aug_3d) | |
| print(f"A-Dense training shape: {X_train_adense.shape}") | |
| print(f"A-Dense test shape: {X_test_adense.shape}") | |
| # Save ADense data | |
| save_data(X_train_adense, y_train, fn_train, output_dir, "A_Dense_train") | |
| save_data(X_test_adense, y_test, fn_test, output_dir, "A_Dense_test") | |
| save_data(X_train_aug_adense, y_train_aug, fn_train_aug, output_dir, "A_Dense_train_aug") | |
| save_data(X_test_aug_adense, y_test_aug, fn_test_aug, output_dir, "A_Dense_test_aug") | |
| # Prepare ACNN data (structured) | |
| print("\nPreparing ACNN data (structured)...") | |
| X_train_acnn = prepare_acnn_data(X_train_3d) | |
| X_test_acnn = prepare_acnn_data(X_test_3d) | |
| X_train_aug_acnn = prepare_acnn_data(X_train_aug_3d) | |
| X_test_aug_acnn = prepare_acnn_data(X_test_aug_3d) | |
| print(f"A-CNN training shape: {X_train_acnn.shape}") | |
| print(f"A-CNN test shape: {X_test_acnn.shape}") | |
| # Save ACNN data | |
| save_data(X_train_acnn, y_train, fn_train, output_dir, "A_CNN_train") | |
| save_data(X_test_acnn, y_test, fn_test, output_dir, "A_CNN_test") | |
| save_data(X_train_aug_acnn, y_train_aug, fn_train_aug, output_dir, "A_CNN_train_aug") | |
| save_data(X_test_aug_acnn, y_test_aug, fn_test_aug, output_dir, "A_CNN_test_aug") | |
| print("\nProblem A (3D) data preparation completed!") | |
| except Exception as e: | |
| print(f"Error preparing Problem A data: {e}") | |
| print("Skipping Problem A...") | |
| # Prepare data for Problem B (2D - PoseNet) | |
| print("\n" + "="*60) | |
| print("PREPARING PROBLEM B (2D - PoseNet: 13 joints x 2 dims)") | |
| print("="*60) | |
| try: | |
| # Convert to 2D format (samples, frames, joints, dimensions) | |
| X_train_2d = reshape_for_2d_problem(X_train, frames_per_seq=10, joints_per_frame=13, dims=2) | |
| X_test_2d = reshape_for_2d_problem(X_test, frames_per_seq=10, joints_per_frame=13, dims=2) | |
| X_train_aug_2d = reshape_for_2d_problem(X_train_aug, frames_per_seq=10, joints_per_frame=13, dims=2) | |
| X_test_aug_2d = reshape_for_2d_problem(X_test_aug, frames_per_seq=10, joints_per_frame=13, dims=2) | |
| print(f"2D training data shape: {X_train_2d.shape}") | |
| print(f"2D test data shape: {X_test_2d.shape}") | |
| print(f"2D augmented training data shape: {X_train_aug_2d.shape}") | |
| print(f"2D augmented test data shape: {X_test_aug_2d.shape}") | |
| # Prepare BDense data (flattened) | |
| print("\nPreparing BDense data (flattened)...") | |
| X_train_bdense = prepare_bdense_data(X_train_2d) | |
| X_test_bdense = prepare_bdense_data(X_test_2d) | |
| X_train_aug_bdense = prepare_bdense_data(X_train_aug_2d) | |
| X_test_aug_bdense = prepare_bdense_data(X_test_aug_2d) | |
| print(f"B-Dense training shape: {X_train_bdense.shape}") | |
| print(f"B-Dense test shape: {X_test_bdense.shape}") | |
| # Save BDense data | |
| save_data(X_train_bdense, y_train, fn_train, output_dir, "B_Dense_train") | |
| save_data(X_test_bdense, y_test, fn_test, output_dir, "B_Dense_test") | |
| save_data(X_train_aug_bdense, y_train_aug, fn_train_aug, output_dir, "B_Dense_train_aug") | |
| save_data(X_test_aug_bdense, y_test_aug, fn_test_aug, output_dir, "B_Dense_test_aug") | |
| # Prepare BCNN data (structured) | |
| print("\nPreparing BCNN data (structured)...") | |
| X_train_bcnn = prepare_bcnn_data(X_train_2d) | |
| X_test_bcnn = prepare_bcnn_data(X_test_2d) | |
| X_train_aug_bcnn = prepare_bcnn_data(X_train_aug_2d) | |
| X_test_aug_bcnn = prepare_bcnn_data(X_test_aug_2d) | |
| print(f"B-CNN training shape: {X_train_bcnn.shape}") | |
| print(f"B-CNN test shape: {X_test_bcnn.shape}") | |
| # Save BCNN data | |
| save_data(X_train_bcnn, y_train, fn_train, output_dir, "B_CNN_train") | |
| save_data(X_test_bcnn, y_test, fn_test, output_dir, "B_CNN_test") | |
| save_data(X_train_aug_bcnn, y_train_aug, fn_train_aug, output_dir, "B_CNN_train_aug") | |
| save_data(X_test_aug_bcnn, y_test_aug, fn_test_aug, output_dir, "B_CNN_test_aug") | |
| print("\nProblem B (2D) data preparation completed!") | |
| except Exception as e: | |
| print(f"Error preparing Problem B data: {e}") | |
| print("Skipping Problem B...") | |
| print("\n" + "="*60) | |
| print("CLASSIFICATION PROBLEMS DATA PREPARATION SUMMARY") | |
| print("="*60) | |
| print("Problem A (3D - Kinect): 13 joints x 3 dimensions per frame") | |
| print(" - ADense: Flattened features for dense networks") | |
| print(" - ACNN: Structured features for convolutional networks") | |
| print("") | |
| print("Problem B (2D - PoseNet): 13 joints x 2 dimensions per frame") | |
| print(" - BDense: Flattened features for dense networks") | |
| print(" - BCNN: Structured features for convolutional networks") | |
| print("") | |
| print("All prepared datasets saved to:", output_dir) | |
| print("Both original and augmented versions are available") | |
| if __name__ == "__main__": | |
| main() | |