Spaces:

Bachstelze
/

pose-deep-learning

Running

pose-deep-learning / A13 /classification_problems /prepare_classification_problems.py

Bachstelze

init A13 data

b94b2ad 20 days ago

14.1 kB

	#!/usr/bin/env python3
	"""
	Script to prepare data for 2 classification problems:
	- Problem A (3D): Kinect frame sequence: 13 joints x 3 dimensions = 39 features per frame
	- Problem B (2D): PoseNet frame sequence: 13 joints x 2 dimensions = 26 features per frame

	Each problem will have two approaches:
	- Dense: Flattened features for dense neural networks
	- CNN: Structured features for convolutional neural networks
	"""

	import pandas as pd
	import numpy as np
	from pathlib import Path
	import os


	def load_processed_data(csv_path):
	"""Load processed data from CSV file."""
	df = pd.read_csv(csv_path)
	# Extract features (skip filename and label columns)
	feature_cols = [col for col in df.columns if col not in ['filename', 'label']]
	X = df[feature_cols].values
	y = df['label'].values
	filenames = df['filename'].values

	return X, y, filenames


	def reshape_for_3d_problem(X, frames_per_seq=10, joints_per_frame=13, dims=3):
	"""
	Reshape data for 3D problem (Kinect: 13 joints x 3 dimensions = 39 features per frame).

	Args:
	X: Input data of shape (samples, total_features)
	frames_per_seq: Number of frames per sequence (default 10)
	joints_per_frame: Number of joints per frame (default 13)
	dims: Number of dimensions (default 3 for 3D)

	Returns:
	Reshaped data of shape (samples, frames_per_seq, joints_per_frame, dims)
	"""
	total_features = frames_per_seq * joints_per_frame * dims
	samples = X.shape[0]

	# Check if the data has the expected number of features
	if X.shape[1] != total_features:
	print(f"Warning: Expected {total_features} features per sample, got {X.shape[1]}")
	print("Attempting to extract 3D features by taking first 39 per frame...")

	# If we have more features per frame, take the first 39 per frame as 3D coordinates
	features_per_frame = X.shape[1] // frames_per_seq
	if features_per_frame >= joints_per_frame * dims:
	# Extract 3D coordinates from each frame
	X_3d = np.zeros((samples, frames_per_seq, joints_per_frame, dims))
	for frame_idx in range(frames_per_seq):
	start_idx = frame_idx * features_per_frame
	end_idx = start_idx + joints_per_frame * dims
	frame_data = X[:, start_idx:end_idx]
	X_3d[:, frame_idx, :, :] = frame_data.reshape(samples, joints_per_frame, dims)
	else:
	raise ValueError(f"Insufficient features per frame for 3D interpretation: {features_per_frame}")
	else:
	X_3d = X.reshape(samples, frames_per_seq, joints_per_frame, dims)

	return X_3d


	def reshape_for_2d_problem(X, frames_per_seq=10, joints_per_frame=13, dims=2):
	"""
	Reshape data for 2D problem (PoseNet: 13 joints x 2 dimensions = 26 features per frame).

	Args:
	X: Input data of shape (samples, total_features)
	frames_per_seq: Number of frames per sequence (default 10)
	joints_per_frame: Number of joints per frame (default 13)
	dims: Number of dimensions (default 2 for 2D)

	Returns:
	Reshaped data of shape (samples, frames_per_seq, joints_per_frame, dims)
	"""
	total_features = frames_per_seq * joints_per_frame * dims
	samples = X.shape[0]

	# Check if the data has the expected number of features
	if X.shape[1] != total_features:
	print(f"Warning: Expected {total_features} features per sample, got {X.shape[1]}")
	print("Attempting to extract 2D features by taking first 26 per frame...")

	# If we have more features per frame, take the first 26 per frame as 2D coordinates
	features_per_frame = X.shape[1] // frames_per_seq
	if features_per_frame >= joints_per_frame * dims:
	# Extract 2D coordinates from each frame
	X_2d = np.zeros((samples, frames_per_seq, joints_per_frame, dims))
	for frame_idx in range(frames_per_seq):
	start_idx = frame_idx * features_per_frame
	end_idx = start_idx + joints_per_frame * dims
	frame_data = X[:, start_idx:end_idx]
	X_2d[:, frame_idx, :, :] = frame_data.reshape(samples, joints_per_frame, dims)
	else:
	raise ValueError(f"Insufficient features per frame for 2D interpretation: {features_per_frame}")
	else:
	X_2d = X.reshape(samples, frames_per_seq, joints_per_frame, dims)

	return X_2d


	def prepare_adense_data(X_3d):
	"""
	Prepare data for ADense (3D Dense network).

	Args:
	X_3d: 3D data of shape (samples, frames, joints, dims)

	Returns:
	Flattened data of shape (samples, framesjointsdims)
	"""
	samples, frames, joints, dims = X_3d.shape
	X_flat = X_3d.reshape(samples, frames * joints * dims)
	return X_flat


	def prepare_acnn_data(X_3d):
	"""
	Prepare data for ACNN (3D Convolutional network).

	Args:
	X_3d: 3D data of shape (samples, frames, joints, dims)

	Returns:
	Data suitable for CNN: (samples, channels, frames, joints, dims) or (samples, frames, joints, dims)
	"""
	# For CNN, we can keep the 4D structure or add a channel dimension
	# Standard format for 3D CNN would be (samples, channels, depth, height, width)
	# Or we can use (samples, time_steps, joints, features) for temporal CNN
	return X_3d


	def prepare_bdense_data(X_2d):
	"""
	Prepare data for BDense (2D Dense network).

	Args:
	X_2d: 2D data of shape (samples, frames, joints, dims)

	Returns:
	Flattened data of shape (samples, framesjointsdims)
	"""
	samples, frames, joints, dims = X_2d.shape
	X_flat = X_2d.reshape(samples, frames * joints * dims)
	return X_flat


	def prepare_bcnn_data(X_2d):
	"""
	Prepare data for BCNN (2D Convolutional network).

	Args:
	X_2d: 2D data of shape (samples, frames, joints, dims)

	Returns:
	Data suitable for CNN: (samples, frames, joints, dims) or with added channel dim
	"""
	# For CNN, we can keep the structure as is for temporal processing
	return X_2d


	def save_data(X, y, filenames, output_dir, prefix):
	"""Save prepared data to the specified directory."""
	os.makedirs(output_dir, exist_ok=True)

	np.save(os.path.join(output_dir, f'{prefix}_X.npy'), X)
	np.save(os.path.join(output_dir, f'{prefix}_y.npy'), y)
	np.save(os.path.join(output_dir, f'{prefix}_filenames.npy'), filenames)

	print(f"Saved {prefix} data: X shape {X.shape}, y shape {y.shape}")


	def main():
	print("Preparing data for classification problems A and B...")

	# Load the processed data
	data_dir = Path("Data-intensive-systems/A13/Processed_Data")

	# Load training data
	print("\nLoading training data...")
	X_train, y_train, fn_train = load_processed_data(
	data_dir / "processed_sequences_Good_vs_Bad_train.csv"
	)
	print(f"Training data shape: {X_train.shape}")

	# Load test data
	print("Loading test data...")
	X_test, y_test, fn_test = load_processed_data(
	data_dir / "processed_sequences_Good_vs_Bad_test.csv"
	)
	print(f"Test data shape: {X_test.shape}")

	# Load augmented training data
	print("Loading augmented training data...")
	X_train_aug, y_train_aug, fn_train_aug = load_processed_data(
	data_dir / "processed_sequences_Good_vs_Bad_train_augmented.csv"
	)
	print(f"Augmented training data shape: {X_train_aug.shape}")

	# Load augmented test data
	print("Loading augmented test data...")
	X_test_aug, y_test_aug, fn_test_aug = load_processed_data(
	data_dir / "processed_sequences_Good_vs_Bad_test_augmented.csv"
	)
	print(f"Augmented test data shape: {X_test_aug.shape}")

	# Prepare output directory
	output_dir = Path("Data-intensive-systems/A13/classification_problems/prepared_data")
	os.makedirs(output_dir, exist_ok=True)

	# Prepare data for Problem A (3D - Kinect)
	print("\n" + "="*60)
	print("PREPARING PROBLEM A (3D - Kinect: 13 joints x 3 dims)")
	print("="*60)

	try:
	# Convert to 3D format (samples, frames, joints, dimensions)
	X_train_3d = reshape_for_3d_problem(X_train, frames_per_seq=10, joints_per_frame=13, dims=3)
	X_test_3d = reshape_for_3d_problem(X_test, frames_per_seq=10, joints_per_frame=13, dims=3)
	X_train_aug_3d = reshape_for_3d_problem(X_train_aug, frames_per_seq=10, joints_per_frame=13, dims=3)
	X_test_aug_3d = reshape_for_3d_problem(X_test_aug, frames_per_seq=10, joints_per_frame=13, dims=3)

	print(f"3D training data shape: {X_train_3d.shape}")
	print(f"3D test data shape: {X_test_3d.shape}")
	print(f"3D augmented training data shape: {X_train_aug_3d.shape}")
	print(f"3D augmented test data shape: {X_test_aug_3d.shape}")

	# Prepare ADense data (flattened)
	print("\nPreparing ADense data (flattened)...")
	X_train_adense = prepare_adense_data(X_train_3d)
	X_test_adense = prepare_adense_data(X_test_3d)
	X_train_aug_adense = prepare_adense_data(X_train_aug_3d)
	X_test_aug_adense = prepare_adense_data(X_test_aug_3d)

	print(f"A-Dense training shape: {X_train_adense.shape}")
	print(f"A-Dense test shape: {X_test_adense.shape}")

	# Save ADense data
	save_data(X_train_adense, y_train, fn_train, output_dir, "A_Dense_train")
	save_data(X_test_adense, y_test, fn_test, output_dir, "A_Dense_test")
	save_data(X_train_aug_adense, y_train_aug, fn_train_aug, output_dir, "A_Dense_train_aug")
	save_data(X_test_aug_adense, y_test_aug, fn_test_aug, output_dir, "A_Dense_test_aug")

	# Prepare ACNN data (structured)
	print("\nPreparing ACNN data (structured)...")
	X_train_acnn = prepare_acnn_data(X_train_3d)
	X_test_acnn = prepare_acnn_data(X_test_3d)
	X_train_aug_acnn = prepare_acnn_data(X_train_aug_3d)
	X_test_aug_acnn = prepare_acnn_data(X_test_aug_3d)

	print(f"A-CNN training shape: {X_train_acnn.shape}")
	print(f"A-CNN test shape: {X_test_acnn.shape}")

	# Save ACNN data
	save_data(X_train_acnn, y_train, fn_train, output_dir, "A_CNN_train")
	save_data(X_test_acnn, y_test, fn_test, output_dir, "A_CNN_test")
	save_data(X_train_aug_acnn, y_train_aug, fn_train_aug, output_dir, "A_CNN_train_aug")
	save_data(X_test_aug_acnn, y_test_aug, fn_test_aug, output_dir, "A_CNN_test_aug")

	print("\nProblem A (3D) data preparation completed!")

	except Exception as e:
	print(f"Error preparing Problem A data: {e}")
	print("Skipping Problem A...")

	# Prepare data for Problem B (2D - PoseNet)
	print("\n" + "="*60)
	print("PREPARING PROBLEM B (2D - PoseNet: 13 joints x 2 dims)")
	print("="*60)

	try:
	# Convert to 2D format (samples, frames, joints, dimensions)
	X_train_2d = reshape_for_2d_problem(X_train, frames_per_seq=10, joints_per_frame=13, dims=2)
	X_test_2d = reshape_for_2d_problem(X_test, frames_per_seq=10, joints_per_frame=13, dims=2)
	X_train_aug_2d = reshape_for_2d_problem(X_train_aug, frames_per_seq=10, joints_per_frame=13, dims=2)
	X_test_aug_2d = reshape_for_2d_problem(X_test_aug, frames_per_seq=10, joints_per_frame=13, dims=2)

	print(f"2D training data shape: {X_train_2d.shape}")
	print(f"2D test data shape: {X_test_2d.shape}")
	print(f"2D augmented training data shape: {X_train_aug_2d.shape}")
	print(f"2D augmented test data shape: {X_test_aug_2d.shape}")

	# Prepare BDense data (flattened)
	print("\nPreparing BDense data (flattened)...")
	X_train_bdense = prepare_bdense_data(X_train_2d)
	X_test_bdense = prepare_bdense_data(X_test_2d)
	X_train_aug_bdense = prepare_bdense_data(X_train_aug_2d)
	X_test_aug_bdense = prepare_bdense_data(X_test_aug_2d)

	print(f"B-Dense training shape: {X_train_bdense.shape}")
	print(f"B-Dense test shape: {X_test_bdense.shape}")

	# Save BDense data
	save_data(X_train_bdense, y_train, fn_train, output_dir, "B_Dense_train")
	save_data(X_test_bdense, y_test, fn_test, output_dir, "B_Dense_test")
	save_data(X_train_aug_bdense, y_train_aug, fn_train_aug, output_dir, "B_Dense_train_aug")
	save_data(X_test_aug_bdense, y_test_aug, fn_test_aug, output_dir, "B_Dense_test_aug")

	# Prepare BCNN data (structured)
	print("\nPreparing BCNN data (structured)...")
	X_train_bcnn = prepare_bcnn_data(X_train_2d)
	X_test_bcnn = prepare_bcnn_data(X_test_2d)
	X_train_aug_bcnn = prepare_bcnn_data(X_train_aug_2d)
	X_test_aug_bcnn = prepare_bcnn_data(X_test_aug_2d)

	print(f"B-CNN training shape: {X_train_bcnn.shape}")
	print(f"B-CNN test shape: {X_test_bcnn.shape}")

	# Save BCNN data
	save_data(X_train_bcnn, y_train, fn_train, output_dir, "B_CNN_train")
	save_data(X_test_bcnn, y_test, fn_test, output_dir, "B_CNN_test")
	save_data(X_train_aug_bcnn, y_train_aug, fn_train_aug, output_dir, "B_CNN_train_aug")
	save_data(X_test_aug_bcnn, y_test_aug, fn_test_aug, output_dir, "B_CNN_test_aug")

	print("\nProblem B (2D) data preparation completed!")

	except Exception as e:
	print(f"Error preparing Problem B data: {e}")
	print("Skipping Problem B...")

	print("\n" + "="*60)
	print("CLASSIFICATION PROBLEMS DATA PREPARATION SUMMARY")
	print("="*60)
	print("Problem A (3D - Kinect): 13 joints x 3 dimensions per frame")
	print(" - ADense: Flattened features for dense networks")
	print(" - ACNN: Structured features for convolutional networks")
	print("")
	print("Problem B (2D - PoseNet): 13 joints x 2 dimensions per frame")
	print(" - BDense: Flattened features for dense networks")
	print(" - BCNN: Structured features for convolutional networks")
	print("")
	print("All prepared datasets saved to:", output_dir)
	print("Both original and augmented versions are available")


	if __name__ == "__main__":
	main()