Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| Script to verify the processed data | |
| """ | |
| import numpy as np | |
| import pandas as pd | |
| from pathlib import Path | |
| def main(): | |
| output_dir = Path("Data-intensive-systems/A13/Processed_Data") | |
| # Load the processed data | |
| sequences = np.load(output_dir / "sequences.npy") | |
| labels = np.load(output_dir / "labels.npy") | |
| print("Verification of processed data:") | |
| print(f"- Sequences shape: {sequences.shape}") | |
| print(f"- Labels shape: {labels.shape}") | |
| print(f"- Number of samples: {len(sequences)}") | |
| print(f"- Number of frames per sequence: {sequences.shape[1]}") | |
| print(f"- Number of features per frame: {sequences.shape[2]}") | |
| print(f"- Number of good sequences (label=1): {np.sum(labels == 1)}") | |
| print(f"- Number of bad sequences (label=0): {np.sum(labels == 0)}") | |
| # Check if data looks reasonable | |
| print(f"\nSample from first sequence (first frame, first 10 features):") | |
| print(sequences[0, 0, :10]) | |
| print(f"\nSample from last sequence (first frame, first 10 features):") | |
| print(sequences[-1, 0, :10]) | |
| # Verify labels correspond to expected good/bad classification | |
| df = pd.read_csv(output_dir / "processed_sequences_with_labels.csv") | |
| print(f"\nCSV file shape: {df.shape}") | |
| print(f"First few entries:") | |
| print(df.head()) | |
| # Count good vs bad from CSV | |
| good_count = sum(df['label'] == 1) | |
| bad_count = sum(df['label'] == 0) | |
| print(f"\nFrom CSV - Good: {good_count}, Bad: {bad_count}") | |
| print("\nProcessing completed successfully!") | |
| if __name__ == "__main__": | |
| main() | |