Spaces:
Running
Running
File size: 1,583 Bytes
b94b2ad | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | #!/usr/bin/env python3
"""
Script to verify the processed data
"""
import numpy as np
import pandas as pd
from pathlib import Path
def main():
output_dir = Path("Data-intensive-systems/A13/Processed_Data")
# Load the processed data
sequences = np.load(output_dir / "sequences.npy")
labels = np.load(output_dir / "labels.npy")
print("Verification of processed data:")
print(f"- Sequences shape: {sequences.shape}")
print(f"- Labels shape: {labels.shape}")
print(f"- Number of samples: {len(sequences)}")
print(f"- Number of frames per sequence: {sequences.shape[1]}")
print(f"- Number of features per frame: {sequences.shape[2]}")
print(f"- Number of good sequences (label=1): {np.sum(labels == 1)}")
print(f"- Number of bad sequences (label=0): {np.sum(labels == 0)}")
# Check if data looks reasonable
print(f"\nSample from first sequence (first frame, first 10 features):")
print(sequences[0, 0, :10])
print(f"\nSample from last sequence (first frame, first 10 features):")
print(sequences[-1, 0, :10])
# Verify labels correspond to expected good/bad classification
df = pd.read_csv(output_dir / "processed_sequences_with_labels.csv")
print(f"\nCSV file shape: {df.shape}")
print(f"First few entries:")
print(df.head())
# Count good vs bad from CSV
good_count = sum(df['label'] == 1)
bad_count = sum(df['label'] == 0)
print(f"\nFrom CSV - Good: {good_count}, Bad: {bad_count}")
print("\nProcessing completed successfully!")
if __name__ == "__main__":
main()
|