| | import os |
| | import json |
| | import scipy.io |
| | import numpy as np |
| |
|
| | DATA_DIR = "data" |
| | OUTPUT_FILE = "Webapp/dataset_metadata.json" |
| |
|
| | def get_metadata(): |
| | metadata = {} |
| | if not os.path.exists(DATA_DIR): |
| | print(f"Data directory {DATA_DIR} not found.") |
| | return metadata |
| |
|
| | for filename in os.listdir(DATA_DIR): |
| | if filename.endswith(".mat"): |
| | name = filename[:-4] |
| | path = os.path.join(DATA_DIR, filename) |
| | try: |
| | mat = scipy.io.loadmat(path) |
| | |
| | |
| | |
| | |
| | n_samples = 0 |
| | n_features = 0 |
| | n_classes = 0 |
| | |
| | |
| | keys = [k for k in mat.keys() if not k.startswith('__')] |
| | |
| | |
| | max_size = 0 |
| | data_key = None |
| | |
| | for k in keys: |
| | if isinstance(mat[k], np.ndarray): |
| | if mat[k].size > max_size: |
| | max_size = mat[k].size |
| | data_key = k |
| | |
| | if data_key: |
| | data = mat[data_key] |
| | if len(data.shape) == 2: |
| | n_samples, n_features = data.shape |
| | |
| | |
| | |
| | label_key = None |
| | for k in keys: |
| | if k != data_key and isinstance(mat[k], np.ndarray): |
| | |
| | if mat[k].shape[0] == n_samples or (len(mat[k].shape) > 1 and mat[k].shape[1] == n_samples): |
| | label_key = k |
| | break |
| | |
| | if label_key: |
| | labels = mat[label_key] |
| | n_classes = len(np.unique(labels)) |
| | |
| | metadata[name] = { |
| | "n_samples": int(n_samples), |
| | "n_features": int(n_features), |
| | "n_classes": int(n_classes) |
| | } |
| | print(f"Processed {name}: {n_samples}x{n_features}, {n_classes} classes") |
| | |
| | except Exception as e: |
| | print(f"Error processing {filename}: {e}") |
| | |
| | return metadata |
| |
|
| | if __name__ == "__main__": |
| | meta = get_metadata() |
| | with open(OUTPUT_FILE, "w") as f: |
| | json.dump(meta, f, indent=2) |
| | print(f"Metadata saved to {OUTPUT_FILE}") |
| |
|