import h5py import numpy as np import polars as pl from scipy.signal import find_peaks from sklearn.decomposition import PCA with h5py.File("data/periodograms.h5", "r") as f: tic = f["meta/tic_id"][:] sec = f["meta/sector"][:] freq = f["frequency"][:] power = f["power"][:] ids = list(zip(tic, sec)) results = [] n = 10 for p in power: peaks, _ = find_peaks(p, distance=500) top_peaks = peaks[np.argsort(p[peaks])[-n:]] peak_powers = (p[top_peaks] - np.mean(p))/np.std(p) peak_freqs = (freq[top_peaks]) / np.max(freq) results.append(np.column_stack((peak_freqs, peak_powers)).ravel()) X = np.array(results) pca = PCA(n_components=2) X_pca = pca.fit_transform(X) alfven_df = pl.read_csv("data/targets_unc.csv") df = pl.DataFrame({"tic_id": tic, "sector": sec, "PC1": X_pca[:, 0], "PC2": X_pca[:, 1]}) df = df.join(alfven_df, on=["tic_id", "sector"]) df.write_csv("data/results.csv")