pca-plot / pca.py
ggrinberg35's picture
Add code
6f27209
import h5py
import numpy as np
import polars as pl
from scipy.signal import find_peaks
from sklearn.decomposition import PCA
with h5py.File("data/periodograms.h5", "r") as f:
tic = f["meta/tic_id"][:]
sec = f["meta/sector"][:]
freq = f["frequency"][:]
power = f["power"][:]
ids = list(zip(tic, sec))
results = []
n = 10
for p in power:
peaks, _ = find_peaks(p, distance=500)
top_peaks = peaks[np.argsort(p[peaks])[-n:]]
peak_powers = (p[top_peaks] - np.mean(p))/np.std(p)
peak_freqs = (freq[top_peaks]) / np.max(freq)
results.append(np.column_stack((peak_freqs, peak_powers)).ravel())
X = np.array(results)
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)
alfven_df = pl.read_csv("data/targets_unc.csv")
df = pl.DataFrame({"tic_id": tic, "sector": sec, "PC1": X_pca[:, 0], "PC2": X_pca[:, 1]})
df = df.join(alfven_df, on=["tic_id", "sector"])
df.write_csv("data/results.csv")