Spaces:
Build error
Build error
File size: 4,945 Bytes
6f27209 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import dotenv
import numpy as np
import polars as pl
import huggingface_hub as hf
from bokeh.io import curdoc
from bokeh.events import Tap
from bokeh.plotting import figure
from bokeh.palettes import Viridis256
from bokeh.layouts import row, column
from bokeh.models import ColumnDataSource, LinearColorMapper, ColorBar, Div
dotenv.load_dotenv()
hf.snapshot_download(
repo_id="ggrinberg35/pca-results",
repo_type="dataset",
local_dir="/data/"
)
df = pl.read_parquet("/data/results.parquet")
freq = np.load("/data/frequency.npy")
lightcurves = pl.scan_parquet("/data/pca-results/lightcurves.parquet")
periodograms = pl.scan_parquet("/data/periodograms.parquet")
def get_lightcurve(tic, sector):
lc = (
lightcurves
.filter((pl.col("tic") == pl.lit(int(tic))) & (pl.col("sector") == pl.lit(int(sector))))
.select(["time", "flux"])
# .collect()
)
return lc.get_column("time").to_numpy(), lc.get_column("flux").to_numpy()
def get_periodogram(tic, sector):
pg = (
periodograms
.filter((pl.col("tic") == pl.lit(tic)) & (pl.col("sector") == pl.lit(sector)))
.select(["power"])
.collect()
)
return freq, pg.get_column("power").to_numpy()
tic_arr = df["tic_id"].to_numpy()
sec_arr = df["sector"].to_numpy()
source_pca = ColumnDataSource({
"tic": tic_arr,
"sector": sec_arr,
"p_sub": df["p_subalfvenic"].to_numpy(),
"x": df["PC1"].to_numpy(),
"y": df["PC2"].to_numpy()
})
source_lc = ColumnDataSource(data=dict(time=[], flux=[]))
source_flc = ColumnDataSource(data=dict(phase=[], flux=[]))
source_pg = ColumnDataSource(data=dict(freq=[], power=[]))
info = Div()
cmap = LinearColorMapper(
palette=Viridis256,
low=min(source_pca.data["p_sub"]),
high=max(source_pca.data["p_sub"]),
)
pca_dim = 600
fig_pca = figure(width=pca_dim, height=pca_dim)
fig_lc = figure(width=pca_dim, height=pca_dim//3)
fig_flc = figure(width=pca_dim, height=pca_dim//3)
fig_pg = figure(width=pca_dim, height=pca_dim//3)
x, y, c = df["PC1"], df["PC2"], df["p_subalfvenic"]
fig_pca.scatter(source=source_pca, x="x", y="y", fill_color={"field": "p_sub", "transform": cmap}, line_color=None, size=3)
cbar = ColorBar(color_mapper=cmap, label_standoff=8, location=(0, 0))
fig_pca.add_layout(cbar, "right")
# Circle for clicks
sel_source = ColumnDataSource(dict(x=[], y=[]))
fig_pca.circle("x", "y", source=sel_source, size=10, fill_color=None, line_color="black", line_width=2)
# Highlight same tic
source_highlight = ColumnDataSource(dict(x=[], y=[]))
fig_pca.circle("x", "y", source=source_highlight, size=6, fill_color="red", line_color="black", line_width=1)
fig_lc.scatter(source=source_lc, x="x", y="y", size=3)
fig_flc.scatter(source=source_flc, x="x", y="y", size=3)
fig_pg.line(source=source_pg, x="x", y="y", width=1)
def tap_callback(event):
x_click, y_click = event.x, event.y
if not (fig_pca.x_range.start <= event.x <= fig_pca.x_range.end and fig_pca.y_range.start <= event.y <= fig_pca.y_range.end):
source_pg.data = dict(x=[], y=[])
source_lc.data = dict(x=[], y=[])
source_flc.data = dict(x=[], y=[])
info.text = ""
source_highlight.data = dict(x=[], y=[])
source_pca.selected.indices = []
sel_source.data = dict(x=[], y=[])
return
xs = np.array(source_pca.data["x"])
ys = np.array(source_pca.data["y"])
tics = np.array(source_pca.data["tic"])
d2 = (xs - x_click)**2 + (ys - y_click)**2
i = int(np.argmin(d2))
source_pca.selected.indices = [i]
sel_source.data = dict(x=[source_pca.data["x"][i]], y=[source_pca.data["y"][i]])
tic = source_pca.data["tic"][i]
sector = source_pca.data["sector"][i]
info.text = f"<strong>Currently selected star:</strong> TIC {tic}, sector {sector}"
info.text += f"<br>View on: <a href=\"https://simbad.cfa.harvard.edu/simbad/sim-basic?Ident=TIC+{tic}&submit=SIMBAD+search\">Simbad</a> • <a href=\"https://exofop.ipac.caltech.edu/tess/target.php?id={tic}\">ExoFOP</a>"
mask = (tics == tic)
source_highlight.data = dict(x=xs[mask], y=ys[mask])
freq, power = get_periodogram(tic, sector)
source_pg.data = dict(x=freq, y=power)
time, flux = get_lightcurve(tic, sector)
source_lc.data = dict(x=time, y=flux)
P = 1/freq[np.argmax(power)]
phase = ((time - time[0]) / np.max(P)) % 1.0
order = np.argsort(phase)
source_flc.data = dict(x=phase[order], y=flux[order])
fig_pca.on_event(Tap, tap_callback)
title = Div(text="<h2>PCA Lightcurve Explorer</h2>")
blurb = Div(text="""
<p>Click in the PCA plot to load the corresponding periodogram and lightcurve. Click on the color bar or axes to unselect the point. Red points indicate other observations of the selected star.</p>
""")
curdoc().add_root(column(
title,
row(fig_pca, column(fig_pg, fig_lc, fig_flc)),
blurb,
info,
sizing_mode="fixed",
align="center"
)) |