import os, io, json, requests from typing import Optional, List, Dict import numpy as np import pandas as pd import joblib import tensorflow as tf import gradio as gr # ===== Artifacts ===== MODEL_PATH = "modelo_tabular.h5" SCALER_PATH = "scaler.pkl" ENCODER_PATH = "label_encoder.pkl" STATS_PATH = "feature_stats.json" assert os.path.exists(MODEL_PATH), "Falta modelo_tabular.h5" assert os.path.exists(SCALER_PATH), "Falta scaler.pkl" assert os.path.exists(ENCODER_PATH), "Falta label_encoder.pkl" assert os.path.exists(STATS_PATH), "Falta feature_stats.json" model = tf.keras.models.load_model(MODEL_PATH) scaler = joblib.load(SCALER_PATH) label_encoder = joblib.load(ENCODER_PATH) with open(STATS_PATH) as f: stats = json.load(f) FEATURE_COLUMNS: List[str] = stats["feature_columns"] MEDIANS: Dict[str, float] = stats["medians"] CLASSES = list(label_encoder.classes_) # ===== Helpers ===== def first_present(candidates, cols_set): for c in candidates: if c in cols_set: return c for c in candidates: found = [x for x in cols_set if c in x] if found: return found[0] return None CANDIDATES_MAP = { "koi_period": ["pl_orbper","tce_period","orbper","period"], "koi_duration": ["pl_trandurh","tce_duration","trandur","duration","dur"], "koi_depth": ["pl_trandep","tce_depth","depth","trandep"], "koi_prad": ["pl_rade","prad","rade","planet_radius"], "koi_srad": ["st_rad","srad","stellar_radius","star_radius"], "koi_teq": ["pl_eqt","teq","equilibrium_temp"], "koi_steff": ["st_teff","teff","stellar_teff","effective_temp"], "koi_slogg": ["st_logg","logg","slogg"], "koi_smet": ["st_met","feh","metallicity","smet"], "koi_kepmag": ["st_tmag","tmag","kepmag","koi_kepmag"], "koi_model_snr": ["tce_model_snr","model_snr","snr"], "koi_num_transits": ["tce_num_transits","num_transits","ntransits","tran_count"] } def impute_and_scale(df: pd.DataFrame) -> np.ndarray: for col in FEATURE_COLUMNS: if col not in df.columns: df[col] = np.nan df = df[FEATURE_COLUMNS].copy() for c in FEATURE_COLUMNS: if df[c].isna().any(): df[c] = df[c].fillna(MEDIANS.get(c, 0.0)) X = scaler.transform(df.values) return X def predict_proba_from_df(df: pd.DataFrame): X = impute_and_scale(df) probs = model.predict(X, verbose=0) classes = list(label_encoder.classes_) return probs, classes # ===== Endpoint 1: Probar con 2 TOI/TCE de la API ===== def predict_toi_samples(n=2, table="tce"): if table not in {"tce","toi"}: table = "tce" if table == "tce": TAP_URL = "https://exoplanetarchive.ipac.caltech.edu/TAP/sync" query = f""" SELECT TOP {int(n)} kepid, tce_plnt_num, tce_period, tce_duration, tce_depth, tce_model_snr FROM q1_q17_dr25_tce WHERE tce_period > 0 AND tce_duration > 0 AND tce_depth > 0 ORDER BY tce_model_snr DESC """ r = requests.get(TAP_URL, params={"query": query, "format": "csv"}, timeout=90) else: BASE = "https://exoplanetarchive.ipac.caltech.edu/cgi-bin/nstedAPI/nph-nstedAPI" where = ("(tfopwg_disp like 'PC' or tfopwg_disp like 'APC') and " "(pl_orbper is not null or tce_period is not null)") r = requests.get(BASE, params={"table":"toi","where":where,"format":"csv"}, timeout=90) r.raise_for_status() df = pd.read_csv(io.StringIO(r.text)) df.columns = [c.strip().lower() for c in df.columns] df = df.sample(min(n, len(df)), random_state=7).reset_index(drop=True) # map flexible a FEATURE_COLUMNS cols_set = set(df.columns) cases = pd.DataFrame(index=df.index, columns=FEATURE_COLUMNS, dtype="float64") for feat in FEATURE_COLUMNS: src = first_present(CANDIDATES_MAP.get(feat, []), cols_set) if src is not None: cases[feat] = pd.to_numeric(df[src], errors="coerce") else: cases[feat] = np.nan probs, classes = predict_proba_from_df(cases) idx = np.argmax(probs, axis=1) preds = label_encoder.inverse_transform(idx) # construir salida out = [] for i in range(len(df)): row_probs = probs[i] d = {"prediction": preds[i]} for j, cls in enumerate(classes): d[f"P({cls})"] = float(row_probs[j]) out.append(d) res = pd.DataFrame(out) csv_path = "pred_toi_samples.csv" res.to_csv(csv_path, index=False) return res, csv_path # ===== Endpoint 2: POST JSON manual ===== def predict_from_json(json_text: str, threshold: float = 0.5): try: payload = json.loads(json_text) except Exception as e: return {"error": f"JSON inválido: {e}"} df = pd.DataFrame([payload]) # normalizar nombres df.columns = [c.strip().lower() for c in df.columns] # map a FEATURE_COLUMNS cols_set = set(df.columns) cases = pd.DataFrame(index=df.index, columns=FEATURE_COLUMNS, dtype="float64") for feat in FEATURE_COLUMNS: # si ya viene con el nombre koi_* lo usamos if feat in cols_set: cases[feat] = pd.to_numeric(df[feat], errors="coerce") continue # sino buscamos sinónimos src = first_present(CANDIDATES_MAP.get(feat, []), cols_set) if src is not None: cases[feat] = pd.to_numeric(df[src], errors="coerce") else: cases[feat] = np.nan probs, classes = predict_proba_from_df(cases) p = probs[0] idx = int(np.argmax(p)) pred = label_encoder.inverse_transform([idx])[0] p_confirmed = float(p[classes.index("CONFIRMED")]) if "CONFIRMED" in classes else 0.0 return { "prediction": pred, "probabilities": {classes[i]: float(p[i]) for i in range(len(classes))}, "is_exoplanet": bool(pred.upper()=="CONFIRMED" and p_confirmed >= float(threshold)), "p_confirmed": p_confirmed } # ===== Endpoint 3: Descargar CSV de un TOI/TCE específico ===== def download_object_csv(identifier: str, table: str = "toi"): table = table.lower() if table not in {"toi","tce"}: table = "toi" if table == "toi": BASE = "https://exoplanetarchive.ipac.caltech.edu/cgi-bin/nstedAPI/nph-nstedAPI" where = f"toi like '{identifier}'" r = requests.get(BASE, params={"table":"toi","where":where,"format":"csv"}, timeout=60) else: # para TCE usamos TAP por kepid + tce_plnt_num, ejemplo: "KIC 11446443 1" TAP_URL = "https://exoplanetarchive.ipac.caltech.edu/TAP/sync" parts = identifier.replace(",", " ").split() if len(parts) >= 2: kep = parts[0] num = parts[1] query = f""" SELECT * FROM q1_q17_dr25_tce WHERE CAST(kepid AS VARCHAR) like '{kep.replace('KIC','').strip()}' AND CAST(tce_plnt_num AS VARCHAR) like '{num.strip()}' """ else: query = f"SELECT TOP 1 * FROM q1_q17_dr25_tce WHERE CAST(kepid AS VARCHAR) like '{identifier.strip()}'" r = requests.get(TAP_URL, params={"query": query, "format": "csv"}, timeout=90) r.raise_for_status() path = "object.csv" with open(path, "w") as f: f.write(r.text) return path # ===== Endpoint 4: Subir CSV y predecir ===== def predict_from_csv(file_obj, threshold: float = 0.5): if file_obj is None: return pd.DataFrame(), None df = pd.read_csv(file_obj.name) # normalizar nombres df.columns = [c.strip().lower() for c in df.columns] cols_set = set(df.columns) cases = pd.DataFrame(index=df.index, columns=FEATURE_COLUMNS, dtype="float64") for feat in FEATURE_COLUMNS: src = feat if feat in cols_set else first_present(CANDIDATES_MAP.get(feat, []), cols_set) if src is not None: cases[feat] = pd.to_numeric(df[src], errors="coerce") else: cases[feat] = np.nan probs, classes = predict_proba_from_df(cases) idx = np.argmax(probs, axis=1) preds = label_encoder.inverse_transform(idx) out = [] for i in range(len(df)): row = {"prediction": preds[i]} for j, cls in enumerate(classes): row[f"P({cls})"] = float(probs[i][j]) out.append(row) res = pd.DataFrame(out) out_path = "predicciones.csv" res.to_csv(out_path, index=False) return res, out_path # ===== Gradio UI ===== with gr.Blocks() as demo: gr.Markdown("# 🔭 Exoplanet Classifier — API + UI (Gradio)") with gr.Row(): with gr.Column(): gr.Markdown("### 1) Probar con 2 objetos de la API (TOI o TCE)") table_dd = gr.Dropdown(choices=["toi","tce"], value="tce", label="Tabla") n_objs = gr.Slider(1, 10, value=2, step=1, label="N objetos") out_df1 = gr.Dataframe(label="Resultados") out_file1 = gr.File(label="Descargar CSV") gr.Button("Probar API").click(predict_toi_samples, inputs=[n_objs, table_dd], outputs=[out_df1, out_file1], api_name="predict_toi_samples") with gr.Column(): gr.Markdown("### 2) JSON manual (POST)") jt = gr.Textbox(lines=12, label="JSON de entrada (TOI/TCE-like o koi_* )") thr_json = gr.Slider(0, 1, value=0.5, step=0.01, label="Umbral P(CONFIRMED)") out_json = gr.JSON(label="Respuesta") gr.Button("Predecir JSON").click(predict_from_json, inputs=[jt, thr_json], outputs=out_json, api_name="predict_json") gr.Markdown("### 3) Descargar CSV de un objeto (por id)") ident = gr.Textbox(label="Identificador (ej: TOI-1234.01 o 'KIC 11446443 1')", placeholder="TOI-xxx.yy ó KIC ###### ") table2 = gr.Dropdown(choices=["toi","tce"], value="toi", label="Tabla") out_csv = gr.File(label="CSV del objeto") gr.Button("Descargar CSV").click(download_object_csv, inputs=[ident, table2], outputs=out_csv, api_name="toi_csv") gr.Markdown("### 4) Subir CSV y clasificar") f_in = gr.File(label="CSV subida", file_types=[".csv"]) thr = gr.Slider(0,1,value=0.5, step=0.01, label="Umbral P(CONFIRMED)") out_df2 = gr.Dataframe(label="Resultados") out_file2 = gr.File(label="Descargar predicciones") gr.Button("Predecir CSV").click(predict_from_csv, inputs=[f_in, thr], outputs=[out_df2, out_file2], api_name="predict_csv") demo.queue().launch()