Spaces:
Sleeping
Sleeping
Upload app.py
#3
by
granamaa
- opened
app.py
CHANGED
|
@@ -1,321 +1,260 @@
|
|
| 1 |
-
import io
|
| 2 |
-
import
|
| 3 |
-
import pandas as pd
|
| 4 |
import numpy as np
|
|
|
|
|
|
|
|
|
|
| 5 |
import gradio as gr
|
| 6 |
-
import json
|
| 7 |
-
import pickle
|
| 8 |
-
import os
|
| 9 |
-
from typing import Dict, List, Any
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
TENSORFLOW_AVAILABLE = True
|
| 22 |
-
except ImportError as e:
|
| 23 |
-
print(f"❌ TensorFlow no disponible: {e}")
|
| 24 |
-
TENSORFLOW_AVAILABLE = False
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
with open("feature_stats.json", "r") as f:
|
| 30 |
-
feature_stats = json.load(f)
|
| 31 |
-
print("✅ Feature stats cargado")
|
| 32 |
-
|
| 33 |
-
with open("scaler.pkl", "rb") as f:
|
| 34 |
-
scaler = pickle.load(f)
|
| 35 |
-
print("✅ Scaler cargado")
|
| 36 |
-
|
| 37 |
-
with open("label_encoder.pkl", "rb") as f:
|
| 38 |
-
label_encoder = pickle.load(f)
|
| 39 |
-
print("✅ Label encoder cargado")
|
| 40 |
-
|
| 41 |
-
model = None
|
| 42 |
-
if TENSORFLOW_AVAILABLE:
|
| 43 |
-
model = load_model("modulo_tabular.h5")
|
| 44 |
-
print("✅ Modelo cargado")
|
| 45 |
-
|
| 46 |
-
return model, scaler, label_encoder, feature_stats
|
| 47 |
-
|
| 48 |
-
except Exception as e:
|
| 49 |
-
print(f"❌ Error cargando recursos: {str(e)}")
|
| 50 |
-
feature_stats = {
|
| 51 |
-
"feature_columns": [
|
| 52 |
-
"koi_period", "koi_duration", "koi_depth", "koi_prad",
|
| 53 |
-
"koi_srad", "koi_teq", "koi_steff", "koi_slogg",
|
| 54 |
-
"koi_smet", "koi_kepmag", "koi_model_snr", "koi_num_transits"
|
| 55 |
-
],
|
| 56 |
-
"train_medians": {
|
| 57 |
-
"koi_period": 10.0, "koi_duration": 5.0, "koi_depth": 1000.0,
|
| 58 |
-
"koi_prad": 2.0, "koi_srad": 1.0, "koi_teq": 1000.0,
|
| 59 |
-
"koi_steff": 6000.0, "koi_slogg": 4.5, "koi_smet": 0.0,
|
| 60 |
-
"koi_kepmag": 12.0, "koi_model_snr": 10.0, "koi_num_transits": 3.0
|
| 61 |
-
}
|
| 62 |
-
}
|
| 63 |
-
return None, None, None, feature_stats
|
| 64 |
|
| 65 |
-
#
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
|
| 74 |
-
def
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
else:
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
pred_idx = np.argmax(probs)
|
| 96 |
-
pred_label = label_encoder.inverse_transform([pred_idx])[0]
|
| 97 |
-
|
| 98 |
-
return {
|
| 99 |
-
"prediction": pred_label,
|
| 100 |
-
"probabilities": {
|
| 101 |
-
"CONFIRMED": float(probs[0]),
|
| 102 |
-
"CANDIDATE": float(probs[1]),
|
| 103 |
-
"FALSE_POSITIVE": float(probs[2])
|
| 104 |
-
},
|
| 105 |
-
"input_features": dict(zip(feature_columns, input_features))
|
| 106 |
-
}
|
| 107 |
-
|
| 108 |
-
except Exception as e:
|
| 109 |
-
return {"error": str(e)}
|
| 110 |
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
koi_steff: float, koi_slogg: float, koi_smet: float,
|
| 115 |
-
koi_kepmag: float, koi_model_snr: float, koi_num_transits: float
|
| 116 |
-
) -> Dict:
|
| 117 |
-
"""Wrapper que toma parámetros individuales y los convierte a dict"""
|
| 118 |
-
features = {
|
| 119 |
-
"koi_period": koi_period,
|
| 120 |
-
"koi_duration": koi_duration,
|
| 121 |
-
"koi_depth": koi_depth,
|
| 122 |
-
"koi_prad": koi_prad,
|
| 123 |
-
"koi_srad": koi_srad,
|
| 124 |
-
"koi_teq": koi_teq,
|
| 125 |
-
"koi_steff": koi_steff,
|
| 126 |
-
"koi_slogg": koi_slogg,
|
| 127 |
-
"koi_smet": koi_smet,
|
| 128 |
-
"koi_kepmag": koi_kepmag,
|
| 129 |
-
"koi_model_snr": koi_model_snr,
|
| 130 |
-
"koi_num_transits": koi_num_transits
|
| 131 |
-
}
|
| 132 |
-
return predict_single(features)
|
| 133 |
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
resp.raise_for_status()
|
| 147 |
-
toi_df = pd.read_csv(io.StringIO(resp.text))
|
| 148 |
-
|
| 149 |
-
if toi_df.empty:
|
| 150 |
-
return "❌ No se encontraron objetos TOI"
|
| 151 |
-
|
| 152 |
-
# Tomar muestra
|
| 153 |
-
toi_sample = toi_df.sample(min(3, len(toi_df)), random_state=7)
|
| 154 |
-
toi_sample.columns = [c.strip().lower() for c in toi_sample.columns]
|
| 155 |
-
|
| 156 |
-
# Mapeo de sinónimos
|
| 157 |
-
candidates_map = {
|
| 158 |
-
"koi_period": ["pl_orbper", "tce_period", "orbper", "period"],
|
| 159 |
-
"koi_duration": ["pl_trandurh", "tce_duration", "tran_dur", "trandur", "duration", "dur"],
|
| 160 |
-
"koi_depth": ["pl_trandep", "tce_depth", "depth", "trandep"],
|
| 161 |
-
"koi_prad": ["pl_rade", "prad", "rade", "planet_radius"],
|
| 162 |
-
"koi_srad": ["st_rad", "srad", "stellar_radius", "star_radius"],
|
| 163 |
-
"koi_teq": ["pl_eqt", "teq", "equilibrium_temp"],
|
| 164 |
-
"koi_steff": ["st_teff", "teff", "stellar_teff", "effective_temp"],
|
| 165 |
-
"koi_slogg": ["st_logg", "logg", "slogg"],
|
| 166 |
-
"koi_smet": ["st_met", "feh", "metallicity", "smet"],
|
| 167 |
-
"koi_kepmag": ["st_tmag", "tmag", "kepmag", "koi_kepmag"],
|
| 168 |
-
"koi_model_snr": ["tce_model_snr", "model_snr", "snr"],
|
| 169 |
-
"koi_num_transits": ["tce_num_transits", "num_transits", "ntransits", "tran_count"]
|
| 170 |
-
}
|
| 171 |
-
|
| 172 |
-
def first_present(candidates, cols_set):
|
| 173 |
-
for name in candidates:
|
| 174 |
-
if name in cols_set:
|
| 175 |
-
return name
|
| 176 |
-
for name in candidates:
|
| 177 |
-
found = [c for c in cols_set if name in c]
|
| 178 |
-
if found:
|
| 179 |
-
return found[0]
|
| 180 |
-
return None
|
| 181 |
-
|
| 182 |
-
cols_set = set(toi_sample.columns)
|
| 183 |
-
results = []
|
| 184 |
-
|
| 185 |
-
for idx, row in toi_sample.iterrows():
|
| 186 |
-
# Preparar características
|
| 187 |
-
features = {}
|
| 188 |
-
for feat in feature_columns:
|
| 189 |
-
src = first_present(candidates_map.get(feat, []), cols_set)
|
| 190 |
-
if src and src in row and pd.notna(row[src]):
|
| 191 |
-
features[feat] = float(row[src])
|
| 192 |
-
else:
|
| 193 |
-
features[feat] = train_medians.get(feat, 0)
|
| 194 |
-
|
| 195 |
-
# Predecir
|
| 196 |
-
result = predict_single(features)
|
| 197 |
-
|
| 198 |
-
if "error" not in result:
|
| 199 |
-
results.append({
|
| 200 |
-
"TOI": row.get('toi', f"TOI-{idx}"),
|
| 201 |
-
"Disposición": row.get('tfopwg_disp', 'Unknown'),
|
| 202 |
-
"Predicción": result['prediction'],
|
| 203 |
-
"P(Confirmado)": f"{result['probabilities']['CONFIRMED']:.3f}",
|
| 204 |
-
"P(Candidato)": f"{result['probabilities']['CANDIDATE']:.3f}",
|
| 205 |
-
"P(Falso Positivo)": f"{result['probabilities']['FALSE_POSITIVE']:.3f}"
|
| 206 |
-
})
|
| 207 |
-
|
| 208 |
-
if not results:
|
| 209 |
-
return "❌ No se pudieron generar predicciones"
|
| 210 |
-
|
| 211 |
-
result_df = pd.DataFrame(results)
|
| 212 |
-
return f"**Predicciones TOI:**\n\n{result_df.to_markdown(index=False)}"
|
| 213 |
-
|
| 214 |
-
except Exception as e:
|
| 215 |
-
return f"❌ Error: {str(e)}"
|
| 216 |
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
):
|
| 220 |
-
"""Función para predicción manual en Gradio"""
|
| 221 |
try:
|
| 222 |
-
|
| 223 |
-
period, duration, depth, prad, srad, teq, steff, slogg, smet, kepmag, snr, num_transits
|
| 224 |
-
)
|
| 225 |
-
|
| 226 |
-
if "error" in result:
|
| 227 |
-
return f"❌ {result['error']}"
|
| 228 |
-
|
| 229 |
-
output = f"**Predicción:** {result['prediction']}\n\n**Probabilidades:**\n"
|
| 230 |
-
for clase, prob in result['probabilities'].items():
|
| 231 |
-
output += f"- {clase}: {prob:.3f}\n"
|
| 232 |
-
|
| 233 |
-
return output
|
| 234 |
-
|
| 235 |
except Exception as e:
|
| 236 |
-
return f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
|
| 238 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
|
| 240 |
-
|
| 241 |
-
gr.
|
| 242 |
-
gr.
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
gr.Markdown("### Endpoint para consumo desde frontend")
|
| 246 |
-
gr.Markdown("""
|
| 247 |
-
**URL:** `https://jarpalucas-echo-finder-api.hf.space/api/predict`
|
| 248 |
-
|
| 249 |
-
**Método:** POST
|
| 250 |
-
**Content-Type:** application/json
|
| 251 |
-
|
| 252 |
-
**Ejemplo de uso con curl:**
|
| 253 |
-
```bash
|
| 254 |
-
curl -X POST "https://jarpalucas-echo-finder-api.hf.space/api/predict" \\
|
| 255 |
-
-H "Content-Type: application/json" \\
|
| 256 |
-
-d '{
|
| 257 |
-
"koi_period": 10.0,
|
| 258 |
-
"koi_duration": 5.0,
|
| 259 |
-
"koi_depth": 1000.0,
|
| 260 |
-
"koi_prad": 2.0,
|
| 261 |
-
"koi_srad": 1.0,
|
| 262 |
-
"koi_teq": 1000.0,
|
| 263 |
-
"koi_steff": 6000.0,
|
| 264 |
-
"koi_slogg": 4.5,
|
| 265 |
-
"koi_smet": 0.0,
|
| 266 |
-
"koi_kepmag": 12.0,
|
| 267 |
-
"koi_model_snr": 10.0,
|
| 268 |
-
"koi_num_transits": 3.0
|
| 269 |
-
}'
|
| 270 |
-
```
|
| 271 |
-
""")
|
| 272 |
-
|
| 273 |
-
# Inputs para probar la API localmente
|
| 274 |
-
with gr.Row():
|
| 275 |
-
with gr.Column():
|
| 276 |
-
period = gr.Number(label="koi_period", value=10.0)
|
| 277 |
-
duration = gr.Number(label="koi_duration", value=5.0)
|
| 278 |
-
depth = gr.Number(label="koi_depth", value=1000.0)
|
| 279 |
-
prad = gr.Number(label="koi_prad", value=2.0)
|
| 280 |
-
with gr.Column():
|
| 281 |
-
srad = gr.Number(label="koi_srad", value=1.0)
|
| 282 |
-
teq = gr.Number(label="koi_teq", value=1000.0)
|
| 283 |
-
steff = gr.Number(label="koi_steff", value=6000.0)
|
| 284 |
-
slogg = gr.Number(label="koi_slogg", value=4.5)
|
| 285 |
-
with gr.Column():
|
| 286 |
-
smet = gr.Number(label="koi_smet", value=0.0)
|
| 287 |
-
kepmag = gr.Number(label="koi_kepmag", value=12.0)
|
| 288 |
-
snr = gr.Number(label="koi_model_snr", value=10.0)
|
| 289 |
-
num_transits = gr.Number(label="koi_num_transits", value=3.0)
|
| 290 |
-
|
| 291 |
-
api_btn = gr.Button("🚀 Probar Predicción")
|
| 292 |
-
api_output = gr.JSON()
|
| 293 |
-
|
| 294 |
-
api_btn.click(
|
| 295 |
-
fn=predict_from_dict,
|
| 296 |
-
inputs=[period, duration, depth, prad, srad, teq, steff, slogg, smet, kepmag, snr, num_transits],
|
| 297 |
-
outputs=api_output
|
| 298 |
-
)
|
| 299 |
-
|
| 300 |
-
with gr.Tab("🔭 TOI Tiempo Real"):
|
| 301 |
-
gr.Markdown("Predicciones de objetos TOI en tiempo real")
|
| 302 |
-
toi_btn = gr.Button("🔍 Analizar TOI")
|
| 303 |
-
toi_output = gr.Markdown()
|
| 304 |
-
toi_btn.click(predict_toi_realtime, outputs=toi_output)
|
| 305 |
-
|
| 306 |
-
with gr.Tab("📊 Interfaz Manual"):
|
| 307 |
-
gr.Markdown("Interfaz manual para predicciones")
|
| 308 |
-
manual_btn = gr.Button("🎯 Predecir")
|
| 309 |
-
manual_output = gr.Markdown()
|
| 310 |
-
manual_btn.click(
|
| 311 |
-
fn=predict_manual,
|
| 312 |
-
inputs=[period, duration, depth, prad, srad, teq, steff, slogg, smet, kepmag, snr, num_transits],
|
| 313 |
-
outputs=manual_output
|
| 314 |
-
)
|
| 315 |
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
|
|
|
|
|
|
|
|
|
| 319 |
|
| 320 |
-
|
| 321 |
-
demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
|
|
|
|
| 1 |
+
import os, io, json, requests
|
| 2 |
+
from typing import Optional, List, Dict
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import joblib
|
| 6 |
+
import tensorflow as tf
|
| 7 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
+
# ===== Artifacts =====
|
| 10 |
+
MODEL_PATH = "modelo_tabular.h5"
|
| 11 |
+
SCALER_PATH = "scaler.pkl"
|
| 12 |
+
ENCODER_PATH = "label_encoder.pkl"
|
| 13 |
+
STATS_PATH = "feature_stats.json"
|
| 14 |
|
| 15 |
+
assert os.path.exists(MODEL_PATH), "Falta modelo_tabular.h5"
|
| 16 |
+
assert os.path.exists(SCALER_PATH), "Falta scaler.pkl"
|
| 17 |
+
assert os.path.exists(ENCODER_PATH), "Falta label_encoder.pkl"
|
| 18 |
+
assert os.path.exists(STATS_PATH), "Falta feature_stats.json"
|
| 19 |
|
| 20 |
+
model = tf.keras.models.load_model(MODEL_PATH)
|
| 21 |
+
scaler = joblib.load(SCALER_PATH)
|
| 22 |
+
label_encoder = joblib.load(ENCODER_PATH)
|
| 23 |
+
with open(STATS_PATH) as f:
|
| 24 |
+
stats = json.load(f)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
+
FEATURE_COLUMNS: List[str] = stats["feature_columns"]
|
| 27 |
+
MEDIANS: Dict[str, float] = stats["medians"]
|
| 28 |
+
CLASSES = list(label_encoder.classes_)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
+
# ===== Helpers =====
|
| 31 |
+
def first_present(candidates, cols_set):
|
| 32 |
+
for c in candidates:
|
| 33 |
+
if c in cols_set:
|
| 34 |
+
return c
|
| 35 |
+
for c in candidates:
|
| 36 |
+
found = [x for x in cols_set if c in x]
|
| 37 |
+
if found:
|
| 38 |
+
return found[0]
|
| 39 |
+
return None
|
| 40 |
|
| 41 |
+
CANDIDATES_MAP = {
|
| 42 |
+
"koi_period": ["pl_orbper","tce_period","orbper","period"],
|
| 43 |
+
"koi_duration": ["pl_trandurh","tce_duration","trandur","duration","dur"],
|
| 44 |
+
"koi_depth": ["pl_trandep","tce_depth","depth","trandep"],
|
| 45 |
+
"koi_prad": ["pl_rade","prad","rade","planet_radius"],
|
| 46 |
+
"koi_srad": ["st_rad","srad","stellar_radius","star_radius"],
|
| 47 |
+
"koi_teq": ["pl_eqt","teq","equilibrium_temp"],
|
| 48 |
+
"koi_steff": ["st_teff","teff","stellar_teff","effective_temp"],
|
| 49 |
+
"koi_slogg": ["st_logg","logg","slogg"],
|
| 50 |
+
"koi_smet": ["st_met","feh","metallicity","smet"],
|
| 51 |
+
"koi_kepmag": ["st_tmag","tmag","kepmag","koi_kepmag"],
|
| 52 |
+
"koi_model_snr": ["tce_model_snr","model_snr","snr"],
|
| 53 |
+
"koi_num_transits": ["tce_num_transits","num_transits","ntransits","tran_count"]
|
| 54 |
+
}
|
| 55 |
|
| 56 |
+
def impute_and_scale(df: pd.DataFrame) -> np.ndarray:
|
| 57 |
+
for col in FEATURE_COLUMNS:
|
| 58 |
+
if col not in df.columns:
|
| 59 |
+
df[col] = np.nan
|
| 60 |
+
df = df[FEATURE_COLUMNS].copy()
|
| 61 |
+
for c in FEATURE_COLUMNS:
|
| 62 |
+
if df[c].isna().any():
|
| 63 |
+
df[c] = df[c].fillna(MEDIANS.get(c, 0.0))
|
| 64 |
+
X = scaler.transform(df.values)
|
| 65 |
+
return X
|
| 66 |
|
| 67 |
+
def predict_proba_from_df(df: pd.DataFrame):
|
| 68 |
+
X = impute_and_scale(df)
|
| 69 |
+
probs = model.predict(X, verbose=0)
|
| 70 |
+
classes = list(label_encoder.classes_)
|
| 71 |
+
return probs, classes
|
| 72 |
+
|
| 73 |
+
# ===== Endpoint 1: Probar con 2 TOI/TCE de la API =====
|
| 74 |
+
def predict_toi_samples(n=2, table="tce"):
|
| 75 |
+
if table not in {"tce","toi"}:
|
| 76 |
+
table = "tce"
|
| 77 |
+
|
| 78 |
+
if table == "tce":
|
| 79 |
+
TAP_URL = "https://exoplanetarchive.ipac.caltech.edu/TAP/sync"
|
| 80 |
+
query = f"""
|
| 81 |
+
SELECT TOP {int(n)}
|
| 82 |
+
kepid, tce_plnt_num, tce_period, tce_duration, tce_depth, tce_model_snr
|
| 83 |
+
FROM q1_q17_dr25_tce
|
| 84 |
+
WHERE tce_period > 0 AND tce_duration > 0 AND tce_depth > 0
|
| 85 |
+
ORDER BY tce_model_snr DESC
|
| 86 |
+
"""
|
| 87 |
+
r = requests.get(TAP_URL, params={"query": query, "format": "csv"}, timeout=90)
|
| 88 |
+
else:
|
| 89 |
+
BASE = "https://exoplanetarchive.ipac.caltech.edu/cgi-bin/nstedAPI/nph-nstedAPI"
|
| 90 |
+
where = ("(tfopwg_disp like 'PC' or tfopwg_disp like 'APC') and "
|
| 91 |
+
"(pl_orbper is not null or tce_period is not null)")
|
| 92 |
+
r = requests.get(BASE, params={"table":"toi","where":where,"format":"csv"}, timeout=90)
|
| 93 |
+
|
| 94 |
+
r.raise_for_status()
|
| 95 |
+
df = pd.read_csv(io.StringIO(r.text))
|
| 96 |
+
df.columns = [c.strip().lower() for c in df.columns]
|
| 97 |
+
df = df.sample(min(n, len(df)), random_state=7).reset_index(drop=True)
|
| 98 |
+
|
| 99 |
+
# map flexible a FEATURE_COLUMNS
|
| 100 |
+
cols_set = set(df.columns)
|
| 101 |
+
cases = pd.DataFrame(index=df.index, columns=FEATURE_COLUMNS, dtype="float64")
|
| 102 |
+
for feat in FEATURE_COLUMNS:
|
| 103 |
+
src = first_present(CANDIDATES_MAP.get(feat, []), cols_set)
|
| 104 |
+
if src is not None:
|
| 105 |
+
cases[feat] = pd.to_numeric(df[src], errors="coerce")
|
| 106 |
else:
|
| 107 |
+
cases[feat] = np.nan
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
|
| 109 |
+
probs, classes = predict_proba_from_df(cases)
|
| 110 |
+
idx = np.argmax(probs, axis=1)
|
| 111 |
+
preds = label_encoder.inverse_transform(idx)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
+
# construir salida
|
| 114 |
+
out = []
|
| 115 |
+
for i in range(len(df)):
|
| 116 |
+
row_probs = probs[i]
|
| 117 |
+
d = {"prediction": preds[i]}
|
| 118 |
+
for j, cls in enumerate(classes):
|
| 119 |
+
d[f"P({cls})"] = float(row_probs[j])
|
| 120 |
+
out.append(d)
|
| 121 |
+
res = pd.DataFrame(out)
|
| 122 |
+
csv_path = "pred_toi_samples.csv"
|
| 123 |
+
res.to_csv(csv_path, index=False)
|
| 124 |
+
return res, csv_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
+
# ===== Endpoint 2: POST JSON manual =====
|
| 127 |
+
def predict_from_json(json_text: str, threshold: float = 0.5):
|
|
|
|
|
|
|
| 128 |
try:
|
| 129 |
+
payload = json.loads(json_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
except Exception as e:
|
| 131 |
+
return {"error": f"JSON inválido: {e}"}
|
| 132 |
+
|
| 133 |
+
df = pd.DataFrame([payload])
|
| 134 |
+
# normalizar nombres
|
| 135 |
+
df.columns = [c.strip().lower() for c in df.columns]
|
| 136 |
+
# map a FEATURE_COLUMNS
|
| 137 |
+
cols_set = set(df.columns)
|
| 138 |
+
cases = pd.DataFrame(index=df.index, columns=FEATURE_COLUMNS, dtype="float64")
|
| 139 |
+
for feat in FEATURE_COLUMNS:
|
| 140 |
+
# si ya viene con el nombre koi_* lo usamos
|
| 141 |
+
if feat in cols_set:
|
| 142 |
+
cases[feat] = pd.to_numeric(df[feat], errors="coerce")
|
| 143 |
+
continue
|
| 144 |
+
# sino buscamos sinónimos
|
| 145 |
+
src = first_present(CANDIDATES_MAP.get(feat, []), cols_set)
|
| 146 |
+
if src is not None:
|
| 147 |
+
cases[feat] = pd.to_numeric(df[src], errors="coerce")
|
| 148 |
+
else:
|
| 149 |
+
cases[feat] = np.nan
|
| 150 |
+
|
| 151 |
+
probs, classes = predict_proba_from_df(cases)
|
| 152 |
+
p = probs[0]
|
| 153 |
+
idx = int(np.argmax(p))
|
| 154 |
+
pred = label_encoder.inverse_transform([idx])[0]
|
| 155 |
+
p_confirmed = float(p[classes.index("CONFIRMED")]) if "CONFIRMED" in classes else 0.0
|
| 156 |
+
return {
|
| 157 |
+
"prediction": pred,
|
| 158 |
+
"probabilities": {classes[i]: float(p[i]) for i in range(len(classes))},
|
| 159 |
+
"is_exoplanet": bool(pred.upper()=="CONFIRMED" and p_confirmed >= float(threshold)),
|
| 160 |
+
"p_confirmed": p_confirmed
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
# ===== Endpoint 3: Descargar CSV de un TOI/TCE específico =====
|
| 164 |
+
def download_object_csv(identifier: str, table: str = "toi"):
|
| 165 |
+
table = table.lower()
|
| 166 |
+
if table not in {"toi","tce"}:
|
| 167 |
+
table = "toi"
|
| 168 |
+
if table == "toi":
|
| 169 |
+
BASE = "https://exoplanetarchive.ipac.caltech.edu/cgi-bin/nstedAPI/nph-nstedAPI"
|
| 170 |
+
where = f"toi like '{identifier}'"
|
| 171 |
+
r = requests.get(BASE, params={"table":"toi","where":where,"format":"csv"}, timeout=60)
|
| 172 |
+
else:
|
| 173 |
+
# para TCE usamos TAP por kepid + tce_plnt_num, ejemplo: "KIC 11446443 1"
|
| 174 |
+
TAP_URL = "https://exoplanetarchive.ipac.caltech.edu/TAP/sync"
|
| 175 |
+
parts = identifier.replace(",", " ").split()
|
| 176 |
+
if len(parts) >= 2:
|
| 177 |
+
kep = parts[0]
|
| 178 |
+
num = parts[1]
|
| 179 |
+
query = f"""
|
| 180 |
+
SELECT *
|
| 181 |
+
FROM q1_q17_dr25_tce
|
| 182 |
+
WHERE CAST(kepid AS VARCHAR) like '{kep.replace('KIC','').strip()}'
|
| 183 |
+
AND CAST(tce_plnt_num AS VARCHAR) like '{num.strip()}'
|
| 184 |
+
"""
|
| 185 |
+
else:
|
| 186 |
+
query = f"SELECT TOP 1 * FROM q1_q17_dr25_tce WHERE CAST(kepid AS VARCHAR) like '{identifier.strip()}'"
|
| 187 |
+
r = requests.get(TAP_URL, params={"query": query, "format": "csv"}, timeout=90)
|
| 188 |
+
|
| 189 |
+
r.raise_for_status()
|
| 190 |
+
path = "object.csv"
|
| 191 |
+
with open(path, "w") as f:
|
| 192 |
+
f.write(r.text)
|
| 193 |
+
return path
|
| 194 |
+
|
| 195 |
+
# ===== Endpoint 4: Subir CSV y predecir =====
|
| 196 |
+
def predict_from_csv(file_obj, threshold: float = 0.5):
|
| 197 |
+
if file_obj is None:
|
| 198 |
+
return pd.DataFrame(), None
|
| 199 |
+
df = pd.read_csv(file_obj.name)
|
| 200 |
+
# normalizar nombres
|
| 201 |
+
df.columns = [c.strip().lower() for c in df.columns]
|
| 202 |
+
cols_set = set(df.columns)
|
| 203 |
+
|
| 204 |
+
cases = pd.DataFrame(index=df.index, columns=FEATURE_COLUMNS, dtype="float64")
|
| 205 |
+
for feat in FEATURE_COLUMNS:
|
| 206 |
+
src = feat if feat in cols_set else first_present(CANDIDATES_MAP.get(feat, []), cols_set)
|
| 207 |
+
if src is not None:
|
| 208 |
+
cases[feat] = pd.to_numeric(df[src], errors="coerce")
|
| 209 |
+
else:
|
| 210 |
+
cases[feat] = np.nan
|
| 211 |
+
|
| 212 |
+
probs, classes = predict_proba_from_df(cases)
|
| 213 |
+
idx = np.argmax(probs, axis=1)
|
| 214 |
+
preds = label_encoder.inverse_transform(idx)
|
| 215 |
+
|
| 216 |
+
out = []
|
| 217 |
+
for i in range(len(df)):
|
| 218 |
+
row = {"prediction": preds[i]}
|
| 219 |
+
for j, cls in enumerate(classes):
|
| 220 |
+
row[f"P({cls})"] = float(probs[i][j])
|
| 221 |
+
out.append(row)
|
| 222 |
+
res = pd.DataFrame(out)
|
| 223 |
+
out_path = "predicciones.csv"
|
| 224 |
+
res.to_csv(out_path, index=False)
|
| 225 |
+
return res, out_path
|
| 226 |
+
|
| 227 |
+
# ===== Gradio UI =====
|
| 228 |
+
with gr.Blocks() as demo:
|
| 229 |
+
gr.Markdown("# 🔭 Exoplanet Classifier — API + UI (Gradio)")
|
| 230 |
+
|
| 231 |
+
with gr.Row():
|
| 232 |
+
with gr.Column():
|
| 233 |
+
gr.Markdown("### 1) Probar con 2 objetos de la API (TOI o TCE)")
|
| 234 |
+
table_dd = gr.Dropdown(choices=["toi","tce"], value="tce", label="Tabla")
|
| 235 |
+
n_objs = gr.Slider(1, 10, value=2, step=1, label="N objetos")
|
| 236 |
+
out_df1 = gr.Dataframe(label="Resultados")
|
| 237 |
+
out_file1 = gr.File(label="Descargar CSV")
|
| 238 |
+
gr.Button("Probar API").click(predict_toi_samples, inputs=[n_objs, table_dd], outputs=[out_df1, out_file1], api_name="predict_toi_samples")
|
| 239 |
|
| 240 |
+
with gr.Column():
|
| 241 |
+
gr.Markdown("### 2) JSON manual (POST)")
|
| 242 |
+
jt = gr.Textbox(lines=12, label="JSON de entrada (TOI/TCE-like o koi_* )")
|
| 243 |
+
thr_json = gr.Slider(0, 1, value=0.5, step=0.01, label="Umbral P(CONFIRMED)")
|
| 244 |
+
out_json = gr.JSON(label="Respuesta")
|
| 245 |
+
gr.Button("Predecir JSON").click(predict_from_json, inputs=[jt, thr_json], outputs=out_json, api_name="predict_json")
|
| 246 |
|
| 247 |
+
gr.Markdown("### 3) Descargar CSV de un objeto (por id)")
|
| 248 |
+
ident = gr.Textbox(label="Identificador (ej: TOI-1234.01 o 'KIC 11446443 1')", placeholder="TOI-xxx.yy ó KIC ###### <planet_num>")
|
| 249 |
+
table2 = gr.Dropdown(choices=["toi","tce"], value="toi", label="Tabla")
|
| 250 |
+
out_csv = gr.File(label="CSV del objeto")
|
| 251 |
+
gr.Button("Descargar CSV").click(download_object_csv, inputs=[ident, table2], outputs=out_csv, api_name="toi_csv")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
|
| 253 |
+
gr.Markdown("### 4) Subir CSV y clasificar")
|
| 254 |
+
f_in = gr.File(label="CSV subida", file_types=[".csv"])
|
| 255 |
+
thr = gr.Slider(0,1,value=0.5, step=0.01, label="Umbral P(CONFIRMED)")
|
| 256 |
+
out_df2 = gr.Dataframe(label="Resultados")
|
| 257 |
+
out_file2 = gr.File(label="Descargar predicciones")
|
| 258 |
+
gr.Button("Predecir CSV").click(predict_from_csv, inputs=[f_in, thr], outputs=[out_df2, out_file2], api_name="predict_csv")
|
| 259 |
|
| 260 |
+
demo.queue().launch()
|
|
|