| import json, random |
| import numpy as np, pandas as pd |
| from sklearn.metrics import roc_auc_score |
| from app.database import SessionLocal |
| from app.repository import list_objects |
| from app.feature_engineering import FEATURE_COLUMNS, combine_features |
| from app.graph_features import build_graph, pair_graph_features |
| from app.ml import train_models |
|
|
| def run_training(): |
| db = SessionLocal() |
| try: |
| objs = list_objects(db, 5000) |
| objects = [{"object_id": o.object_id, "object_type": o.object_type, "mean_motion": o.mean_motion, "inclination": o.inclination, "eccentricity": o.eccentricity, "raan": o.raan, "bstar": o.bstar, "launch_year": o.launch_year} for o in objs] |
| finally: |
| db.close() |
| pairs = [tuple(random.sample(objects, 2)) for _ in range(4000)] |
| g = build_graph([(a["object_id"], b["object_id"]) for a, b in pairs[:1000]]) |
| rows = [] |
| raw_scores = [] |
| for a, b in pairs: |
| trend = {"recurrence_count": float(random.choice([0,1,2,3,4])), "trend_delta_score": float(random.uniform(-0.1, 0.3)), "score_volatility_proxy": float(random.uniform(0, 0.2))} |
| f = combine_features(a, b, trend, pair_graph_features(g, a["object_id"], b["object_id"])) |
| score = 0.30*f["close_approach_proxy"] + 0.16*f["same_shell"] + 0.10*min(1.0,f["shell_density_proxy"]/12.0) + 0.10*min(1.0,f["graph_local_density"]*2.0) + 0.09*min(1.0,f["graph_jaccard"]) + 0.10*min(1.0,f["recurrence_count"]/5.0) + 0.08*max(0.0,f["trend_delta_score"]) + np.random.normal(0,0.05) |
| y = 1 if score > 0.48 else 0 |
| rows.append({**f, "label": y}) |
| raw_scores.append(score) |
| df = pd.DataFrame(rows) |
| path = train_models(df[FEATURE_COLUMNS].values, df["label"].values) |
| auc = float(roc_auc_score(df["label"].values, np.array(raw_scores))) |
| return path, len(df), {"pseudo_auc": round(auc, 4), "rows": int(len(df))} |
|
|
| if __name__ == "__main__": |
| p, r, m = run_training() |
| print(json.dumps({"model_path": p, "rows": r, "metrics": m}, indent=2)) |
|
|