File size: 1,995 Bytes
9e6ee24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import json, random
import numpy as np, pandas as pd
from sklearn.metrics import roc_auc_score
from app.database import SessionLocal
from app.repository import list_objects
from app.feature_engineering import FEATURE_COLUMNS, combine_features
from app.graph_features import build_graph, pair_graph_features
from app.ml import train_models

def run_training():
    db = SessionLocal()
    try:
        objs = list_objects(db, 5000)
        objects = [{"object_id": o.object_id, "object_type": o.object_type, "mean_motion": o.mean_motion, "inclination": o.inclination, "eccentricity": o.eccentricity, "raan": o.raan, "bstar": o.bstar, "launch_year": o.launch_year} for o in objs]
    finally:
        db.close()
    pairs = [tuple(random.sample(objects, 2)) for _ in range(4000)]
    g = build_graph([(a["object_id"], b["object_id"]) for a, b in pairs[:1000]])
    rows = []
    raw_scores = []
    for a, b in pairs:
        trend = {"recurrence_count": float(random.choice([0,1,2,3,4])), "trend_delta_score": float(random.uniform(-0.1, 0.3)), "score_volatility_proxy": float(random.uniform(0, 0.2))}
        f = combine_features(a, b, trend, pair_graph_features(g, a["object_id"], b["object_id"]))
        score = 0.30*f["close_approach_proxy"] + 0.16*f["same_shell"] + 0.10*min(1.0,f["shell_density_proxy"]/12.0) + 0.10*min(1.0,f["graph_local_density"]*2.0) + 0.09*min(1.0,f["graph_jaccard"]) + 0.10*min(1.0,f["recurrence_count"]/5.0) + 0.08*max(0.0,f["trend_delta_score"]) + np.random.normal(0,0.05)
        y = 1 if score > 0.48 else 0
        rows.append({**f, "label": y})
        raw_scores.append(score)
    df = pd.DataFrame(rows)
    path = train_models(df[FEATURE_COLUMNS].values, df["label"].values)
    auc = float(roc_auc_score(df["label"].values, np.array(raw_scores)))
    return path, len(df), {"pseudo_auc": round(auc, 4), "rows": int(len(df))}

if __name__ == "__main__":
    p, r, m = run_training()
    print(json.dumps({"model_path": p, "rows": r, "metrics": m}, indent=2))