|
|
from fastapi import FastAPI, HTTPException |
|
|
from huggingface_hub import hf_hub_download |
|
|
import pandas as pd |
|
|
|
|
|
DATASET_REPO = "credi-net/CrediPred" |
|
|
FILENAME = "mlpInfer_dec2024_pc1_embeddinggemma-300m_GNN-RNI.parquet" |
|
|
|
|
|
app = FastAPI(title="CrediNet API") |
|
|
|
|
|
lookup = {} |
|
|
|
|
|
@app.on_event("startup") |
|
|
def load_data(): |
|
|
global lookup |
|
|
path = hf_hub_download( |
|
|
repo_id=DATASET_REPO, |
|
|
filename=FILENAME, |
|
|
repo_type="dataset" |
|
|
) |
|
|
df = pd.read_parquet(path, columns=["domain", "pc1_score"]) |
|
|
lookup = dict(zip(df["domain"], df["pc1_score"])) |
|
|
print(f"Loaded {len(lookup):,} rows") |
|
|
|
|
|
@app.get("/health") |
|
|
def health(): |
|
|
return {"status": "ok", "rows": len(lookup)} |
|
|
|
|
|
@app.get("/by_domain/{domain}") |
|
|
def get_score_score(domain: str): |
|
|
score = lookup.get(domain) |
|
|
if score is None: |
|
|
raise HTTPException(status_code=404, detail="Domain not found") |
|
|
return {"domain": domain, "pc1_score": float(score)} |