Spaces:
Running
Running
| """ | |
| FraudFoxAI Inference API - ONNX Runtime (low memory) | |
| Uses ONNX Runtime instead of PyTorch to stay under 512MB memory limit. | |
| The ONNX model files must already exist in the HF model repo | |
| (pushed from Colab after training via optimum export). | |
| """ | |
| import gc | |
| import numpy as np | |
| from transformers import AutoTokenizer | |
| from optimum.onnxruntime import ORTModelForSequenceClassification | |
| from fastapi import FastAPI, HTTPException | |
| from pydantic import BaseModel | |
| from scipy.special import softmax | |
| app = FastAPI() | |
| MODEL_NAME = "xanderabim/fraudfoxai-phishing" | |
| # Load tokenizer (lightweight, ~few MB) | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| # Load ONNX model directly (~80MB vs ~400MB for PyTorch) | |
| # ONNX files are pre-exported and pushed from Colab - no torch needed here | |
| model = ORTModelForSequenceClassification.from_pretrained(MODEL_NAME) | |
| # Force garbage collection after loading | |
| gc.collect() | |
| class TextInput(BaseModel): | |
| inputs: str | |
| async def predict(data: TextInput): | |
| try: | |
| inputs = tokenizer( | |
| data.inputs, | |
| return_tensors="np", # numpy tensors for ONNX | |
| truncation=True, | |
| max_length=512, | |
| ) | |
| # Remove token_type_ids for DistilBERT (doesn't use them) | |
| inputs.pop("token_type_ids", None) | |
| outputs = model(**inputs) | |
| # outputs.logits is a numpy array | |
| logits = outputs.logits | |
| if hasattr(logits, "numpy"): | |
| logits = logits.numpy() | |
| probs = softmax(logits, axis=1) | |
| result = [ | |
| [ | |
| {"label": "LABEL_0", "score": float(probs[0][0])}, | |
| {"label": "LABEL_1", "score": float(probs[0][1])}, | |
| ] | |
| ] | |
| return result | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| async def health(): | |
| return {"status": "healthy", "model": MODEL_NAME, "runtime": "onnxruntime"} | |