Spaces:

VishalBhagat01
/

Fraud-API

Sleeping

App Files Files Community

VishalBhagat01 commited on Mar 20

Commit

20cbff3

verified ·

1 Parent(s): 6f68924

Upload 6 files

Browse files

Files changed (6) hide show

.gitignore +59 -0
Dockerfile +33 -0
README.md +35 -6
fraud_model.py +158 -0
main.py +127 -0
requirements.txt +11 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,59 @@

+# ========================
+# Environment & Secrets
+# ========================
+.env
+.env.*
+!.env.example
+# ========================
+# Python
+# ========================
+__pycache__/
+*.py[cod]
+*.pyo
+*.pyd
+*.egg-info/
+dist/
+build/
+*.egg
+# ========================
+# Virtual Environment
+# ========================
+venv/
+env/
+.venv/
+.ENV/
+# ========================
+# HuggingFace Cache
+# ========================
+# Model is baked into Docker image at build time — no need to push cache
+.cache/
+*.cache/
+huggingface/
+# ========================
+# Logs
+# ========================
+*.log
+logs/
+# ========================
+# OS Files
+# ========================
+# Windows
+Thumbs.db
+desktop.ini
+# macOS
+.DS_Store
+.AppleDouble
+# ========================
+# IDE / Editor
+# ========================
+.vscode/
+.idea/
+*.swp
+*.swo

Dockerfile ADDED Viewed

	@@ -0,0 +1,33 @@

+# HuggingFace Spaces runs containers as non-root user (uid=1000)
+FROM python:3.11-slim
+# Required by HuggingFace Spaces
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+# Install CPU-only torch first (saves ~1.5GB vs CUDA build)
+RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu
+# Copy and install dependencies
+COPY --chown=user requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy source code
+COPY --chown=user fraud_model.py .
+COPY --chown=user main.py .
+# Pre-download model at build time for fast startup
+# HF_HOME points to a writable location for the non-root user
+ENV HF_HOME=/home/user/.cache/huggingface
+RUN python -c "from transformers import AutoTokenizer, AutoModelForSequenceClassification; \
+    AutoTokenizer.from_pretrained('austinb/fraud_text_detection'); \
+    AutoModelForSequenceClassification.from_pretrained('austinb/fraud_text_detection')"
+# HuggingFace Spaces exposes port 7860
+EXPOSE 7860
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,11 +1,40 @@
 ---
-title: Fraud API
-emoji: 🏃
-colorFrom: pink
-colorTo: indigo
 sdk: docker
 pinned: false
-license: mit
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Bank Fraud Detection API
+emoji: 🏦
+colorFrom: red
+colorTo: yellow
 sdk: docker
 pinned: false
 ---
+# 🏦 Bank Fraud Detection API
+A FastAPI-based fraud detection API powered by `austinb/fraud_text_detection` (DistilBERT).
+## Endpoints
+| Method | Endpoint | Description |
+|--------|----------|-------------|
+| GET | `/health` | Health check |
+| POST | `/predict` | Single fraud risk score + risk level |
+| POST | `/predict/batch` | Batch predictions |
+| POST | `/analyze` | Full analysis — score + risk + binary detection |
+## Example Usage
+```bash
+curl -X POST https://YOUR-SPACE-URL/analyze \
+  -H "Content-Type: application/json" \
+  -d '{"text": "User transferred 50000 to an unknown account at midnight"}'
+```
+## Response
+```json
+{
+  "text": "User transferred 50000 to an unknown account at midnight",
+  "fraud_score": 0.923,
+  "risk_level": "High Risk 🚨",
+  "is_fraud": true,
+  "detection": "Fraud Detected 🚨"
+}
+```

fraud_model.py ADDED Viewed

	@@ -0,0 +1,158 @@

+import os
+import torch
+import logging
+from dotenv import load_dotenv
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+load_dotenv()
+class FraudDetector:
+    def __init__(self, model_name=None, hf_token=None):
+        self.model_name = model_name or os.getenv("MODEL_NAME", "austinb/fraud_text_detection")
+        self.hf_token = hf_token or os.getenv("HUGGINGFACEHUB_API_TOKEN")
+        self.low_threshold = float(os.getenv("LOW_THRESHOLD", 0.3))
+        self.high_threshold = float(os.getenv("HIGH_THRESHOLD", 0.7))
+        self.max_length = int(os.getenv("MAX_LENGTH", 512))
+        self.tokenizer = None
+        self.model = None
+        self.fraud_index = None
+        if not self.model_name:
+            raise ValueError("MODEL_NAME not provided and not found in environment variables")
+        self._load_model()
+    def _load_model(self):
+        try:
+            logger.info(f"Loading model: {self.model_name}")
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                self.model_name,
+                token=self.hf_token
+            )
+            self.model = AutoModelForSequenceClassification.from_pretrained(
+                self.model_name,
+                token=self.hf_token
+            )
+            self.model.eval()
+            # Detect fraud label index from model config
+            id2label = self.model.config.id2label
+            logger.info(f"Model labels: {id2label}")
+            for idx, label in id2label.items():
+                if "fraud" in label.lower() or label == "LABEL_1":
+                    self.fraud_index = idx
+                    break
+            # Fallback: assume index 1 is fraud for binary classifiers
+            if self.fraud_index is None:
+                self.fraud_index = 1
+                logger.warning(
+                    f"Could not detect fraud label from {list(id2label.values())}. "
+                    f"Defaulting to index 1. Set FRAUD_LABEL_INDEX in .env to override."
+                )
+            # Allow manual override via env
+            env_override = os.getenv("FRAUD_LABEL_INDEX")
+            if env_override is not None:
+                self.fraud_index = int(env_override)
+                logger.info(f"Fraud label index overridden by env: {self.fraud_index}")
+            logger.info(
+                f"Model loaded. Fraud index: {self.fraud_index} "
+                f"(label: {id2label.get(self.fraud_index, 'unknown')})"
+            )
+        except Exception as e:
+            logger.error(f"Failed to load model: {str(e)}")
+            raise
+    def _tokenize(self, texts):
+        """Shared tokenizer call with consistent settings."""
+        return self.tokenizer(
+            texts,
+            return_tensors="pt",
+            truncation=True,
+            padding=True,
+            max_length=self.max_length
+        )
+    def get_fraud_score(self, text: str) -> float:
+        inputs = self._tokenize(text)
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+        probs = torch.softmax(outputs.logits, dim=1)
+        return probs[0][self.fraud_index].item()
+    def get_fraud_scores(self, texts: list) -> list:
+        inputs = self._tokenize(texts)
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+        probs = torch.softmax(outputs.logits, dim=1)
+        return probs[:, self.fraud_index].tolist()
+    def risk_label(self, score: float) -> str:
+        if score < self.low_threshold:
+            return "Low Risk"
+        elif score < self.high_threshold:
+            return "Medium Risk"
+        else:
+            return "High Risk 🚨"
+    def predict(self, text: str) -> dict:
+        score = self.get_fraud_score(text)
+        preview = text[:50] + ("..." if len(text) > 50 else "")
+        result = {
+            "text": text,
+            "fraud_score": round(score, 4),
+            "risk_level": self.risk_label(score)
+        }
+        logger.info(f"Prediction for '{preview}': {result['risk_level']} ({result['fraud_score']})")
+        return result
+    def analyze(self, text: str) -> dict:
+        """Returns fraud score + risk level + binary detection in one call."""
+        score = self.get_fraud_score(text)
+        is_fraud = score >= self.high_threshold
+        preview = text[:50] + ("..." if len(text) > 50 else "")
+        result = {
+            "text": text,
+            "fraud_score": round(score, 4),
+            "risk_level": self.risk_label(score),
+            "is_fraud": is_fraud,
+            "detection": "Fraud Detected 🚨" if is_fraud else "No Fraud Detected ✅"
+        }
+        logger.info(f"Analyze for '{preview}': {result['detection']} | {result['risk_level']} ({result['fraud_score']})")
+        return result
+    def predict_batch(self, texts: list) -> list:
+        """Batch predict with consistent logging."""
+        scores = self.get_fraud_scores(texts)
+        results = []
+        for text, score in zip(texts, scores):
+            preview = text[:50] + ("..." if len(text) > 50 else "")
+            risk = self.risk_label(score)
+            logger.info(f"Batch prediction for '{preview}': {risk} ({round(score, 4)})")
+            results.append({
+                "text": text,
+                "fraud_score": round(score, 4),
+                "risk_level": risk
+            })
+        return results
+# Example Usage
+if __name__ == "__main__":
+    try:
+        detector = FraudDetector()
+        sample_text = "User transferred ₹50,000 to an unknown account at midnight"
+        result = detector.predict(sample_text)
+        print("\nPrediction Result:")
+        print(result)
+    except Exception as e:
+        print(f"Error: {e}")

main.py ADDED Viewed

	@@ -0,0 +1,127 @@

+from contextlib import asynccontextmanager
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, field_validator
+from typing import List
+from fraud_model import FraudDetector
+import uvicorn
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Global detector instance
+detector: FraudDetector = None
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    global detector
+    try:
+        logger.info("Loading FraudDetector model...")
+        detector = FraudDetector()
+        logger.info("FraudDetector loaded successfully.")
+    except Exception as e:
+        logger.error(f"FATAL: Failed to initialize FraudDetector: {e}")
+        raise RuntimeError(f"Model failed to load: {e}")
+    yield
+    detector = None
+    logger.info("FraudDetector shut down.")
+app = FastAPI(
+    title="Bank Fraud Detection API",
+    description="API for detecting fraudulent bank transactions using AI.",
+    version="1.0.0",
+    lifespan=lifespan
+)
+# --- Request / Response Models ---
+class PredictionRequest(BaseModel):
+    text: str
+    @field_validator("text")
+    @classmethod
+    def text_must_not_be_empty(cls, v):
+        if not v or not v.strip():
+            raise ValueError("text must not be empty")
+        return v.strip()
+class BatchPredictionRequest(BaseModel):
+    texts: List[str]
+    @field_validator("texts")
+    @classmethod
+    def texts_must_not_be_empty(cls, v):
+        if not v:
+            raise ValueError("texts list must not be empty")
+        cleaned = [t.strip() for t in v if t and t.strip()]
+        if not cleaned:
+            raise ValueError("texts list contains only empty strings")
+        return cleaned
+class PredictionResponse(BaseModel):
+    text: str
+    fraud_score: float
+    risk_level: str
+class AnalyzeResponse(BaseModel):
+    text: str
+    fraud_score: float
+    risk_level: str
+    is_fraud: bool
+    detection: str
+# --- Routes ---
+@app.get("/health")
+def health_check():
+    if detector:
+        return {"status": "healthy", "model": detector.model_name}
+    return {"status": "unhealthy", "error": "Model not loaded"}
+@app.post("/predict", response_model=PredictionResponse)
+def predict_single(request: PredictionRequest):
+    if not detector:
+        raise HTTPException(status_code=503, detail="Model service unavailable")
+    try:
+        result = detector.predict(request.text)
+        return result
+    except Exception as e:
+        logger.error(f"Prediction error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/predict/batch", response_model=List[PredictionResponse])
+def predict_batch(request: BatchPredictionRequest):
+    if not detector:
+        raise HTTPException(status_code=503, detail="Model service unavailable")
+    try:
+        results = detector.predict_batch(request.texts)
+        return results
+    except Exception as e:
+        logger.error(f"Batch prediction error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/analyze", response_model=AnalyzeResponse)
+def analyze(request: PredictionRequest):
+    if not detector:
+        raise HTTPException(status_code=503, detail="Model service unavailable")
+    try:
+        result = detector.analyze(request.text)
+        return result
+    except Exception as e:
+        logger.error(f"Analyze error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860)

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+# Core ML - CPU only (no CUDA, keeps slug size small for free tier)
+torch>=2.0.0+cpu
+transformers>=4.36.0
+# API
+fastapi>=0.110.0
+uvicorn>=0.29.0
+pydantic>=2.0.0
+# Env / config
+python-dotenv>=1.0.0