VishalBhagat01 commited on
Commit
20cbff3
·
verified ·
1 Parent(s): 6f68924

Upload 6 files

Browse files
Files changed (6) hide show
  1. .gitignore +59 -0
  2. Dockerfile +33 -0
  3. README.md +35 -6
  4. fraud_model.py +158 -0
  5. main.py +127 -0
  6. requirements.txt +11 -0
.gitignore ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ========================
2
+ # Environment & Secrets
3
+ # ========================
4
+ .env
5
+ .env.*
6
+ !.env.example
7
+
8
+ # ========================
9
+ # Python
10
+ # ========================
11
+ __pycache__/
12
+ *.py[cod]
13
+ *.pyo
14
+ *.pyd
15
+ *.egg-info/
16
+ dist/
17
+ build/
18
+ *.egg
19
+
20
+ # ========================
21
+ # Virtual Environment
22
+ # ========================
23
+ venv/
24
+ env/
25
+ .venv/
26
+ .ENV/
27
+
28
+ # ========================
29
+ # HuggingFace Cache
30
+ # ========================
31
+ # Model is baked into Docker image at build time — no need to push cache
32
+ .cache/
33
+ *.cache/
34
+ huggingface/
35
+
36
+ # ========================
37
+ # Logs
38
+ # ========================
39
+ *.log
40
+ logs/
41
+
42
+ # ========================
43
+ # OS Files
44
+ # ========================
45
+ # Windows
46
+ Thumbs.db
47
+ desktop.ini
48
+
49
+ # macOS
50
+ .DS_Store
51
+ .AppleDouble
52
+
53
+ # ========================
54
+ # IDE / Editor
55
+ # ========================
56
+ .vscode/
57
+ .idea/
58
+ *.swp
59
+ *.swo
Dockerfile ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # HuggingFace Spaces runs containers as non-root user (uid=1000)
2
+ FROM python:3.11-slim
3
+
4
+ # Required by HuggingFace Spaces
5
+ RUN useradd -m -u 1000 user
6
+ USER user
7
+ ENV HOME=/home/user \
8
+ PATH=/home/user/.local/bin:$PATH
9
+
10
+ WORKDIR $HOME/app
11
+
12
+ # Install CPU-only torch first (saves ~1.5GB vs CUDA build)
13
+ RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu
14
+
15
+ # Copy and install dependencies
16
+ COPY --chown=user requirements.txt .
17
+ RUN pip install --no-cache-dir -r requirements.txt
18
+
19
+ # Copy source code
20
+ COPY --chown=user fraud_model.py .
21
+ COPY --chown=user main.py .
22
+
23
+ # Pre-download model at build time for fast startup
24
+ # HF_HOME points to a writable location for the non-root user
25
+ ENV HF_HOME=/home/user/.cache/huggingface
26
+ RUN python -c "from transformers import AutoTokenizer, AutoModelForSequenceClassification; \
27
+ AutoTokenizer.from_pretrained('austinb/fraud_text_detection'); \
28
+ AutoModelForSequenceClassification.from_pretrained('austinb/fraud_text_detection')"
29
+
30
+ # HuggingFace Spaces exposes port 7860
31
+ EXPOSE 7860
32
+
33
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,11 +1,40 @@
1
  ---
2
- title: Fraud API
3
- emoji: 🏃
4
- colorFrom: pink
5
- colorTo: indigo
6
  sdk: docker
7
  pinned: false
8
- license: mit
9
  ---
10
 
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Bank Fraud Detection API
3
+ emoji: 🏦
4
+ colorFrom: red
5
+ colorTo: yellow
6
  sdk: docker
7
  pinned: false
 
8
  ---
9
 
10
+ # 🏦 Bank Fraud Detection API
11
+
12
+ A FastAPI-based fraud detection API powered by `austinb/fraud_text_detection` (DistilBERT).
13
+
14
+ ## Endpoints
15
+
16
+ | Method | Endpoint | Description |
17
+ |--------|----------|-------------|
18
+ | GET | `/health` | Health check |
19
+ | POST | `/predict` | Single fraud risk score + risk level |
20
+ | POST | `/predict/batch` | Batch predictions |
21
+ | POST | `/analyze` | Full analysis — score + risk + binary detection |
22
+
23
+ ## Example Usage
24
+
25
+ ```bash
26
+ curl -X POST https://YOUR-SPACE-URL/analyze \
27
+ -H "Content-Type: application/json" \
28
+ -d '{"text": "User transferred 50000 to an unknown account at midnight"}'
29
+ ```
30
+
31
+ ## Response
32
+ ```json
33
+ {
34
+ "text": "User transferred 50000 to an unknown account at midnight",
35
+ "fraud_score": 0.923,
36
+ "risk_level": "High Risk 🚨",
37
+ "is_fraud": true,
38
+ "detection": "Fraud Detected 🚨"
39
+ }
40
+ ```
fraud_model.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import logging
4
+ from dotenv import load_dotenv
5
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
6
+
7
+ # Configure logging
8
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
9
+ logger = logging.getLogger(__name__)
10
+
11
+ load_dotenv()
12
+
13
+ class FraudDetector:
14
+ def __init__(self, model_name=None, hf_token=None):
15
+ self.model_name = model_name or os.getenv("MODEL_NAME", "austinb/fraud_text_detection")
16
+ self.hf_token = hf_token or os.getenv("HUGGINGFACEHUB_API_TOKEN")
17
+ self.low_threshold = float(os.getenv("LOW_THRESHOLD", 0.3))
18
+ self.high_threshold = float(os.getenv("HIGH_THRESHOLD", 0.7))
19
+ self.max_length = int(os.getenv("MAX_LENGTH", 512))
20
+
21
+ self.tokenizer = None
22
+ self.model = None
23
+ self.fraud_index = None
24
+
25
+ if not self.model_name:
26
+ raise ValueError("MODEL_NAME not provided and not found in environment variables")
27
+
28
+ self._load_model()
29
+
30
+ def _load_model(self):
31
+ try:
32
+ logger.info(f"Loading model: {self.model_name}")
33
+ self.tokenizer = AutoTokenizer.from_pretrained(
34
+ self.model_name,
35
+ token=self.hf_token
36
+ )
37
+ self.model = AutoModelForSequenceClassification.from_pretrained(
38
+ self.model_name,
39
+ token=self.hf_token
40
+ )
41
+ self.model.eval()
42
+
43
+ # Detect fraud label index from model config
44
+ id2label = self.model.config.id2label
45
+ logger.info(f"Model labels: {id2label}")
46
+
47
+ for idx, label in id2label.items():
48
+ if "fraud" in label.lower() or label == "LABEL_1":
49
+ self.fraud_index = idx
50
+ break
51
+
52
+ # Fallback: assume index 1 is fraud for binary classifiers
53
+ if self.fraud_index is None:
54
+ self.fraud_index = 1
55
+ logger.warning(
56
+ f"Could not detect fraud label from {list(id2label.values())}. "
57
+ f"Defaulting to index 1. Set FRAUD_LABEL_INDEX in .env to override."
58
+ )
59
+
60
+ # Allow manual override via env
61
+ env_override = os.getenv("FRAUD_LABEL_INDEX")
62
+ if env_override is not None:
63
+ self.fraud_index = int(env_override)
64
+ logger.info(f"Fraud label index overridden by env: {self.fraud_index}")
65
+
66
+ logger.info(
67
+ f"Model loaded. Fraud index: {self.fraud_index} "
68
+ f"(label: {id2label.get(self.fraud_index, 'unknown')})"
69
+ )
70
+
71
+ except Exception as e:
72
+ logger.error(f"Failed to load model: {str(e)}")
73
+ raise
74
+
75
+ def _tokenize(self, texts):
76
+ """Shared tokenizer call with consistent settings."""
77
+ return self.tokenizer(
78
+ texts,
79
+ return_tensors="pt",
80
+ truncation=True,
81
+ padding=True,
82
+ max_length=self.max_length
83
+ )
84
+
85
+ def get_fraud_score(self, text: str) -> float:
86
+ inputs = self._tokenize(text)
87
+ with torch.no_grad():
88
+ outputs = self.model(**inputs)
89
+ probs = torch.softmax(outputs.logits, dim=1)
90
+ return probs[0][self.fraud_index].item()
91
+
92
+ def get_fraud_scores(self, texts: list) -> list:
93
+ inputs = self._tokenize(texts)
94
+ with torch.no_grad():
95
+ outputs = self.model(**inputs)
96
+ probs = torch.softmax(outputs.logits, dim=1)
97
+ return probs[:, self.fraud_index].tolist()
98
+
99
+ def risk_label(self, score: float) -> str:
100
+ if score < self.low_threshold:
101
+ return "Low Risk"
102
+ elif score < self.high_threshold:
103
+ return "Medium Risk"
104
+ else:
105
+ return "High Risk 🚨"
106
+
107
+ def predict(self, text: str) -> dict:
108
+ score = self.get_fraud_score(text)
109
+ preview = text[:50] + ("..." if len(text) > 50 else "")
110
+ result = {
111
+ "text": text,
112
+ "fraud_score": round(score, 4),
113
+ "risk_level": self.risk_label(score)
114
+ }
115
+ logger.info(f"Prediction for '{preview}': {result['risk_level']} ({result['fraud_score']})")
116
+ return result
117
+
118
+ def analyze(self, text: str) -> dict:
119
+ """Returns fraud score + risk level + binary detection in one call."""
120
+ score = self.get_fraud_score(text)
121
+ is_fraud = score >= self.high_threshold
122
+ preview = text[:50] + ("..." if len(text) > 50 else "")
123
+ result = {
124
+ "text": text,
125
+ "fraud_score": round(score, 4),
126
+ "risk_level": self.risk_label(score),
127
+ "is_fraud": is_fraud,
128
+ "detection": "Fraud Detected 🚨" if is_fraud else "No Fraud Detected ✅"
129
+ }
130
+ logger.info(f"Analyze for '{preview}': {result['detection']} | {result['risk_level']} ({result['fraud_score']})")
131
+ return result
132
+
133
+ def predict_batch(self, texts: list) -> list:
134
+ """Batch predict with consistent logging."""
135
+ scores = self.get_fraud_scores(texts)
136
+ results = []
137
+ for text, score in zip(texts, scores):
138
+ preview = text[:50] + ("..." if len(text) > 50 else "")
139
+ risk = self.risk_label(score)
140
+ logger.info(f"Batch prediction for '{preview}': {risk} ({round(score, 4)})")
141
+ results.append({
142
+ "text": text,
143
+ "fraud_score": round(score, 4),
144
+ "risk_level": risk
145
+ })
146
+ return results
147
+
148
+
149
+ # Example Usage
150
+ if __name__ == "__main__":
151
+ try:
152
+ detector = FraudDetector()
153
+ sample_text = "User transferred ₹50,000 to an unknown account at midnight"
154
+ result = detector.predict(sample_text)
155
+ print("\nPrediction Result:")
156
+ print(result)
157
+ except Exception as e:
158
+ print(f"Error: {e}")
main.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from contextlib import asynccontextmanager
2
+ from fastapi import FastAPI, HTTPException
3
+ from pydantic import BaseModel, field_validator
4
+ from typing import List
5
+ from fraud_model import FraudDetector
6
+ import uvicorn
7
+ import logging
8
+
9
+ # Configure logging
10
+ logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger(__name__)
12
+
13
+ # Global detector instance
14
+ detector: FraudDetector = None
15
+
16
+
17
+ @asynccontextmanager
18
+ async def lifespan(app: FastAPI):
19
+ global detector
20
+ try:
21
+ logger.info("Loading FraudDetector model...")
22
+ detector = FraudDetector()
23
+ logger.info("FraudDetector loaded successfully.")
24
+ except Exception as e:
25
+ logger.error(f"FATAL: Failed to initialize FraudDetector: {e}")
26
+ raise RuntimeError(f"Model failed to load: {e}")
27
+ yield
28
+ detector = None
29
+ logger.info("FraudDetector shut down.")
30
+
31
+
32
+ app = FastAPI(
33
+ title="Bank Fraud Detection API",
34
+ description="API for detecting fraudulent bank transactions using AI.",
35
+ version="1.0.0",
36
+ lifespan=lifespan
37
+ )
38
+
39
+
40
+ # --- Request / Response Models ---
41
+
42
+ class PredictionRequest(BaseModel):
43
+ text: str
44
+
45
+ @field_validator("text")
46
+ @classmethod
47
+ def text_must_not_be_empty(cls, v):
48
+ if not v or not v.strip():
49
+ raise ValueError("text must not be empty")
50
+ return v.strip()
51
+
52
+
53
+ class BatchPredictionRequest(BaseModel):
54
+ texts: List[str]
55
+
56
+ @field_validator("texts")
57
+ @classmethod
58
+ def texts_must_not_be_empty(cls, v):
59
+ if not v:
60
+ raise ValueError("texts list must not be empty")
61
+ cleaned = [t.strip() for t in v if t and t.strip()]
62
+ if not cleaned:
63
+ raise ValueError("texts list contains only empty strings")
64
+ return cleaned
65
+
66
+
67
+ class PredictionResponse(BaseModel):
68
+ text: str
69
+ fraud_score: float
70
+ risk_level: str
71
+
72
+
73
+ class AnalyzeResponse(BaseModel):
74
+ text: str
75
+ fraud_score: float
76
+ risk_level: str
77
+ is_fraud: bool
78
+ detection: str
79
+
80
+
81
+ # --- Routes ---
82
+
83
+ @app.get("/health")
84
+ def health_check():
85
+ if detector:
86
+ return {"status": "healthy", "model": detector.model_name}
87
+ return {"status": "unhealthy", "error": "Model not loaded"}
88
+
89
+
90
+ @app.post("/predict", response_model=PredictionResponse)
91
+ def predict_single(request: PredictionRequest):
92
+ if not detector:
93
+ raise HTTPException(status_code=503, detail="Model service unavailable")
94
+ try:
95
+ result = detector.predict(request.text)
96
+ return result
97
+ except Exception as e:
98
+ logger.error(f"Prediction error: {e}")
99
+ raise HTTPException(status_code=500, detail=str(e))
100
+
101
+
102
+ @app.post("/predict/batch", response_model=List[PredictionResponse])
103
+ def predict_batch(request: BatchPredictionRequest):
104
+ if not detector:
105
+ raise HTTPException(status_code=503, detail="Model service unavailable")
106
+ try:
107
+ results = detector.predict_batch(request.texts)
108
+ return results
109
+ except Exception as e:
110
+ logger.error(f"Batch prediction error: {e}")
111
+ raise HTTPException(status_code=500, detail=str(e))
112
+
113
+
114
+ @app.post("/analyze", response_model=AnalyzeResponse)
115
+ def analyze(request: PredictionRequest):
116
+ if not detector:
117
+ raise HTTPException(status_code=503, detail="Model service unavailable")
118
+ try:
119
+ result = detector.analyze(request.text)
120
+ return result
121
+ except Exception as e:
122
+ logger.error(f"Analyze error: {e}")
123
+ raise HTTPException(status_code=500, detail=str(e))
124
+
125
+
126
+ if __name__ == "__main__":
127
+ uvicorn.run(app, host="0.0.0.0", port=7860)
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core ML - CPU only (no CUDA, keeps slug size small for free tier)
2
+ torch>=2.0.0+cpu
3
+ transformers>=4.36.0
4
+
5
+ # API
6
+ fastapi>=0.110.0
7
+ uvicorn>=0.29.0
8
+ pydantic>=2.0.0
9
+
10
+ # Env / config
11
+ python-dotenv>=1.0.0