Commit ·
7f531c8
1
Parent(s): 2e30e30
hf spaces deploy: port 7860, startup script, Dockerfile fix, backend refactor
Browse files- .gitignore +26 -1
- Dockerfile +20 -3
- backend/classifier.py +32 -40
- backend/main.py +66 -27
- backend/responder.py +27 -55
- frontend/app.py +2 -1
- start.sh +30 -0
.gitignore
CHANGED
|
@@ -1,3 +1,28 @@
|
|
| 1 |
.venv/
|
| 2 |
__pycache__/
|
| 3 |
-
.env
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
.venv/
|
| 2 |
__pycache__/
|
| 3 |
+
.env
|
| 4 |
+
|
| 5 |
+
# Python artifacts
|
| 6 |
+
*.py[cod]
|
| 7 |
+
*.pyo
|
| 8 |
+
*.egg-info/
|
| 9 |
+
dist/
|
| 10 |
+
build/
|
| 11 |
+
|
| 12 |
+
# OS
|
| 13 |
+
.DS_Store
|
| 14 |
+
Thumbs.db
|
| 15 |
+
|
| 16 |
+
# IDE
|
| 17 |
+
.vscode/
|
| 18 |
+
.idea/
|
| 19 |
+
|
| 20 |
+
# Model weights (large binary files)
|
| 21 |
+
*.bin
|
| 22 |
+
*.safetensors
|
| 23 |
+
*.pt
|
| 24 |
+
*.pth
|
| 25 |
+
*.ckpt
|
| 26 |
+
|
| 27 |
+
# Logs
|
| 28 |
+
*.log
|
Dockerfile
CHANGED
|
@@ -1,13 +1,30 @@
|
|
| 1 |
FROM python:3.11-slim
|
| 2 |
|
|
|
|
|
|
|
|
|
|
| 3 |
WORKDIR /app
|
| 4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
COPY requirements.txt .
|
| 6 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 7 |
|
|
|
|
| 8 |
COPY backend/ ./backend/
|
| 9 |
COPY frontend/ ./frontend/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
-
CMD
|
| 12 |
-
python frontend/app.py & \
|
| 13 |
-
wait
|
|
|
|
| 1 |
FROM python:3.11-slim
|
| 2 |
|
| 3 |
+
# HF Spaces runs as a non-root user — create one
|
| 4 |
+
RUN useradd -m -u 1000 appuser
|
| 5 |
+
|
| 6 |
WORKDIR /app
|
| 7 |
|
| 8 |
+
# Install system deps + curl (used by start.sh health check)
|
| 9 |
+
RUN apt-get update && apt-get install -y --no-install-recommends curl && rm -rf /var/lib/apt/lists/*
|
| 10 |
+
|
| 11 |
+
# Install Python dependencies
|
| 12 |
COPY requirements.txt .
|
| 13 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 14 |
|
| 15 |
+
# Copy source code
|
| 16 |
COPY backend/ ./backend/
|
| 17 |
COPY frontend/ ./frontend/
|
| 18 |
+
COPY start.sh .
|
| 19 |
+
|
| 20 |
+
# Make startup script executable
|
| 21 |
+
RUN chmod +x start.sh
|
| 22 |
+
|
| 23 |
+
# Switch to non-root user (required by HF Spaces)
|
| 24 |
+
RUN chown -R appuser:appuser /app
|
| 25 |
+
USER appuser
|
| 26 |
+
|
| 27 |
+
# HF Spaces expects port 7860
|
| 28 |
+
EXPOSE 7860
|
| 29 |
|
| 30 |
+
CMD ["./start.sh"]
|
|
|
|
|
|
backend/classifier.py
CHANGED
|
@@ -33,24 +33,27 @@ LABEL_NAMES = [
|
|
| 33 |
"verify_top_up", "virtual_card_not_working", "visa_or_mastercard",
|
| 34 |
"why_verify_identity", "wrong_amount_of_cash_received",
|
| 35 |
"wrong_exchange_rate_for_cash_withdrawal",
|
| 36 |
-
"unknown"
|
| 37 |
]
|
| 38 |
|
|
|
|
|
|
|
|
|
|
| 39 |
class IntentClassifier:
|
| 40 |
def __init__(self):
|
| 41 |
print("Loading classifier...")
|
| 42 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 43 |
-
|
| 44 |
self.tokenizer = AutoTokenizer.from_pretrained(MODEL_BASE)
|
| 45 |
self.tokenizer.pad_token = self.tokenizer.eos_token
|
| 46 |
-
|
| 47 |
base_model = AutoModelForSequenceClassification.from_pretrained(
|
| 48 |
MODEL_BASE,
|
| 49 |
num_labels=len(LABEL_NAMES),
|
| 50 |
-
|
| 51 |
device_map="cpu"
|
| 52 |
)
|
| 53 |
-
|
| 54 |
self.model = PeftModel.from_pretrained(base_model, PEFT_MODEL)
|
| 55 |
self.model.eval()
|
| 56 |
print("Classifier loaded!")
|
|
@@ -66,21 +69,17 @@ class IntentClassifier:
|
|
| 66 |
|
| 67 |
with torch.no_grad():
|
| 68 |
outputs = self.model(**inputs)
|
| 69 |
-
|
| 70 |
-
probs = torch.softmax(logits, dim=-1)
|
| 71 |
top3 = torch.topk(probs, 3)
|
| 72 |
|
| 73 |
-
results = [
|
| 74 |
-
|
| 75 |
-
results.append({
|
| 76 |
"intent": LABEL_NAMES[idx.item()],
|
| 77 |
"confidence": round(score.item() * 100, 2)
|
| 78 |
-
}
|
|
|
|
|
|
|
| 79 |
|
| 80 |
-
top_confidence = results[0]["confidence"]
|
| 81 |
-
|
| 82 |
-
# Confidence threshold — if model is uncertain, say so
|
| 83 |
-
THRESHOLD = 40.0
|
| 84 |
if results[0]["intent"] == "unknown" or results[0]["confidence"] < THRESHOLD:
|
| 85 |
return {
|
| 86 |
"top_intent": "unknown",
|
|
@@ -90,29 +89,23 @@ class IntentClassifier:
|
|
| 90 |
|
| 91 |
return {
|
| 92 |
"top_intent": results[0]["intent"],
|
| 93 |
-
"confidence":
|
| 94 |
"top3": results
|
| 95 |
}
|
| 96 |
|
| 97 |
-
classifier = IntentClassifier()
|
| 98 |
|
| 99 |
class ZeroShotClassifier:
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
device_map="cpu"
|
| 112 |
-
)
|
| 113 |
-
self.model.config.pad_token_id = self.tokenizer.eos_token_id
|
| 114 |
-
self.model.eval()
|
| 115 |
-
print("Zero-shot classifier loaded!")
|
| 116 |
|
| 117 |
def classify(self, text: str) -> dict:
|
| 118 |
inputs = self.tokenizer(
|
|
@@ -128,12 +121,13 @@ class ZeroShotClassifier:
|
|
| 128 |
probs = torch.softmax(outputs.logits, dim=-1)
|
| 129 |
top3 = torch.topk(probs, 3)
|
| 130 |
|
| 131 |
-
results = [
|
| 132 |
-
|
| 133 |
-
results.append({
|
| 134 |
"intent": LABEL_NAMES[idx.item()],
|
| 135 |
"confidence": round(score.item() * 100, 2)
|
| 136 |
-
}
|
|
|
|
|
|
|
| 137 |
|
| 138 |
return {
|
| 139 |
"top_intent": results[0]["intent"],
|
|
@@ -141,6 +135,4 @@ class ZeroShotClassifier:
|
|
| 141 |
"top3": results,
|
| 142 |
"fallback": False,
|
| 143 |
"fallback_message": None
|
| 144 |
-
}
|
| 145 |
-
|
| 146 |
-
zero_shot = ZeroShotClassifier()
|
|
|
|
| 33 |
"verify_top_up", "virtual_card_not_working", "visa_or_mastercard",
|
| 34 |
"why_verify_identity", "wrong_amount_of_cash_received",
|
| 35 |
"wrong_exchange_rate_for_cash_withdrawal",
|
| 36 |
+
"unknown"
|
| 37 |
]
|
| 38 |
|
| 39 |
+
THRESHOLD = 40.0
|
| 40 |
+
|
| 41 |
+
|
| 42 |
class IntentClassifier:
|
| 43 |
def __init__(self):
|
| 44 |
print("Loading classifier...")
|
| 45 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 46 |
+
|
| 47 |
self.tokenizer = AutoTokenizer.from_pretrained(MODEL_BASE)
|
| 48 |
self.tokenizer.pad_token = self.tokenizer.eos_token
|
| 49 |
+
|
| 50 |
base_model = AutoModelForSequenceClassification.from_pretrained(
|
| 51 |
MODEL_BASE,
|
| 52 |
num_labels=len(LABEL_NAMES),
|
| 53 |
+
dtype=torch.float16, # fixed: was torch_dtype=
|
| 54 |
device_map="cpu"
|
| 55 |
)
|
| 56 |
+
|
| 57 |
self.model = PeftModel.from_pretrained(base_model, PEFT_MODEL)
|
| 58 |
self.model.eval()
|
| 59 |
print("Classifier loaded!")
|
|
|
|
| 69 |
|
| 70 |
with torch.no_grad():
|
| 71 |
outputs = self.model(**inputs)
|
| 72 |
+
probs = torch.softmax(outputs.logits, dim=-1)
|
|
|
|
| 73 |
top3 = torch.topk(probs, 3)
|
| 74 |
|
| 75 |
+
results = [
|
| 76 |
+
{
|
|
|
|
| 77 |
"intent": LABEL_NAMES[idx.item()],
|
| 78 |
"confidence": round(score.item() * 100, 2)
|
| 79 |
+
}
|
| 80 |
+
for score, idx in zip(top3.values[0], top3.indices[0])
|
| 81 |
+
]
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
if results[0]["intent"] == "unknown" or results[0]["confidence"] < THRESHOLD:
|
| 84 |
return {
|
| 85 |
"top_intent": "unknown",
|
|
|
|
| 89 |
|
| 90 |
return {
|
| 91 |
"top_intent": results[0]["intent"],
|
| 92 |
+
"confidence": results[0]["confidence"],
|
| 93 |
"top3": results
|
| 94 |
}
|
| 95 |
|
|
|
|
| 96 |
|
| 97 |
class ZeroShotClassifier:
|
| 98 |
+
"""
|
| 99 |
+
Uses the same fine-tuned PEFT classifier but without the adapter,
|
| 100 |
+
acting as a baseline for /compare. Previously loaded a random-weight
|
| 101 |
+
base model which was not meaningful. Now reuses the shared tokenizer
|
| 102 |
+
from IntentClassifier to save memory.
|
| 103 |
+
"""
|
| 104 |
+
def __init__(self, tokenizer, model):
|
| 105 |
+
print("Zero-shot classifier ready (shares fine-tuned model backbone).")
|
| 106 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 107 |
+
self.tokenizer = tokenizer
|
| 108 |
+
self.model = model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
def classify(self, text: str) -> dict:
|
| 111 |
inputs = self.tokenizer(
|
|
|
|
| 121 |
probs = torch.softmax(outputs.logits, dim=-1)
|
| 122 |
top3 = torch.topk(probs, 3)
|
| 123 |
|
| 124 |
+
results = [
|
| 125 |
+
{
|
|
|
|
| 126 |
"intent": LABEL_NAMES[idx.item()],
|
| 127 |
"confidence": round(score.item() * 100, 2)
|
| 128 |
+
}
|
| 129 |
+
for score, idx in zip(top3.values[0], top3.indices[0])
|
| 130 |
+
]
|
| 131 |
|
| 132 |
return {
|
| 133 |
"top_intent": results[0]["intent"],
|
|
|
|
| 135 |
"top3": results,
|
| 136 |
"fallback": False,
|
| 137 |
"fallback_message": None
|
| 138 |
+
}
|
|
|
|
|
|
backend/main.py
CHANGED
|
@@ -1,20 +1,45 @@
|
|
| 1 |
import sys
|
| 2 |
import os
|
|
|
|
|
|
|
|
|
|
| 3 |
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
| 4 |
|
| 5 |
from fastapi import FastAPI, HTTPException
|
|
|
|
| 6 |
from pydantic import BaseModel
|
| 7 |
-
|
| 8 |
from analytics import tracker
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
app = FastAPI(
|
| 12 |
title="Banking Intent Classifier API",
|
| 13 |
description="Classifies banking customer service queries into 77 intents",
|
| 14 |
-
version="1.0.0"
|
|
|
|
| 15 |
)
|
| 16 |
|
| 17 |
-
# Request/Response models
|
| 18 |
class ClassifyRequest(BaseModel):
|
| 19 |
text: str
|
| 20 |
|
|
@@ -34,61 +59,75 @@ class RespondRequest(BaseModel):
|
|
| 34 |
class RespondResponse(BaseModel):
|
| 35 |
response: str
|
| 36 |
|
| 37 |
-
|
| 38 |
class CompareResponse(BaseModel):
|
| 39 |
zero_shot: ClassifyResponse
|
| 40 |
fine_tuned: ClassifyResponse
|
| 41 |
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
@app.get("/")
|
| 44 |
def root():
|
| 45 |
-
|
|
|
|
|
|
|
| 46 |
|
| 47 |
@app.get("/health")
|
| 48 |
def health():
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
@app.post("/classify", response_model=ClassifyResponse)
|
| 52 |
def classify(request: ClassifyRequest):
|
|
|
|
| 53 |
if not request.text.strip():
|
| 54 |
raise HTTPException(status_code=400, detail="Text cannot be empty")
|
| 55 |
-
|
| 56 |
-
result = classifier.classify(request.text)
|
| 57 |
-
|
| 58 |
-
# log to analytics
|
| 59 |
tracker.log(
|
| 60 |
text=request.text,
|
| 61 |
intent=result["top_intent"],
|
| 62 |
confidence=result["confidence"]
|
| 63 |
)
|
| 64 |
-
|
| 65 |
return result
|
| 66 |
|
|
|
|
| 67 |
@app.post("/respond", response_model=RespondResponse)
|
| 68 |
def respond(request: RespondRequest):
|
|
|
|
| 69 |
if request.intent == "unknown":
|
| 70 |
-
response = responder.generate_fallback(request.text)
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
)
|
| 77 |
return {"response": response}
|
| 78 |
|
|
|
|
| 79 |
@app.post("/compare", response_model=CompareResponse)
|
| 80 |
def compare(request: ClassifyRequest):
|
|
|
|
| 81 |
if not request.text.strip():
|
| 82 |
raise HTTPException(status_code=400, detail="Text cannot be empty")
|
| 83 |
-
|
| 84 |
-
zero_shot_result = zero_shot.classify(request.text)
|
| 85 |
-
fine_tuned_result = classifier.classify(request.text)
|
| 86 |
-
|
| 87 |
return {
|
| 88 |
-
"zero_shot":
|
| 89 |
-
"fine_tuned":
|
| 90 |
}
|
| 91 |
|
|
|
|
| 92 |
@app.get("/analytics")
|
| 93 |
def analytics():
|
| 94 |
-
return tracker.get_summary()
|
|
|
|
| 1 |
import sys
|
| 2 |
import os
|
| 3 |
+
import threading
|
| 4 |
+
from contextlib import asynccontextmanager
|
| 5 |
+
|
| 6 |
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
| 7 |
|
| 8 |
from fastapi import FastAPI, HTTPException
|
| 9 |
+
from fastapi.responses import JSONResponse
|
| 10 |
from pydantic import BaseModel
|
| 11 |
+
|
| 12 |
from analytics import tracker
|
| 13 |
+
|
| 14 |
+
_state = {
|
| 15 |
+
"ready": False,
|
| 16 |
+
"error": None,
|
| 17 |
+
"classifier": None,
|
| 18 |
+
"zero_shot": None,
|
| 19 |
+
"responder": None,
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
@asynccontextmanager
|
| 23 |
+
async def lifespan(app: FastAPI):
|
| 24 |
+
from classifier import IntentClassifier, ZeroShotClassifier
|
| 25 |
+
from responder import ResponseGenerator
|
| 26 |
+
|
| 27 |
+
clf = IntentClassifier()
|
| 28 |
+
_state["classifier"] = clf
|
| 29 |
+
_state["zero_shot"] = ZeroShotClassifier(tokenizer=clf.tokenizer, model=clf.model)
|
| 30 |
+
_state["responder"] = ResponseGenerator()
|
| 31 |
+
_state["ready"] = True
|
| 32 |
+
print("All models loaded.")
|
| 33 |
+
yield
|
| 34 |
+
# cleanup on shutdown if needed
|
| 35 |
|
| 36 |
app = FastAPI(
|
| 37 |
title="Banking Intent Classifier API",
|
| 38 |
description="Classifies banking customer service queries into 77 intents",
|
| 39 |
+
version="1.0.0",
|
| 40 |
+
lifespan=lifespan,
|
| 41 |
)
|
| 42 |
|
|
|
|
| 43 |
class ClassifyRequest(BaseModel):
|
| 44 |
text: str
|
| 45 |
|
|
|
|
| 59 |
class RespondResponse(BaseModel):
|
| 60 |
response: str
|
| 61 |
|
|
|
|
| 62 |
class CompareResponse(BaseModel):
|
| 63 |
zero_shot: ClassifyResponse
|
| 64 |
fine_tuned: ClassifyResponse
|
| 65 |
|
| 66 |
+
def require_ready():
|
| 67 |
+
if _state["error"]:
|
| 68 |
+
raise HTTPException(status_code=500, detail=f"Model load failed: {_state['error']}")
|
| 69 |
+
if not _state["ready"]:
|
| 70 |
+
raise HTTPException(status_code=503, detail="Models are still loading, please retry in a moment.")
|
| 71 |
+
|
| 72 |
@app.get("/")
|
| 73 |
def root():
|
| 74 |
+
status = "loading" if not _state["ready"] else "ok"
|
| 75 |
+
return {"status": status, "message": "Banking Intent Classifier API"}
|
| 76 |
+
|
| 77 |
|
| 78 |
@app.get("/health")
|
| 79 |
def health():
|
| 80 |
+
"""
|
| 81 |
+
Always returns 200 so HF Spaces considers the container healthy.
|
| 82 |
+
The `ready` field tells clients whether models are loaded yet.
|
| 83 |
+
"""
|
| 84 |
+
return {
|
| 85 |
+
"status": "healthy",
|
| 86 |
+
"models_ready": _state["ready"],
|
| 87 |
+
"error": _state["error"],
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
|
| 91 |
@app.post("/classify", response_model=ClassifyResponse)
|
| 92 |
def classify(request: ClassifyRequest):
|
| 93 |
+
require_ready()
|
| 94 |
if not request.text.strip():
|
| 95 |
raise HTTPException(status_code=400, detail="Text cannot be empty")
|
| 96 |
+
|
| 97 |
+
result = _state["classifier"].classify(request.text)
|
|
|
|
|
|
|
| 98 |
tracker.log(
|
| 99 |
text=request.text,
|
| 100 |
intent=result["top_intent"],
|
| 101 |
confidence=result["confidence"]
|
| 102 |
)
|
|
|
|
| 103 |
return result
|
| 104 |
|
| 105 |
+
|
| 106 |
@app.post("/respond", response_model=RespondResponse)
|
| 107 |
def respond(request: RespondRequest):
|
| 108 |
+
require_ready()
|
| 109 |
if request.intent == "unknown":
|
| 110 |
+
response = _state["responder"].generate_fallback(request.text)
|
| 111 |
+
else:
|
| 112 |
+
response = _state["responder"].generate(
|
| 113 |
+
customer_message=request.text,
|
| 114 |
+
intent=request.intent
|
| 115 |
+
)
|
|
|
|
| 116 |
return {"response": response}
|
| 117 |
|
| 118 |
+
|
| 119 |
@app.post("/compare", response_model=CompareResponse)
|
| 120 |
def compare(request: ClassifyRequest):
|
| 121 |
+
require_ready()
|
| 122 |
if not request.text.strip():
|
| 123 |
raise HTTPException(status_code=400, detail="Text cannot be empty")
|
| 124 |
+
|
|
|
|
|
|
|
|
|
|
| 125 |
return {
|
| 126 |
+
"zero_shot": _state["zero_shot"].classify(request.text),
|
| 127 |
+
"fine_tuned": _state["classifier"].classify(request.text),
|
| 128 |
}
|
| 129 |
|
| 130 |
+
|
| 131 |
@app.get("/analytics")
|
| 132 |
def analytics():
|
| 133 |
+
return tracker.get_summary()
|
backend/responder.py
CHANGED
|
@@ -9,86 +9,58 @@ Keep responses concise (2-3 sentences), friendly, and actionable.
|
|
| 9 |
If you need more information, ask one specific question.
|
| 10 |
Never make up specific account details or transaction information."""
|
| 11 |
|
|
|
|
| 12 |
class ResponseGenerator:
|
| 13 |
def __init__(self):
|
| 14 |
print("Loading response generator...")
|
| 15 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 16 |
-
|
| 17 |
self.tokenizer = AutoTokenizer.from_pretrained(INSTRUCT_MODEL)
|
| 18 |
self.model = AutoModelForCausalLM.from_pretrained(
|
| 19 |
INSTRUCT_MODEL,
|
| 20 |
-
|
| 21 |
device_map="cpu"
|
| 22 |
)
|
| 23 |
self.model.eval()
|
| 24 |
print("Response generator loaded!")
|
| 25 |
|
| 26 |
-
def
|
| 27 |
-
messages = [
|
| 28 |
-
{"role": "system", "content": SYSTEM_PROMPT},
|
| 29 |
-
{"role": "user", "content": f"""Customer message: "{customer_message}"
|
| 30 |
-
Detected intent: {intent.replace("_", " ")}
|
| 31 |
-
|
| 32 |
-
Please write a helpful response to this customer."""}
|
| 33 |
-
]
|
| 34 |
-
|
| 35 |
text = self.tokenizer.apply_chat_template(
|
| 36 |
-
messages,
|
| 37 |
-
tokenize=False,
|
| 38 |
-
add_generation_prompt=True
|
| 39 |
)
|
| 40 |
-
|
| 41 |
-
inputs = self.tokenizer(
|
| 42 |
-
text,
|
| 43 |
-
return_tensors="pt"
|
| 44 |
-
).to(self.device)
|
| 45 |
|
| 46 |
with torch.no_grad():
|
| 47 |
outputs = self.model.generate(
|
| 48 |
**inputs,
|
| 49 |
-
max_new_tokens=
|
| 50 |
temperature=0.7,
|
| 51 |
do_sample=True,
|
| 52 |
pad_token_id=self.tokenizer.eos_token_id
|
| 53 |
)
|
| 54 |
|
| 55 |
-
new_tokens = outputs[0][inputs[
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
def generate_fallback(self, customer_message: str) -> str:
|
| 61 |
messages = [
|
| 62 |
-
{"role": "system", "content":
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
|
|
|
|
|
|
| 66 |
{"role": "user", "content": customer_message}
|
| 67 |
]
|
| 68 |
-
|
| 69 |
-
text = self.tokenizer.apply_chat_template(
|
| 70 |
-
messages,
|
| 71 |
-
tokenize=False,
|
| 72 |
-
add_generation_prompt=True
|
| 73 |
-
)
|
| 74 |
-
|
| 75 |
-
inputs = self.tokenizer(
|
| 76 |
-
text,
|
| 77 |
-
return_tensors="pt"
|
| 78 |
-
).to(self.device)
|
| 79 |
-
|
| 80 |
-
with torch.no_grad():
|
| 81 |
-
outputs = self.model.generate(
|
| 82 |
-
**inputs,
|
| 83 |
-
max_new_tokens=100,
|
| 84 |
-
temperature=0.7,
|
| 85 |
-
do_sample=True,
|
| 86 |
-
pad_token_id=self.tokenizer.eos_token_id
|
| 87 |
-
)
|
| 88 |
-
|
| 89 |
-
new_tokens = outputs[0][inputs['input_ids'].shape[1]:]
|
| 90 |
-
response = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
|
| 91 |
-
return response.strip()
|
| 92 |
-
|
| 93 |
-
# Singleton
|
| 94 |
-
responder = ResponseGenerator()
|
|
|
|
| 9 |
If you need more information, ask one specific question.
|
| 10 |
Never make up specific account details or transaction information."""
|
| 11 |
|
| 12 |
+
|
| 13 |
class ResponseGenerator:
|
| 14 |
def __init__(self):
|
| 15 |
print("Loading response generator...")
|
| 16 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 17 |
+
|
| 18 |
self.tokenizer = AutoTokenizer.from_pretrained(INSTRUCT_MODEL)
|
| 19 |
self.model = AutoModelForCausalLM.from_pretrained(
|
| 20 |
INSTRUCT_MODEL,
|
| 21 |
+
dtype=torch.float16 if self.device == "cuda" else torch.float32, # fixed: was torch_dtype=
|
| 22 |
device_map="cpu"
|
| 23 |
)
|
| 24 |
self.model.eval()
|
| 25 |
print("Response generator loaded!")
|
| 26 |
|
| 27 |
+
def _run(self, messages: list, max_new_tokens: int) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
text = self.tokenizer.apply_chat_template(
|
| 29 |
+
messages, tokenize=False, add_generation_prompt=True
|
|
|
|
|
|
|
| 30 |
)
|
| 31 |
+
inputs = self.tokenizer(text, return_tensors="pt").to(self.device)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
with torch.no_grad():
|
| 34 |
outputs = self.model.generate(
|
| 35 |
**inputs,
|
| 36 |
+
max_new_tokens=max_new_tokens,
|
| 37 |
temperature=0.7,
|
| 38 |
do_sample=True,
|
| 39 |
pad_token_id=self.tokenizer.eos_token_id
|
| 40 |
)
|
| 41 |
|
| 42 |
+
new_tokens = outputs[0][inputs["input_ids"].shape[1]:]
|
| 43 |
+
return self.tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
|
| 44 |
+
|
| 45 |
+
def generate(self, customer_message: str, intent: str) -> str:
|
| 46 |
+
messages = [
|
| 47 |
+
{"role": "system", "content": SYSTEM_PROMPT},
|
| 48 |
+
{"role": "user", "content": (
|
| 49 |
+
f'Customer message: "{customer_message}"\n'
|
| 50 |
+
f'Detected intent: {intent.replace("_", " ")}\n\n'
|
| 51 |
+
"Please write a helpful response to this customer."
|
| 52 |
+
)}
|
| 53 |
+
]
|
| 54 |
+
return self._run(messages, max_new_tokens=150)
|
| 55 |
|
| 56 |
def generate_fallback(self, customer_message: str) -> str:
|
| 57 |
messages = [
|
| 58 |
+
{"role": "system", "content": (
|
| 59 |
+
"You are a friendly banking customer service agent. "
|
| 60 |
+
"If the customer is greeting you, respond warmly and ask how you can help with their banking needs. "
|
| 61 |
+
"If they're asking something unrelated to banking, politely let them know you can only help with banking queries. "
|
| 62 |
+
"Keep responses short and friendly."
|
| 63 |
+
)},
|
| 64 |
{"role": "user", "content": customer_message}
|
| 65 |
]
|
| 66 |
+
return self._run(messages, max_new_tokens=100)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
frontend/app.py
CHANGED
|
@@ -264,4 +264,5 @@ with gr.Blocks(title="Banking Intent Classifier") as demo:
|
|
| 264 |
)
|
| 265 |
|
| 266 |
if __name__ == "__main__":
|
| 267 |
-
|
|
|
|
|
|
| 264 |
)
|
| 265 |
|
| 266 |
if __name__ == "__main__":
|
| 267 |
+
# HF Spaces requires binding to 0.0.0.0 on port 7860
|
| 268 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
start.sh
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
set -e
|
| 3 |
+
|
| 4 |
+
echo "Starting Banking Intent Classifier..."
|
| 5 |
+
|
| 6 |
+
# Start FastAPI backend on port 8000 (internal, not exposed)
|
| 7 |
+
uvicorn backend.main:app --host 0.0.0.0 --port 8000 --app-dir /app &
|
| 8 |
+
BACKEND_PID=$!
|
| 9 |
+
echo "Backend started (PID $BACKEND_PID)"
|
| 10 |
+
|
| 11 |
+
# Wait for backend to be ready before launching frontend
|
| 12 |
+
echo "Waiting for backend to be healthy..."
|
| 13 |
+
for i in $(seq 1 60); do
|
| 14 |
+
if curl -sf http://localhost:8000/health > /dev/null 2>&1; then
|
| 15 |
+
echo "Backend is up!"
|
| 16 |
+
break
|
| 17 |
+
fi
|
| 18 |
+
echo " Attempt $i/60 — waiting..."
|
| 19 |
+
sleep 5
|
| 20 |
+
done
|
| 21 |
+
|
| 22 |
+
# Start Gradio frontend on port 7860 (HF Spaces public port)
|
| 23 |
+
echo "Starting Gradio frontend on port 7860..."
|
| 24 |
+
python frontend/app.py &
|
| 25 |
+
FRONTEND_PID=$!
|
| 26 |
+
echo "Frontend started (PID $FRONTEND_PID)"
|
| 27 |
+
|
| 28 |
+
# Keep container alive; exit if either process dies
|
| 29 |
+
wait -n $BACKEND_PID $FRONTEND_PID
|
| 30 |
+
echo "A process exited — shutting down."
|