Spaces:
Running
Running
Upload 4 files
Browse files- Dockerfile +12 -0
- backend/classifier.py +32 -0
- backend/main.py +44 -0
- requirements.txt +87 -0
Dockerfile
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.12-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
COPY requirements.txt .
|
| 6 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 7 |
+
|
| 8 |
+
COPY backend/ ./backend/
|
| 9 |
+
|
| 10 |
+
EXPOSE 7860
|
| 11 |
+
|
| 12 |
+
CMD ["uvicorn", "backend.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
backend/classifier.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import pipeline
|
| 2 |
+
|
| 3 |
+
classifier = pipeline(
|
| 4 |
+
"text-classification",
|
| 5 |
+
model="protectai/deberta-v3-base-prompt-injection-v2",
|
| 6 |
+
device=-1
|
| 7 |
+
)
|
| 8 |
+
|
| 9 |
+
def detect_injection(user_input: str) -> dict:
|
| 10 |
+
result = classifier(user_input)[0]
|
| 11 |
+
label = result["label"]
|
| 12 |
+
score = result["score"]
|
| 13 |
+
|
| 14 |
+
suspicious_keywords = [
|
| 15 |
+
"ignore previous instructions",
|
| 16 |
+
"hack the system",
|
| 17 |
+
"disregard your system prompt",
|
| 18 |
+
"you are now",
|
| 19 |
+
"forget everything",
|
| 20 |
+
"act as",
|
| 21 |
+
"jailbreak"
|
| 22 |
+
]
|
| 23 |
+
rule_triggered = any(kw in user_input.lower() for kw in suspicious_keywords)
|
| 24 |
+
is_malicious = (label == "INJECTION" and score > 0.85) or rule_triggered
|
| 25 |
+
|
| 26 |
+
return {
|
| 27 |
+
"input": user_input,
|
| 28 |
+
"label": label,
|
| 29 |
+
"confidence": round(score, 3),
|
| 30 |
+
"rule_triggered": rule_triggered,
|
| 31 |
+
"is_malicious": is_malicious
|
| 32 |
+
}
|
backend/main.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI
|
| 2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
+
from pydantic import BaseModel
|
| 4 |
+
from classifier import detect_injection
|
| 5 |
+
from groq import Groq
|
| 6 |
+
from dotenv import load_dotenv
|
| 7 |
+
import os
|
| 8 |
+
|
| 9 |
+
load_dotenv(dotenv_path="../.env")
|
| 10 |
+
app = FastAPI()
|
| 11 |
+
app.add_middleware(
|
| 12 |
+
CORSMiddleware,
|
| 13 |
+
allow_origins=["*"],
|
| 14 |
+
allow_methods=["*"],
|
| 15 |
+
allow_headers=["*"],
|
| 16 |
+
)
|
| 17 |
+
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
|
| 18 |
+
|
| 19 |
+
class ChatRequest(BaseModel):
|
| 20 |
+
message: str
|
| 21 |
+
|
| 22 |
+
@app.post("/chat")
|
| 23 |
+
def chat(req: ChatRequest):
|
| 24 |
+
detection = detect_injection(req.message)
|
| 25 |
+
|
| 26 |
+
if detection["is_malicious"]:
|
| 27 |
+
return {
|
| 28 |
+
"response": "Prompt injection detected. Request blocked.",
|
| 29 |
+
"flagged": True,
|
| 30 |
+
"detection": detection
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
completion = client.chat.completions.create(
|
| 34 |
+
model="llama-3.3-70b-versatile",
|
| 35 |
+
messages=[
|
| 36 |
+
{"role": "system", "content": "You are a helpful customer support assistant. You help users with order issues, and general inquiries. You do not have access to internal systems or user account data.If you cannot help, politely direct the user to contact support@company.com. Your return policy is 30 days. The order is dispatched and should arrive in 2-5 days"},
|
| 37 |
+
{"role": "user", "content": req.message}
|
| 38 |
+
]
|
| 39 |
+
)
|
| 40 |
+
return {
|
| 41 |
+
"response": completion.choices[0].message.content,
|
| 42 |
+
"flagged": False,
|
| 43 |
+
"detection": detection
|
| 44 |
+
}
|
requirements.txt
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
aiohappyeyeballs==2.6.1
|
| 2 |
+
aiohttp==3.13.3
|
| 3 |
+
aiosignal==1.4.0
|
| 4 |
+
altair==6.0.0
|
| 5 |
+
annotated-doc==0.0.4
|
| 6 |
+
annotated-types==0.7.0
|
| 7 |
+
anyio==4.12.1
|
| 8 |
+
attrs==25.4.0
|
| 9 |
+
blinker==1.9.0
|
| 10 |
+
cachetools==6.2.6
|
| 11 |
+
certifi==2026.2.25
|
| 12 |
+
charset-normalizer==3.4.4
|
| 13 |
+
click==8.3.1
|
| 14 |
+
datasets==4.6.1
|
| 15 |
+
dill==0.4.0
|
| 16 |
+
distro==1.9.0
|
| 17 |
+
fastapi==0.134.0
|
| 18 |
+
filelock==3.24.3
|
| 19 |
+
frozenlist==1.8.0
|
| 20 |
+
fsspec==2026.2.0
|
| 21 |
+
gitdb==4.0.12
|
| 22 |
+
GitPython==3.1.46
|
| 23 |
+
groq==1.0.0
|
| 24 |
+
h11==0.16.0
|
| 25 |
+
hf-xet==1.3.2
|
| 26 |
+
httpcore==1.0.9
|
| 27 |
+
httpx==0.28.1
|
| 28 |
+
huggingface_hub==1.5.0
|
| 29 |
+
idna==3.11
|
| 30 |
+
Jinja2==3.1.6
|
| 31 |
+
jiter==0.13.0
|
| 32 |
+
jsonschema==4.26.0
|
| 33 |
+
jsonschema-specifications==2025.9.1
|
| 34 |
+
markdown-it-py==4.0.0
|
| 35 |
+
MarkupSafe==3.0.3
|
| 36 |
+
mdurl==0.1.2
|
| 37 |
+
mpmath==1.3.0
|
| 38 |
+
multidict==6.7.1
|
| 39 |
+
multiprocess==0.70.18
|
| 40 |
+
narwhals==2.17.0
|
| 41 |
+
networkx==3.6.1
|
| 42 |
+
numpy==2.4.2
|
| 43 |
+
openai==2.24.0
|
| 44 |
+
packaging==26.0
|
| 45 |
+
pandas==2.3.3
|
| 46 |
+
pillow==12.1.1
|
| 47 |
+
propcache==0.4.1
|
| 48 |
+
protobuf==6.33.5
|
| 49 |
+
pyarrow==23.0.1
|
| 50 |
+
pydantic==2.12.5
|
| 51 |
+
pydantic_core==2.41.5
|
| 52 |
+
pydeck==0.9.1
|
| 53 |
+
Pygments==2.19.2
|
| 54 |
+
python-dateutil==2.9.0.post0
|
| 55 |
+
python-dotenv==1.2.1
|
| 56 |
+
pytz==2025.2
|
| 57 |
+
PyYAML==6.0.3
|
| 58 |
+
referencing==0.37.0
|
| 59 |
+
regex==2026.2.28
|
| 60 |
+
requests==2.32.5
|
| 61 |
+
rich==14.3.3
|
| 62 |
+
rpds-py==0.30.0
|
| 63 |
+
safetensors==0.7.0
|
| 64 |
+
setuptools==82.0.0
|
| 65 |
+
shellingham==1.5.4
|
| 66 |
+
six==1.17.0
|
| 67 |
+
smmap==5.0.2
|
| 68 |
+
sniffio==1.3.1
|
| 69 |
+
starlette==0.52.1
|
| 70 |
+
streamlit==1.54.0
|
| 71 |
+
sympy==1.14.0
|
| 72 |
+
tenacity==9.1.4
|
| 73 |
+
tokenizers==0.22.2
|
| 74 |
+
toml==0.10.2
|
| 75 |
+
torch==2.10.0
|
| 76 |
+
tornado==6.5.4
|
| 77 |
+
tqdm==4.67.3
|
| 78 |
+
transformers==5.2.0
|
| 79 |
+
typer==0.24.1
|
| 80 |
+
typer-slim==0.24.0
|
| 81 |
+
typing-inspection==0.4.2
|
| 82 |
+
typing_extensions==4.15.0
|
| 83 |
+
tzdata==2025.3
|
| 84 |
+
urllib3==2.6.3
|
| 85 |
+
uvicorn==0.41.0
|
| 86 |
+
xxhash==3.6.0
|
| 87 |
+
yarl==1.22.0
|