Spaces:
Running
Running
Delete backend
Browse files- backend/classifier.py +0 -32
- backend/main.py +0 -44
backend/classifier.py
DELETED
|
@@ -1,32 +0,0 @@
|
|
| 1 |
-
from transformers import pipeline
|
| 2 |
-
|
| 3 |
-
classifier = pipeline(
|
| 4 |
-
"text-classification",
|
| 5 |
-
model="protectai/deberta-v3-base-prompt-injection-v2",
|
| 6 |
-
device=-1
|
| 7 |
-
)
|
| 8 |
-
|
| 9 |
-
def detect_injection(user_input: str) -> dict:
|
| 10 |
-
result = classifier(user_input)[0]
|
| 11 |
-
label = result["label"]
|
| 12 |
-
score = result["score"]
|
| 13 |
-
|
| 14 |
-
suspicious_keywords = [
|
| 15 |
-
"ignore previous instructions",
|
| 16 |
-
"hack the system",
|
| 17 |
-
"disregard your system prompt",
|
| 18 |
-
"you are now",
|
| 19 |
-
"forget everything",
|
| 20 |
-
"act as",
|
| 21 |
-
"jailbreak"
|
| 22 |
-
]
|
| 23 |
-
rule_triggered = any(kw in user_input.lower() for kw in suspicious_keywords)
|
| 24 |
-
is_malicious = (label == "INJECTION" and score > 0.85) or rule_triggered
|
| 25 |
-
|
| 26 |
-
return {
|
| 27 |
-
"input": user_input,
|
| 28 |
-
"label": label,
|
| 29 |
-
"confidence": round(score, 3),
|
| 30 |
-
"rule_triggered": rule_triggered,
|
| 31 |
-
"is_malicious": is_malicious
|
| 32 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/main.py
DELETED
|
@@ -1,44 +0,0 @@
|
|
| 1 |
-
from fastapi import FastAPI
|
| 2 |
-
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
-
from pydantic import BaseModel
|
| 4 |
-
from backend.classifier import detect_injection
|
| 5 |
-
from groq import Groq
|
| 6 |
-
from dotenv import load_dotenv
|
| 7 |
-
import os
|
| 8 |
-
|
| 9 |
-
load_dotenv(dotenv_path="../.env")
|
| 10 |
-
app = FastAPI()
|
| 11 |
-
app.add_middleware(
|
| 12 |
-
CORSMiddleware,
|
| 13 |
-
allow_origins=["*"],
|
| 14 |
-
allow_methods=["*"],
|
| 15 |
-
allow_headers=["*"],
|
| 16 |
-
)
|
| 17 |
-
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
|
| 18 |
-
|
| 19 |
-
class ChatRequest(BaseModel):
|
| 20 |
-
message: str
|
| 21 |
-
|
| 22 |
-
@app.post("/chat")
|
| 23 |
-
def chat(req: ChatRequest):
|
| 24 |
-
detection = detect_injection(req.message)
|
| 25 |
-
|
| 26 |
-
if detection["is_malicious"]:
|
| 27 |
-
return {
|
| 28 |
-
"response": "Prompt injection detected. Request blocked.",
|
| 29 |
-
"flagged": True,
|
| 30 |
-
"detection": detection
|
| 31 |
-
}
|
| 32 |
-
|
| 33 |
-
completion = client.chat.completions.create(
|
| 34 |
-
model="llama-3.3-70b-versatile",
|
| 35 |
-
messages=[
|
| 36 |
-
{"role": "system", "content": "You are a helpful customer support assistant. You help users with order issues, and general inquiries. You do not have access to internal systems or user account data.If you cannot help, politely direct the user to contact support@company.com. Your return policy is 30 days. The order is dispatched and should arrive in 2-5 days"},
|
| 37 |
-
{"role": "user", "content": req.message}
|
| 38 |
-
]
|
| 39 |
-
)
|
| 40 |
-
return {
|
| 41 |
-
"response": completion.choices[0].message.content,
|
| 42 |
-
"flagged": False,
|
| 43 |
-
"detection": detection
|
| 44 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|