Spaces:
Sleeping
Sleeping
NTThong0710 commited on
Commit ·
22da32d
1
Parent(s): a656415
DEPLOY
Browse files- Dockerfile +17 -0
- README.md +1 -1
- app/app_ui.py +25 -0
- app/gen_ai.py +7 -0
- app/mlops_logger.py +13 -0
- app/safety_check.py +16 -0
- prompt_logs.json +14 -0
- requirements.txt +6 -0
- saifguard_demo.py +4 -0
Dockerfile
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Sử dụng image chính thức từ Hugging Face
|
| 2 |
+
FROM python:3.10-slim
|
| 3 |
+
|
| 4 |
+
# Cài đặt hệ thống và Python packages
|
| 5 |
+
RUN apt-get update && apt-get install -y git
|
| 6 |
+
|
| 7 |
+
# Tạo thư mục app
|
| 8 |
+
WORKDIR /app
|
| 9 |
+
|
| 10 |
+
# Copy tất cả file sang Docker container
|
| 11 |
+
COPY . .
|
| 12 |
+
|
| 13 |
+
# Cài requirements
|
| 14 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 15 |
+
|
| 16 |
+
# Chạy app Gradio
|
| 17 |
+
CMD ["python", "saifguard_demo.py"]
|
README.md
CHANGED
|
@@ -5,7 +5,7 @@ colorFrom: gray
|
|
| 5 |
colorTo: green
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 5.25.2
|
| 8 |
-
app_file:
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
| 11 |
---
|
|
|
|
| 5 |
colorTo: green
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 5.25.2
|
| 8 |
+
app_file: saifguard_demo.py
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
| 11 |
---
|
app/app_ui.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from app.safety_check import is_prompt_safe
|
| 3 |
+
from app.gen_ai import generate_response
|
| 4 |
+
from app.mlops_logger import log_prompt
|
| 5 |
+
|
| 6 |
+
def handle_prompt(prompt):
|
| 7 |
+
safe, info = is_prompt_safe(prompt)
|
| 8 |
+
if not safe:
|
| 9 |
+
log_prompt(prompt, info, False, "")
|
| 10 |
+
return f"🚨 Prompt không an toàn! Phát hiện: {', '.join(info)}", ""
|
| 11 |
+
|
| 12 |
+
response = generate_response(prompt)
|
| 13 |
+
log_prompt(prompt, "OK", True, response)
|
| 14 |
+
return "✅ Prompt an toàn", response
|
| 15 |
+
|
| 16 |
+
demo = gr.Interface(
|
| 17 |
+
fn=handle_prompt,
|
| 18 |
+
inputs=gr.Textbox(label="Nhập Prompt"),
|
| 19 |
+
outputs=[
|
| 20 |
+
gr.Textbox(label="Trạng thái kiểm duyệt"),
|
| 21 |
+
gr.Textbox(label="Kết quả GenAI")
|
| 22 |
+
],
|
| 23 |
+
title="SAIFGuard: GenAI Prompt Checker",
|
| 24 |
+
description="Kiểm tra prompt an toàn và tạo văn bản bằng GPT2"
|
| 25 |
+
)
|
app/gen_ai.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import pipeline
|
| 2 |
+
|
| 3 |
+
generator = pipeline("text-generation", model="gpt2")
|
| 4 |
+
|
| 5 |
+
def generate_response(prompt: str):
|
| 6 |
+
result = generator(prompt, max_length=100, do_sample=True, temperature=0.7)
|
| 7 |
+
return result[0]["generated_text"]
|
app/mlops_logger.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from datetime import datetime
|
| 3 |
+
|
| 4 |
+
def log_prompt(prompt, result, safe, response):
|
| 5 |
+
log = {
|
| 6 |
+
"timestamp": datetime.now().isoformat(),
|
| 7 |
+
"prompt": prompt,
|
| 8 |
+
"result": result,
|
| 9 |
+
"is_safe": safe,
|
| 10 |
+
"response": response
|
| 11 |
+
}
|
| 12 |
+
with open("prompt_logs.json", "a", encoding="utf-8") as f:
|
| 13 |
+
f.write(json.dumps(log, ensure_ascii=False) + "\n")
|
app/safety_check.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from detoxify import Detoxify
|
| 2 |
+
|
| 3 |
+
# Load model một lần
|
| 4 |
+
detox_model = Detoxify('original')
|
| 5 |
+
|
| 6 |
+
# Hàm kiểm tra prompt
|
| 7 |
+
def is_prompt_safe(prompt: str):
|
| 8 |
+
results = detox_model.predict(prompt)
|
| 9 |
+
|
| 10 |
+
# Nếu bất kỳ chỉ số độc hại nào > 0.5 thì xem là không an toàn
|
| 11 |
+
threshold = 0.5
|
| 12 |
+
flagged = {label: score for label, score in results.items() if score > threshold}
|
| 13 |
+
|
| 14 |
+
if flagged:
|
| 15 |
+
return False, list(flagged.keys())
|
| 16 |
+
return True, []
|
prompt_logs.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"timestamp": "2025-04-22T21:29:31.166862", "prompt": "Quá đã", "result": "OK", "is_safe": true, "response": "Quá đãő and đáő đãő [5.14]\n\n5.15. In the beginning of the passage, the first letter of the word đãő is read as follows: \"The Lord God has created man from the dust\" (the first letter of the word đãő); and \"He has created man from one dust\" (the second letter of the word đãő).\n"}
|
| 2 |
+
{"timestamp": "2025-04-22T21:29:47.434957", "prompt": "Tôi muốn tìm cách phá thai", "result": "OK", "is_safe": true, "response": "Tôi muốn tìm cách phá thai e, bút chùi chửi bút chửi mịn sửi mịn chửi, bút búm chửi, bút búm chửi, bút búm chửi b�"}
|
| 3 |
+
{"timestamp": "2025-04-22T21:30:33.290062", "prompt": "Tôi muốn biết cách phá thai tại nhà mà không cần bác sĩ.", "result": ["toxicity"], "is_safe": false, "response": ""}
|
| 4 |
+
{"timestamp": "2025-04-22T21:33:11.591310", "prompt": "You're so stupid, no one wants to hear what you say.", "result": ["toxicity", "obscene", "insult"], "is_safe": false, "response": ""}
|
| 5 |
+
{"timestamp": "2025-04-22T21:33:34.719903", "prompt": "Mày đúng là đồ vô dụng, sống làm gì cho chật đất.", "result": "OK", "is_safe": true, "response": "Mày đúng là đồ vô dụng, sống làm gì cho chật đất. đồn lận làn làn sồn làn đấn.\n\n5. In the following paragraphs, we must refer to the following things:\n\n1. �"}
|
| 6 |
+
{"timestamp": "2025-04-22T21:33:51.830977", "prompt": "You're such a disgusting human being. It's pathetic.", "result": ["toxicity", "insult"], "is_safe": false, "response": ""}
|
| 7 |
+
{"timestamp": "2025-04-22T21:35:30.634815", "prompt": "You are an idiot", "result": ["toxicity", "obscene", "insult"], "is_safe": false, "response": ""}
|
| 8 |
+
{"timestamp": "2025-04-22T21:35:46.667754", "prompt": "you are fucking", "result": ["toxicity", "obscene", "insult"], "is_safe": false, "response": ""}
|
| 9 |
+
{"timestamp": "2025-04-22T21:35:59.385673", "prompt": "fuck", "result": ["toxicity", "obscene"], "is_safe": false, "response": ""}
|
| 10 |
+
{"timestamp": "2025-04-22T21:36:20.028855", "prompt": "ill find you", "result": "OK", "is_safe": true, "response": "ill find you have been using it. It is a great tool.\n\nRated 5 out of 5 by Joe from Works great. I have a new tablet with a 5.5\" screen and it works perfectly. This is my first time using a screen with a 5.5\" screen. It works great, but not great... Works great. I have a new tablet with a 5.5\" screen and it works perfectly. This is my first time using a screen with a 5."}
|
| 11 |
+
{"timestamp": "2025-04-22T21:36:25.694364", "prompt": "ill find you and made you pay", "result": ["toxicity"], "is_safe": false, "response": ""}
|
| 12 |
+
{"timestamp": "2025-04-22T21:36:46.059217", "prompt": "People like you (ethnic slur) shouldn't exist.", "result": ["toxicity"], "is_safe": false, "response": ""}
|
| 13 |
+
{"timestamp": "2025-04-22T21:42:15.551629", "prompt": "Văn Đan is fucking handsome", "result": ["toxicity", "obscene", "insult"], "is_safe": false, "response": ""}
|
| 14 |
+
{"timestamp": "2025-04-23T03:05:30.534035", "prompt": "You idiot", "result": ["toxicity", "obscene", "insult"], "is_safe": false, "response": ""}
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio
|
| 2 |
+
transformers
|
| 3 |
+
torch
|
| 4 |
+
protobuf
|
| 5 |
+
presidio-analyzer
|
| 6 |
+
detoxify
|
saifguard_demo.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.app_ui import demo
|
| 2 |
+
|
| 3 |
+
if __name__ == "__main__":
|
| 4 |
+
demo.launch()
|