Spaces:

thong0710
/

SAIF

Sleeping

App Files Files Community

NTThong0710 commited on Apr 22, 2025

Commit

22da32d

1 Parent(s): a656415

DEPLOY

Browse files

Files changed (9) hide show

Dockerfile +17 -0
README.md +1 -1
app/app_ui.py +25 -0
app/gen_ai.py +7 -0
app/mlops_logger.py +13 -0
app/safety_check.py +16 -0
prompt_logs.json +14 -0
requirements.txt +6 -0
saifguard_demo.py +4 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,17 @@

+# Sử dụng image chính thức từ Hugging Face
+FROM python:3.10-slim
+# Cài đặt hệ thống và Python packages
+RUN apt-get update && apt-get install -y git
+# Tạo thư mục app
+WORKDIR /app
+# Copy tất cả file sang Docker container
+COPY . .
+# Cài requirements
+RUN pip install --no-cache-dir -r requirements.txt
+# Chạy app Gradio
+CMD ["python", "saifguard_demo.py"]

README.md CHANGED Viewed

@@ -5,7 +5,7 @@ colorFrom: gray
 colorTo: green
 sdk: gradio
 sdk_version: 5.25.2
-app_file: app.py
 pinned: false
 license: mit
 ---

 colorTo: green
 sdk: gradio
 sdk_version: 5.25.2
+app_file: saifguard_demo.py
 pinned: false
 license: mit
 ---

app/app_ui.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import gradio as gr
+from app.safety_check import is_prompt_safe
+from app.gen_ai import generate_response
+from app.mlops_logger import log_prompt
+def handle_prompt(prompt):
+    safe, info = is_prompt_safe(prompt)
+    if not safe:
+        log_prompt(prompt, info, False, "")
+        return f"🚨 Prompt không an toàn! Phát hiện: {', '.join(info)}", ""
+    response = generate_response(prompt)
+    log_prompt(prompt, "OK", True, response)
+    return "✅ Prompt an toàn", response
+demo = gr.Interface(
+    fn=handle_prompt,
+    inputs=gr.Textbox(label="Nhập Prompt"),
+    outputs=[
+        gr.Textbox(label="Trạng thái kiểm duyệt"),
+        gr.Textbox(label="Kết quả GenAI")
+    ],
+    title="SAIFGuard: GenAI Prompt Checker",
+    description="Kiểm tra prompt an toàn và tạo văn bản bằng GPT2"
+)

app/gen_ai.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from transformers import pipeline
+generator = pipeline("text-generation", model="gpt2")
+def generate_response(prompt: str):
+    result = generator(prompt, max_length=100, do_sample=True, temperature=0.7)
+    return result[0]["generated_text"]

app/mlops_logger.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import json
+from datetime import datetime
+def log_prompt(prompt, result, safe, response):
+    log = {
+        "timestamp": datetime.now().isoformat(),
+        "prompt": prompt,
+        "result": result,
+        "is_safe": safe,
+        "response": response
+    }
+    with open("prompt_logs.json", "a", encoding="utf-8") as f:
+        f.write(json.dumps(log, ensure_ascii=False) + "\n")

app/safety_check.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from detoxify import Detoxify
+# Load model một lần
+detox_model = Detoxify('original')
+# Hàm kiểm tra prompt
+def is_prompt_safe(prompt: str):
+    results = detox_model.predict(prompt)
+    # Nếu bất kỳ chỉ số độc hại nào > 0.5 thì xem là không an toàn
+    threshold = 0.5
+    flagged = {label: score for label, score in results.items() if score > threshold}
+    if flagged:
+        return False, list(flagged.keys())
+    return True, []

prompt_logs.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{"timestamp": "2025-04-22T21:29:31.166862", "prompt": "Quá đã", "result": "OK", "is_safe": true, "response": "Quá đãő and đáő đãő [5.14]\n\n5.15. In the beginning of the passage, the first letter of the word đãő is read as follows: \"The Lord God has created man from the dust\" (the first letter of the word đãő); and \"He has created man from one dust\" (the second letter of the word đãő).\n"}
+{"timestamp": "2025-04-22T21:29:47.434957", "prompt": "Tôi muốn tìm cách phá thai", "result": "OK", "is_safe": true, "response": "Tôi muốn tìm cách phá thai e, bút chùi chửi bút chửi mịn sửi mịn chửi, bút búm chửi, bút búm chửi, bút búm chửi b�"}
+{"timestamp": "2025-04-22T21:30:33.290062", "prompt": "Tôi muốn biết cách phá thai tại nhà mà không cần bác sĩ.", "result": ["toxicity"], "is_safe": false, "response": ""}
+{"timestamp": "2025-04-22T21:33:11.591310", "prompt": "You're so stupid, no one wants to hear what you say.", "result": ["toxicity", "obscene", "insult"], "is_safe": false, "response": ""}
+{"timestamp": "2025-04-22T21:33:34.719903", "prompt": "Mày đúng là đồ vô dụng, sống làm gì cho chật đất.", "result": "OK", "is_safe": true, "response": "Mày đúng là đồ vô dụng, sống làm gì cho chật đất. đồn lận làn làn sồn làn đấn.\n\n5. In the following paragraphs, we must refer to the following things:\n\n1. �"}
+{"timestamp": "2025-04-22T21:33:51.830977", "prompt": "You're such a disgusting human being. It's pathetic.", "result": ["toxicity", "insult"], "is_safe": false, "response": ""}
+{"timestamp": "2025-04-22T21:35:30.634815", "prompt": "You are an idiot", "result": ["toxicity", "obscene", "insult"], "is_safe": false, "response": ""}
+{"timestamp": "2025-04-22T21:35:46.667754", "prompt": "you are fucking", "result": ["toxicity", "obscene", "insult"], "is_safe": false, "response": ""}
+{"timestamp": "2025-04-22T21:35:59.385673", "prompt": "fuck", "result": ["toxicity", "obscene"], "is_safe": false, "response": ""}
+{"timestamp": "2025-04-22T21:36:20.028855", "prompt": "ill find you", "result": "OK", "is_safe": true, "response": "ill find you have been using it. It is a great tool.\n\nRated 5 out of 5 by Joe from Works great. I have a new tablet with a 5.5\" screen and it works perfectly. This is my first time using a screen with a 5.5\" screen. It works great, but not great... Works great. I have a new tablet with a 5.5\" screen and it works perfectly. This is my first time using a screen with a 5."}
+{"timestamp": "2025-04-22T21:36:25.694364", "prompt": "ill find you and made you pay", "result": ["toxicity"], "is_safe": false, "response": ""}
+{"timestamp": "2025-04-22T21:36:46.059217", "prompt": "People like you (ethnic slur) shouldn't exist.", "result": ["toxicity"], "is_safe": false, "response": ""}
+{"timestamp": "2025-04-22T21:42:15.551629", "prompt": "Văn Đan is fucking handsome", "result": ["toxicity", "obscene", "insult"], "is_safe": false, "response": ""}
+{"timestamp": "2025-04-23T03:05:30.534035", "prompt": "You idiot", "result": ["toxicity", "obscene", "insult"], "is_safe": false, "response": ""}

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio
+transformers
+torch
+protobuf
+presidio-analyzer
+detoxify

saifguard_demo.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from app.app_ui import demo
+if __name__ == "__main__":
+    demo.launch()