Upload 13 files
Browse files- detectors/ambiguity.py +1 -1
- detectors/clarity.py +1 -1
- detectors/context_completeness.py +1 -1
- detectors/diversity.py +1 -1
- detectors/ethic_compliance.py +1 -1
- detectors/feasibility.py +1 -1
- detectors/grammar_spelling.py +31 -7
- detectors/injection_risk.py +1 -1
- detectors/length_appropriateness.py +1 -1
- detectors/relevance.py +1 -1
- detectors/step_guidance.py +2 -10
- detectors/structural_cleanness.py +27 -10
- detectors/verbosity.py +1 -1
detectors/ambiguity.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
import re
|
| 3 |
from ..utils.nlp_helpers import tokenize
|
| 4 |
|
| 5 |
-
def
|
| 6 |
# 简单检测多义词列表
|
| 7 |
ambiguous = [w for w in ["或者","可能","大概"] if w in prompt]
|
| 8 |
score = 1.0 - len(ambiguous)*0.2
|
|
|
|
| 2 |
import re
|
| 3 |
from ..utils.nlp_helpers import tokenize
|
| 4 |
|
| 5 |
+
def detect_ambiguity(prompt: str) -> dict:
|
| 6 |
# 简单检测多义词列表
|
| 7 |
ambiguous = [w for w in ["或者","可能","大概"] if w in prompt]
|
| 8 |
score = 1.0 - len(ambiguous)*0.2
|
detectors/clarity.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
# detectors/clarity.py
|
| 2 |
from ..utils.nlp_helpers import sentence_length
|
| 3 |
|
| 4 |
-
def
|
| 5 |
length = sentence_length(prompt)
|
| 6 |
score = 1.0 if length < 50 else max(0.0, 1.0 - (length - 50)/100)
|
| 7 |
suggestions = []
|
|
|
|
| 1 |
# detectors/clarity.py
|
| 2 |
from ..utils.nlp_helpers import sentence_length
|
| 3 |
|
| 4 |
+
def detect_clarity(prompt: str) -> dict:
|
| 5 |
length = sentence_length(prompt)
|
| 6 |
score = 1.0 if length < 50 else max(0.0, 1.0 - (length - 50)/100)
|
| 7 |
suggestions = []
|
detectors/context_completeness.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
# detectors/context_completeness.py
|
| 2 |
|
| 3 |
-
def
|
| 4 |
# 简易:检测是否包含关键词示例或上下文标签
|
| 5 |
has_context = '背景' in prompt or '示例' in prompt
|
| 6 |
score = 1.0 if has_context else 0.5
|
|
|
|
| 1 |
# detectors/context_completeness.py
|
| 2 |
|
| 3 |
+
def detect_context_completeness(prompt: str) -> dict:
|
| 4 |
# 简易:检测是否包含关键词示例或上下文标签
|
| 5 |
has_context = '背景' in prompt or '示例' in prompt
|
| 6 |
score = 1.0 if has_context else 0.5
|
detectors/diversity.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 3 |
import numpy as np
|
| 4 |
|
| 5 |
-
def
|
| 6 |
vec = TfidfVectorizer().fit_transform(batch_prompts)
|
| 7 |
sim = (vec * vec.T).A
|
| 8 |
avg_sim = np.mean(sim[np.triu_indices_from(sim, k=1)])
|
|
|
|
| 2 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 3 |
import numpy as np
|
| 4 |
|
| 5 |
+
def detect_diversity(batch_prompts: list) -> dict:
|
| 6 |
vec = TfidfVectorizer().fit_transform(batch_prompts)
|
| 7 |
sim = (vec * vec.T).A
|
| 8 |
avg_sim = np.mean(sim[np.triu_indices_from(sim, k=1)])
|
detectors/ethic_compliance.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
# detectors/ethic_compliance.py
|
| 2 |
|
| 3 |
-
def
|
| 4 |
# 简易词库检测
|
| 5 |
blacklist = ['暴力','歧视','仇恨']
|
| 6 |
hits = [w for w in blacklist if w in prompt]
|
|
|
|
| 1 |
# detectors/ethic_compliance.py
|
| 2 |
|
| 3 |
+
def detect_ethic_compliance(prompt: str) -> dict:
|
| 4 |
# 简易词库检测
|
| 5 |
blacklist = ['暴力','歧视','仇恨']
|
| 6 |
hits = [w for w in blacklist if w in prompt]
|
detectors/feasibility.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
# detectors/feasibility.py
|
| 2 |
from ..utils.nlp_helpers import sentence_length
|
| 3 |
|
| 4 |
-
def
|
| 5 |
length = sentence_length(prompt)
|
| 6 |
score = 1.0 if length < max_tokens/2 else 0.5
|
| 7 |
suggestions = []
|
|
|
|
| 1 |
# detectors/feasibility.py
|
| 2 |
from ..utils.nlp_helpers import sentence_length
|
| 3 |
|
| 4 |
+
def detect_feasibility(prompt: str, max_tokens: int = 512) -> dict:
|
| 5 |
length = sentence_length(prompt)
|
| 6 |
score = 1.0 if length < max_tokens/2 else 0.5
|
| 7 |
suggestions = []
|
detectors/grammar_spelling.py
CHANGED
|
@@ -1,10 +1,34 @@
|
|
| 1 |
-
|
| 2 |
-
|
|
|
|
| 3 |
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
matches = tool.check(prompt)
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Grammar and spelling detector for PRIVAL.
|
| 3 |
+
"""
|
| 4 |
|
| 5 |
+
try:
|
| 6 |
+
from language_tool_python import LanguageTool
|
| 7 |
+
except Exception:
|
| 8 |
+
LanguageTool = None
|
| 9 |
|
| 10 |
+
|
| 11 |
+
def grammar_spelling(prompt: str):
|
| 12 |
+
"""
|
| 13 |
+
使用 LanguageTool 检测语法和拼写错误,如果不可用则跳过。
|
| 14 |
+
"""
|
| 15 |
+
# 如果 LanguageTool 不可用,跳过此检测
|
| 16 |
+
if LanguageTool is None:
|
| 17 |
+
return {"score": None, "suggestions": ["LanguageTool not available; skipped grammar checks."]}
|
| 18 |
+
|
| 19 |
+
# 尝试初始化 LanguageTool 服务
|
| 20 |
+
try:
|
| 21 |
+
tool = LanguageTool('en-US')
|
| 22 |
+
except Exception:
|
| 23 |
+
return {"score": None, "suggestions": ["Failed to start LanguageTool; skipped grammar checks."]}
|
| 24 |
+
|
| 25 |
+
# 执行检查
|
| 26 |
matches = tool.check(prompt)
|
| 27 |
+
suggestions = []
|
| 28 |
+
for match in matches:
|
| 29 |
+
suggestions.append(f"{match.ruleId}: {match.message} at position {match.offset}")
|
| 30 |
+
|
| 31 |
+
# 计算分数:错误越少得分越高
|
| 32 |
+
length = len(prompt.split()) if prompt else 1
|
| 33 |
+
score = max(0.0, 1.0 - len(matches) / length)
|
| 34 |
+
return {"score": score, "suggestions": suggestions}
|
detectors/injection_risk.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
# detectors/injection_risk.py
|
| 2 |
import re
|
| 3 |
|
| 4 |
-
def
|
| 5 |
patterns = [r"\bignore previous\b", r"\bmalicious\b"]
|
| 6 |
hits = [p for p in patterns if re.search(p, prompt, re.IGNORECASE)]
|
| 7 |
score = 1.0 - len(hits)*0.5
|
|
|
|
| 1 |
# detectors/injection_risk.py
|
| 2 |
import re
|
| 3 |
|
| 4 |
+
def detect_injection_risk(prompt: str) -> dict:
|
| 5 |
patterns = [r"\bignore previous\b", r"\bmalicious\b"]
|
| 6 |
hits = [p for p in patterns if re.search(p, prompt, re.IGNORECASE)]
|
| 7 |
score = 1.0 - len(hits)*0.5
|
detectors/length_appropriateness.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
# detectors/length_appropriateness.py
|
| 2 |
from ..utils.nlp_helpers import sentence_length
|
| 3 |
|
| 4 |
-
def
|
| 5 |
length = sentence_length(prompt)
|
| 6 |
score = 1.0 if min_len <= length <= max_len else 0.5
|
| 7 |
suggestions = []
|
|
|
|
| 1 |
# detectors/length_appropriateness.py
|
| 2 |
from ..utils.nlp_helpers import sentence_length
|
| 3 |
|
| 4 |
+
def detect_length_appropriateness(prompt: str, min_len: int = 10, max_len: int = 200) -> dict:
|
| 5 |
length = sentence_length(prompt)
|
| 6 |
score = 1.0 if min_len <= length <= max_len else 0.5
|
| 7 |
suggestions = []
|
detectors/relevance.py
CHANGED
|
@@ -3,7 +3,7 @@ from sentence_transformers import SentenceTransformer, util
|
|
| 3 |
|
| 4 |
model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 5 |
|
| 6 |
-
def
|
| 7 |
if reference:
|
| 8 |
sim = util.cos_sim(model.encode(prompt), model.encode(reference)).item()
|
| 9 |
else:
|
|
|
|
| 3 |
|
| 4 |
model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 5 |
|
| 6 |
+
def detect_relevance(prompt: str, reference: str = None) -> dict:
|
| 7 |
if reference:
|
| 8 |
sim = util.cos_sim(model.encode(prompt), model.encode(reference)).item()
|
| 9 |
else:
|
detectors/step_guidance.py
CHANGED
|
@@ -1,17 +1,9 @@
|
|
| 1 |
# detectors/step_guidance.py
|
| 2 |
from ..utils.nlp_helpers import tokenize
|
| 3 |
|
| 4 |
-
def
|
| 5 |
tokens = tokenize(prompt)
|
| 6 |
has_step = any(w in ["步骤","首先","然后","最后"] for w in tokens)
|
| 7 |
score = 1.0 if has_step else 0.0
|
| 8 |
suggestions = [] if has_step else ["建议在 prompt 中添加明确步骤提示,如'首先...'、'然后...'" ]
|
| 9 |
-
return {"score": score, "suggestions": suggestions}# detectors/step_guidance.py
|
| 10 |
-
from ..utils.nlp_helpers import tokenize
|
| 11 |
-
|
| 12 |
-
def evaluate(prompt: str) -> dict:
|
| 13 |
-
tokens = tokenize(prompt)
|
| 14 |
-
has_step = any(w in ["步骤","首先","然后","最后"] for w in tokens)
|
| 15 |
-
score = 1.0 if has_step else 0.0
|
| 16 |
-
suggestions = [] if has_step else ["建议在 prompt 中添加明确步骤提示,如'首先...'、'然后...'" ]
|
| 17 |
-
return {"score": score, "suggestions": suggestions}
|
|
|
|
| 1 |
# detectors/step_guidance.py
|
| 2 |
from ..utils.nlp_helpers import tokenize
|
| 3 |
|
| 4 |
+
def detect_step_guidance(prompt: str) -> dict:
|
| 5 |
tokens = tokenize(prompt)
|
| 6 |
has_step = any(w in ["步骤","首先","然后","最后"] for w in tokens)
|
| 7 |
score = 1.0 if has_step else 0.0
|
| 8 |
suggestions = [] if has_step else ["建议在 prompt 中添加明确步骤提示,如'首先...'、'然后...'" ]
|
| 9 |
+
return {"score": score, "suggestions": suggestions}# detectors/step_guidance.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
detectors/structural_cleanness.py
CHANGED
|
@@ -1,10 +1,27 @@
|
|
| 1 |
-
# detectors/structural_cleanness.py
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# prival/prival/detectors/structural_cleanness.py
|
| 2 |
+
|
| 3 |
+
# 1. 捕获 spacy 不存在的情况
|
| 4 |
+
try:
|
| 5 |
+
import spacy
|
| 6 |
+
except ImportError:
|
| 7 |
+
spacy = None
|
| 8 |
+
|
| 9 |
+
from ..utils.nlp_helpers import dependency_depth, sentence_length
|
| 10 |
+
# …其它 imports…
|
| 11 |
+
|
| 12 |
+
def structural_cleanness(prompt: str):
|
| 13 |
+
# 2. 如果没有安装 spaCy,就跳过
|
| 14 |
+
if spacy is None:
|
| 15 |
+
return {
|
| 16 |
+
"score": None,
|
| 17 |
+
"suggestions": ["spaCy not installed; structural_cleanness skipped."]
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
# 原有的 spaCy 分析逻辑,例如:
|
| 21 |
+
# nlp = spacy.load("en_core_web_sm")
|
| 22 |
+
# doc = nlp(prompt)
|
| 23 |
+
# depth = dependency_depth(doc)
|
| 24 |
+
# …计算分数、生成 suggestions …
|
| 25 |
+
|
| 26 |
+
# 最终返回一个 dict
|
| 27 |
+
return {"score": computed_score, "suggestions": suggestions}
|
detectors/verbosity.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
# detectors/verbosity.py
|
| 2 |
from ..utils.nlp_helpers import sentence_length
|
| 3 |
|
| 4 |
-
def
|
| 5 |
length = sentence_length(prompt)
|
| 6 |
score = 1.0 if length < 60 else max(0.0, 1.0 - (length-60)/200)
|
| 7 |
suggestions = []
|
|
|
|
| 1 |
# detectors/verbosity.py
|
| 2 |
from ..utils.nlp_helpers import sentence_length
|
| 3 |
|
| 4 |
+
def detect_verbosity(prompt: str) -> dict:
|
| 5 |
length = sentence_length(prompt)
|
| 6 |
score = 1.0 if length < 60 else max(0.0, 1.0 - (length-60)/200)
|
| 7 |
suggestions = []
|