Upload 6 files

Browse files

Files changed (6) hide show

__init__.py +4 -0
Prival Module Package +369 -0
config.yaml +29 -0
core.py +21 -0
report.py +49 -0
scoring.py +24 -0

__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+# __init__.py
+from .core import evaluate_prompt
+__all__ = ["evaluate_prompt"]

Prival Module Package ADDED Viewed

	@@ -0,0 +1,369 @@

+```yaml
+# config.yaml
+enabled_dimensions:
+  clarity:     # 表示启用清晰度检测
+    weight: 0.15
+  ambiguity:
+    weight: 0.10
+  step_guidance:
+    weight: 0.10
+  verbosity:
+    weight: 0.10
+  injection_risk:
+    weight: 0.15
+  context_completeness:
+    weight: 0.10
+  ethic_compliance:
+    weight: 0.10
+  structural_cleanness:
+    weight: 0.05
+  relevance:
+    weight: 0.05
+  feasibility:
+    weight: 0.05
+  grammar_spelling:
+    weight: 0.05
+  length_appropriateness:
+    weight: 0.05
+  diversity:
+    weight: 0.05
+# politeness 未启用
+```
+```python
+# __init__.py
+from .core import evaluate_prompt
+__all__ = ["evaluate_prompt"]
+```
+---
+```python
+# utils/nlp_helpers.py
+import spacy
+from typing import List
+# 加载小型中文模型或英文模型
+try:
+    nlp = spacy.load("zh_core_web_sm")
+except:
+    nlp = spacy.load("en_core_web_sm")
+def tokenize(text: str) -> List[str]:
+    return [token.text for token in nlp(text)]
+def sentence_length(text: str) -> int:
+    return len(tokenize(text))
+def dependency_depth(text: str) -> int:
+    doc = nlp(text)
+    return max([len([t for t in token.ancestors]) for token in doc])
+```
+---
+```python
+# detectors/clarity.py
+from ..utils.nlp_helpers import sentence_length
+def evaluate(prompt: str) -> dict:
+    length = sentence_length(prompt)
+    score = 1.0 if length < 50 else max(0.0, 1.0 - (length - 50)/100)
+    suggestions = []
+    if length > 100:
+        suggestions.append("Prompt 太长，建议拆分或简化。")
+    return {"score": round(score, 2), "suggestions": suggestions}
+```
+```python
+# detectors/ambiguity.py
+import re
+from ..utils.nlp_helpers import tokenize
+def evaluate(prompt: str) -> dict:
+    # 简单检测多义词列表
+    ambiguous = [w for w in ["或者","可能","大概"] if w in prompt]
+    score = 1.0 - len(ambiguous)*0.2
+    suggestions = [f"检测到歧义词：{w}" for w in ambiguous]
+    return {"score": max(score, 0.0), "suggestions": suggestions}
+```
+```python
+# detectors/step_guidance.py
+from ..utils.nlp_helpers import tokenize
+def evaluate(prompt: str) -> dict:
+    tokens = tokenize(prompt)
+    has_step = any(w in ["步骤","首先","然后","最后"] for w in tokens)
+    score = 1.0 if has_step else 0.0
+    suggestions = [] if has_step else ["建议在 prompt 中添加明确步骤提示，如'首先...'、'然后...'" ]
+    return {"score": score, "suggestions": suggestions}
+```
+```python
+# detectors/verbosity.py
+from ..utils.nlp_helpers import sentence_length
+def evaluate(prompt: str) -> dict:
+    length = sentence_length(prompt)
+    score = 1.0 if length < 60 else max(0.0, 1.0 - (length-60)/200)
+    suggestions = []
+    if length > 80:
+        suggestions.append("Prompt 内容冗长，考虑精简无关信息。")
+    return {"score": round(score,2), "suggestions": suggestions}
+```
+```python
+# detectors/injection_risk.py
+import re
+def evaluate(prompt: str) -> dict:
+    patterns = [r"\bignore previous\b", r"\bmalicious\b"]
+    hits = [p for p in patterns if re.search(p, prompt, re.IGNORECASE)]
+    score = 1.0 - len(hits)*0.5
+    suggestions = ["检测到潜在注入风险模式：%s" % h for h in hits]
+    return {"score": max(score,0.0), "suggestions": suggestions}
+```
+```python
+# detectors/context_completeness.py
+def evaluate(prompt: str) -> dict:
+    # 简易：检测是否包含关键词示例或上下文标签
+    has_context = '背景' in prompt or '示例' in prompt
+    score = 1.0 if has_context else 0.5
+    suggestions = [] if has_context else ["提示：如有必要，可添加背景或示例以提升上下文完整性。"]
+    return {"score": score, "suggestions": suggestions}
+```
+```python
+# detectors/ethic_compliance.py
+def evaluate(prompt: str) -> dict:
+    # 简易词库检测
+    blacklist = ['暴力','歧视','仇恨']
+    hits = [w for w in blacklist if w in prompt]
+    score = 1.0 if not hits else 0.0
+    suggestions = [] if not hits else ["检测到不当词汇：%s" % w for w in hits]
+    return {"score": score, "suggestions": suggestions}
+```
+```python
+# detectors/structural_cleanness.py
+from ..utils.nlp_helpers import dependency_depth
+def evaluate(prompt: str) -> dict:
+    depth = dependency_depth(prompt)
+    score = 1.0 if depth < 3 else max(0.0, 1.0 - (depth-3)*0.2)
+    suggestions = []
+    if depth > 5:
+        suggestions.append("句子结构过于复杂，建议拆分或简化嵌套。")
+    return {"score": round(score,2), "suggestions": suggestions}
+```
+```python
+# detectors/relevance.py
+from sentence_transformers import SentenceTransformer, util
+model = SentenceTransformer('all-MiniLM-L6-v2')
+def evaluate(prompt: str, reference: str = None) -> dict:
+    if reference:
+        sim = util.cos_sim(model.encode(prompt), model.encode(reference)).item()
+    else:
+        sim = 0.5
+    score = sim
+    suggestions = []
+    return {"score": round(score,2), "suggestions": suggestions}
+```
+```python
+# detectors/feasibility.py
+from ..utils.nlp_helpers import sentence_length
+def evaluate(prompt: str, max_tokens: int = 512) -> dict:
+    length = sentence_length(prompt)
+    score = 1.0 if length < max_tokens/2 else 0.5
+    suggestions = []
+    if length > max_tokens:
+        suggestions.append("Prompt 太长，可能超出模型最大长度限制。")
+    return {"score": score, "suggestions": suggestions}
+```
+```python
+# detectors/grammar_spelling.py
+from language_tool_python import LanguageTool
+tool = LanguageTool('en-US')
+def evaluate(prompt: str) -> dict:
+    matches = tool.check(prompt)
+    score = 1.0 if not matches else max(0.0, 1.0 - len(matches)*0.1)
+    suggestions = [m.message for m in matches]
+    return {"score": round(score,2), "suggestions": suggestions}
+```
+```python
+# detectors/length_appropriateness.py
+from ..utils.nlp_helpers import sentence_length
+def evaluate(prompt: str, min_len: int = 10, max_len: int = 200) -> dict:
+    length = sentence_length(prompt)
+    score = 1.0 if min_len <= length <= max_len else 0.5
+    suggestions = []
+    if length < min_len:
+        suggestions.append(f"Prompt 太短（{length}），建议至少 {min_len} 个词。")
+    if length > max_len:
+        suggestions.append(f"Prompt 太长（{length}），建议不超过 {max_len} 个词。")
+    return {"score": score, "suggestions": suggestions}
+```
+```python
+# detectors/diversity.py
+from sklearn.feature_extraction.text import TfidfVectorizer
+import numpy as np
+def evaluate(batch_prompts: list) -> dict:
+    vec = TfidfVectorizer().fit_transform(batch_prompts)
+    sim = (vec * vec.T).A
+    avg_sim = np.mean(sim[np.triu_indices_from(sim, k=1)])
+    score = 1 - avg_sim
+    suggestions = []
+    if avg_sim > 0.8:
+        suggestions.append("批量 prompt 相似度过高，建议增加多样性。")
+    return {"score": round(score,2), "suggestions": suggestions}
+```
+```python
+# core.py
+import yaml
+import concurrent.futures
+from .detectors import clarity, ambiguity, step_guidance, verbosity, injection_risk, context_completeness, ethic_compliance, structural_cleanness, relevance, feasibility, grammar_spelling, length_appropriateness, diversity
+# 映射名称到模块
+DETECTORS = {
+    'clarity': clarity,
+    'ambiguity': ambiguity,
+    'step_guidance': step_guidance,
+    'verbosity': verbosity,
+    'injection_risk': injection_risk,
+    'context_completeness': context_completeness,
+    'ethic_compliance': ethic_compliance,
+    'structural_cleanness': structural_cleanness,
+    'relevance': relevance,
+    'feasibility': feasibility,
+    'grammar_spelling': grammar_spelling,
+    'length_appropriateness': length_appropriateness,
+    'diversity': diversity
+}
+```
+# scoring.py
+"""
+汇总各维度打分，按权重计算总分，输出标准结果格式。
+"""
+def compute_overall_score(scores: dict, weights: dict) -> float:
+    """按 weights 对 scores 中每个维度加权平均，返回总分（0.0–1.0）。"""
+    total_weight = sum(weights.values())
+    if total_weight == 0:
+        return 0.0
+    weighted_sum = sum(scores[dim] * weights.get(dim, 0) for dim in scores)
+    return round(weighted_sum / total_weight, 4)
+def format_scores(scores: dict, suggestions: dict, overall: float) -> dict:
+    """
+    将各维度分数、建议和总分整理成字典，方便序列化输出。
+    返回格式：{"scores": {...}, "suggestions": {...}, "overall": float}
+    """
+    return {
+        "scores": scores,
+        "suggestions": suggestions,
+        "overall": overall
+    }
+# report.py
+"""
+生成 HTML 与 Markdown 格式的报告，包含各维度得分和建议。
+"""
+from jinja2 import Template
+HTML_TEMPLATE = """
+<html>
+<head><title>PRIVAL Prompt 验证报告</title></head>
+<body>
+<h2>PRIVAL 验证报告</h2>
+<p>Overall Score: {{ overall }}</p>
+<table border=1 cellpadding=5>
+  <tr><th>维度</th><th>分数</th><th>建议</th></tr>
+  {% for dim, score in scores.items() %}
+  <tr>
+    <td>{{ dim }}</td>
+    <td>{{ score }}</td>
+    <td>{{ suggestions[dim] | join('; ') }}</td>
+  </tr>
+  {% endfor %}
+</table>
+</body>
+</html>
+"""
+MD_TEMPLATE = """
+# PRIVAL Prompt 验证报告
+**Overall Score:** {{ overall }}
+| 维度 | 分数 | 建议 |
+|-----|-----|------|
+{% for dim, score in scores.items() %}
+| {{ dim }} | {{ score }} | {{ suggestions[dim] | join('; ') }} |
+{% endfor %}
+"""
+def generate_html_report(data: dict) -> str:
+    """返回 HTML 格式报告字符串。"""
+    tmpl = Template(HTML_TEMPLATE)
+    return tmpl.render(scores=data['scores'], suggestions=data['suggestions'], overall=data['overall'])
+def generate_md_report(data: dict) -> str:
+    """返回 Markdown 格式报告字符串。"""
+    tmpl = Template(MD_TEMPLATE)
+    return tmpl.render(scores=data['scores'], suggestions=data['suggestions'], overall=data['overall'])
+# tests/ 目录结构与示例测试
+mkdir -p tests
+# tests/test_scoring.py
+import pytest
+from prival.scoring import compute_overall_score
+def test_compute_overall_score_empty():
+    assert compute_overall_score({}, {}) == 0.0
+def test_compute_overall_score_simple():
+    scores = {'a': 1.0, 'b': 0.5}
+    weights = {'a': 0.5, 'b': 0.5}
+    assert compute_overall_score(scores, weights) == 0.75
+# tests/test_report.py
+import pytest
+from prival.report import generate_md_report, generate_html_report
+def test_generate_reports():
+    data = {
+        'scores': {'clarity': 0.8},
+        'suggestions': {'clarity': ['Be more specific']},
+        'overall': 0.8
+    }
+    md = generate_md_report(data)
+    assert 'clarity' in md and 'Be more specific' in md
+    html = generate_html_report(data)
+    assert '<td>clarity</td>' in html

config.yaml ADDED Viewed

	@@ -0,0 +1,29 @@

+# config.yaml
+enabled_dimensions:
+  clarity:     # 表示启用清晰度检测
+    weight: 0.15
+  ambiguity:
+    weight: 0.10
+  step_guidance:
+    weight: 0.10
+  verbosity:
+    weight: 0.10
+  injection_risk:
+    weight: 0.15
+  context_completeness:
+    weight: 0.10
+  ethic_compliance:
+    weight: 0.10
+  structural_cleanness:
+    weight: 0.05
+  relevance:
+    weight: 0.05
+  feasibility:
+    weight: 0.05
+  grammar_spelling:
+    weight: 0.05
+  length_appropriateness:
+    weight: 0.05
+  diversity:
+    weight: 0.05
+# politeness 未启用

core.py ADDED Viewed

	@@ -0,0 +1,21 @@

+# core.py
+import yaml
+import concurrent.futures
+from .detectors import clarity, ambiguity, step_guidance, verbosity, injection_risk, context_completeness, ethic_compliance, structural_cleanness, relevance, feasibility, grammar_spelling, length_appropriateness, diversity
+# 映射名称到模块
+DETECTORS = {
+    'clarity': clarity,
+    'ambiguity': ambiguity,
+    'step_guidance': step_guidance,
+    'verbosity': verbosity,
+    'injection_risk': injection_risk,
+    'context_completeness': context_completeness,
+    'ethic_compliance': ethic_compliance,
+    'structural_cleanness': structural_cleanness,
+    'relevance': relevance,
+    'feasibility': feasibility,
+    'grammar_spelling': grammar_spelling,
+    'length_appropriateness': length_appropriateness,
+    'diversity': diversity
+}

report.py ADDED Viewed

	@@ -0,0 +1,49 @@

+# report.py
+"""
+生成 HTML 与 Markdown 格式的报告，包含各维度得分和建议。
+"""
+from jinja2 import Template
+HTML_TEMPLATE = """
+<html>
+<head><title>PRIVAL Prompt 验证报告</title></head>
+<body>
+<h2>PRIVAL 验证报告</h2>
+<p>Overall Score: {{ overall }}</p>
+<table border=1 cellpadding=5>
+  <tr><th>维度</th><th>分数</th><th>建议</th></tr>
+  {% for dim, score in scores.items() %}
+  <tr>
+    <td>{{ dim }}</td>
+    <td>{{ score }}</td>
+    <td>{{ suggestions[dim] | join('; ') }}</td>
+  </tr>
+  {% endfor %}
+</table>
+</body>
+</html>
+"""
+MD_TEMPLATE = """
+# PRIVAL Prompt 验证报告
+**Overall Score:** {{ overall }}
+| 维度 | 分数 | 建议 |
+|-----|-----|------|
+{% for dim, score in scores.items() %}
+| {{ dim }} | {{ score }} | {{ suggestions[dim] | join('; ') }} |
+{% endfor %}
+"""
+def generate_html_report(data: dict) -> str:
+    """返回 HTML 格式报告字符串。"""
+    tmpl = Template(HTML_TEMPLATE)
+    return tmpl.render(scores=data['scores'], suggestions=data['suggestions'], overall=data['overall'])
+def generate_md_report(data: dict) -> str:
+    """返回 Markdown 格式报告字符串。"""
+    tmpl = Template(MD_TEMPLATE)
+    return tmpl.render(scores=data['scores'], suggestions=data['suggestions'], overall=data['overall'])

scoring.py ADDED Viewed

	@@ -0,0 +1,24 @@

+# scoring.py
+"""
+汇总各维度打分，按权重计算总分，输出标准结果格式。
+"""
+def compute_overall_score(scores: dict, weights: dict) -> float:
+    """按 weights 对 scores 中每个维度加权平均，返回总分（0.0–1.0）。"""
+    total_weight = sum(weights.values())
+    if total_weight == 0:
+        return 0.0
+    weighted_sum = sum(scores[dim] * weights.get(dim, 0) for dim in scores)
+    return round(weighted_sum / total_weight, 4)
+def format_scores(scores: dict, suggestions: dict, overall: float) -> dict:
+    """
+    将各维度分数、建议和总分整理成字典，方便序列化输出。
+    返回格式：{"scores": {...}, "suggestions": {...}, "overall": float}
+    """
+    return {
+        "scores": scores,
+        "suggestions": suggestions,
+        "overall": overall
+    }