EugeneXiang commited on
Commit
55f57c9
·
verified ·
1 Parent(s): b69bdfc

Upload 6 files

Browse files
Files changed (6) hide show
  1. __init__.py +4 -0
  2. Prival Module Package +369 -0
  3. config.yaml +29 -0
  4. core.py +21 -0
  5. report.py +49 -0
  6. scoring.py +24 -0
__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # __init__.py
2
+ from .core import evaluate_prompt
3
+
4
+ __all__ = ["evaluate_prompt"]
Prival Module Package ADDED
@@ -0,0 +1,369 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ```yaml
2
+ # config.yaml
3
+ enabled_dimensions:
4
+ clarity: # 表示启用清晰度检测
5
+ weight: 0.15
6
+ ambiguity:
7
+ weight: 0.10
8
+ step_guidance:
9
+ weight: 0.10
10
+ verbosity:
11
+ weight: 0.10
12
+ injection_risk:
13
+ weight: 0.15
14
+ context_completeness:
15
+ weight: 0.10
16
+ ethic_compliance:
17
+ weight: 0.10
18
+ structural_cleanness:
19
+ weight: 0.05
20
+ relevance:
21
+ weight: 0.05
22
+ feasibility:
23
+ weight: 0.05
24
+ grammar_spelling:
25
+ weight: 0.05
26
+ length_appropriateness:
27
+ weight: 0.05
28
+ diversity:
29
+ weight: 0.05
30
+ # politeness 未启用
31
+
32
+ ```
33
+
34
+ ```python
35
+ # __init__.py
36
+ from .core import evaluate_prompt
37
+
38
+ __all__ = ["evaluate_prompt"]
39
+ ```
40
+
41
+ ---
42
+
43
+ ```python
44
+ # utils/nlp_helpers.py
45
+ import spacy
46
+ from typing import List
47
+
48
+ # 加载小型中文模型或英文模型
49
+ try:
50
+ nlp = spacy.load("zh_core_web_sm")
51
+ except:
52
+ nlp = spacy.load("en_core_web_sm")
53
+
54
+
55
+ def tokenize(text: str) -> List[str]:
56
+ return [token.text for token in nlp(text)]
57
+
58
+
59
+ def sentence_length(text: str) -> int:
60
+ return len(tokenize(text))
61
+
62
+
63
+ def dependency_depth(text: str) -> int:
64
+ doc = nlp(text)
65
+ return max([len([t for t in token.ancestors]) for token in doc])
66
+ ```
67
+
68
+ ---
69
+
70
+ ```python
71
+ # detectors/clarity.py
72
+ from ..utils.nlp_helpers import sentence_length
73
+
74
+ def evaluate(prompt: str) -> dict:
75
+ length = sentence_length(prompt)
76
+ score = 1.0 if length < 50 else max(0.0, 1.0 - (length - 50)/100)
77
+ suggestions = []
78
+ if length > 100:
79
+ suggestions.append("Prompt 太长,建议拆分或简化。")
80
+ return {"score": round(score, 2), "suggestions": suggestions}
81
+ ```
82
+
83
+ ```python
84
+ # detectors/ambiguity.py
85
+ import re
86
+ from ..utils.nlp_helpers import tokenize
87
+
88
+ def evaluate(prompt: str) -> dict:
89
+ # 简单检测多义词列表
90
+ ambiguous = [w for w in ["或者","可能","大概"] if w in prompt]
91
+ score = 1.0 - len(ambiguous)*0.2
92
+ suggestions = [f"检测到歧义词:{w}" for w in ambiguous]
93
+ return {"score": max(score, 0.0), "suggestions": suggestions}
94
+ ```
95
+
96
+ ```python
97
+ # detectors/step_guidance.py
98
+ from ..utils.nlp_helpers import tokenize
99
+
100
+ def evaluate(prompt: str) -> dict:
101
+ tokens = tokenize(prompt)
102
+ has_step = any(w in ["步骤","首先","然后","最后"] for w in tokens)
103
+ score = 1.0 if has_step else 0.0
104
+ suggestions = [] if has_step else ["建议在 prompt 中添加明确步骤提示,如'首先...'、'然后...'" ]
105
+ return {"score": score, "suggestions": suggestions}
106
+ ```
107
+
108
+ ```python
109
+ # detectors/verbosity.py
110
+ from ..utils.nlp_helpers import sentence_length
111
+
112
+ def evaluate(prompt: str) -> dict:
113
+ length = sentence_length(prompt)
114
+ score = 1.0 if length < 60 else max(0.0, 1.0 - (length-60)/200)
115
+ suggestions = []
116
+ if length > 80:
117
+ suggestions.append("Prompt 内容冗长,考虑精简无关信息。")
118
+ return {"score": round(score,2), "suggestions": suggestions}
119
+ ```
120
+
121
+ ```python
122
+ # detectors/injection_risk.py
123
+ import re
124
+
125
+ def evaluate(prompt: str) -> dict:
126
+ patterns = [r"\bignore previous\b", r"\bmalicious\b"]
127
+ hits = [p for p in patterns if re.search(p, prompt, re.IGNORECASE)]
128
+ score = 1.0 - len(hits)*0.5
129
+ suggestions = ["检测到潜在注入风险模式:%s" % h for h in hits]
130
+ return {"score": max(score,0.0), "suggestions": suggestions}
131
+ ```
132
+
133
+ ```python
134
+ # detectors/context_completeness.py
135
+
136
+ def evaluate(prompt: str) -> dict:
137
+ # 简易:检测是否包含关键词示例或上下文标签
138
+ has_context = '背景' in prompt or '示例' in prompt
139
+ score = 1.0 if has_context else 0.5
140
+ suggestions = [] if has_context else ["提示:如有必要,可添加背景或示例以提升上下文完整性。"]
141
+ return {"score": score, "suggestions": suggestions}
142
+ ```
143
+
144
+ ```python
145
+ # detectors/ethic_compliance.py
146
+
147
+ def evaluate(prompt: str) -> dict:
148
+ # 简易词库检测
149
+ blacklist = ['暴力','歧视','仇恨']
150
+ hits = [w for w in blacklist if w in prompt]
151
+ score = 1.0 if not hits else 0.0
152
+ suggestions = [] if not hits else ["检测到不当词汇:%s" % w for w in hits]
153
+ return {"score": score, "suggestions": suggestions}
154
+ ```
155
+
156
+ ```python
157
+ # detectors/structural_cleanness.py
158
+ from ..utils.nlp_helpers import dependency_depth
159
+
160
+ def evaluate(prompt: str) -> dict:
161
+ depth = dependency_depth(prompt)
162
+ score = 1.0 if depth < 3 else max(0.0, 1.0 - (depth-3)*0.2)
163
+ suggestions = []
164
+ if depth > 5:
165
+ suggestions.append("句子结构过于复杂,建议拆分或简化嵌套。")
166
+ return {"score": round(score,2), "suggestions": suggestions}
167
+ ```
168
+
169
+ ```python
170
+ # detectors/relevance.py
171
+ from sentence_transformers import SentenceTransformer, util
172
+
173
+ model = SentenceTransformer('all-MiniLM-L6-v2')
174
+
175
+ def evaluate(prompt: str, reference: str = None) -> dict:
176
+ if reference:
177
+ sim = util.cos_sim(model.encode(prompt), model.encode(reference)).item()
178
+ else:
179
+ sim = 0.5
180
+ score = sim
181
+ suggestions = []
182
+ return {"score": round(score,2), "suggestions": suggestions}
183
+ ```
184
+
185
+ ```python
186
+ # detectors/feasibility.py
187
+ from ..utils.nlp_helpers import sentence_length
188
+
189
+ def evaluate(prompt: str, max_tokens: int = 512) -> dict:
190
+ length = sentence_length(prompt)
191
+ score = 1.0 if length < max_tokens/2 else 0.5
192
+ suggestions = []
193
+ if length > max_tokens:
194
+ suggestions.append("Prompt 太长,可能超出模型最大长度限制。")
195
+ return {"score": score, "suggestions": suggestions}
196
+ ```
197
+
198
+ ```python
199
+ # detectors/grammar_spelling.py
200
+ from language_tool_python import LanguageTool
201
+
202
+ tool = LanguageTool('en-US')
203
+
204
+ def evaluate(prompt: str) -> dict:
205
+ matches = tool.check(prompt)
206
+ score = 1.0 if not matches else max(0.0, 1.0 - len(matches)*0.1)
207
+ suggestions = [m.message for m in matches]
208
+ return {"score": round(score,2), "suggestions": suggestions}
209
+ ```
210
+
211
+ ```python
212
+ # detectors/length_appropriateness.py
213
+ from ..utils.nlp_helpers import sentence_length
214
+
215
+ def evaluate(prompt: str, min_len: int = 10, max_len: int = 200) -> dict:
216
+ length = sentence_length(prompt)
217
+ score = 1.0 if min_len <= length <= max_len else 0.5
218
+ suggestions = []
219
+ if length < min_len:
220
+ suggestions.append(f"Prompt 太短({length}),建议至少 {min_len} 个词。")
221
+ if length > max_len:
222
+ suggestions.append(f"Prompt 太长({length}),建议不超过 {max_len} 个词。")
223
+ return {"score": score, "suggestions": suggestions}
224
+ ```
225
+
226
+ ```python
227
+ # detectors/diversity.py
228
+ from sklearn.feature_extraction.text import TfidfVectorizer
229
+ import numpy as np
230
+
231
+ def evaluate(batch_prompts: list) -> dict:
232
+ vec = TfidfVectorizer().fit_transform(batch_prompts)
233
+ sim = (vec * vec.T).A
234
+ avg_sim = np.mean(sim[np.triu_indices_from(sim, k=1)])
235
+ score = 1 - avg_sim
236
+ suggestions = []
237
+ if avg_sim > 0.8:
238
+ suggestions.append("批量 prompt 相似度过高,建议增加多样性。")
239
+ return {"score": round(score,2), "suggestions": suggestions}
240
+ ```
241
+
242
+ ```python
243
+ # core.py
244
+ import yaml
245
+ import concurrent.futures
246
+ from .detectors import clarity, ambiguity, step_guidance, verbosity, injection_risk, context_completeness, ethic_compliance, structural_cleanness, relevance, feasibility, grammar_spelling, length_appropriateness, diversity
247
+
248
+ # 映射名称到模块
249
+ DETECTORS = {
250
+ 'clarity': clarity,
251
+ 'ambiguity': ambiguity,
252
+ 'step_guidance': step_guidance,
253
+ 'verbosity': verbosity,
254
+ 'injection_risk': injection_risk,
255
+ 'context_completeness': context_completeness,
256
+ 'ethic_compliance': ethic_compliance,
257
+ 'structural_cleanness': structural_cleanness,
258
+ 'relevance': relevance,
259
+ 'feasibility': feasibility,
260
+ 'grammar_spelling': grammar_spelling,
261
+ 'length_appropriateness': length_appropriateness,
262
+ 'diversity': diversity
263
+ }
264
+
265
+ ```
266
+ # scoring.py
267
+ """
268
+ 汇总各维度打分,按权重计算总分,输出标准结果格式。
269
+ """
270
+
271
+ def compute_overall_score(scores: dict, weights: dict) -> float:
272
+ """按 weights 对 scores 中每个维度加权平均,返回总分(0.0–1.0)。"""
273
+ total_weight = sum(weights.values())
274
+ if total_weight == 0:
275
+ return 0.0
276
+ weighted_sum = sum(scores[dim] * weights.get(dim, 0) for dim in scores)
277
+ return round(weighted_sum / total_weight, 4)
278
+
279
+
280
+ def format_scores(scores: dict, suggestions: dict, overall: float) -> dict:
281
+ """
282
+ 将各维度分数、建议和总分整理成字典,方便序列化输出。
283
+ 返回格式:{"scores": {...}, "suggestions": {...}, "overall": float}
284
+ """
285
+ return {
286
+ "scores": scores,
287
+ "suggestions": suggestions,
288
+ "overall": overall
289
+ }
290
+
291
+ # report.py
292
+ """
293
+ 生成 HTML 与 Markdown 格式的报告,包含各维度得分和建议。
294
+ """
295
+
296
+ from jinja2 import Template
297
+
298
+ HTML_TEMPLATE = """
299
+ <html>
300
+ <head><title>PRIVAL Prompt 验证报告</title></head>
301
+ <body>
302
+ <h2>PRIVAL 验证报告</h2>
303
+ <p>Overall Score: {{ overall }}</p>
304
+ <table border=1 cellpadding=5>
305
+ <tr><th>维度</th><th>分数</th><th>建议</th></tr>
306
+ {% for dim, score in scores.items() %}
307
+ <tr>
308
+ <td>{{ dim }}</td>
309
+ <td>{{ score }}</td>
310
+ <td>{{ suggestions[dim] | join('; ') }}</td>
311
+ </tr>
312
+ {% endfor %}
313
+ </table>
314
+ </body>
315
+ </html>
316
+ """
317
+
318
+ MD_TEMPLATE = """
319
+ # PRIVAL Prompt 验证报告
320
+
321
+ **Overall Score:** {{ overall }}
322
+
323
+ | 维度 | 分数 | 建议 |
324
+ |-----|-----|------|
325
+ {% for dim, score in scores.items() %}
326
+ | {{ dim }} | {{ score }} | {{ suggestions[dim] | join('; ') }} |
327
+ {% endfor %}
328
+ """
329
+
330
+ def generate_html_report(data: dict) -> str:
331
+ """返回 HTML 格式报告字符串。"""
332
+ tmpl = Template(HTML_TEMPLATE)
333
+ return tmpl.render(scores=data['scores'], suggestions=data['suggestions'], overall=data['overall'])
334
+
335
+
336
+ def generate_md_report(data: dict) -> str:
337
+ """返回 Markdown 格式报告字符串。"""
338
+ tmpl = Template(MD_TEMPLATE)
339
+ return tmpl.render(scores=data['scores'], suggestions=data['suggestions'], overall=data['overall'])
340
+
341
+ # tests/ 目录结构与示例测试
342
+ mkdir -p tests
343
+
344
+ # tests/test_scoring.py
345
+ import pytest
346
+ from prival.scoring import compute_overall_score
347
+
348
+ def test_compute_overall_score_empty():
349
+ assert compute_overall_score({}, {}) == 0.0
350
+
351
+ def test_compute_overall_score_simple():
352
+ scores = {'a': 1.0, 'b': 0.5}
353
+ weights = {'a': 0.5, 'b': 0.5}
354
+ assert compute_overall_score(scores, weights) == 0.75
355
+
356
+ # tests/test_report.py
357
+ import pytest
358
+ from prival.report import generate_md_report, generate_html_report
359
+
360
+ def test_generate_reports():
361
+ data = {
362
+ 'scores': {'clarity': 0.8},
363
+ 'suggestions': {'clarity': ['Be more specific']},
364
+ 'overall': 0.8
365
+ }
366
+ md = generate_md_report(data)
367
+ assert 'clarity' in md and 'Be more specific' in md
368
+ html = generate_html_report(data)
369
+ assert '<td>clarity</td>' in html
config.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # config.yaml
2
+ enabled_dimensions:
3
+ clarity: # 表示启用清晰度检测
4
+ weight: 0.15
5
+ ambiguity:
6
+ weight: 0.10
7
+ step_guidance:
8
+ weight: 0.10
9
+ verbosity:
10
+ weight: 0.10
11
+ injection_risk:
12
+ weight: 0.15
13
+ context_completeness:
14
+ weight: 0.10
15
+ ethic_compliance:
16
+ weight: 0.10
17
+ structural_cleanness:
18
+ weight: 0.05
19
+ relevance:
20
+ weight: 0.05
21
+ feasibility:
22
+ weight: 0.05
23
+ grammar_spelling:
24
+ weight: 0.05
25
+ length_appropriateness:
26
+ weight: 0.05
27
+ diversity:
28
+ weight: 0.05
29
+ # politeness 未启用
core.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # core.py
2
+ import yaml
3
+ import concurrent.futures
4
+ from .detectors import clarity, ambiguity, step_guidance, verbosity, injection_risk, context_completeness, ethic_compliance, structural_cleanness, relevance, feasibility, grammar_spelling, length_appropriateness, diversity
5
+
6
+ # 映射名称到模块
7
+ DETECTORS = {
8
+ 'clarity': clarity,
9
+ 'ambiguity': ambiguity,
10
+ 'step_guidance': step_guidance,
11
+ 'verbosity': verbosity,
12
+ 'injection_risk': injection_risk,
13
+ 'context_completeness': context_completeness,
14
+ 'ethic_compliance': ethic_compliance,
15
+ 'structural_cleanness': structural_cleanness,
16
+ 'relevance': relevance,
17
+ 'feasibility': feasibility,
18
+ 'grammar_spelling': grammar_spelling,
19
+ 'length_appropriateness': length_appropriateness,
20
+ 'diversity': diversity
21
+ }
report.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # report.py
2
+ """
3
+ 生成 HTML 与 Markdown 格式的报告,包含各维度得分和建议。
4
+ """
5
+
6
+ from jinja2 import Template
7
+
8
+ HTML_TEMPLATE = """
9
+ <html>
10
+ <head><title>PRIVAL Prompt 验证报告</title></head>
11
+ <body>
12
+ <h2>PRIVAL 验证报告</h2>
13
+ <p>Overall Score: {{ overall }}</p>
14
+ <table border=1 cellpadding=5>
15
+ <tr><th>维度</th><th>分数</th><th>建议</th></tr>
16
+ {% for dim, score in scores.items() %}
17
+ <tr>
18
+ <td>{{ dim }}</td>
19
+ <td>{{ score }}</td>
20
+ <td>{{ suggestions[dim] | join('; ') }}</td>
21
+ </tr>
22
+ {% endfor %}
23
+ </table>
24
+ </body>
25
+ </html>
26
+ """
27
+
28
+ MD_TEMPLATE = """
29
+ # PRIVAL Prompt 验证报告
30
+
31
+ **Overall Score:** {{ overall }}
32
+
33
+ | 维度 | 分数 | 建议 |
34
+ |-----|-----|------|
35
+ {% for dim, score in scores.items() %}
36
+ | {{ dim }} | {{ score }} | {{ suggestions[dim] | join('; ') }} |
37
+ {% endfor %}
38
+ """
39
+
40
+ def generate_html_report(data: dict) -> str:
41
+ """返回 HTML 格式报告字符串。"""
42
+ tmpl = Template(HTML_TEMPLATE)
43
+ return tmpl.render(scores=data['scores'], suggestions=data['suggestions'], overall=data['overall'])
44
+
45
+
46
+ def generate_md_report(data: dict) -> str:
47
+ """返回 Markdown 格式报告字符串。"""
48
+ tmpl = Template(MD_TEMPLATE)
49
+ return tmpl.render(scores=data['scores'], suggestions=data['suggestions'], overall=data['overall'])
scoring.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # scoring.py
2
+ """
3
+ 汇总各维度打分,按权重计算总分,输出标准结果格式。
4
+ """
5
+
6
+ def compute_overall_score(scores: dict, weights: dict) -> float:
7
+ """按 weights 对 scores 中每个维度加权平均,返回总分(0.0–1.0)。"""
8
+ total_weight = sum(weights.values())
9
+ if total_weight == 0:
10
+ return 0.0
11
+ weighted_sum = sum(scores[dim] * weights.get(dim, 0) for dim in scores)
12
+ return round(weighted_sum / total_weight, 4)
13
+
14
+
15
+ def format_scores(scores: dict, suggestions: dict, overall: float) -> dict:
16
+ """
17
+ 将各维度分数、建议和总分整理成字典,方便序列化输出。
18
+ 返回格式:{"scores": {...}, "suggestions": {...}, "overall": float}
19
+ """
20
+ return {
21
+ "scores": scores,
22
+ "suggestions": suggestions,
23
+ "overall": overall
24
+ }