EugeneXiang commited on
Commit
dcf611d
·
verified ·
1 Parent(s): 9b414c1

Upload 5 files

Browse files
Files changed (5) hide show
  1. config.py +33 -0
  2. export_utils.py +22 -0
  3. radar_chart.py +25 -0
  4. scoring_utils.py +21 -0
  5. slang_parser.py +7 -0
config.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # 系统名称列表(展示顺序)
3
+ SYSTEMS = ["Human", "OVAL", "DeepEval"]
4
+
5
+ # 维度顺序(统一用于评分、图表、展示)
6
+ DIMENSIONS = [
7
+ "Compliance", "Ethical", "Naturalness",
8
+ "Structure", "Rationality", "Logic",
9
+ "Non-hallucination", "Accuracy", "Coherence"
10
+ ]
11
+
12
+ # 中文维度映射(可用于表头)
13
+ DIMENSION_ZH = {
14
+ "Compliance": "合规性",
15
+ "Ethical": "伦理性",
16
+ "Naturalness": "自然度",
17
+ "Structure": "结构性",
18
+ "Rationality": "合理性",
19
+ "Logic": "逻辑性",
20
+ "Non-hallucination": "非幻觉性",
21
+ "Accuracy": "准确性",
22
+ "Coherence": "连贯性"
23
+ }
24
+
25
+ # 默认打分范围
26
+ SCORE_RANGE = (0.0, 10.0)
27
+
28
+ # 颜色配置(可用于图表)
29
+ SYSTEM_COLORS = {
30
+ "Human": "#1f77b4",
31
+ "OVAL": "#2ca02c",
32
+ "DeepEval": "#ff7f0e"
33
+ }
export_utils.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import csv
3
+ import tempfile
4
+
5
+ def generate_csv_download(dimensions, systems, scores_data, notes, filename="evaluation_scores.csv"):
6
+ output = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", newline="", encoding="utf-8")
7
+ writer = csv.writer(output)
8
+
9
+ writer.writerow(["维度"] + systems)
10
+ for idx, dim in enumerate(dimensions):
11
+ row = [dim]
12
+ for sys in systems:
13
+ score = scores_data[sys][idx] if scores_data[sys][idx] is not None else ""
14
+ row.append(score)
15
+ writer.writerow(row)
16
+
17
+ writer.writerow([])
18
+ writer.writerow(["网络用语备注"])
19
+ writer.writerow([notes])
20
+
21
+ output.close()
22
+ return output.name
radar_chart.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import plotly.graph_objects as go
3
+
4
+ def make_radar_chart(dimensions, scores_dict):
5
+ fig = go.Figure()
6
+
7
+ for system, scores in scores_dict.items():
8
+ fig.add_trace(go.Scatterpolar(
9
+ r=[s if s is not None else 0 for s in scores],
10
+ theta=dimensions,
11
+ fill='toself',
12
+ name=system
13
+ ))
14
+
15
+ fig.update_layout(
16
+ polar=dict(
17
+ radialaxis=dict(visible=True, range=[0, 10])
18
+ ),
19
+ showlegend=True,
20
+ autosize=True,
21
+ margin=dict(l=0, r=0, t=20, b=0),
22
+ height=450
23
+ )
24
+
25
+ return fig
scoring_utils.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import hashlib
3
+
4
+ def stable_score(system, text, dimension):
5
+ h = hashlib.md5(f"{text}_{system}_{dimension}".encode()).hexdigest()
6
+ v = int(h, 16) % 41
7
+ return round(v / 10 + 1, 1)
8
+
9
+ def make_explanation(system, dimension, score):
10
+ templates = {
11
+ "Structure": f"{system} scored Structure at {score}: The text structure may be unclear; consider adding headings or breaking into paragraphs.",
12
+ "Rationality": f"{system} scored Rationality at {score}: Argument support is weak; consider adding logical reasoning or evidence.",
13
+ "Logic": f"{system} scored Logic at {score}: Flow seems disjointed; check for consistency and coherence between sentences.",
14
+ "Non-hallucination": f"{system} scored Non-hallucination at {score}: There may be inaccurate or made‑up information; please fact‑check.",
15
+ "Accuracy": f"{system} scored Accuracy at {score}: Minor factual inaccuracies may exist; verify claims against reliable sources.",
16
+ "Coherence": f"{system} scored Coherence at {score}: Sentences may lack a smooth narrative flow; consider rephrasing.",
17
+ "Compliance": f"{system} scored Compliance at {score}: Potential issues in legal or content safety compliance detected.",
18
+ "Ethical": f"{system} scored Ethical at {score}: Some content may raise ethical or cultural concerns.",
19
+ "Naturalness": f"{system} scored Naturalness at {score}: The tone might sound artificial or robotic; consider improving fluency.",
20
+ }
21
+ return templates.get(dimension, f"{system} scored {dimension} at {score}.")
slang_parser.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+
2
+ def extract_slang_notes(user_notes):
3
+ """
4
+ 占位函数:用于处理用户输入的网络用语备注。
5
+ 当前版本仅做原样返回,可扩展为检测敏感词/非规范词/风险语义等。
6
+ """
7
+ return user_notes.strip()