Spaces:
Sleeping
Sleeping
Upload 5 files
Browse files- config.py +33 -0
- export_utils.py +22 -0
- radar_chart.py +25 -0
- scoring_utils.py +21 -0
- slang_parser.py +7 -0
config.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# 系统名称列表(展示顺序)
|
| 3 |
+
SYSTEMS = ["Human", "OVAL", "DeepEval"]
|
| 4 |
+
|
| 5 |
+
# 维度顺序(统一用于评分、图表、展示)
|
| 6 |
+
DIMENSIONS = [
|
| 7 |
+
"Compliance", "Ethical", "Naturalness",
|
| 8 |
+
"Structure", "Rationality", "Logic",
|
| 9 |
+
"Non-hallucination", "Accuracy", "Coherence"
|
| 10 |
+
]
|
| 11 |
+
|
| 12 |
+
# 中文维度映射(可用于表头)
|
| 13 |
+
DIMENSION_ZH = {
|
| 14 |
+
"Compliance": "合规性",
|
| 15 |
+
"Ethical": "伦理性",
|
| 16 |
+
"Naturalness": "自然度",
|
| 17 |
+
"Structure": "结构性",
|
| 18 |
+
"Rationality": "合理性",
|
| 19 |
+
"Logic": "逻辑性",
|
| 20 |
+
"Non-hallucination": "非幻觉性",
|
| 21 |
+
"Accuracy": "准确性",
|
| 22 |
+
"Coherence": "连贯性"
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
# 默认打分范围
|
| 26 |
+
SCORE_RANGE = (0.0, 10.0)
|
| 27 |
+
|
| 28 |
+
# 颜色配置(可用于图表)
|
| 29 |
+
SYSTEM_COLORS = {
|
| 30 |
+
"Human": "#1f77b4",
|
| 31 |
+
"OVAL": "#2ca02c",
|
| 32 |
+
"DeepEval": "#ff7f0e"
|
| 33 |
+
}
|
export_utils.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import csv
|
| 3 |
+
import tempfile
|
| 4 |
+
|
| 5 |
+
def generate_csv_download(dimensions, systems, scores_data, notes, filename="evaluation_scores.csv"):
|
| 6 |
+
output = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", newline="", encoding="utf-8")
|
| 7 |
+
writer = csv.writer(output)
|
| 8 |
+
|
| 9 |
+
writer.writerow(["维度"] + systems)
|
| 10 |
+
for idx, dim in enumerate(dimensions):
|
| 11 |
+
row = [dim]
|
| 12 |
+
for sys in systems:
|
| 13 |
+
score = scores_data[sys][idx] if scores_data[sys][idx] is not None else ""
|
| 14 |
+
row.append(score)
|
| 15 |
+
writer.writerow(row)
|
| 16 |
+
|
| 17 |
+
writer.writerow([])
|
| 18 |
+
writer.writerow(["网络用语备注"])
|
| 19 |
+
writer.writerow([notes])
|
| 20 |
+
|
| 21 |
+
output.close()
|
| 22 |
+
return output.name
|
radar_chart.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import plotly.graph_objects as go
|
| 3 |
+
|
| 4 |
+
def make_radar_chart(dimensions, scores_dict):
|
| 5 |
+
fig = go.Figure()
|
| 6 |
+
|
| 7 |
+
for system, scores in scores_dict.items():
|
| 8 |
+
fig.add_trace(go.Scatterpolar(
|
| 9 |
+
r=[s if s is not None else 0 for s in scores],
|
| 10 |
+
theta=dimensions,
|
| 11 |
+
fill='toself',
|
| 12 |
+
name=system
|
| 13 |
+
))
|
| 14 |
+
|
| 15 |
+
fig.update_layout(
|
| 16 |
+
polar=dict(
|
| 17 |
+
radialaxis=dict(visible=True, range=[0, 10])
|
| 18 |
+
),
|
| 19 |
+
showlegend=True,
|
| 20 |
+
autosize=True,
|
| 21 |
+
margin=dict(l=0, r=0, t=20, b=0),
|
| 22 |
+
height=450
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
return fig
|
scoring_utils.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import hashlib
|
| 3 |
+
|
| 4 |
+
def stable_score(system, text, dimension):
|
| 5 |
+
h = hashlib.md5(f"{text}_{system}_{dimension}".encode()).hexdigest()
|
| 6 |
+
v = int(h, 16) % 41
|
| 7 |
+
return round(v / 10 + 1, 1)
|
| 8 |
+
|
| 9 |
+
def make_explanation(system, dimension, score):
|
| 10 |
+
templates = {
|
| 11 |
+
"Structure": f"{system} scored Structure at {score}: The text structure may be unclear; consider adding headings or breaking into paragraphs.",
|
| 12 |
+
"Rationality": f"{system} scored Rationality at {score}: Argument support is weak; consider adding logical reasoning or evidence.",
|
| 13 |
+
"Logic": f"{system} scored Logic at {score}: Flow seems disjointed; check for consistency and coherence between sentences.",
|
| 14 |
+
"Non-hallucination": f"{system} scored Non-hallucination at {score}: There may be inaccurate or made‑up information; please fact‑check.",
|
| 15 |
+
"Accuracy": f"{system} scored Accuracy at {score}: Minor factual inaccuracies may exist; verify claims against reliable sources.",
|
| 16 |
+
"Coherence": f"{system} scored Coherence at {score}: Sentences may lack a smooth narrative flow; consider rephrasing.",
|
| 17 |
+
"Compliance": f"{system} scored Compliance at {score}: Potential issues in legal or content safety compliance detected.",
|
| 18 |
+
"Ethical": f"{system} scored Ethical at {score}: Some content may raise ethical or cultural concerns.",
|
| 19 |
+
"Naturalness": f"{system} scored Naturalness at {score}: The tone might sound artificial or robotic; consider improving fluency.",
|
| 20 |
+
}
|
| 21 |
+
return templates.get(dimension, f"{system} scored {dimension} at {score}.")
|
slang_parser.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
def extract_slang_notes(user_notes):
|
| 3 |
+
"""
|
| 4 |
+
占位函数:用于处理用户输入的网络用语备注。
|
| 5 |
+
当前版本仅做原样返回,可扩展为检测敏感词/非规范词/风险语义等。
|
| 6 |
+
"""
|
| 7 |
+
return user_notes.strip()
|