PowerDistance / app.py
WENior's picture
Update app.py
66d8d7a verified
import re
import numpy as np
import matplotlib.pyplot as plt
import gradio as gr
from transformers import pipeline
# --------------------
# 礼貌增强模型(T5)
# --------------------
polite_rewrite = pipeline(
"text2text-generation",
model="prithivida/parrot_paraphraser_on_T5"
)
# ---------- 1. 加载 Hugging Face 模型 ----------
# 中 → 英 翻译
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en")
# 英文礼貌度(4 类:polite / somewhat polite / neutral / impolite)
politeness_cls = pipeline("text-classification", model="Intel/polite-guard")
# 英文正式度(3 类:formal / neutral / informal)
formality_cls = pipeline("text-classification", model="LenDigLearn/formality-classifier-mdeberta-v3-base")
# 英文 hedge / uncertainty(委婉/模糊表达)
hedge_cls = pipeline(
"text-classification",
model="siebert/sentiment-roberta-large-english",
device="cpu",
truncation=True,
max_length=256,
padding="max_length"
)
# ---------- 2. 一些简单的中文 & 英文规则打分函数 ----------
POLITE_WORDS_ZH = ["请", "麻烦您", "劳烦", "敬请", "拜托", "打扰了", "烦请"]
HEDGE_WORDS_ZH = ["是否", "可能", "大概", "也许", "好像", "觉得", "有点"]
IMPERATIVE_WORDS_ZH = ["必须", "务必", "不得", "不准", "立即", "马上", "必须要"]
# ---------- 2. 一些简单的中文 & 英文规则打分函数 ----------
POLITE_WORDS_ZH = ["请", "麻烦您", "劳烦", "敬请", "拜托", "打扰了", "烦请"]
HEDGE_WORDS_ZH = ["是否", "可能", "大概", "也许", "好像", "觉得", "有点"]
IMPERATIVE_WORDS_ZH = ["必须", "务必", "不得", "不准", "立即", "马上", "必须要"]
def score_chinese_features(text: str):
"""非常简单的中文语气打分:返回 0~1 之间的几个指标"""
if not text.strip():
return 0.5, 0.5, 0.0 # 默认中等
length = max(len(text), 1)
polite_hits = sum(text.count(w) for w in POLITE_WORDS_ZH)
hedge_hits = sum(text.count(w) for w in HEDGE_WORDS_ZH)
imp_hits = sum(text.count(w) for w in IMPERATIVE_WORDS_ZH)
polite_score = np.clip(polite_hits / 3.0, 0, 1) # 出现次数越多分越高
hedge_score = np.clip(hedge_hits / 3.0, 0, 1)
imp_score = np.clip(imp_hits / 2.0, 0, 1)
return float(polite_score), float(hedge_score), float(imp_score)
def map_polite_guard_to_score(label: str):
"""把 Intel/polite-guard 的 4 类映射到 [0,1] 礼貌度"""
label = label.lower()
if "polite" == label:
return 1.0
if "somewhat polite" in label:
return 0.75
if "neutral" in label:
return 0.5
if "impolite" in label:
return 0.0
return 0.5
def map_formality_to_score(label: str):
"""formal / neutral / informal → [0,1] 正式度"""
label = label.lower()
if "formal" in label:
return 1.0
if "neutral" in label:
return 0.5
if "informal" in label:
return 0.0
return 0.5
def map_hedge_to_score(label: str):
"""
BERTweet-Hedge 的 label 可能类似 "Hedge" / "No_Hedge" / 多类。
这里只是示意:如果包含 hedge 就算高 hedge。
"""
label = label.lower()
if "hedge" in label and "no" not in label:
return 1.0
if "no_hedge" in label:
return 0.0
# 多类时可以更细分,这里先给中等
return 0.5
IMPERATIVE_TRIGGER_EN = [
r"^please\b",
r"^kindly\b",
r"^do\b",
r"^make\b",
r"^send\b",
r"^provide\b",
r"\byou must\b",
r"\byou have to\b",
r"\byou are required to\b",
]
def score_imperative_en(text: str):
"""用很简单的规则估计英文命令语气强度"""
t = text.strip().lower()
if not t:
return 0.0
hits = 0
for pat in IMPERATIVE_TRIGGER_EN:
if re.search(pat, t):
hits += 1
# 多个命令触发就提高分数
return float(np.clip(hits / 2.0, 0, 1))
# ---------- 3. 核心:分析函数 ----------
def analyze_letter(chinese_text: str):
if not chinese_text.strip():
return (
"", # 英文翻译
{}, # 中文指标
{}, # 英文指标
"N/A", # PD 等级
0.0, # PD 分数
None, # bar fig
None, # radar fig
)
# 1) 中文语气分析(规则)
polite_zh, hedge_zh, imp_zh = score_chinese_features(chinese_text)
zh_stats = {
"politeness": polite_zh,
"hedging": hedge_zh,
"imperative": imp_zh,
}
# 2) 中 → 英 翻译
translated = translator(chinese_text, max_length=512)[0]["translation_text"]
# 2.1) 礼貌增强版英文改写
polite_prompt = f"Rewrite the following sentence in polite and respectful English: {translated}"
polite_version = polite_rewrite(polite_prompt)[0]["generated_text"]
# 3) 英文礼貌度
pol_out = politeness_cls(translated)[0]
polite_en = map_polite_guard_to_score(pol_out["label"])
# 4) 英文正式度
form_out = formality_cls(translated)[0]
formality_en = map_formality_to_score(form_out["label"])
# 5) 英文 hedge 程度
hedge_out = hedge_cls(translated)[0]
hedge_en = map_hedge_to_score(hedge_out["label"])
# 6) 英文命令式强度
imp_en = score_imperative_en(translated)
en_stats = {
"politeness": polite_en,
"formality": formality_en,
"hedging": hedge_en,
"imperative": imp_en,
}
# 7) 计算英文侧权力距离得分(0~1)
power_distance_score = (
0.35 * (1 - polite_en)
+ 0.25 * formality_en
+ 0.25 * (1 - hedge_en)
+ 0.15 * imp_en
)
# 三分类
if power_distance_score < 0.33:
level = "Low"
elif power_distance_score < 0.66:
level = "Medium"
else:
level = "High"
# ---------- 4. 画柱状图:中文 vs 英文对比 ----------
features = ["politeness", "formality", "hedging", "imperative"]
zh_vals = [zh_stats.get(k, 0.5 if k != "imperative" else 0.0) for k in features]
en_vals = [en_stats.get(k, 0.0) for k in features]
x = np.arange(len(features))
width = 0.35
fig_bar, ax_bar = plt.subplots()
ax_bar.bar(x - width/2, zh_vals, width, label="Chinese (source)")
ax_bar.bar(x + width/2, en_vals, width, label="English (translation)")
ax_bar.set_ylim(0, 1)
ax_bar.set_xticks(x)
ax_bar.set_xticklabels(features)
ax_bar.set_ylabel("Score (0–1)")
ax_bar.set_title("Chinese vs English stylistic features")
ax_bar.legend()
fig_bar.tight_layout()
# ---------- 5. 画雷达图 ----------
fig_radar = plt.figure()
ax_radar = fig_radar.add_subplot(111, polar=True)
labels = features
angles = np.linspace(0, 2 * np.pi, len(labels), endpoint=False)
zh_vals_closed = zh_vals + [zh_vals[0]]
en_vals_closed = en_vals + [en_vals[0]]
angles_closed = list(angles) + [angles[0]]
ax_radar.plot(angles_closed, zh_vals_closed, marker="o", label="Chinese")
ax_radar.fill(angles_closed, zh_vals_closed, alpha=0.1)
ax_radar.plot(angles_closed, en_vals_closed, marker="o", linestyle="--", label="English")
ax_radar.fill(angles_closed, en_vals_closed, alpha=0.1)
ax_radar.set_xticks(angles)
ax_radar.set_xticklabels(labels)
ax_radar.set_yticklabels([])
ax_radar.set_title("Stylistic profile (radar)")
ax_radar.legend(loc="upper right", bbox_to_anchor=(1.3, 1.1))
fig_radar.tight_layout()
return (
translated, # 1
polite_version, # 2
zh_stats, # 3
en_stats, # 4
level, # 5
power_distance_score, # 6 ← 这里千万不能写 score
fig_bar, # 7
fig_radar # 8
)
# ---------- 6. Gradio 界面 ----------
with gr.Blocks(title="Power Distance Checker") as demo:
gr.Markdown(
"""
# 📨 中译英权力距离检测(Power Distance)
输入一段 **中文信件**,系统会:
1. 自动翻译为英文
2. 分析中英文两侧的礼貌度、正式度、委婉程度、命令语气
3. 给出英文译文的 **权力距离等级:Low / Medium / High**
4. 用柱状图 + 雷达图展示风格变化
"""
)
with gr.Row():
input_box = gr.Textbox(
label="输入中文信件",
lines=6,
placeholder="例如:您好,我想向您反馈近期的项目进度,如有不妥之处,还请您多多指正。"
)
run_btn = gr.Button("分析语气与权力距离")
# 原始英文翻译
with gr.Row():
output_en = gr.Textbox(label="英文翻译", lines=6)
# ✅ 新增:更礼貌的英文版本(单独一行声明组件)
with gr.Row():
polite_output = gr.Textbox(label="更礼貌的(增强版)英文", lines=6)
with gr.Row():
zh_json = gr.JSON(label="中文侧语气指标(0–1)")
en_json = gr.JSON(label="英文侧语气指标(0–1)")
with gr.Row():
pd_label = gr.Label(label="Power Distance Level (English translation)")
pd_score = gr.Number(label="Power Distance Score (0–1)", precision=3)
with gr.Row():
bar_plot = gr.Plot(label="Bar Chart:Chinese vs English")
radar_plot = gr.Plot(label="Radar Chart:Stylistic Profile")
# 按钮绑定:注意 outputs 里只写变量名,不要写“=”
run_btn.click(
fn=analyze_letter,
inputs=[input_box],
outputs=[
output_en, # 1 原始英译
polite_output, # 2 更礼貌英译
zh_json, # 3 中文语气
en_json, # 4 英文语气
pd_label, # 5 PD 等级
pd_score, # 6 PD 分数
bar_plot, # 7 柱状图
radar_plot # 8 雷达图
],
)
if __name__ == "__main__":
demo.launch()