import re import numpy as np import matplotlib.pyplot as plt import gradio as gr from transformers import pipeline # -------------------- # 礼貌增强模型(T5) # -------------------- polite_rewrite = pipeline( "text2text-generation", model="prithivida/parrot_paraphraser_on_T5" ) # ---------- 1. 加载 Hugging Face 模型 ---------- # 中 → 英 翻译 translator = pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en") # 英文礼貌度(4 类:polite / somewhat polite / neutral / impolite) politeness_cls = pipeline("text-classification", model="Intel/polite-guard") # 英文正式度(3 类:formal / neutral / informal) formality_cls = pipeline("text-classification", model="LenDigLearn/formality-classifier-mdeberta-v3-base") # 英文 hedge / uncertainty(委婉/模糊表达) hedge_cls = pipeline( "text-classification", model="siebert/sentiment-roberta-large-english", device="cpu", truncation=True, max_length=256, padding="max_length" ) # ---------- 2. 一些简单的中文 & 英文规则打分函数 ---------- POLITE_WORDS_ZH = ["请", "麻烦您", "劳烦", "敬请", "拜托", "打扰了", "烦请"] HEDGE_WORDS_ZH = ["是否", "可能", "大概", "也许", "好像", "觉得", "有点"] IMPERATIVE_WORDS_ZH = ["必须", "务必", "不得", "不准", "立即", "马上", "必须要"] # ---------- 2. 一些简单的中文 & 英文规则打分函数 ---------- POLITE_WORDS_ZH = ["请", "麻烦您", "劳烦", "敬请", "拜托", "打扰了", "烦请"] HEDGE_WORDS_ZH = ["是否", "可能", "大概", "也许", "好像", "觉得", "有点"] IMPERATIVE_WORDS_ZH = ["必须", "务必", "不得", "不准", "立即", "马上", "必须要"] def score_chinese_features(text: str): """非常简单的中文语气打分:返回 0~1 之间的几个指标""" if not text.strip(): return 0.5, 0.5, 0.0 # 默认中等 length = max(len(text), 1) polite_hits = sum(text.count(w) for w in POLITE_WORDS_ZH) hedge_hits = sum(text.count(w) for w in HEDGE_WORDS_ZH) imp_hits = sum(text.count(w) for w in IMPERATIVE_WORDS_ZH) polite_score = np.clip(polite_hits / 3.0, 0, 1) # 出现次数越多分越高 hedge_score = np.clip(hedge_hits / 3.0, 0, 1) imp_score = np.clip(imp_hits / 2.0, 0, 1) return float(polite_score), float(hedge_score), float(imp_score) def map_polite_guard_to_score(label: str): """把 Intel/polite-guard 的 4 类映射到 [0,1] 礼貌度""" label = label.lower() if "polite" == label: return 1.0 if "somewhat polite" in label: return 0.75 if "neutral" in label: return 0.5 if "impolite" in label: return 0.0 return 0.5 def map_formality_to_score(label: str): """formal / neutral / informal → [0,1] 正式度""" label = label.lower() if "formal" in label: return 1.0 if "neutral" in label: return 0.5 if "informal" in label: return 0.0 return 0.5 def map_hedge_to_score(label: str): """ BERTweet-Hedge 的 label 可能类似 "Hedge" / "No_Hedge" / 多类。 这里只是示意:如果包含 hedge 就算高 hedge。 """ label = label.lower() if "hedge" in label and "no" not in label: return 1.0 if "no_hedge" in label: return 0.0 # 多类时可以更细分,这里先给中等 return 0.5 IMPERATIVE_TRIGGER_EN = [ r"^please\b", r"^kindly\b", r"^do\b", r"^make\b", r"^send\b", r"^provide\b", r"\byou must\b", r"\byou have to\b", r"\byou are required to\b", ] def score_imperative_en(text: str): """用很简单的规则估计英文命令语气强度""" t = text.strip().lower() if not t: return 0.0 hits = 0 for pat in IMPERATIVE_TRIGGER_EN: if re.search(pat, t): hits += 1 # 多个命令触发就提高分数 return float(np.clip(hits / 2.0, 0, 1)) # ---------- 3. 核心:分析函数 ---------- def analyze_letter(chinese_text: str): if not chinese_text.strip(): return ( "", # 英文翻译 {}, # 中文指标 {}, # 英文指标 "N/A", # PD 等级 0.0, # PD 分数 None, # bar fig None, # radar fig ) # 1) 中文语气分析(规则) polite_zh, hedge_zh, imp_zh = score_chinese_features(chinese_text) zh_stats = { "politeness": polite_zh, "hedging": hedge_zh, "imperative": imp_zh, } # 2) 中 → 英 翻译 translated = translator(chinese_text, max_length=512)[0]["translation_text"] # 2.1) 礼貌增强版英文改写 polite_prompt = f"Rewrite the following sentence in polite and respectful English: {translated}" polite_version = polite_rewrite(polite_prompt)[0]["generated_text"] # 3) 英文礼貌度 pol_out = politeness_cls(translated)[0] polite_en = map_polite_guard_to_score(pol_out["label"]) # 4) 英文正式度 form_out = formality_cls(translated)[0] formality_en = map_formality_to_score(form_out["label"]) # 5) 英文 hedge 程度 hedge_out = hedge_cls(translated)[0] hedge_en = map_hedge_to_score(hedge_out["label"]) # 6) 英文命令式强度 imp_en = score_imperative_en(translated) en_stats = { "politeness": polite_en, "formality": formality_en, "hedging": hedge_en, "imperative": imp_en, } # 7) 计算英文侧权力距离得分(0~1) power_distance_score = ( 0.35 * (1 - polite_en) + 0.25 * formality_en + 0.25 * (1 - hedge_en) + 0.15 * imp_en ) # 三分类 if power_distance_score < 0.33: level = "Low" elif power_distance_score < 0.66: level = "Medium" else: level = "High" # ---------- 4. 画柱状图:中文 vs 英文对比 ---------- features = ["politeness", "formality", "hedging", "imperative"] zh_vals = [zh_stats.get(k, 0.5 if k != "imperative" else 0.0) for k in features] en_vals = [en_stats.get(k, 0.0) for k in features] x = np.arange(len(features)) width = 0.35 fig_bar, ax_bar = plt.subplots() ax_bar.bar(x - width/2, zh_vals, width, label="Chinese (source)") ax_bar.bar(x + width/2, en_vals, width, label="English (translation)") ax_bar.set_ylim(0, 1) ax_bar.set_xticks(x) ax_bar.set_xticklabels(features) ax_bar.set_ylabel("Score (0–1)") ax_bar.set_title("Chinese vs English stylistic features") ax_bar.legend() fig_bar.tight_layout() # ---------- 5. 画雷达图 ---------- fig_radar = plt.figure() ax_radar = fig_radar.add_subplot(111, polar=True) labels = features angles = np.linspace(0, 2 * np.pi, len(labels), endpoint=False) zh_vals_closed = zh_vals + [zh_vals[0]] en_vals_closed = en_vals + [en_vals[0]] angles_closed = list(angles) + [angles[0]] ax_radar.plot(angles_closed, zh_vals_closed, marker="o", label="Chinese") ax_radar.fill(angles_closed, zh_vals_closed, alpha=0.1) ax_radar.plot(angles_closed, en_vals_closed, marker="o", linestyle="--", label="English") ax_radar.fill(angles_closed, en_vals_closed, alpha=0.1) ax_radar.set_xticks(angles) ax_radar.set_xticklabels(labels) ax_radar.set_yticklabels([]) ax_radar.set_title("Stylistic profile (radar)") ax_radar.legend(loc="upper right", bbox_to_anchor=(1.3, 1.1)) fig_radar.tight_layout() return ( translated, # 1 polite_version, # 2 zh_stats, # 3 en_stats, # 4 level, # 5 power_distance_score, # 6 ← 这里千万不能写 score fig_bar, # 7 fig_radar # 8 ) # ---------- 6. Gradio 界面 ---------- with gr.Blocks(title="Power Distance Checker") as demo: gr.Markdown( """ # 📨 中译英权力距离检测(Power Distance) 输入一段 **中文信件**,系统会: 1. 自动翻译为英文 2. 分析中英文两侧的礼貌度、正式度、委婉程度、命令语气 3. 给出英文译文的 **权力距离等级:Low / Medium / High** 4. 用柱状图 + 雷达图展示风格变化 """ ) with gr.Row(): input_box = gr.Textbox( label="输入中文信件", lines=6, placeholder="例如:您好,我想向您反馈近期的项目进度,如有不妥之处,还请您多多指正。" ) run_btn = gr.Button("分析语气与权力距离") # 原始英文翻译 with gr.Row(): output_en = gr.Textbox(label="英文翻译", lines=6) # ✅ 新增:更礼貌的英文版本(单独一行声明组件) with gr.Row(): polite_output = gr.Textbox(label="更礼貌的(增强版)英文", lines=6) with gr.Row(): zh_json = gr.JSON(label="中文侧语气指标(0–1)") en_json = gr.JSON(label="英文侧语气指标(0–1)") with gr.Row(): pd_label = gr.Label(label="Power Distance Level (English translation)") pd_score = gr.Number(label="Power Distance Score (0–1)", precision=3) with gr.Row(): bar_plot = gr.Plot(label="Bar Chart:Chinese vs English") radar_plot = gr.Plot(label="Radar Chart:Stylistic Profile") # 按钮绑定:注意 outputs 里只写变量名,不要写“=” run_btn.click( fn=analyze_letter, inputs=[input_box], outputs=[ output_en, # 1 原始英译 polite_output, # 2 更礼貌英译 zh_json, # 3 中文语气 en_json, # 4 英文语气 pd_label, # 5 PD 等级 pd_score, # 6 PD 分数 bar_plot, # 7 柱状图 radar_plot # 8 雷达图 ], ) if __name__ == "__main__": demo.launch()