Spaces:
Sleeping
Sleeping
| import re | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import gradio as gr | |
| from transformers import pipeline | |
| # -------------------- | |
| # 礼貌增强模型(T5) | |
| # -------------------- | |
| polite_rewrite = pipeline( | |
| "text2text-generation", | |
| model="prithivida/parrot_paraphraser_on_T5" | |
| ) | |
| # ---------- 1. 加载 Hugging Face 模型 ---------- | |
| # 中 → 英 翻译 | |
| translator = pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en") | |
| # 英文礼貌度(4 类:polite / somewhat polite / neutral / impolite) | |
| politeness_cls = pipeline("text-classification", model="Intel/polite-guard") | |
| # 英文正式度(3 类:formal / neutral / informal) | |
| formality_cls = pipeline("text-classification", model="LenDigLearn/formality-classifier-mdeberta-v3-base") | |
| # 英文 hedge / uncertainty(委婉/模糊表达) | |
| hedge_cls = pipeline( | |
| "text-classification", | |
| model="siebert/sentiment-roberta-large-english", | |
| device="cpu", | |
| truncation=True, | |
| max_length=256, | |
| padding="max_length" | |
| ) | |
| # ---------- 2. 一些简单的中文 & 英文规则打分函数 ---------- | |
| POLITE_WORDS_ZH = ["请", "麻烦您", "劳烦", "敬请", "拜托", "打扰了", "烦请"] | |
| HEDGE_WORDS_ZH = ["是否", "可能", "大概", "也许", "好像", "觉得", "有点"] | |
| IMPERATIVE_WORDS_ZH = ["必须", "务必", "不得", "不准", "立即", "马上", "必须要"] | |
| # ---------- 2. 一些简单的中文 & 英文规则打分函数 ---------- | |
| POLITE_WORDS_ZH = ["请", "麻烦您", "劳烦", "敬请", "拜托", "打扰了", "烦请"] | |
| HEDGE_WORDS_ZH = ["是否", "可能", "大概", "也许", "好像", "觉得", "有点"] | |
| IMPERATIVE_WORDS_ZH = ["必须", "务必", "不得", "不准", "立即", "马上", "必须要"] | |
| def score_chinese_features(text: str): | |
| """非常简单的中文语气打分:返回 0~1 之间的几个指标""" | |
| if not text.strip(): | |
| return 0.5, 0.5, 0.0 # 默认中等 | |
| length = max(len(text), 1) | |
| polite_hits = sum(text.count(w) for w in POLITE_WORDS_ZH) | |
| hedge_hits = sum(text.count(w) for w in HEDGE_WORDS_ZH) | |
| imp_hits = sum(text.count(w) for w in IMPERATIVE_WORDS_ZH) | |
| polite_score = np.clip(polite_hits / 3.0, 0, 1) # 出现次数越多分越高 | |
| hedge_score = np.clip(hedge_hits / 3.0, 0, 1) | |
| imp_score = np.clip(imp_hits / 2.0, 0, 1) | |
| return float(polite_score), float(hedge_score), float(imp_score) | |
| def map_polite_guard_to_score(label: str): | |
| """把 Intel/polite-guard 的 4 类映射到 [0,1] 礼貌度""" | |
| label = label.lower() | |
| if "polite" == label: | |
| return 1.0 | |
| if "somewhat polite" in label: | |
| return 0.75 | |
| if "neutral" in label: | |
| return 0.5 | |
| if "impolite" in label: | |
| return 0.0 | |
| return 0.5 | |
| def map_formality_to_score(label: str): | |
| """formal / neutral / informal → [0,1] 正式度""" | |
| label = label.lower() | |
| if "formal" in label: | |
| return 1.0 | |
| if "neutral" in label: | |
| return 0.5 | |
| if "informal" in label: | |
| return 0.0 | |
| return 0.5 | |
| def map_hedge_to_score(label: str): | |
| """ | |
| BERTweet-Hedge 的 label 可能类似 "Hedge" / "No_Hedge" / 多类。 | |
| 这里只是示意:如果包含 hedge 就算高 hedge。 | |
| """ | |
| label = label.lower() | |
| if "hedge" in label and "no" not in label: | |
| return 1.0 | |
| if "no_hedge" in label: | |
| return 0.0 | |
| # 多类时可以更细分,这里先给中等 | |
| return 0.5 | |
| IMPERATIVE_TRIGGER_EN = [ | |
| r"^please\b", | |
| r"^kindly\b", | |
| r"^do\b", | |
| r"^make\b", | |
| r"^send\b", | |
| r"^provide\b", | |
| r"\byou must\b", | |
| r"\byou have to\b", | |
| r"\byou are required to\b", | |
| ] | |
| def score_imperative_en(text: str): | |
| """用很简单的规则估计英文命令语气强度""" | |
| t = text.strip().lower() | |
| if not t: | |
| return 0.0 | |
| hits = 0 | |
| for pat in IMPERATIVE_TRIGGER_EN: | |
| if re.search(pat, t): | |
| hits += 1 | |
| # 多个命令触发就提高分数 | |
| return float(np.clip(hits / 2.0, 0, 1)) | |
| # ---------- 3. 核心:分析函数 ---------- | |
| def analyze_letter(chinese_text: str): | |
| if not chinese_text.strip(): | |
| return ( | |
| "", # 英文翻译 | |
| {}, # 中文指标 | |
| {}, # 英文指标 | |
| "N/A", # PD 等级 | |
| 0.0, # PD 分数 | |
| None, # bar fig | |
| None, # radar fig | |
| ) | |
| # 1) 中文语气分析(规则) | |
| polite_zh, hedge_zh, imp_zh = score_chinese_features(chinese_text) | |
| zh_stats = { | |
| "politeness": polite_zh, | |
| "hedging": hedge_zh, | |
| "imperative": imp_zh, | |
| } | |
| # 2) 中 → 英 翻译 | |
| translated = translator(chinese_text, max_length=512)[0]["translation_text"] | |
| # 2.1) 礼貌增强版英文改写 | |
| polite_prompt = f"Rewrite the following sentence in polite and respectful English: {translated}" | |
| polite_version = polite_rewrite(polite_prompt)[0]["generated_text"] | |
| # 3) 英文礼貌度 | |
| pol_out = politeness_cls(translated)[0] | |
| polite_en = map_polite_guard_to_score(pol_out["label"]) | |
| # 4) 英文正式度 | |
| form_out = formality_cls(translated)[0] | |
| formality_en = map_formality_to_score(form_out["label"]) | |
| # 5) 英文 hedge 程度 | |
| hedge_out = hedge_cls(translated)[0] | |
| hedge_en = map_hedge_to_score(hedge_out["label"]) | |
| # 6) 英文命令式强度 | |
| imp_en = score_imperative_en(translated) | |
| en_stats = { | |
| "politeness": polite_en, | |
| "formality": formality_en, | |
| "hedging": hedge_en, | |
| "imperative": imp_en, | |
| } | |
| # 7) 计算英文侧权力距离得分(0~1) | |
| power_distance_score = ( | |
| 0.35 * (1 - polite_en) | |
| + 0.25 * formality_en | |
| + 0.25 * (1 - hedge_en) | |
| + 0.15 * imp_en | |
| ) | |
| # 三分类 | |
| if power_distance_score < 0.33: | |
| level = "Low" | |
| elif power_distance_score < 0.66: | |
| level = "Medium" | |
| else: | |
| level = "High" | |
| # ---------- 4. 画柱状图:中文 vs 英文对比 ---------- | |
| features = ["politeness", "formality", "hedging", "imperative"] | |
| zh_vals = [zh_stats.get(k, 0.5 if k != "imperative" else 0.0) for k in features] | |
| en_vals = [en_stats.get(k, 0.0) for k in features] | |
| x = np.arange(len(features)) | |
| width = 0.35 | |
| fig_bar, ax_bar = plt.subplots() | |
| ax_bar.bar(x - width/2, zh_vals, width, label="Chinese (source)") | |
| ax_bar.bar(x + width/2, en_vals, width, label="English (translation)") | |
| ax_bar.set_ylim(0, 1) | |
| ax_bar.set_xticks(x) | |
| ax_bar.set_xticklabels(features) | |
| ax_bar.set_ylabel("Score (0–1)") | |
| ax_bar.set_title("Chinese vs English stylistic features") | |
| ax_bar.legend() | |
| fig_bar.tight_layout() | |
| # ---------- 5. 画雷达图 ---------- | |
| fig_radar = plt.figure() | |
| ax_radar = fig_radar.add_subplot(111, polar=True) | |
| labels = features | |
| angles = np.linspace(0, 2 * np.pi, len(labels), endpoint=False) | |
| zh_vals_closed = zh_vals + [zh_vals[0]] | |
| en_vals_closed = en_vals + [en_vals[0]] | |
| angles_closed = list(angles) + [angles[0]] | |
| ax_radar.plot(angles_closed, zh_vals_closed, marker="o", label="Chinese") | |
| ax_radar.fill(angles_closed, zh_vals_closed, alpha=0.1) | |
| ax_radar.plot(angles_closed, en_vals_closed, marker="o", linestyle="--", label="English") | |
| ax_radar.fill(angles_closed, en_vals_closed, alpha=0.1) | |
| ax_radar.set_xticks(angles) | |
| ax_radar.set_xticklabels(labels) | |
| ax_radar.set_yticklabels([]) | |
| ax_radar.set_title("Stylistic profile (radar)") | |
| ax_radar.legend(loc="upper right", bbox_to_anchor=(1.3, 1.1)) | |
| fig_radar.tight_layout() | |
| return ( | |
| translated, # 1 | |
| polite_version, # 2 | |
| zh_stats, # 3 | |
| en_stats, # 4 | |
| level, # 5 | |
| power_distance_score, # 6 ← 这里千万不能写 score | |
| fig_bar, # 7 | |
| fig_radar # 8 | |
| ) | |
| # ---------- 6. Gradio 界面 ---------- | |
| with gr.Blocks(title="Power Distance Checker") as demo: | |
| gr.Markdown( | |
| """ | |
| # 📨 中译英权力距离检测(Power Distance) | |
| 输入一段 **中文信件**,系统会: | |
| 1. 自动翻译为英文 | |
| 2. 分析中英文两侧的礼貌度、正式度、委婉程度、命令语气 | |
| 3. 给出英文译文的 **权力距离等级:Low / Medium / High** | |
| 4. 用柱状图 + 雷达图展示风格变化 | |
| """ | |
| ) | |
| with gr.Row(): | |
| input_box = gr.Textbox( | |
| label="输入中文信件", | |
| lines=6, | |
| placeholder="例如:您好,我想向您反馈近期的项目进度,如有不妥之处,还请您多多指正。" | |
| ) | |
| run_btn = gr.Button("分析语气与权力距离") | |
| # 原始英文翻译 | |
| with gr.Row(): | |
| output_en = gr.Textbox(label="英文翻译", lines=6) | |
| # ✅ 新增:更礼貌的英文版本(单独一行声明组件) | |
| with gr.Row(): | |
| polite_output = gr.Textbox(label="更礼貌的(增强版)英文", lines=6) | |
| with gr.Row(): | |
| zh_json = gr.JSON(label="中文侧语气指标(0–1)") | |
| en_json = gr.JSON(label="英文侧语气指标(0–1)") | |
| with gr.Row(): | |
| pd_label = gr.Label(label="Power Distance Level (English translation)") | |
| pd_score = gr.Number(label="Power Distance Score (0–1)", precision=3) | |
| with gr.Row(): | |
| bar_plot = gr.Plot(label="Bar Chart:Chinese vs English") | |
| radar_plot = gr.Plot(label="Radar Chart:Stylistic Profile") | |
| # 按钮绑定:注意 outputs 里只写变量名,不要写“=” | |
| run_btn.click( | |
| fn=analyze_letter, | |
| inputs=[input_box], | |
| outputs=[ | |
| output_en, # 1 原始英译 | |
| polite_output, # 2 更礼貌英译 | |
| zh_json, # 3 中文语气 | |
| en_json, # 4 英文语气 | |
| pd_label, # 5 PD 等级 | |
| pd_score, # 6 PD 分数 | |
| bar_plot, # 7 柱状图 | |
| radar_plot # 8 雷达图 | |
| ], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |