File size: 8,341 Bytes
02acc58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
import re
import numpy as np
import matplotlib.pyplot as plt

import gradio as gr
from transformers import pipeline


# ---------- 1. 加载 Hugging Face 模型 ----------

# 中 → 英 翻译
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en")

# 英文礼貌度(4 类:polite / somewhat polite / neutral / impolite)
politeness_cls = pipeline("text-classification", model="Intel/polite-guard")

# 英文正式度(3 类:formal / neutral / informal)
formality_cls = pipeline("text-classification", model="LenDigLearn/formality-classifier-mdeberta-v3-base")

# 英文 hedge / uncertainty(委婉/模糊表达)
hedge_cls = pipeline("text-classification", model="ChrisLiewJY/BERTweet-Hedge")


# ---------- 2. 一些简单的中文 & 英文规则打分函数 ----------

POLITE_WORDS_ZH = ["请", "麻烦您", "劳烦", "敬请", "拜托", "打扰了", "烦请"]
HEDGE_WORDS_ZH = ["是否", "可能", "大概", "也许", "好像", "觉得", "有点"]
IMPERATIVE_WORDS_ZH = ["必须", "务必", "不得", "不准", "立即", "马上", "必须要"]

def score_chinese_features(text: str):
    """非常简单的中文语气打分:返回 0~1 之间的几个指标"""
    if not text.strip():
        return 0.5, 0.5, 0.0  # 默认中等

    length = max(len(text), 1)

    polite_hits = sum(text.count(w) for w in POLITE_WORDS_ZH)
    hedge_hits = sum(text.count(w) for w in HEDGE_WORDS_ZH)
    imp_hits = sum(text.count(w) for w in IMPERATIVE_WORDS_ZH)

    polite_score = np.clip(polite_hits / 3.0, 0, 1)   # 出现次数越多分越高
    hedge_score = np.clip(hedge_hits / 3.0, 0, 1)
    imp_score = np.clip(imp_hits / 2.0, 0, 1)

    return float(polite_score), float(hedge_score), float(imp_score)


def map_polite_guard_to_score(label: str):
    """把 Intel/polite-guard 的 4 类映射到 [0,1] 礼貌度"""
    label = label.lower()
    if "polite" == label:
        return 1.0
    if "somewhat polite" in label:
        return 0.75
    if "neutral" in label:
        return 0.5
    if "impolite" in label:
        return 0.0
    return 0.5


def map_formality_to_score(label: str):
    """formal / neutral / informal → [0,1] 正式度"""
    label = label.lower()
    if "formal" in label:
        return 1.0
    if "neutral" in label:
        return 0.5
    if "informal" in label:
        return 0.0
    return 0.5


def map_hedge_to_score(label: str):
    """
    BERTweet-Hedge 的 label 可能类似 "Hedge" / "No_Hedge" / 多类。
    这里只是示意:如果包含 hedge 就算高 hedge。
    """
    label = label.lower()
    if "hedge" in label and "no" not in label:
        return 1.0
    if "no_hedge" in label:
        return 0.0
    # 多类时可以更细分,这里先给中等
    return 0.5


IMPERATIVE_TRIGGER_EN = [
    r"^please\b",
    r"^kindly\b",
    r"^do\b",
    r"^make\b",
    r"^send\b",
    r"^provide\b",
    r"\byou must\b",
    r"\byou have to\b",
    r"\byou are required to\b",
]


def score_imperative_en(text: str):
    """用很简单的规则估计英文命令语气强度"""
    t = text.strip().lower()
    if not t:
        return 0.0
    hits = 0
    for pat in IMPERATIVE_TRIGGER_EN:
        if re.search(pat, t):
            hits += 1
    # 多个命令触发就提高分数
    return float(np.clip(hits / 2.0, 0, 1))


# ---------- 3. 核心:分析函数 ----------

def analyze_letter(chinese_text: str):
    if not chinese_text.strip():
        return (
            "",  # 英文翻译
            {},  # 中文指标
            {},  # 英文指标
            "N/A",  # PD 等级
            0.0,   # PD 分数
            None,  # bar fig
            None,  # radar fig
        )

    # 1) 中文语气分析(规则)
    polite_zh, hedge_zh, imp_zh = score_chinese_features(chinese_text)

    zh_stats = {
        "politeness": polite_zh,
        "hedging": hedge_zh,
        "imperative": imp_zh,
    }

    # 2) 中 → 英 翻译
    translated = translator(chinese_text, max_length=512)[0]["translation_text"]

    # 3) 英文礼貌度
    pol_out = politeness_cls(translated)[0]
    polite_en = map_polite_guard_to_score(pol_out["label"])

    # 4) 英文正式度
    form_out = formality_cls(translated)[0]
    formality_en = map_formality_to_score(form_out["label"])

    # 5) 英文 hedge 程度
    hedge_out = hedge_cls(translated)[0]
    hedge_en = map_hedge_to_score(hedge_out["label"])

    # 6) 英文命令式强度
    imp_en = score_imperative_en(translated)

    en_stats = {
        "politeness": polite_en,
        "formality": formality_en,
        "hedging": hedge_en,
        "imperative": imp_en,
    }

    # 7) 计算英文侧权力距离得分(0~1)
    power_distance_score = (
        0.35 * (1 - polite_en)
        + 0.25 * formality_en
        + 0.25 * (1 - hedge_en)
        + 0.15 * imp_en
    )

    # 三分类
    if power_distance_score < 0.33:
        level = "Low"
    elif power_distance_score < 0.66:
        level = "Medium"
    else:
        level = "High"

    # ---------- 4. 画柱状图:中文 vs 英文对比 ----------
    features = ["politeness", "formality", "hedging", "imperative"]
    zh_vals = [zh_stats.get(k, 0.5 if k != "imperative" else 0.0) for k in features]
    en_vals = [en_stats.get(k, 0.0) for k in features]

    x = np.arange(len(features))
    width = 0.35

    fig_bar, ax_bar = plt.subplots()
    ax_bar.bar(x - width/2, zh_vals, width, label="Chinese (source)")
    ax_bar.bar(x + width/2, en_vals, width, label="English (translation)")
    ax_bar.set_ylim(0, 1)
    ax_bar.set_xticks(x)
    ax_bar.set_xticklabels(features)
    ax_bar.set_ylabel("Score (0–1)")
    ax_bar.set_title("Chinese vs English stylistic features")
    ax_bar.legend()
    fig_bar.tight_layout()

    # ---------- 5. 画雷达图 ----------
    fig_radar = plt.figure()
    ax_radar = fig_radar.add_subplot(111, polar=True)

    labels = features
    angles = np.linspace(0, 2 * np.pi, len(labels), endpoint=False)
    zh_vals_closed = zh_vals + [zh_vals[0]]
    en_vals_closed = en_vals + [en_vals[0]]
    angles_closed = list(angles) + [angles[0]]

    ax_radar.plot(angles_closed, zh_vals_closed, marker="o", label="Chinese")
    ax_radar.fill(angles_closed, zh_vals_closed, alpha=0.1)

    ax_radar.plot(angles_closed, en_vals_closed, marker="o", linestyle="--", label="English")
    ax_radar.fill(angles_closed, en_vals_closed, alpha=0.1)

    ax_radar.set_xticks(angles)
    ax_radar.set_xticklabels(labels)
    ax_radar.set_yticklabels([])
    ax_radar.set_title("Stylistic profile (radar)")
    ax_radar.legend(loc="upper right", bbox_to_anchor=(1.3, 1.1))
    fig_radar.tight_layout()

    return translated, zh_stats, en_stats, level, round(power_distance_score, 3), fig_bar, fig_radar


# ---------- 6. Gradio 界面 ----------

with gr.Blocks(title="Power Distance Checker") as demo:
    gr.Markdown(
        """
        # 📨 中译英权力距离检测(Power Distance)
        输入一段 **中文信件**,系统会:
        1. 自动翻译为英文  
        2. 分析中英文两侧的礼貌度、正式度、委婉程度、命令语气  
        3. 给出英文译文的 **权力距离等级:Low / Medium / High**  
        4. 用柱状图 + 雷达图展示风格变化
        """
    )

    with gr.Row():
        input_box = gr.Textbox(label="输入中文信件", lines=6, placeholder="例如:您好,我想向您反馈近期的项目进度,如有不妥之处,还请您多多指正。")

    run_btn = gr.Button("分析语气与权力距离")

    with gr.Row():
        output_en = gr.Textbox(label="英文翻译", lines=6)

    with gr.Row():
        zh_json = gr.JSON(label="中文侧语气指标(0–1)")
        en_json = gr.JSON(label="英文侧语气指标(0–1)")

    with gr.Row():
        pd_label = gr.Label(label="Power Distance Level (English translation)")
        pd_score = gr.Number(label="Power Distance Score (0–1)", precision=3)

    with gr.Row():
        bar_plot = gr.Plot(label="Bar Chart:Chinese vs English")
        radar_plot = gr.Plot(label="Radar Chart:Stylistic Profile")

    run_btn.click(
        fn=analyze_letter,
        inputs=[input_box],
        outputs=[output_en, zh_json, en_json, pd_label, pd_score, bar_plot, radar_plot],
    )

if __name__ == "__main__":
    demo.launch()