File size: 10,025 Bytes
02acc58
 
 
 
 
 
 
ef288d0
 
 
 
 
 
 
 
 
02acc58
 
 
 
 
 
 
 
 
 
 
 
b8e5011
 
5d654ff
b8e5011
 
 
 
 
02acc58
 
54a1367
 
 
 
 
 
 
 
02acc58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef288d0
8366c2a
 
 
02acc58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef288d0
3c209ef
 
 
 
 
 
 
 
ef288d0
02acc58
 
6e61c6d
02acc58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ad8afb
 
 
 
 
02acc58
 
 
1ad8afb
02acc58
 
 
1ad8afb
 
 
 
02acc58
 
 
 
 
 
 
 
 
 
 
 
1ad8afb
02acc58
1ad8afb
 
 
 
 
 
 
 
 
 
 
 
 
02acc58
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
import re
import numpy as np
import matplotlib.pyplot as plt

import gradio as gr
from transformers import pipeline

# --------------------
# 礼貌增强模型(T5)
# --------------------
polite_rewrite = pipeline(
    "text2text-generation",
    model="prithivida/parrot_paraphraser_on_T5"
)



# ---------- 1. 加载 Hugging Face 模型 ----------
# 中 → 英 翻译
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en")

# 英文礼貌度(4 类:polite / somewhat polite / neutral / impolite)
politeness_cls = pipeline("text-classification", model="Intel/polite-guard")

# 英文正式度(3 类:formal / neutral / informal)
formality_cls = pipeline("text-classification", model="LenDigLearn/formality-classifier-mdeberta-v3-base")

# 英文 hedge / uncertainty(委婉/模糊表达)
hedge_cls = pipeline(
    "text-classification",
    model="siebert/sentiment-roberta-large-english",
    device="cpu",
    truncation=True,
    max_length=256,
    padding="max_length"
)


# ---------- 2. 一些简单的中文 & 英文规则打分函数 ----------

POLITE_WORDS_ZH = ["请", "麻烦您", "劳烦", "敬请", "拜托", "打扰了", "烦请"]
HEDGE_WORDS_ZH = ["是否", "可能", "大概", "也许", "好像", "觉得", "有点"]
IMPERATIVE_WORDS_ZH = ["必须", "务必", "不得", "不准", "立即", "马上", "必须要"]



# ---------- 2. 一些简单的中文 & 英文规则打分函数 ----------

POLITE_WORDS_ZH = ["请", "麻烦您", "劳烦", "敬请", "拜托", "打扰了", "烦请"]
HEDGE_WORDS_ZH = ["是否", "可能", "大概", "也许", "好像", "觉得", "有点"]
IMPERATIVE_WORDS_ZH = ["必须", "务必", "不得", "不准", "立即", "马上", "必须要"]

def score_chinese_features(text: str):
    """非常简单的中文语气打分:返回 0~1 之间的几个指标"""
    if not text.strip():
        return 0.5, 0.5, 0.0  # 默认中等

    length = max(len(text), 1)

    polite_hits = sum(text.count(w) for w in POLITE_WORDS_ZH)
    hedge_hits = sum(text.count(w) for w in HEDGE_WORDS_ZH)
    imp_hits = sum(text.count(w) for w in IMPERATIVE_WORDS_ZH)

    polite_score = np.clip(polite_hits / 3.0, 0, 1)   # 出现次数越多分越高
    hedge_score = np.clip(hedge_hits / 3.0, 0, 1)
    imp_score = np.clip(imp_hits / 2.0, 0, 1)

    return float(polite_score), float(hedge_score), float(imp_score)


def map_polite_guard_to_score(label: str):
    """把 Intel/polite-guard 的 4 类映射到 [0,1] 礼貌度"""
    label = label.lower()
    if "polite" == label:
        return 1.0
    if "somewhat polite" in label:
        return 0.75
    if "neutral" in label:
        return 0.5
    if "impolite" in label:
        return 0.0
    return 0.5


def map_formality_to_score(label: str):
    """formal / neutral / informal → [0,1] 正式度"""
    label = label.lower()
    if "formal" in label:
        return 1.0
    if "neutral" in label:
        return 0.5
    if "informal" in label:
        return 0.0
    return 0.5


def map_hedge_to_score(label: str):
    """
    BERTweet-Hedge 的 label 可能类似 "Hedge" / "No_Hedge" / 多类。
    这里只是示意:如果包含 hedge 就算高 hedge。
    """
    label = label.lower()
    if "hedge" in label and "no" not in label:
        return 1.0
    if "no_hedge" in label:
        return 0.0
    # 多类时可以更细分,这里先给中等
    return 0.5


IMPERATIVE_TRIGGER_EN = [
    r"^please\b",
    r"^kindly\b",
    r"^do\b",
    r"^make\b",
    r"^send\b",
    r"^provide\b",
    r"\byou must\b",
    r"\byou have to\b",
    r"\byou are required to\b",
]


def score_imperative_en(text: str):
    """用很简单的规则估计英文命令语气强度"""
    t = text.strip().lower()
    if not t:
        return 0.0
    hits = 0
    for pat in IMPERATIVE_TRIGGER_EN:
        if re.search(pat, t):
            hits += 1
    # 多个命令触发就提高分数
    return float(np.clip(hits / 2.0, 0, 1))


# ---------- 3. 核心:分析函数 ----------

def analyze_letter(chinese_text: str):
    if not chinese_text.strip():
        return (
            "",  # 英文翻译
            {},  # 中文指标
            {},  # 英文指标
            "N/A",  # PD 等级
            0.0,   # PD 分数
            None,  # bar fig
            None,  # radar fig
        )

    # 1) 中文语气分析(规则)
    polite_zh, hedge_zh, imp_zh = score_chinese_features(chinese_text)

    zh_stats = {
        "politeness": polite_zh,
        "hedging": hedge_zh,
        "imperative": imp_zh,
    }

    # 2) 中 → 英 翻译
    translated = translator(chinese_text, max_length=512)[0]["translation_text"]
    
    # 2.1) 礼貌增强版英文改写
    polite_prompt = f"Rewrite the following sentence in polite and respectful English: {translated}"
    polite_version = polite_rewrite(polite_prompt)[0]["generated_text"]

    # 3) 英文礼貌度
    pol_out = politeness_cls(translated)[0]
    polite_en = map_polite_guard_to_score(pol_out["label"])

    # 4) 英文正式度
    form_out = formality_cls(translated)[0]
    formality_en = map_formality_to_score(form_out["label"])

    # 5) 英文 hedge 程度
    hedge_out = hedge_cls(translated)[0]
    hedge_en = map_hedge_to_score(hedge_out["label"])

    # 6) 英文命令式强度
    imp_en = score_imperative_en(translated)

    en_stats = {
        "politeness": polite_en,
        "formality": formality_en,
        "hedging": hedge_en,
        "imperative": imp_en,
    }

    # 7) 计算英文侧权力距离得分(0~1)
    power_distance_score = (
        0.35 * (1 - polite_en)
        + 0.25 * formality_en
        + 0.25 * (1 - hedge_en)
        + 0.15 * imp_en
    )

    # 三分类
    if power_distance_score < 0.33:
        level = "Low"
    elif power_distance_score < 0.66:
        level = "Medium"
    else:
        level = "High"

    # ---------- 4. 画柱状图:中文 vs 英文对比 ----------
    features = ["politeness", "formality", "hedging", "imperative"]
    zh_vals = [zh_stats.get(k, 0.5 if k != "imperative" else 0.0) for k in features]
    en_vals = [en_stats.get(k, 0.0) for k in features]

    x = np.arange(len(features))
    width = 0.35

    fig_bar, ax_bar = plt.subplots()
    ax_bar.bar(x - width/2, zh_vals, width, label="Chinese (source)")
    ax_bar.bar(x + width/2, en_vals, width, label="English (translation)")
    ax_bar.set_ylim(0, 1)
    ax_bar.set_xticks(x)
    ax_bar.set_xticklabels(features)
    ax_bar.set_ylabel("Score (0–1)")
    ax_bar.set_title("Chinese vs English stylistic features")
    ax_bar.legend()
    fig_bar.tight_layout()

    # ---------- 5. 画雷达图 ----------
    fig_radar = plt.figure()
    ax_radar = fig_radar.add_subplot(111, polar=True)

    labels = features
    angles = np.linspace(0, 2 * np.pi, len(labels), endpoint=False)
    zh_vals_closed = zh_vals + [zh_vals[0]]
    en_vals_closed = en_vals + [en_vals[0]]
    angles_closed = list(angles) + [angles[0]]

    ax_radar.plot(angles_closed, zh_vals_closed, marker="o", label="Chinese")
    ax_radar.fill(angles_closed, zh_vals_closed, alpha=0.1)

    ax_radar.plot(angles_closed, en_vals_closed, marker="o", linestyle="--", label="English")
    ax_radar.fill(angles_closed, en_vals_closed, alpha=0.1)

    ax_radar.set_xticks(angles)
    ax_radar.set_xticklabels(labels)
    ax_radar.set_yticklabels([])
    ax_radar.set_title("Stylistic profile (radar)")
    ax_radar.legend(loc="upper right", bbox_to_anchor=(1.3, 1.1))
    fig_radar.tight_layout()

    return (
    translated,             # 1
    polite_version,         # 2
    zh_stats,               # 3
    en_stats,               # 4
    level,                  # 5
    power_distance_score,   # 6   ← 这里千万不能写 score
    fig_bar,                # 7
    fig_radar               # 8
)



# ---------- 6. Gradio 界面 ----------

with gr.Blocks(title="Power Distance Checker") as demo:
    gr.Markdown(
        """
        # 📨 中译英权力距离检测(Power Distance)
        输入一段 **中文信件**,系统会:
        1. 自动翻译为英文  
        2. 分析中英文两侧的礼貌度、正式度、委婉程度、命令语气  
        3. 给出英文译文的 **权力距离等级:Low / Medium / High**  
        4. 用柱状图 + 雷达图展示风格变化
        """
    )

    with gr.Row():
        input_box = gr.Textbox(
            label="输入中文信件",
            lines=6,
            placeholder="例如:您好,我想向您反馈近期的项目进度,如有不妥之处,还请您多多指正。"
        )

    run_btn = gr.Button("分析语气与权力距离")

    # 原始英文翻译
    with gr.Row():
        output_en = gr.Textbox(label="英文翻译", lines=6)

    # ✅ 新增:更礼貌的英文版本(单独一行声明组件)
    with gr.Row():
        polite_output = gr.Textbox(label="更礼貌的(增强版)英文", lines=6)

    with gr.Row():
        zh_json = gr.JSON(label="中文侧语气指标(0–1)")
        en_json = gr.JSON(label="英文侧语气指标(0–1)")

    with gr.Row():
        pd_label = gr.Label(label="Power Distance Level (English translation)")
        pd_score = gr.Number(label="Power Distance Score (0–1)", precision=3)

    with gr.Row():
        bar_plot = gr.Plot(label="Bar Chart:Chinese vs English")
        radar_plot = gr.Plot(label="Radar Chart:Stylistic Profile")

    # 按钮绑定:注意 outputs 里只写变量名,不要写“=”
    run_btn.click(
        fn=analyze_letter,
        inputs=[input_box],
        outputs=[
            output_en,      # 1 原始英译
            polite_output,  # 2 更礼貌英译
            zh_json,        # 3 中文语气
            en_json,        # 4 英文语气
            pd_label,       # 5 PD 等级
            pd_score,       # 6 PD 分数
            bar_plot,       # 7 柱状图
            radar_plot      # 8 雷达图
        ],
    )

if __name__ == "__main__":
    demo.launch()