File size: 15,633 Bytes
f90826c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
"""
对远端 HF Spaces 上部署的 NER API 做端到端测试,覆盖所有路由分支与边界情况。
为每个用例记录:HTTP 状态、识别到的实体、调用耗时、自动检测的语言(如有)。
最终输出 Markdown 报告:reports/remote_api_test_report.md
"""
import io
import json
import time
import urllib.request
import urllib.error
from dataclasses import dataclass, field
from pathlib import Path

BASE_URL = "https://robinwu-nerserver.hf.space"
EXTRACT  = f"{BASE_URL}/api/v1/extract"
HEALTH   = f"{BASE_URL}/api/v1/health"
REPORT   = Path("reports/remote_api_test_report.md")


# ── 用例定义 ──────────────────────────────────────────────────────────────────
#
# 每个用例字段:
#   id          短编号
#   group       分组(用于报告分类)
#   description 中文描述
#   payload     传给 /api/v1/extract 的 JSON
#   expected    期望命中的实体文本(用于召回率统计;可为空集合表示不校验)

CASES: list[dict] = [
    # ── EN 路由 ──
    {
        "id": "EN-01", "group": "EN — GLiNER 主路径",
        "description": "英文短句,显式 language=en,自定义标签",
        "payload": {
            "text": "Elon Musk founded SpaceX in Hawthorne, California in 2002.",
            "labels": ["full name of a person", "company or organization name",
                       "geographical location", "date or year"],
            "language": "en",
        },
        "expected": {"Elon Musk", "SpaceX", "Hawthorne", "California", "2002"},
    },
    {
        "id": "EN-02", "group": "EN — GLiNER 主路径",
        "description": "英文长段,labels 留空触发默认双语标签集",
        "payload": {
            "text": ("President Biden signed the Inflation Reduction Act in "
                     "Washington D.C. on August 16, 2022. The legislation was "
                     "championed by Senator Chuck Schumer and was seen as a major "
                     "win for the Democratic Party."),
            "language": "en",
        },
        "expected": {"Biden", "Chuck Schumer", "Washington D.C.", "Democratic Party"},
    },
    # ── ZH 路由 ──
    {
        "id": "ZH-01", "group": "ZH — BERT 主路径",
        "description": "中文现代商业文本,显式 language=zh",
        "payload": {
            "text": "阿里巴巴集团创始人马云于2019年卸任董事局主席,由张勇接任。"
                    "总部位于杭州的阿里巴巴旗下拥有淘宝、天猫、支付宝等业务板块。",
            "language": "zh",
        },
        "expected": {"马云", "张勇", "阿里巴巴", "杭州"},
    },
    {
        "id": "ZH-02", "group": "ZH — BERT 主路径",
        "description": "中文医疗场景,自定义双语标签",
        "payload": {
            "text": "北京协和医院心内科主任王建国教授团队,于2023年成功完成首例"
                    "机器人辅助冠状动脉搭桥手术,患者来自山东省济南市。",
            "labels": ["人名或姓名", "医院或医疗机构名称", "地名或城市", "日期或年份"],
            "language": "zh",
        },
        "expected": {"王建国", "北京协和医院", "济南"},
    },
    {
        "id": "ZH-03", "group": "ZH — BERT 边界识别",
        "description": "古典文学边界测试 — 「尤氏来请」应只取「尤氏」",
        "payload": {
            "text": "尤氏来请,王熙凤笑道:你来了。贾母命人摆酒,宝玉和黛玉在大观园散步。",
            "language": "zh",
        },
        "expected": {"尤氏", "王熙凤", "贾母", "宝玉", "黛玉", "大观园"},
        "must_not_contain": {"尤氏来请", "王熙凤笑道"},
    },
    # ── AR 路由 ──
    {
        "id": "AR-01", "group": "AR — GLiNER 主路径",
        "description": "阿拉伯语新闻",
        "payload": {
            "text": ("أعلن الرئيس محمد بن سلمان عن إطلاق مشروع نيوم في المملكة "
                     "العربية السعودية عام 2017، وتبلغ تكلفته 500 مليار دولار."),
            "labels": ["full name of a person", "geographical location",
                       "project or initiative name", "date or year"],
            "language": "ar",
        },
        "expected": {"محمد بن سلمان", "المملكة العربية السعودية"},
    },
    # ── Mixed 路由(双跑合并) ──
    {
        "id": "MIX-01", "group": "Mixed — 双模型合并",
        "description": "中英混合 · 职场场景,language=mixed 强制双跑",
        "payload": {
            "text": "张伟加入了 Google 北京研发中心,负责 Android 系统优化。"
                    "他的同事 Sarah Chen 来自 Meta,两人共同参与了 2024 年的 AI Summit。",
            "language": "mixed",
        },
        "expected": {"张伟", "Google", "Sarah Chen", "Meta", "Android", "北京", "2024"},
    },
    {
        "id": "MIX-02", "group": "Mixed — 双模型合并",
        "description": "学术场景,labels 留空",
        "payload": {
            "text": "清华大学计算机系教授李明在 NeurIPS 2023 发表了关于 "
                    "Transformer 架构的论文,合作者来自 MIT 和 Stanford University。",
            "language": "mixed",
        },
        "expected": {"李明", "清华大学", "MIT", "Stanford University", "Transformer"},
    },
    # ── auto 自动检测 ──
    {
        "id": "AUTO-01", "group": "auto — 自动语言检测",
        "description": "纯中文文本,应被检测为 zh",
        "payload": {
            "text": "马云创立了阿里巴巴,总部在杭州。",
        },
        "expected": {"马云", "阿里巴巴", "杭州"},
    },
    {
        "id": "AUTO-02", "group": "auto — 自动语言检测",
        "description": "纯英文文本,应被检测为 en",
        "payload": {
            "text": "Tim Cook is the CEO of Apple in Cupertino.",
        },
        "expected": {"Tim Cook", "Apple", "Cupertino"},
    },
    {
        "id": "AUTO-03", "group": "auto — 自动语言检测",
        "description": "中英混合,应被检测为 mixed 并双跑合并",
        "payload": {
            "text": "李华在 Microsoft 担任工程师,常驻 Seattle 办公室。",
        },
        "expected": {"李华", "Microsoft", "Seattle"},
    },
    # ── min_entities 覆盖 ──
    {
        "id": "MIN-01", "group": "min_entities 覆盖启发式",
        "description": "min_entities=10 强制兜底(短文本启发式只期望 1 个)",
        "payload": {
            "text": "马云",
            "language": "zh",
            "min_entities": 10,
        },
        "expected": {"马云"},
    },
    {
        "id": "MIN-02", "group": "min_entities 覆盖启发式",
        "description": "min_entities=0 关闭兜底",
        "payload": {
            "text": "马云",
            "language": "zh",
            "min_entities": 0,
        },
        "expected": {"马云"},
    },
    # ── 阈值变化 ──
    {
        "id": "THR-01", "group": "Threshold 变化",
        "description": "高阈值 0.8 - 期望返回更少但更高置信度的实体",
        "payload": {
            "text": "Tesla and SpaceX are companies founded by Elon Musk.",
            "language": "en",
            "threshold": 0.8,
        },
        "expected": {"Tesla", "SpaceX", "Elon Musk"},
    },
    # ── 边界请求 ──
    {
        "id": "EDGE-01", "group": "Edge cases",
        "description": "空文本",
        "payload": {"text": ""},
        "expected": set(),
    },
]


# ── HTTP 调用 + 计时 ──────────────────────────────────────────────────────────

@dataclass
class CallResult:
    case_id: str
    status: int
    elapsed_ms: float
    entities: list[dict] = field(default_factory=list)
    labels_used: list[str] = field(default_factory=list)
    error: str | None = None


def post_extract(payload: dict, timeout: int = 60) -> CallResult:
    body = json.dumps(payload).encode("utf-8")
    req = urllib.request.Request(
        EXTRACT,
        data=body,
        headers={"Content-Type": "application/json"},
        method="POST",
    )
    t0 = time.perf_counter()
    try:
        with urllib.request.urlopen(req, timeout=timeout) as resp:
            elapsed = (time.perf_counter() - t0) * 1000
            data = json.loads(resp.read().decode())
            return CallResult(
                case_id="",
                status=resp.status,
                elapsed_ms=elapsed,
                entities=data.get("entities", []),
                labels_used=data.get("labels_used", []),
            )
    except urllib.error.HTTPError as e:
        elapsed = (time.perf_counter() - t0) * 1000
        return CallResult(case_id="", status=e.code, elapsed_ms=elapsed,
                          error=e.read().decode("utf-8", errors="replace"))
    except Exception as e:
        elapsed = (time.perf_counter() - t0) * 1000
        return CallResult(case_id="", status=0, elapsed_ms=elapsed, error=str(e))


# ── 健康检查 ──────────────────────────────────────────────────────────────────

def check_health() -> tuple[bool, float, str]:
    t0 = time.perf_counter()
    try:
        with urllib.request.urlopen(HEALTH, timeout=30) as resp:
            elapsed = (time.perf_counter() - t0) * 1000
            return resp.status == 200, elapsed, resp.read().decode()
    except Exception as e:
        return False, (time.perf_counter() - t0) * 1000, str(e)


# ── 报告生成 ──────────────────────────────────────────────────────────────────

def write_report(results: list[tuple[dict, CallResult]], health: tuple[bool, float, str]):
    buf = io.StringIO()
    w = buf.write

    w("# 远端 API 测试报告\n\n")
    w(f"- 服务地址:`{BASE_URL}`\n")
    w(f"- 测试时间:{time.strftime('%Y-%m-%d %H:%M:%S')}\n")
    ok, hms, hbody = health
    w(f"- 健康检查:{'✓ OK' if ok else '✗ FAIL'} ({hms:.0f}ms) — {hbody}\n")
    w(f"- 用例总数:{len(results)}\n\n")

    # ── 汇总表 ────────────────────────────────────────────────────────────────
    w("## 一、汇总\n\n")
    w("| 用例 | 描述 | HTTP | 实体数 | 召回 | 耗时 |\n")
    w("|---|---|---|---|---|---|\n")
    total_ms = 0.0
    pass_n = 0
    for case, res in results:
        expected = case.get("expected", set())
        found = {e["text"] for e in res.entities}
        hit = len(expected & found)
        recall = f"{hit}/{len(expected)}" if expected else "—"
        ok_mark = "✓" if res.status == 200 else "✗"
        w(f"| **{case['id']}** | {case['description']} | {ok_mark} {res.status} | "
          f"{len(res.entities)} | {recall} | {res.elapsed_ms:.0f}ms |\n")
        if res.status == 200:
            pass_n += 1
        total_ms += res.elapsed_ms
    w(f"\n- 通过率:**{pass_n}/{len(results)}**\n")
    w(f"- 累计耗时:**{total_ms:.0f}ms**(平均 {total_ms/len(results):.0f}ms/请求)\n\n")

    # ── 分组详情 ──────────────────────────────────────────────────────────────
    groups: dict[str, list] = {}
    for case, res in results:
        groups.setdefault(case["group"], []).append((case, res))

    w("## 二、分组详细结果\n\n")
    for group_name, items in groups.items():
        w(f"### {group_name}\n\n")
        for case, res in items:
            w(f"#### {case['id']} · {case['description']}\n\n")
            w("**请求**\n```json\n")
            w(json.dumps(case["payload"], ensure_ascii=False, indent=2))
            w("\n```\n\n")

            w(f"**响应**:HTTP {res.status} · {res.elapsed_ms:.0f}ms · "
              f"{len(res.entities)} 个实体\n\n")

            if res.error:
                w(f"```\nERROR: {res.error}\n```\n\n")
                continue

            if res.entities:
                w("| 文本 | 标签 | 置信度 | 起止 |\n|---|---|---|---|\n")
                for e in res.entities:
                    w(f"| `{e['text']}` | {e['label']} | {e['score']:.2f} | "
                      f"{e['start']}{e['end']} |\n")
            else:
                w("_未识别到实体_\n")

            expected = case.get("expected", set())
            if expected:
                found = {e["text"] for e in res.entities}
                hits   = expected & found
                misses = expected - found
                w(f"\n**期望命中** {len(hits)}/{len(expected)}:")
                w(", ".join(f"`{x}`" for x in expected) + "  \n")
                if misses:
                    w(f"**未命中**:{', '.join(f'`{x}`' for x in misses)}  \n")

            mnc = case.get("must_not_contain", set())
            if mnc:
                bad = {e["text"] for e in res.entities} & mnc
                if bad:
                    w(f"\n> ⚠️ **边界错误**:{bad}\n")
                else:
                    w(f"\n> ✓ 边界正确(未出现 {mnc})\n")
            w("\n")

    # ── 性能聚合 ──────────────────────────────────────────────────────────────
    w("## 三、按路由分组性能\n\n")
    by_group: dict[str, list[float]] = {}
    for case, res in results:
        if res.status == 200:
            by_group.setdefault(case["group"], []).append(res.elapsed_ms)
    w("| 分组 | 用例数 | 最快 | 最慢 | 平均 |\n|---|---|---|---|---|\n")
    for g, times in by_group.items():
        w(f"| {g} | {len(times)} | {min(times):.0f}ms | "
          f"{max(times):.0f}ms | {sum(times)/len(times):.0f}ms |\n")

    REPORT.parent.mkdir(parents=True, exist_ok=True)
    REPORT.write_text(buf.getvalue(), encoding="utf-8")
    print(f"\nReport: {REPORT.resolve()}")


# ── 主程序 ────────────────────────────────────────────────────────────────────

def main():
    print(f"Target: {BASE_URL}")
    health = check_health()
    print(f"Health: {'OK' if health[0] else 'FAIL'} ({health[1]:.0f}ms)")
    if not health[0]:
        print(f"  -> {health[2]}")
        return

    results: list[tuple[dict, CallResult]] = []
    for case in CASES:
        print(f"  {case['id']:8s}  ", end="", flush=True)
        res = post_extract(case["payload"])
        res.case_id = case["id"]
        results.append((case, res))
        status = "OK" if res.status == 200 else f"FAIL({res.status})"
        print(f"{status:8s}  {res.elapsed_ms:6.0f}ms  {len(res.entities)} entities")

    write_report(results, health)


if __name__ == "__main__":
    main()