Robin Claude Sonnet 4.6 commited on
Commit ยท
f90826c
1
Parent(s): 2288fd7
fix(zh): slice entity text from original input to avoid BERT tokenizer spaces
Browse filesThe transformers token-classification pipeline with aggregation_strategy="simple"
emits Chinese entity text with whitespace between characters (e.g. "้ฉฌ ไบ"
instead of "้ฉฌไบ"). This breaks downstream string matching and recall metrics.
Fix: use the start/end character offsets returned by the pipeline to slice
the entity text directly from the original input string. The offsets are
correct; only the joined token text is wrong.
Also adds scripts/test_remote_api.py โ end-to-end API test harness that hits
the deployed HF Space, exercises every routing branch (EN/ZH/AR/Mixed/auto/
min_entities/threshold/edge), and writes a Markdown report with timings
and per-case recall.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- app/ner.py +4 -1
- reports/remote_api_test_report.md +401 -0
- scripts/test_remote_api.py +360 -0
app/ner.py
CHANGED
|
@@ -216,8 +216,11 @@ class ChineseBERTBackend(_Backend):
|
|
| 216 |
|
| 217 |
std_label = BERT_TYPE_TO_LABEL.get(bert_type, bert_type)
|
| 218 |
labels_seen.add(std_label)
|
|
|
|
|
|
|
|
|
|
| 219 |
entities.append(Entity(
|
| 220 |
-
text=
|
| 221 |
label=std_label,
|
| 222 |
score=round(score, 4),
|
| 223 |
start=r["start"],
|
|
|
|
| 216 |
|
| 217 |
std_label = BERT_TYPE_TO_LABEL.get(bert_type, bert_type)
|
| 218 |
labels_seen.add(std_label)
|
| 219 |
+
# Chinese BERT tokenizer ไผๅจๅญ่ฏ้ดๆๅ
ฅ็ฉบๆ ผ๏ผ"้ฉฌ ไบ"๏ผ๏ผ
|
| 220 |
+
# ็ดๆฅ็จ start/end ไปๅๆๅ็๏ผ้ฟๅ
็ฉบๆ ผๆฑกๆ
|
| 221 |
+
entity_text = text[r["start"]:r["end"]]
|
| 222 |
entities.append(Entity(
|
| 223 |
+
text=entity_text,
|
| 224 |
label=std_label,
|
| 225 |
score=round(score, 4),
|
| 226 |
start=r["start"],
|
reports/remote_api_test_report.md
ADDED
|
@@ -0,0 +1,401 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ่ฟ็ซฏ API ๆต่ฏๆฅๅ
|
| 2 |
+
|
| 3 |
+
- ๆๅกๅฐๅ๏ผ`https://robinwu-nerserver.hf.space`
|
| 4 |
+
- ๆต่ฏๆถ้ด๏ผ2026-04-30 09:43:24
|
| 5 |
+
- ๅฅๅบทๆฃๆฅ๏ผโ OK (1259ms) โ {"status":"ok"}
|
| 6 |
+
- ็จไพๆปๆฐ๏ผ15
|
| 7 |
+
|
| 8 |
+
## ไธใๆฑๆป
|
| 9 |
+
|
| 10 |
+
| ็จไพ | ๆ่ฟฐ | HTTP | ๅฎไฝๆฐ | ๅฌๅ | ่ๆถ |
|
| 11 |
+
|---|---|---|---|---|---|
|
| 12 |
+
| **EN-01** | ่ฑๆ็ญๅฅ๏ผๆพๅผ language=en๏ผ่ชๅฎไนๆ ็ญพ | โ 200 | 5 | 5/5 | 1632ms |
|
| 13 |
+
| **EN-02** | ่ฑๆ้ฟๆฎต๏ผlabels ็็ฉบ่งฆๅ้ป่ฎคๅ่ฏญๆ ็ญพ้ | โ 200 | 5 | 1/4 | 1542ms |
|
| 14 |
+
| **ZH-01** | ไธญๆ็ฐไปฃๅไธๆๆฌ๏ผๆพๅผ language=zh | โ 200 | 6 | 0/4 | 1305ms |
|
| 15 |
+
| **ZH-02** | ไธญๆๅป็ๅบๆฏ๏ผ่ชๅฎไนๅ่ฏญๆ ็ญพ | โ 200 | 5 | 0/3 | 1282ms |
|
| 16 |
+
| **ZH-03** | ๅคๅ
ธๆๅญฆ่พน็ๆต่ฏ โ ใๅฐคๆฐๆฅ่ฏทใๅบๅชๅใๅฐคๆฐใ | โ 200 | 6 | 0/6 | 1330ms |
|
| 17 |
+
| **AR-01** | ้ฟๆไผฏ่ฏญๆฐ้ป | โ 200 | 4 | 2/2 | 1307ms |
|
| 18 |
+
| **MIX-01** | ไธญ่ฑๆททๅ ยท ่ๅบๅบๆฏ๏ผlanguage=mixed ๅผบๅถๅ่ท | โ 200 | 7 | 3/7 | 1652ms |
|
| 19 |
+
| **MIX-02** | ๅญฆๆฏๅบๆฏ๏ผlabels ็็ฉบ | โ 200 | 4 | 2/5 | 1635ms |
|
| 20 |
+
| **AUTO-01** | ็บฏไธญๆๆๆฌ๏ผๅบ่ขซๆฃๆตไธบ zh | โ 200 | 3 | 0/3 | 1267ms |
|
| 21 |
+
| **AUTO-02** | ็บฏ่ฑๆๆๆฌ๏ผๅบ่ขซๆฃๆตไธบ en | โ 200 | 4 | 3/3 | 1940ms |
|
| 22 |
+
| **AUTO-03** | ไธญ่ฑๆททๅ๏ผๅบ่ขซๆฃๆตไธบ mixed ๅนถๅ่ทๅๅนถ | โ 200 | 4 | 2/3 | 1428ms |
|
| 23 |
+
| **MIN-01** | min_entities=10 ๅผบๅถๅ
ๅบ๏ผ็ญๆๆฌๅฏๅๅผๅชๆๆ 1 ไธช๏ผ | โ 200 | 1 | 0/1 | 1501ms |
|
| 24 |
+
| **MIN-02** | min_entities=0 ๅ
ณ้ญๅ
ๅบ | โ 200 | 1 | 0/1 | 1183ms |
|
| 25 |
+
| **THR-01** | ้ซ้ๅผ 0.8 - ๆๆ่ฟๅๆดๅฐไฝๆด้ซ็ฝฎไฟกๅบฆ็ๅฎไฝ | โ 200 | 1 | 1/3 | 1525ms |
|
| 26 |
+
| **EDGE-01** | ็ฉบๆๆฌ | โ 200 | 0 | โ | 1160ms |
|
| 27 |
+
|
| 28 |
+
- ้่ฟ็๏ผ**15/15**
|
| 29 |
+
- ็ดฏ่ฎก่ๆถ๏ผ**21688ms**๏ผๅนณๅ 1446ms/่ฏทๆฑ๏ผ
|
| 30 |
+
|
| 31 |
+
## ไบใๅ็ป่ฏฆ็ป็ปๆ
|
| 32 |
+
|
| 33 |
+
### EN โ GLiNER ไธป่ทฏๅพ
|
| 34 |
+
|
| 35 |
+
#### EN-01 ยท ่ฑๆ็ญๅฅ๏ผๆพๅผ language=en๏ผ่ชๅฎไนๆ ็ญพ
|
| 36 |
+
|
| 37 |
+
**่ฏทๆฑ**
|
| 38 |
+
```json
|
| 39 |
+
{
|
| 40 |
+
"text": "Elon Musk founded SpaceX in Hawthorne, California in 2002.",
|
| 41 |
+
"labels": [
|
| 42 |
+
"full name of a person",
|
| 43 |
+
"company or organization name",
|
| 44 |
+
"geographical location",
|
| 45 |
+
"date or year"
|
| 46 |
+
],
|
| 47 |
+
"language": "en"
|
| 48 |
+
}
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
**ๅๅบ**๏ผHTTP 200 ยท 1632ms ยท 5 ไธชๅฎไฝ
|
| 52 |
+
|
| 53 |
+
| ๆๆฌ | ๆ ็ญพ | ็ฝฎไฟกๅบฆ | ่ตทๆญข |
|
| 54 |
+
|---|---|---|---|
|
| 55 |
+
| `Elon Musk` | ไบบๅๆๅงๅ | 0.85 | 0โ9 |
|
| 56 |
+
| `SpaceX` | company or organization name | 0.85 | 18โ24 |
|
| 57 |
+
| `Hawthorne` | ๅฐๅๆๅๅธ | 0.70 | 28โ37 |
|
| 58 |
+
| `California` | geographical location | 0.57 | 39โ49 |
|
| 59 |
+
| `2002` | date or year | 0.89 | 53โ57 |
|
| 60 |
+
|
| 61 |
+
**ๆๆๅฝไธญ** 5/5๏ผ`Elon Musk`, `2002`, `SpaceX`, `Hawthorne`, `California`
|
| 62 |
+
|
| 63 |
+
#### EN-02 ยท ่ฑๆ้ฟๆฎต๏ผlabels ็็ฉบ่งฆๅ้ป่ฎคๅ่ฏญๆ ็ญพ้
|
| 64 |
+
|
| 65 |
+
**่ฏทๆฑ**
|
| 66 |
+
```json
|
| 67 |
+
{
|
| 68 |
+
"text": "President Biden signed the Inflation Reduction Act in Washington D.C. on August 16, 2022. The legislation was championed by Senator Chuck Schumer and was seen as a major win for the Democratic Party.",
|
| 69 |
+
"language": "en"
|
| 70 |
+
}
|
| 71 |
+
```
|
| 72 |
+
|
| 73 |
+
**ๅๅบ**๏ผHTTP 200 ยท 1542ms ยท 5 ไธชๅฎไฝ
|
| 74 |
+
|
| 75 |
+
| ๆๆฌ | ๆ ็ญพ | ็ฝฎไฟกๅบฆ | ่ตทๆญข |
|
| 76 |
+
|---|---|---|---|
|
| 77 |
+
| `President Biden` | ไบบๅๆๅงๅ | 0.66 | 0โ15 |
|
| 78 |
+
| `Inflation Reduction Act` | legislation or policy name | 0.78 | 27โ50 |
|
| 79 |
+
| `Washington D.C.` | geographical location | 0.64 | 54โ69 |
|
| 80 |
+
| `August 16, 2022` | date or year | 0.92 | 73โ88 |
|
| 81 |
+
| `Senator Chuck Schumer` | ไบบๅๆๅงๅ | 0.66 | 124โ145 |
|
| 82 |
+
|
| 83 |
+
**ๆๆๅฝไธญ** 1/4๏ผ`Chuck Schumer`, `Democratic Party`, `Biden`, `Washington D.C.`
|
| 84 |
+
**ๆชๅฝไธญ**๏ผ`Chuck Schumer`, `Democratic Party`, `Biden`
|
| 85 |
+
|
| 86 |
+
### ZH โ BERT ไธป่ทฏๅพ
|
| 87 |
+
|
| 88 |
+
#### ZH-01 ยท ไธญๆ็ฐไปฃๅไธๆๆฌ๏ผๆพๅผ language=zh
|
| 89 |
+
|
| 90 |
+
**่ฏทๆฑ**
|
| 91 |
+
```json
|
| 92 |
+
{
|
| 93 |
+
"text": "้ฟ้ๅทดๅทด้ๅขๅๅงไบบ้ฉฌไบไบ2019ๅนดๅธไปป่ฃไบๅฑไธปๅธญ๏ผ็ฑๅผ ๅๆฅไปปใๆป้จไฝไบๆญๅท็้ฟ้ๅทดๅทดๆไธๆฅๆๆทๅฎใๅคฉ็ซใๆฏไปๅฎ็ญไธๅกๆฟๅใ",
|
| 94 |
+
"language": "zh"
|
| 95 |
+
}
|
| 96 |
+
```
|
| 97 |
+
|
| 98 |
+
**ๅๅบ**๏ผHTTP 200 ยท 1305ms ยท 6 ไธชๅฎไฝ
|
| 99 |
+
|
| 100 |
+
| ๆๆฌ | ๆ ็ญพ | ็ฝฎไฟกๅบฆ | ่ตทๆญข |
|
| 101 |
+
|---|---|---|---|
|
| 102 |
+
| `้ฟ ้ ๅทด ๅทด ้ ๅข` | ๅ
ฌๅธๆ็ป็ปๆบๆๅ็งฐ | 1.00 | 0โ6 |
|
| 103 |
+
| `้ฉฌ ไบ` | ไบบๅๆๅงๅ | 1.00 | 9โ11 |
|
| 104 |
+
| `2019 ๅนด` | ๆฅๆๆๅนดไปฝ | 1.00 | 12โ17 |
|
| 105 |
+
| `ๅผ ๅ` | ไบบๅๆๅงๅ | 1.00 | 26โ28 |
|
| 106 |
+
| `ๆญ ๅท` | ๅฐๅๆๅๅธ | 1.00 | 35โ37 |
|
| 107 |
+
| `้ฟ ้ ๅทด ๅทด` | ๅ
ฌๅธๆ็ป็ปๆบๆๅ็งฐ | 0.99 | 38โ42 |
|
| 108 |
+
|
| 109 |
+
**ๆๆๅฝไธญ** 0/4๏ผ`ๅผ ๅ`, `ๆญๅท`, `้ฟ้ๅทดๅทด`, `้ฉฌไบ`
|
| 110 |
+
**ๆชๅฝไธญ**๏ผ`ๆญๅท`, `้ฟ้ๅทดๅทด`, `ๅผ ๅ`, `้ฉฌไบ`
|
| 111 |
+
|
| 112 |
+
#### ZH-02 ยท ไธญๆๅป็ๅบๆฏ๏ผ่ชๅฎไนๅ่ฏญๆ ็ญพ
|
| 113 |
+
|
| 114 |
+
**่ฏทๆฑ**
|
| 115 |
+
```json
|
| 116 |
+
{
|
| 117 |
+
"text": "ๅไบฌๅๅๅป้ขๅฟๅ
็งไธปไปป็ๅปบๅฝๆๆๅข้๏ผไบ2023ๅนดๆๅๅฎๆ้ฆไพๆบๅจไบบ่พ
ๅฉๅ ็ถๅจ่ๆญๆกฅๆๆฏ๏ผๆฃ่
ๆฅ่ชๅฑฑไธ็ๆตๅๅธใ",
|
| 118 |
+
"labels": [
|
| 119 |
+
"ไบบๅๆๅงๅ",
|
| 120 |
+
"ๅป้ขๆๅป็ๆบๆๅ็งฐ",
|
| 121 |
+
"ๅฐๅๆๅๅธ",
|
| 122 |
+
"ๆฅๆๆๅนดไปฝ"
|
| 123 |
+
],
|
| 124 |
+
"language": "zh"
|
| 125 |
+
}
|
| 126 |
+
```
|
| 127 |
+
|
| 128 |
+
**ๅๅบ**๏ผHTTP 200 ยท 1282ms ยท 5 ไธชๅฎไฝ
|
| 129 |
+
|
| 130 |
+
| ๆๆฌ | ๆ ็ญพ | ็ฝฎไฟกๅบฆ | ่ตทๆญข |
|
| 131 |
+
|---|---|---|---|
|
| 132 |
+
| `ๅ ไบฌ ๅ ๅ ๅป ้ข ๅฟ ๅ
็ง` | ๅ
ฌๅธๆ็ป็ปๆบๆๅ็งฐ | 0.98 | 0โ9 |
|
| 133 |
+
| `๏ฟฝ๏ฟฝ๏ฟฝ ๅปบ ๅฝ` | ไบบๅๆๅงๅ | 1.00 | 11โ14 |
|
| 134 |
+
| `2023 ๅนด` | ๆฅๆๆๅนดไปฝ | 0.96 | 20โ25 |
|
| 135 |
+
| `ๅฑฑ ไธ ็` | ๅฐๅๆๅๅธ | 1.00 | 49โ52 |
|
| 136 |
+
| `ๆต ๅ ๅธ` | ๅฐๅๆๅๅธ | 1.00 | 52โ55 |
|
| 137 |
+
|
| 138 |
+
**ๆๆๅฝไธญ** 0/3๏ผ`ๆตๅ`, `็ๅปบๅฝ`, `ๅไบฌๅๅๅป้ข`
|
| 139 |
+
**ๆชๅฝไธญ**๏ผ`ๆตๅ`, `็ๅปบๅฝ`, `ๅไบฌๅๅๅป้ข`
|
| 140 |
+
|
| 141 |
+
### ZH โ BERT ่พน็่ฏๅซ
|
| 142 |
+
|
| 143 |
+
#### ZH-03 ยท ๅคๅ
ธๆๅญฆ่พน็ๆต่ฏ โ ใๅฐคๆฐๆฅ่ฏทใๅบๅชๅใๅฐคๆฐใ
|
| 144 |
+
|
| 145 |
+
**่ฏทๆฑ**
|
| 146 |
+
```json
|
| 147 |
+
{
|
| 148 |
+
"text": "ๅฐคๆฐๆฅ่ฏท๏ผ็็ๅค็ฌ้๏ผไฝ ๆฅไบใ่ดพๆฏๅฝไบบๆ้
๏ผๅฎ็ๅ้ป็ๅจๅคง่งๅญๆฃๆญฅใ",
|
| 149 |
+
"language": "zh"
|
| 150 |
+
}
|
| 151 |
+
```
|
| 152 |
+
|
| 153 |
+
**ๅๅบ**๏ผHTTP 200 ยท 1330ms ยท 6 ไธชๅฎไฝ
|
| 154 |
+
|
| 155 |
+
| ๆๆฌ | ๆ ็ญพ | ็ฝฎไฟกๅบฆ | ่ตทๆญข |
|
| 156 |
+
|---|---|---|---|
|
| 157 |
+
| `ๅฐค ๆฐ` | ไบบๅๆๅงๅ | 1.00 | 0โ2 |
|
| 158 |
+
| `็ ็ ๅค` | ไบบๅๆๅงๅ | 1.00 | 5โ8 |
|
| 159 |
+
| `่ดพ ๆฏ` | ไบบๅๆๅงๅ | 1.00 | 15โ17 |
|
| 160 |
+
| `ๅฎ ็` | ไบบๅๆๅงๅ | 1.00 | 22โ24 |
|
| 161 |
+
| `้ป ็` | ไบบๅๆๅงๅ | 1.00 | 25โ27 |
|
| 162 |
+
| `ๅคง ่ง ๅญ` | ๅฐๅๆๅๅธ | 0.93 | 28โ31 |
|
| 163 |
+
|
| 164 |
+
**ๆๆๅฝไธญ** 0/6๏ผ`่ดพๆฏ`, `ๅคง่งๅญ`, `ๅฎ็`, `ๅฐคๆฐ`, `้ป็`, `็็ๅค`
|
| 165 |
+
**ๆชๅฝไธญ**๏ผ`ๅฐคๆฐ`, `้ป็`, `็็ๅค`, `่ดพๆฏ`, `ๅคง่งๅญ`, `ๅฎ็`
|
| 166 |
+
|
| 167 |
+
> โ ่พน็ๆญฃ็กฎ๏ผๆชๅบ็ฐ {'็็ๅค็ฌ้', 'ๅฐคๆฐๆฅ่ฏท'}๏ผ
|
| 168 |
+
|
| 169 |
+
### AR โ GLiNER ไธป่ทฏๅพ
|
| 170 |
+
|
| 171 |
+
#### AR-01 ยท ้ฟๆไผฏ่ฏญๆฐ้ป
|
| 172 |
+
|
| 173 |
+
**่ฏทๆฑ**
|
| 174 |
+
```json
|
| 175 |
+
{
|
| 176 |
+
"text": "ุฃุนูู ุงูุฑุฆูุณ ู
ุญู
ุฏ ุจู ุณูู
ุงู ุนู ุฅุทูุงู ู
ุดุฑูุน ูููู
ูู ุงูู
ู
ููุฉ ุงูุนุฑุจูุฉ ุงูุณุนูุฏูุฉ ุนุงู
2017ุ ูุชุจูุบ ุชูููุชู 500 ู
ููุงุฑ ุฏููุงุฑ.",
|
| 177 |
+
"labels": [
|
| 178 |
+
"full name of a person",
|
| 179 |
+
"geographical location",
|
| 180 |
+
"project or initiative name",
|
| 181 |
+
"date or year"
|
| 182 |
+
],
|
| 183 |
+
"language": "ar"
|
| 184 |
+
}
|
| 185 |
+
```
|
| 186 |
+
|
| 187 |
+
**ๅๅบ**๏ผHTTP 200 ยท 1307ms ยท 4 ไธชๅฎไฝ
|
| 188 |
+
|
| 189 |
+
| ๆๆฌ | ๆ ็ญพ | ็ฝฎไฟกๅบฆ | ่ตทๆญข |
|
| 190 |
+
|---|---|---|---|
|
| 191 |
+
| `ู
ุญู
ุฏ ุจู ุณูู
ุงู` | ไบบๅๆๅงๅ | 0.71 | 12โ25 |
|
| 192 |
+
| `ูููู
` | project or initiative name | 0.55 | 41โ45 |
|
| 193 |
+
| `ุงูู
ู
ููุฉ ุงูุนุฑุจูุฉ ุงูุณุนูุฏูุฉ` | ๅฐๅๆๅๅธ | 0.65 | 49โ73 |
|
| 194 |
+
| `2017` | date or year | 0.82 | 78โ82 |
|
| 195 |
+
|
| 196 |
+
**ๆๆๅฝไธญ** 2/2๏ผ`ู
ุญู
ุฏ ุจู ุณูู
ุงู`, `ุงูู
ู
ููุฉ ุงูุนุฑุจูุฉ ุงูุณุนูุฏูุฉ`
|
| 197 |
+
|
| 198 |
+
### Mixed โ ๅๆจกๅๅๅนถ
|
| 199 |
+
|
| 200 |
+
#### MIX-01 ยท ไธญ่ฑๆททๅ ยท ่ๅบๅบๆฏ๏ผlanguage=mixed ๅผบๅถๅ่ท
|
| 201 |
+
|
| 202 |
+
**่ฏทๆฑ**
|
| 203 |
+
```json
|
| 204 |
+
{
|
| 205 |
+
"text": "ๅผ ไผๅ ๅ
ฅไบ Google ๅไบฌ็ ๅไธญๅฟ๏ผ่ด่ดฃ Android ็ณป็ปไผๅใไป็ๅไบ Sarah Chen ๆฅ่ช Meta๏ผไธคไบบๅ
ฑๅๅไธไบ 2024 ๅนด็ AI Summitใ",
|
| 206 |
+
"language": "mixed"
|
| 207 |
+
}
|
| 208 |
+
```
|
| 209 |
+
|
| 210 |
+
**ๅๅบ**๏ผHTTP 200 ยท 1652ms ยท 7 ไธชๅฎไฝ
|
| 211 |
+
|
| 212 |
+
| ๆๆฌ | ๆ ็ญพ | ็ฝฎไฟกๅบฆ | ่ตทๆญข |
|
| 213 |
+
|---|---|---|---|
|
| 214 |
+
| `ๅผ ไผ` | ไบบๅๆๅงๅ | 1.00 | 0โ2 |
|
| 215 |
+
| `Google` | ๅ
ฌๅธๆ็ป็ปๆบๆๅ็งฐ | 0.82 | 6โ12 |
|
| 216 |
+
| `ๅ ไบฌ ็ ๅ ไธญ ๅฟ` | ๅ
ฌๅธๆ็ป็ปๆบๆๅ็งฐ | 0.86 | 13โ19 |
|
| 217 |
+
| `Sarah Chen` | ไบบๅๆๅงๅ | 0.89 | 41โ51 |
|
| 218 |
+
| `Meta` | company or organization name | 0.85 | 55โ59 |
|
| 219 |
+
| `2024 ๅนด` | ๆฅๆๆๅนดไปฝ | 0.66 | 68โ74 |
|
| 220 |
+
| `AI Summit` | event name | 0.49 | 76โ85 |
|
| 221 |
+
|
| 222 |
+
**ๆๆๅฝไธญ** 3/7๏ผ`2024`, `Meta`, `Google`, `Android`, `ๅไบฌ`, `Sarah Chen`, `ๅผ ไผ`
|
| 223 |
+
**ๆชๅฝไธญ**๏ผ`2024`, `Android`, `ๅไบฌ`, `ๅผ ไผ`
|
| 224 |
+
|
| 225 |
+
#### MIX-02 ยท ๅญฆๆฏๅบๆฏ๏ผlabels ็็ฉบ
|
| 226 |
+
|
| 227 |
+
**่ฏทๆฑ**
|
| 228 |
+
```json
|
| 229 |
+
{
|
| 230 |
+
"text": "ๆธ
ๅๅคงๅญฆ่ฎก็ฎๆบ็ณปๆๆๆๆๅจ NeurIPS 2023 ๅ่กจไบๅ
ณไบ Transformer ๆถๆ็่ฎบๆ๏ผๅไฝ่
ๆฅ่ช MIT ๅ Stanford Universityใ",
|
| 231 |
+
"language": "mixed"
|
| 232 |
+
}
|
| 233 |
+
```
|
| 234 |
+
|
| 235 |
+
**ๅๅบ**๏ผHTTP 200 ยท 1635ms ยท 4 ไธชๅฎไฝ
|
| 236 |
+
|
| 237 |
+
| ๆๆฌ | ๆ ็ญพ | ็ฝฎไฟกๅบฆ | ่ตทๆญข |
|
| 238 |
+
|---|---|---|---|
|
| 239 |
+
| `ๆธ
ๅ ๅคง ๅญฆ ่ฎก ็ฎ ๆบ ็ณป` | ๅ
ฌๅธๆ็ป็ปๆบๆๅ็งฐ | 1.00 | 0โ8 |
|
| 240 |
+
| `ๆ ๆ` | ไบบๅๆๅงๅ | 1.00 | 10โ12 |
|
| 241 |
+
| `MIT` | university or research institution | 0.76 | 57โ60 |
|
| 242 |
+
| `Stanford University` | university or research institution | 0.85 | 63โ82 |
|
| 243 |
+
|
| 244 |
+
**ๆๆๅฝไธญ** 2/5๏ผ`Stanford University`, `ๆๆ`, `ๆธ
ๅๅคงๅญฆ`, `Transformer`, `MIT`
|
| 245 |
+
**ๆชๅฝไธญ**๏ผ`ๆๆ`, `ๆธ
ๅๅคงๅญฆ`, `Transformer`
|
| 246 |
+
|
| 247 |
+
### auto โ ่ชๅจ่ฏญ่จๆฃๆต
|
| 248 |
+
|
| 249 |
+
#### AUTO-01 ยท ็บฏไธญๆๆๆฌ๏ผๅบ่ขซๆฃๆตไธบ zh
|
| 250 |
+
|
| 251 |
+
**่ฏทๆฑ**
|
| 252 |
+
```json
|
| 253 |
+
{
|
| 254 |
+
"text": "้ฉฌไบๅ็ซไบ้ฟ้ๅทดๅทด๏ผๆป้จๅจๆญๅทใ"
|
| 255 |
+
}
|
| 256 |
+
```
|
| 257 |
+
|
| 258 |
+
**ๅๅบ**๏ผHTTP 200 ยท 1267ms ยท 3 ไธชๅฎไฝ
|
| 259 |
+
|
| 260 |
+
| ๆๆฌ | ๆ ็ญพ | ็ฝฎไฟกๅบฆ | ่ตทๆญข |
|
| 261 |
+
|---|---|---|---|
|
| 262 |
+
| `้ฉฌ ไบ` | ไบบๅๆๅงๅ | 1.00 | 0โ2 |
|
| 263 |
+
| `้ฟ ้ ๅทด ๅทด` | ๅ
ฌๅธๆ็ป็ปๆบๆๅ็งฐ | 1.00 | 5โ9 |
|
| 264 |
+
| `ๆญ ๅท` | ๅฐๅๆๅๅธ | 1.00 | 13โ15 |
|
| 265 |
+
|
| 266 |
+
**ๆๆๅฝไธญ** 0/3๏ผ`ๆญๅท`, `้ฟ้ๅทดๅทด`, `้ฉฌไบ`
|
| 267 |
+
**ๆชๅฝไธญ**๏ผ`ๆญๅท`, `้ฟ้ๅทดๅทด`, `้ฉฌไบ`
|
| 268 |
+
|
| 269 |
+
#### AUTO-02 ยท ็บฏ่ฑๆๆๆฌ๏ผๅบ่ขซๆฃๆตไธบ en
|
| 270 |
+
|
| 271 |
+
**่ฏทๆฑ**
|
| 272 |
+
```json
|
| 273 |
+
{
|
| 274 |
+
"text": "Tim Cook is the CEO of Apple in Cupertino."
|
| 275 |
+
}
|
| 276 |
+
```
|
| 277 |
+
|
| 278 |
+
**ๅๅบ**๏ผHTTP 200 ยท 1940ms ยท 4 ไธชๅฎไฝ
|
| 279 |
+
|
| 280 |
+
| ๆๆฌ | ๆ ็ญพ | ็ฝฎไฟกๅบฆ | ่ตทๆญข |
|
| 281 |
+
|---|---|---|---|
|
| 282 |
+
| `Tim Cook` | ไบบๅๆๅงๅ | 0.86 | 0โ8 |
|
| 283 |
+
| `CEO` | ไบบๅๆๅงๅ | 0.61 | 16โ19 |
|
| 284 |
+
| `Apple` | ๅ
ฌๅธๆ็ป็ปๆบๆๅ็งฐ | 0.90 | 23โ28 |
|
| 285 |
+
| `Cupertino` | ๅฐๅๆๅๅธ | 0.88 | 32โ41 |
|
| 286 |
+
|
| 287 |
+
**ๆๆๅฝไธญ** 3/3๏ผ`Cupertino`, `Tim Cook`, `Apple`
|
| 288 |
+
|
| 289 |
+
#### AUTO-03 ยท ไธญ่ฑๆททๅ๏ผๅบ่ขซๆฃๆตไธบ mixed ๅนถๅ่ทๅๅนถ
|
| 290 |
+
|
| 291 |
+
**่ฏทๆฑ**
|
| 292 |
+
```json
|
| 293 |
+
{
|
| 294 |
+
"text": "ๆๅๅจ Microsoft ๆ
ไปปๅทฅ็จๅธ๏ผๅธธ้ฉป Seattle ๅๅ
ฌๅฎคใ"
|
| 295 |
+
}
|
| 296 |
+
```
|
| 297 |
+
|
| 298 |
+
**ๅๅบ**๏ผHTTP 200 ยท 1428ms ยท 4 ไธชๅฎไฝ
|
| 299 |
+
|
| 300 |
+
| ๆๆฌ | ๆ ็ญพ | ็ฝฎไฟกๅบฆ | ่ตทๆญข |
|
| 301 |
+
|---|---|---|---|
|
| 302 |
+
| `ๆๅๅจ` | ไบบๅๆๅงๅ | 0.41 | 0โ3 |
|
| 303 |
+
| `ๆ ๅ` | ไบบๅๆๅงๅ | 1.00 | 0โ2 |
|
| 304 |
+
| `Microsoft` | ๅ
ฌๅธๆ็ป็ปๆบๆๅ็งฐ | 0.75 | 4โ13 |
|
| 305 |
+
| `Seattle` | ๅฐๅๆๅๅธ | 0.79 | 23โ30 |
|
| 306 |
+
|
| 307 |
+
**ๆๆๅฝไธญ** 2/3๏ผ`ๆๅ`, `Microsoft`, `Seattle`
|
| 308 |
+
**ๆชๅฝไธญ**๏ผ`ๆๅ`
|
| 309 |
+
|
| 310 |
+
### min_entities ่ฆ็ๅฏๅๅผ
|
| 311 |
+
|
| 312 |
+
#### MIN-01 ยท min_entities=10 ๅผบๅถๅ
ๅบ๏ผ็ญๆๆฌๅฏๅๅผๅชๆๆ 1 ไธช๏ผ
|
| 313 |
+
|
| 314 |
+
**่ฏทๆฑ**
|
| 315 |
+
```json
|
| 316 |
+
{
|
| 317 |
+
"text": "้ฉฌไบ",
|
| 318 |
+
"language": "zh",
|
| 319 |
+
"min_entities": 10
|
| 320 |
+
}
|
| 321 |
+
```
|
| 322 |
+
|
| 323 |
+
**ๅๅบ**๏ผHTTP 200 ยท 1501ms ยท 1 ไธชๅฎไฝ
|
| 324 |
+
|
| 325 |
+
| ๆๆฌ | ๆ ็ญพ | ็ฝฎไฟกๅบฆ | ่ตทๆญข |
|
| 326 |
+
|---|---|---|---|
|
| 327 |
+
| `้ฉฌ ไบ` | ไบบๅๆๅงๅ | 1.00 | 0โ2 |
|
| 328 |
+
|
| 329 |
+
**ๆๆๅฝไธญ** 0/1๏ผ`้ฉฌไบ`
|
| 330 |
+
**ๆชๅฝไธญ**๏ผ`้ฉฌไบ`
|
| 331 |
+
|
| 332 |
+
#### MIN-02 ยท min_entities=0 ๅ
ณ้ญๅ
ๅบ
|
| 333 |
+
|
| 334 |
+
**่ฏทๆฑ**
|
| 335 |
+
```json
|
| 336 |
+
{
|
| 337 |
+
"text": "้ฉฌไบ",
|
| 338 |
+
"language": "zh",
|
| 339 |
+
"min_entities": 0
|
| 340 |
+
}
|
| 341 |
+
```
|
| 342 |
+
|
| 343 |
+
**ๅๅบ**๏ผHTTP 200 ยท 1183ms ยท 1 ไธชๅฎไฝ
|
| 344 |
+
|
| 345 |
+
| ๆๆฌ | ๆ ็ญพ | ็ฝฎไฟกๅบฆ | ่ตทๆญข |
|
| 346 |
+
|---|---|---|---|
|
| 347 |
+
| `้ฉฌ ไบ` | ไบบๅๆๅงๅ | 1.00 | 0โ2 |
|
| 348 |
+
|
| 349 |
+
**ๆๆๅฝไธญ** 0/1๏ผ`้ฉฌไบ`
|
| 350 |
+
**ๆชๅฝไธญ**๏ผ`้ฉฌไบ`
|
| 351 |
+
|
| 352 |
+
### Threshold ๅๅ
|
| 353 |
+
|
| 354 |
+
#### THR-01 ยท ้ซ้ๅผ 0.8 - ๆๆ่ฟๅๆดๅฐไฝๆด้ซ็ฝฎไฟกๅบฆ็ๅฎไฝ
|
| 355 |
+
|
| 356 |
+
**่ฏทๆฑ**
|
| 357 |
+
```json
|
| 358 |
+
{
|
| 359 |
+
"text": "Tesla and SpaceX are companies founded by Elon Musk.",
|
| 360 |
+
"language": "en",
|
| 361 |
+
"threshold": 0.8
|
| 362 |
+
}
|
| 363 |
+
```
|
| 364 |
+
|
| 365 |
+
**ๅๅบ**๏ผHTTP 200 ยท 1525ms ยท 1 ไธชๅฎไฝ
|
| 366 |
+
|
| 367 |
+
| ๆๆฌ | ๆ ็ญพ | ็ฝฎไฟกๅบฆ | ่ตทๆญข |
|
| 368 |
+
|---|---|---|---|
|
| 369 |
+
| `Elon Musk` | ไบบๅๆๅงๅ | 0.86 | 42โ51 |
|
| 370 |
+
|
| 371 |
+
**ๆๆๅฝไธญ** 1/3๏ผ`Tesla`, `SpaceX`, `Elon Musk`
|
| 372 |
+
**ๆชๅฝไธญ**๏ผ`Tesla`, `SpaceX`
|
| 373 |
+
|
| 374 |
+
### Edge cases
|
| 375 |
+
|
| 376 |
+
#### EDGE-01 ยท ็ฉบๆๆฌ
|
| 377 |
+
|
| 378 |
+
**่ฏทๆฑ**
|
| 379 |
+
```json
|
| 380 |
+
{
|
| 381 |
+
"text": ""
|
| 382 |
+
}
|
| 383 |
+
```
|
| 384 |
+
|
| 385 |
+
**ๅๅบ**๏ผHTTP 200 ยท 1160ms ยท 0 ไธชๅฎไฝ
|
| 386 |
+
|
| 387 |
+
_ๆช่ฏๅซๅฐๅฎไฝ_
|
| 388 |
+
|
| 389 |
+
## ไธใๆ่ทฏ็ฑๅ็ปๆง่ฝ
|
| 390 |
+
|
| 391 |
+
| ๅ็ป | ็จไพๆฐ | ๆๅฟซ | ๆๆ
ข | ๅนณๅ |
|
| 392 |
+
|---|---|---|---|---|
|
| 393 |
+
| EN โ GLiNER ไธป่ทฏๅพ | 2 | 1542ms | 1632ms | 1587ms |
|
| 394 |
+
| ZH โ BERT ไธป่ทฏๅพ | 2 | 1282ms | 1305ms | 1294ms |
|
| 395 |
+
| ZH โ BERT ่พน็่ฏๅซ | 1 | 1330ms | 1330ms | 1330ms |
|
| 396 |
+
| AR โ GLiNER ไธป่ทฏๅพ | 1 | 1307ms | 1307ms | 1307ms |
|
| 397 |
+
| Mixed โ ๅๆจกๅๅๅนถ | 2 | 1635ms | 1652ms | 1643ms |
|
| 398 |
+
| auto โ ่ชๅจ่ฏญ่จๆฃๆต | 3 | 1267ms | 1940ms | 1545ms |
|
| 399 |
+
| min_entities ่ฆ็ๅฏๅๅผ | 2 | 1183ms | 1501ms | 1342ms |
|
| 400 |
+
| Threshold ๅๅ | 1 | 1525ms | 1525ms | 1525ms |
|
| 401 |
+
| Edge cases | 1 | 1160ms | 1160ms | 1160ms |
|
scripts/test_remote_api.py
ADDED
|
@@ -0,0 +1,360 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ๅฏน่ฟ็ซฏ HF Spaces ไธ้จ็ฝฒ็ NER API ๅ็ซฏๅฐ็ซฏๆต่ฏ๏ผ่ฆ็ๆๆ่ทฏ็ฑๅๆฏไธ่พน็ๆ
ๅตใ
|
| 3 |
+
ไธบๆฏไธช็จไพ่ฎฐๅฝ๏ผHTTP ็ถๆใ่ฏๅซๅฐ็ๅฎไฝใ่ฐ็จ่ๆถใ่ชๅจๆฃๆต็่ฏญ่จ๏ผๅฆๆ๏ผใ
|
| 4 |
+
ๆ็ป่พๅบ Markdown ๆฅๅ๏ผreports/remote_api_test_report.md
|
| 5 |
+
"""
|
| 6 |
+
import io
|
| 7 |
+
import json
|
| 8 |
+
import time
|
| 9 |
+
import urllib.request
|
| 10 |
+
import urllib.error
|
| 11 |
+
from dataclasses import dataclass, field
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
|
| 14 |
+
BASE_URL = "https://robinwu-nerserver.hf.space"
|
| 15 |
+
EXTRACT = f"{BASE_URL}/api/v1/extract"
|
| 16 |
+
HEALTH = f"{BASE_URL}/api/v1/health"
|
| 17 |
+
REPORT = Path("reports/remote_api_test_report.md")
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
# โโ ็จไพๅฎไน โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 21 |
+
#
|
| 22 |
+
# ๆฏไธช็จไพๅญๆฎต๏ผ
|
| 23 |
+
# id ็ญ็ผๅท
|
| 24 |
+
# group ๅ็ป๏ผ็จไบๆฅๅๅ็ฑป๏ผ
|
| 25 |
+
# description ไธญๆๆ่ฟฐ
|
| 26 |
+
# payload ไผ ็ป /api/v1/extract ็ JSON
|
| 27 |
+
# expected ๆๆๅฝไธญ็ๅฎไฝๆๆฌ๏ผ็จไบๅฌๅ็็ป่ฎก๏ผๅฏไธบ็ฉบ้ๅ่กจ็คบไธๆ ก้ช๏ผ
|
| 28 |
+
|
| 29 |
+
CASES: list[dict] = [
|
| 30 |
+
# โโ EN ่ทฏ็ฑ โโ
|
| 31 |
+
{
|
| 32 |
+
"id": "EN-01", "group": "EN โ GLiNER ไธป่ทฏๅพ",
|
| 33 |
+
"description": "่ฑๆ็ญๅฅ๏ผๆพๅผ language=en๏ผ่ชๅฎไนๆ ็ญพ",
|
| 34 |
+
"payload": {
|
| 35 |
+
"text": "Elon Musk founded SpaceX in Hawthorne, California in 2002.",
|
| 36 |
+
"labels": ["full name of a person", "company or organization name",
|
| 37 |
+
"geographical location", "date or year"],
|
| 38 |
+
"language": "en",
|
| 39 |
+
},
|
| 40 |
+
"expected": {"Elon Musk", "SpaceX", "Hawthorne", "California", "2002"},
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"id": "EN-02", "group": "EN โ GLiNER ไธป่ทฏๅพ",
|
| 44 |
+
"description": "่ฑๆ้ฟๆฎต๏ผlabels ็็ฉบ่งฆๅ้ป่ฎคๅ่ฏญๆ ็ญพ้",
|
| 45 |
+
"payload": {
|
| 46 |
+
"text": ("President Biden signed the Inflation Reduction Act in "
|
| 47 |
+
"Washington D.C. on August 16, 2022. The legislation was "
|
| 48 |
+
"championed by Senator Chuck Schumer and was seen as a major "
|
| 49 |
+
"win for the Democratic Party."),
|
| 50 |
+
"language": "en",
|
| 51 |
+
},
|
| 52 |
+
"expected": {"Biden", "Chuck Schumer", "Washington D.C.", "Democratic Party"},
|
| 53 |
+
},
|
| 54 |
+
# โโ ZH ่ทฏ็ฑ โโ
|
| 55 |
+
{
|
| 56 |
+
"id": "ZH-01", "group": "ZH โ BERT ไธป่ทฏๅพ",
|
| 57 |
+
"description": "ไธญๆ็ฐไปฃๅไธๆๆฌ๏ผๆพๅผ language=zh",
|
| 58 |
+
"payload": {
|
| 59 |
+
"text": "้ฟ้ๅทดๅทด้ๅขๅๅงไบบ้ฉฌไบไบ2019ๅนดๅธไปป่ฃไบๅฑไธปๅธญ๏ผ็ฑๅผ ๅๆฅไปปใ"
|
| 60 |
+
"ๆป้จไฝไบๆญๅท็้ฟ้ๅทดๅทดๆไธๆฅๆๆทๅฎใๅคฉ็ซใๆฏไปๅฎ็ญไธๅกๆฟๅใ",
|
| 61 |
+
"language": "zh",
|
| 62 |
+
},
|
| 63 |
+
"expected": {"้ฉฌไบ", "ๅผ ๅ", "้ฟ้ๅทดๅทด", "ๆญๅท"},
|
| 64 |
+
},
|
| 65 |
+
{
|
| 66 |
+
"id": "ZH-02", "group": "ZH โ BERT ไธป่ทฏๅพ",
|
| 67 |
+
"description": "ไธญๆๅป็ๅบๆฏ๏ผ่ชๅฎไนๅ่ฏญๆ ็ญพ",
|
| 68 |
+
"payload": {
|
| 69 |
+
"text": "ๅไบฌๅๅๅป้ขๅฟๅ
็งไธปไปป็ๅปบๅฝๆๆๅข้๏ผไบ2023ๅนดๆๅๅฎๆ้ฆไพ"
|
| 70 |
+
"ๆบๅจไบบ่พ
ๅฉๅ ็ถๅจ่ๆญๆกฅๆๆฏ๏ผๆฃ่
ๆฅ่ชๅฑฑไธ็ๆตๅๅธใ",
|
| 71 |
+
"labels": ["ไบบๅๆๅงๅ", "ๅป้ขๆๅป็ๆบๆๅ็งฐ", "ๅฐๅๆๅๅธ", "ๆฅๆๆๅนดไปฝ"],
|
| 72 |
+
"language": "zh",
|
| 73 |
+
},
|
| 74 |
+
"expected": {"็ๅปบๅฝ", "ๅไบฌๅๅๅป้ข", "ๆตๅ"},
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"id": "ZH-03", "group": "ZH โ BERT ่พน็่ฏๅซ",
|
| 78 |
+
"description": "ๅคๅ
ธๆๅญฆ่พน็ๆต่ฏ โ ใๅฐคๆฐๆฅ่ฏทใๅบๅชๅใๅฐคๆฐใ",
|
| 79 |
+
"payload": {
|
| 80 |
+
"text": "ๅฐคๆฐๆฅ่ฏท๏ผ็็ๅค็ฌ้๏ผไฝ ๆฅไบใ่ดพๆฏๅฝไบบๆ้
๏ผๅฎ็ๅ้ป็ๅจๅคง่งๅญๆฃๆญฅใ",
|
| 81 |
+
"language": "zh",
|
| 82 |
+
},
|
| 83 |
+
"expected": {"ๅฐคๆฐ", "็็ๅค", "่ดพๆฏ", "ๅฎ็", "้ป็", "ๅคง่งๅญ"},
|
| 84 |
+
"must_not_contain": {"ๅฐคๆฐๆฅ่ฏท", "็็ๅค็ฌ้"},
|
| 85 |
+
},
|
| 86 |
+
# โโ AR ่ทฏ็ฑ โโ
|
| 87 |
+
{
|
| 88 |
+
"id": "AR-01", "group": "AR โ GLiNER ไธป่ทฏๅพ",
|
| 89 |
+
"description": "้ฟๆไผฏ่ฏญๆฐ้ป",
|
| 90 |
+
"payload": {
|
| 91 |
+
"text": ("ุฃุนูู ุงูุฑุฆูุณ ู
ุญู
ุฏ ุจู ุณูู
ุงู ุนู ุฅุทูุงู ู
ุดุฑูุน ูููู
ูู ุงูู
ู
ููุฉ "
|
| 92 |
+
"ุงูุนุฑุจูุฉ ุงูุณุนูุฏูุฉ ุนุงู
2017ุ ูุชุจูุบ ุชูููุชู 500 ู
ููุงุฑ ุฏููุงุฑ."),
|
| 93 |
+
"labels": ["full name of a person", "geographical location",
|
| 94 |
+
"project or initiative name", "date or year"],
|
| 95 |
+
"language": "ar",
|
| 96 |
+
},
|
| 97 |
+
"expected": {"ู
ุญู
ุฏ ุจู ุณูู
ุงู", "ุงูู
ู
ููุฉ ุงูุนุฑุจูุฉ ุงูุณุนูุฏูุฉ"},
|
| 98 |
+
},
|
| 99 |
+
# โโ Mixed ่ทฏ็ฑ๏ผๅ่ทๅๅนถ๏ผ โโ
|
| 100 |
+
{
|
| 101 |
+
"id": "MIX-01", "group": "Mixed โ ๅๆจกๅๅๅนถ",
|
| 102 |
+
"description": "ไธญ่ฑๆททๅ ยท ่ๅบๅบๆฏ๏ผlanguage=mixed ๅผบๅถๅ่ท",
|
| 103 |
+
"payload": {
|
| 104 |
+
"text": "ๅผ ไผๅ ๅ
ฅไบ Google ๅไบฌ็ ๅไธญๅฟ๏ผ่ด่ดฃ Android ็ณป็ปไผๅใ"
|
| 105 |
+
"ไป็ๅไบ Sarah Chen ๆฅ่ช Meta๏ผไธคไบบ๏ฟฝ๏ฟฝๅๅไธไบ 2024 ๅนด็ AI Summitใ",
|
| 106 |
+
"language": "mixed",
|
| 107 |
+
},
|
| 108 |
+
"expected": {"ๅผ ไผ", "Google", "Sarah Chen", "Meta", "Android", "ๅไบฌ", "2024"},
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"id": "MIX-02", "group": "Mixed โ ๅๆจกๅๅๅนถ",
|
| 112 |
+
"description": "ๅญฆๆฏๅบๆฏ๏ผlabels ็็ฉบ",
|
| 113 |
+
"payload": {
|
| 114 |
+
"text": "ๆธ
ๅๅคงๅญฆ่ฎก็ฎๆบ็ณปๆๆๆๆๅจ NeurIPS 2023 ๅ่กจไบๅ
ณไบ "
|
| 115 |
+
"Transformer ๆถๆ็่ฎบๆ๏ผๅไฝ่
ๆฅ่ช MIT ๅ Stanford Universityใ",
|
| 116 |
+
"language": "mixed",
|
| 117 |
+
},
|
| 118 |
+
"expected": {"ๆๆ", "ๆธ
ๅๅคงๅญฆ", "MIT", "Stanford University", "Transformer"},
|
| 119 |
+
},
|
| 120 |
+
# โโ auto ่ชๅจๆฃๆต โโ
|
| 121 |
+
{
|
| 122 |
+
"id": "AUTO-01", "group": "auto โ ่ชๅจ่ฏญ่จๆฃๆต",
|
| 123 |
+
"description": "็บฏไธญๆๆๆฌ๏ผๅบ่ขซๆฃๆตไธบ zh",
|
| 124 |
+
"payload": {
|
| 125 |
+
"text": "้ฉฌไบๅ็ซไบ้ฟ้ๅทดๅทด๏ผๆป้จๅจๆญๅทใ",
|
| 126 |
+
},
|
| 127 |
+
"expected": {"้ฉฌไบ", "้ฟ้ๅทดๅทด", "ๆญๅท"},
|
| 128 |
+
},
|
| 129 |
+
{
|
| 130 |
+
"id": "AUTO-02", "group": "auto โ ่ชๅจ่ฏญ่จๆฃๆต",
|
| 131 |
+
"description": "็บฏ่ฑๆๆๆฌ๏ผๅบ่ขซๆฃๆตไธบ en",
|
| 132 |
+
"payload": {
|
| 133 |
+
"text": "Tim Cook is the CEO of Apple in Cupertino.",
|
| 134 |
+
},
|
| 135 |
+
"expected": {"Tim Cook", "Apple", "Cupertino"},
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"id": "AUTO-03", "group": "auto โ ่ชๅจ่ฏญ่จๆฃๆต",
|
| 139 |
+
"description": "ไธญ่ฑๆททๅ๏ผๅบ่ขซๆฃๆตไธบ mixed ๅนถๅ่ทๅๅนถ",
|
| 140 |
+
"payload": {
|
| 141 |
+
"text": "ๆๅๅจ Microsoft ๆ
ไปปๅทฅ็จๅธ๏ผๅธธ้ฉป Seattle ๅๅ
ฌๅฎคใ",
|
| 142 |
+
},
|
| 143 |
+
"expected": {"ๆๅ", "Microsoft", "Seattle"},
|
| 144 |
+
},
|
| 145 |
+
# โโ min_entities ่ฆ็ โโ
|
| 146 |
+
{
|
| 147 |
+
"id": "MIN-01", "group": "min_entities ่ฆ็ๅฏๅๅผ",
|
| 148 |
+
"description": "min_entities=10 ๅผบๅถๅ
ๅบ๏ผ็ญๆๆฌๅฏๅๅผๅชๆๆ 1 ไธช๏ผ",
|
| 149 |
+
"payload": {
|
| 150 |
+
"text": "้ฉฌไบ",
|
| 151 |
+
"language": "zh",
|
| 152 |
+
"min_entities": 10,
|
| 153 |
+
},
|
| 154 |
+
"expected": {"้ฉฌไบ"},
|
| 155 |
+
},
|
| 156 |
+
{
|
| 157 |
+
"id": "MIN-02", "group": "min_entities ่ฆ็ๅฏๅๅผ",
|
| 158 |
+
"description": "min_entities=0 ๅ
ณ้ญๅ
ๅบ",
|
| 159 |
+
"payload": {
|
| 160 |
+
"text": "้ฉฌไบ",
|
| 161 |
+
"language": "zh",
|
| 162 |
+
"min_entities": 0,
|
| 163 |
+
},
|
| 164 |
+
"expected": {"้ฉฌไบ"},
|
| 165 |
+
},
|
| 166 |
+
# โโ ้ๅผๅๅ โโ
|
| 167 |
+
{
|
| 168 |
+
"id": "THR-01", "group": "Threshold ๅๅ",
|
| 169 |
+
"description": "้ซ้ๅผ 0.8 - ๆๆ่ฟๅๆดๅฐไฝๆด้ซ็ฝฎไฟกๅบฆ็ๅฎไฝ",
|
| 170 |
+
"payload": {
|
| 171 |
+
"text": "Tesla and SpaceX are companies founded by Elon Musk.",
|
| 172 |
+
"language": "en",
|
| 173 |
+
"threshold": 0.8,
|
| 174 |
+
},
|
| 175 |
+
"expected": {"Tesla", "SpaceX", "Elon Musk"},
|
| 176 |
+
},
|
| 177 |
+
# โโ ่พน็่ฏทๆฑ โโ
|
| 178 |
+
{
|
| 179 |
+
"id": "EDGE-01", "group": "Edge cases",
|
| 180 |
+
"description": "็ฉบๆๆฌ",
|
| 181 |
+
"payload": {"text": ""},
|
| 182 |
+
"expected": set(),
|
| 183 |
+
},
|
| 184 |
+
]
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
# โโ HTTP ่ฐ็จ + ่ฎกๆถ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 188 |
+
|
| 189 |
+
@dataclass
|
| 190 |
+
class CallResult:
|
| 191 |
+
case_id: str
|
| 192 |
+
status: int
|
| 193 |
+
elapsed_ms: float
|
| 194 |
+
entities: list[dict] = field(default_factory=list)
|
| 195 |
+
labels_used: list[str] = field(default_factory=list)
|
| 196 |
+
error: str | None = None
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
def post_extract(payload: dict, timeout: int = 60) -> CallResult:
|
| 200 |
+
body = json.dumps(payload).encode("utf-8")
|
| 201 |
+
req = urllib.request.Request(
|
| 202 |
+
EXTRACT,
|
| 203 |
+
data=body,
|
| 204 |
+
headers={"Content-Type": "application/json"},
|
| 205 |
+
method="POST",
|
| 206 |
+
)
|
| 207 |
+
t0 = time.perf_counter()
|
| 208 |
+
try:
|
| 209 |
+
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
| 210 |
+
elapsed = (time.perf_counter() - t0) * 1000
|
| 211 |
+
data = json.loads(resp.read().decode())
|
| 212 |
+
return CallResult(
|
| 213 |
+
case_id="",
|
| 214 |
+
status=resp.status,
|
| 215 |
+
elapsed_ms=elapsed,
|
| 216 |
+
entities=data.get("entities", []),
|
| 217 |
+
labels_used=data.get("labels_used", []),
|
| 218 |
+
)
|
| 219 |
+
except urllib.error.HTTPError as e:
|
| 220 |
+
elapsed = (time.perf_counter() - t0) * 1000
|
| 221 |
+
return CallResult(case_id="", status=e.code, elapsed_ms=elapsed,
|
| 222 |
+
error=e.read().decode("utf-8", errors="replace"))
|
| 223 |
+
except Exception as e:
|
| 224 |
+
elapsed = (time.perf_counter() - t0) * 1000
|
| 225 |
+
return CallResult(case_id="", status=0, elapsed_ms=elapsed, error=str(e))
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
# โโ ๅฅๅบทๆฃๆฅ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 229 |
+
|
| 230 |
+
def check_health() -> tuple[bool, float, str]:
|
| 231 |
+
t0 = time.perf_counter()
|
| 232 |
+
try:
|
| 233 |
+
with urllib.request.urlopen(HEALTH, timeout=30) as resp:
|
| 234 |
+
elapsed = (time.perf_counter() - t0) * 1000
|
| 235 |
+
return resp.status == 200, elapsed, resp.read().decode()
|
| 236 |
+
except Exception as e:
|
| 237 |
+
return False, (time.perf_counter() - t0) * 1000, str(e)
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
# โโ ๆฅๅ็ๆ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 241 |
+
|
| 242 |
+
def write_report(results: list[tuple[dict, CallResult]], health: tuple[bool, float, str]):
|
| 243 |
+
buf = io.StringIO()
|
| 244 |
+
w = buf.write
|
| 245 |
+
|
| 246 |
+
w("# ่ฟ็ซฏ API ๆต่ฏๆฅๅ\n\n")
|
| 247 |
+
w(f"- ๆๅกๅฐๅ๏ผ`{BASE_URL}`\n")
|
| 248 |
+
w(f"- ๆต่ฏๆถ้ด๏ผ{time.strftime('%Y-%m-%d %H:%M:%S')}\n")
|
| 249 |
+
ok, hms, hbody = health
|
| 250 |
+
w(f"- ๅฅๅบทๆฃๆฅ๏ผ{'โ OK' if ok else 'โ FAIL'} ({hms:.0f}ms) โ {hbody}\n")
|
| 251 |
+
w(f"- ็จไพๆปๆฐ๏ผ{len(results)}\n\n")
|
| 252 |
+
|
| 253 |
+
# โโ ๆฑๆป่กจ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 254 |
+
w("## ไธใๆฑๆป\n\n")
|
| 255 |
+
w("| ็จไพ | ๆ่ฟฐ | HTTP | ๅฎไฝๆฐ | ๅฌๅ | ่ๆถ |\n")
|
| 256 |
+
w("|---|---|---|---|---|---|\n")
|
| 257 |
+
total_ms = 0.0
|
| 258 |
+
pass_n = 0
|
| 259 |
+
for case, res in results:
|
| 260 |
+
expected = case.get("expected", set())
|
| 261 |
+
found = {e["text"] for e in res.entities}
|
| 262 |
+
hit = len(expected & found)
|
| 263 |
+
recall = f"{hit}/{len(expected)}" if expected else "โ"
|
| 264 |
+
ok_mark = "โ" if res.status == 200 else "โ"
|
| 265 |
+
w(f"| **{case['id']}** | {case['description']} | {ok_mark} {res.status} | "
|
| 266 |
+
f"{len(res.entities)} | {recall} | {res.elapsed_ms:.0f}ms |\n")
|
| 267 |
+
if res.status == 200:
|
| 268 |
+
pass_n += 1
|
| 269 |
+
total_ms += res.elapsed_ms
|
| 270 |
+
w(f"\n- ้่ฟ็๏ผ**{pass_n}/{len(results)}**\n")
|
| 271 |
+
w(f"- ็ดฏ่ฎก่ๆถ๏ผ**{total_ms:.0f}ms**๏ผๅนณๅ {total_ms/len(results):.0f}ms/่ฏทๆฑ๏ผ\n\n")
|
| 272 |
+
|
| 273 |
+
# โโ ๅ็ป่ฏฆๆ
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 274 |
+
groups: dict[str, list] = {}
|
| 275 |
+
for case, res in results:
|
| 276 |
+
groups.setdefault(case["group"], []).append((case, res))
|
| 277 |
+
|
| 278 |
+
w("## ไบใๅ็ป่ฏฆ็ป็ปๆ\n\n")
|
| 279 |
+
for group_name, items in groups.items():
|
| 280 |
+
w(f"### {group_name}\n\n")
|
| 281 |
+
for case, res in items:
|
| 282 |
+
w(f"#### {case['id']} ยท {case['description']}\n\n")
|
| 283 |
+
w("**่ฏทๆฑ**\n```json\n")
|
| 284 |
+
w(json.dumps(case["payload"], ensure_ascii=False, indent=2))
|
| 285 |
+
w("\n```\n\n")
|
| 286 |
+
|
| 287 |
+
w(f"**ๅๅบ**๏ผHTTP {res.status} ยท {res.elapsed_ms:.0f}ms ยท "
|
| 288 |
+
f"{len(res.entities)} ไธชๅฎไฝ\n\n")
|
| 289 |
+
|
| 290 |
+
if res.error:
|
| 291 |
+
w(f"```\nERROR: {res.error}\n```\n\n")
|
| 292 |
+
continue
|
| 293 |
+
|
| 294 |
+
if res.entities:
|
| 295 |
+
w("| ๆๆฌ | ๆ ็ญพ | ็ฝฎไฟกๅบฆ | ่ตทๆญข |\n|---|---|---|---|\n")
|
| 296 |
+
for e in res.entities:
|
| 297 |
+
w(f"| `{e['text']}` | {e['label']} | {e['score']:.2f} | "
|
| 298 |
+
f"{e['start']}โ{e['end']} |\n")
|
| 299 |
+
else:
|
| 300 |
+
w("_ๆช่ฏๅซๅฐๅฎไฝ_\n")
|
| 301 |
+
|
| 302 |
+
expected = case.get("expected", set())
|
| 303 |
+
if expected:
|
| 304 |
+
found = {e["text"] for e in res.entities}
|
| 305 |
+
hits = expected & found
|
| 306 |
+
misses = expected - found
|
| 307 |
+
w(f"\n**ๆๆๅฝไธญ** {len(hits)}/{len(expected)}๏ผ")
|
| 308 |
+
w(", ".join(f"`{x}`" for x in expected) + " \n")
|
| 309 |
+
if misses:
|
| 310 |
+
w(f"**ๆชๅฝไธญ**๏ผ{', '.join(f'`{x}`' for x in misses)} \n")
|
| 311 |
+
|
| 312 |
+
mnc = case.get("must_not_contain", set())
|
| 313 |
+
if mnc:
|
| 314 |
+
bad = {e["text"] for e in res.entities} & mnc
|
| 315 |
+
if bad:
|
| 316 |
+
w(f"\n> โ ๏ธ **่พน็้่ฏฏ**๏ผ{bad}\n")
|
| 317 |
+
else:
|
| 318 |
+
w(f"\n> โ ่พน็ๆญฃ็กฎ๏ผๆชๅบ็ฐ {mnc}๏ผ\n")
|
| 319 |
+
w("\n")
|
| 320 |
+
|
| 321 |
+
# โโ ๆง่ฝ่ๅ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 322 |
+
w("## ไธใๆ่ทฏ็ฑๅ็ปๆง่ฝ\n\n")
|
| 323 |
+
by_group: dict[str, list[float]] = {}
|
| 324 |
+
for case, res in results:
|
| 325 |
+
if res.status == 200:
|
| 326 |
+
by_group.setdefault(case["group"], []).append(res.elapsed_ms)
|
| 327 |
+
w("| ๅ็ป | ็จไพๆฐ | ๆๅฟซ | ๆๆ
ข | ๅนณๅ |\n|---|---|---|---|---|\n")
|
| 328 |
+
for g, times in by_group.items():
|
| 329 |
+
w(f"| {g} | {len(times)} | {min(times):.0f}ms | "
|
| 330 |
+
f"{max(times):.0f}ms | {sum(times)/len(times):.0f}ms |\n")
|
| 331 |
+
|
| 332 |
+
REPORT.parent.mkdir(parents=True, exist_ok=True)
|
| 333 |
+
REPORT.write_text(buf.getvalue(), encoding="utf-8")
|
| 334 |
+
print(f"\nReport: {REPORT.resolve()}")
|
| 335 |
+
|
| 336 |
+
|
| 337 |
+
# โโ ไธป็จๅบ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 338 |
+
|
| 339 |
+
def main():
|
| 340 |
+
print(f"Target: {BASE_URL}")
|
| 341 |
+
health = check_health()
|
| 342 |
+
print(f"Health: {'OK' if health[0] else 'FAIL'} ({health[1]:.0f}ms)")
|
| 343 |
+
if not health[0]:
|
| 344 |
+
print(f" -> {health[2]}")
|
| 345 |
+
return
|
| 346 |
+
|
| 347 |
+
results: list[tuple[dict, CallResult]] = []
|
| 348 |
+
for case in CASES:
|
| 349 |
+
print(f" {case['id']:8s} ", end="", flush=True)
|
| 350 |
+
res = post_extract(case["payload"])
|
| 351 |
+
res.case_id = case["id"]
|
| 352 |
+
results.append((case, res))
|
| 353 |
+
status = "OK" if res.status == 200 else f"FAIL({res.status})"
|
| 354 |
+
print(f"{status:8s} {res.elapsed_ms:6.0f}ms {len(res.entities)} entities")
|
| 355 |
+
|
| 356 |
+
write_report(results, health)
|
| 357 |
+
|
| 358 |
+
|
| 359 |
+
if __name__ == "__main__":
|
| 360 |
+
main()
|