nerserver / scripts /compare_models.py
Robin
feat: optional labels, bilingual auto-expansion, language hint (v2.0)
372fe0c
"""
ๅฏนๆฏ” gliner_multi-v2.1 ๅ’Œ gliner-multitask-large-v0.5 ไธคไธชๆจกๅž‹
ๅœจไธญๆ–‡ใ€่‹ฑๆ–‡ใ€้˜ฟๆ‹‰ไผฏๆ–‡ใ€ไธญ่‹ฑๆททๅˆๆ–‡ๆœฌไธŠ็š„ NER ๆ•ˆๆžœใ€‚
ไผ˜ๅŒ–็‚น๏ผš
- ๆ‰€ๆœ‰ๆต‹่ฏ•็”จไพ‹็ปŸไธ€ไฝฟ็”จๅŒ่ฏญๆ ‡็ญพ๏ผˆไธญ่‹ฑๅนถๅˆ—๏ผ‰๏ผŒๆๅ‡ไธญๆ–‡่ฏ†ๅˆซ็އ
- ็ป“ๆžœๅ†™ๅ…ฅ UTF-8 Markdown ๆŠฅๅ‘Š๏ผŒ้ฟๅ… Windows GBK ๆŽงๅˆถๅฐไนฑ็ 
- ๆ–ฐๅขž้˜ฟๆ‹‰ไผฏ่ฏญๆต‹่ฏ•็”จไพ‹
- ๆ–ฐๅขž span ๅŽป้‡๏ผšๅŒ่ฏญๆ ‡็ญพๅฏ่ƒฝไบง็”Ÿ้‡ๅค่ทจๅบฆ๏ผŒไฟ็•™ๅพ—ๅˆ†ๆœ€้ซ˜็š„
็”จๆณ•๏ผš
python scripts/compare_models.py
ๆŠฅๅ‘Š๏ผš
reports/comparison_report.md
"""
import io
import os
import sys
import time
from dataclasses import dataclass, field
from pathlib import Path
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # Windows OpenMP ๅ†ฒ็ช
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1" # Windows ็ฌฆๅท้“พๆŽฅ่ญฆๅ‘Š
from huggingface_hub import snapshot_download
from gliner import GLiNER
# โ”€โ”€ ๆต‹่ฏ•็”จไพ‹๏ผˆๅ…จ้ƒจไฝฟ็”จๅŒ่ฏญๆ ‡็ญพ๏ผ‰ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
CASES = [
# โ”€โ”€ ่‹ฑๆ–‡ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
{
"name": "EN-01 ่‹ฑๆ–‡ ยท ็ง‘ๆŠ€ไบบ็‰ฉ",
"lang": "en",
"text": (
"Elon Musk, CEO of Tesla and founder of SpaceX, announced a new "
"Starship launch from Boca Chica, Texas. NASA has partnered with "
"SpaceX for the Artemis lunar lander mission planned for 2026."
),
"labels": [
"full name of a person",
"company or organization name",
"geographical location",
"product or technology name",
"date or year",
],
"expected": ["Elon Musk", "Tesla", "SpaceX", "NASA", "Boca Chica", "Texas", "2026"],
},
{
"name": "EN-02 ่‹ฑๆ–‡ ยท ๆ”ฟๆฒปๆ–ฐ้—ป",
"lang": "en",
"text": (
"President Biden signed the Inflation Reduction Act in Washington D.C. "
"on August 16, 2022. The legislation was championed by Senator Chuck Schumer "
"and was seen as a major win for the Democratic Party."
),
"labels": [
"full name of a person",
"company or organization name",
"geographical location",
"legislation or policy name",
"date or year",
"political party",
],
"expected": ["Biden", "Chuck Schumer", "Washington D.C.", "August 16, 2022", "Democratic Party"],
},
# โ”€โ”€ ไธญๆ–‡ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
{
"name": "ZH-01 ไธญๆ–‡ ยท ็Žฐไปฃๅ•†ไธš๏ผˆๅŒ่ฏญๆ ‡็ญพ๏ผ‰",
"lang": "zh",
"text": (
"้˜ฟ้‡Œๅทดๅทด้›†ๅ›ขๅˆ›ๅง‹ไบบ้ฉฌไบ‘ไบŽ2019ๅนดๅธไปป่‘ฃไบ‹ๅฑ€ไธปๅธญ๏ผŒ็”ฑๅผ ๅ‹‡ๆŽฅไปปใ€‚"
"ๆ€ป้ƒจไฝไบŽๆญๅทž็š„้˜ฟ้‡Œๅทดๅทดๆ——ไธ‹ๆ‹ฅๆœ‰ๆท˜ๅฎใ€ๅคฉ็Œซใ€ๆ”ฏไป˜ๅฎ็ญ‰ไธšๅŠกๆฟๅ—ใ€‚"
),
"labels": [
"ไบบๅๆˆ–ๅง“ๅ", "full name of a person",
"ๅ…ฌๅธๆˆ–็ป„็ป‡ๆœบๆž„ๅ็งฐ", "company or organization name",
"ๅœฐๅๆˆ–ๅŸŽๅธ‚", "geographical location",
"ไบงๅ“ๆˆ–ๅ“็‰Œๅ็งฐ", "product or brand name",
"ๆ—ฅๆœŸๆˆ–ๅนดไปฝ", "date or year",
],
"expected": ["้ฉฌไบ‘", "ๅผ ๅ‹‡", "้˜ฟ้‡Œๅทดๅทด", "ๆญๅทž", "ๆท˜ๅฎ", "ๅคฉ็Œซ", "ๆ”ฏไป˜ๅฎ", "2019"],
},
{
"name": "ZH-02 ไธญๆ–‡ ยท ๅคๅ…ธๆ–‡ๅญฆ๏ผˆ่พน็•Œๆต‹่ฏ•๏ผ‰",
"lang": "zh",
"text": (
"ๅฐคๆฐๆฅ่ฏท๏ผŒ็Ž‹็†™ๅ‡ค็ฌ‘้“๏ผš'ไฝ ๆฅไบ†ใ€‚'่ดพๆฏๅ‘ฝไบบๆ‘†้…’๏ผŒ"
"ๅฎ็މๅ’Œ้ป›็މๅœจๅคง่ง‚ๅ›ญๆ•ฃๆญฅ๏ผŒ่–›ๅฎ้’—็‹ฌๅๆขจ้ฆ™้™ขใ€‚"
),
"labels": [
"ไบบๅๆˆ–ๅง“ๅ", "full name of a person",
"ๅœฐๅๆˆ–ๅœบๆ‰€", "place or location name",
],
"expected": ["ๅฐคๆฐ", "็Ž‹็†™ๅ‡ค", "่ดพๆฏ", "ๅฎ็މ", "้ป›็މ", "่–›ๅฎ้’—", "ๅคง่ง‚ๅ›ญ", "ๆขจ้ฆ™้™ข"],
"boundary_check": {
"must_not_contain": ["ๅฐคๆฐๆฅ่ฏท", "็Ž‹็†™ๅ‡ค็ฌ‘้“"],
},
},
{
"name": "ZH-03 ไธญๆ–‡ ยท ๅŒป็–—ๅœบๆ™ฏ๏ผˆๅŒ่ฏญๆ ‡็ญพ๏ผ‰",
"lang": "zh",
"text": (
"ๅŒ—ไบฌๅๅ’ŒๅŒป้™ขๅฟƒๅ†…็ง‘ไธปไปป็Ž‹ๅปบๅ›ฝๆ•™ๆŽˆๅ›ข้˜Ÿ๏ผŒไบŽ2023ๅนดๆˆๅŠŸๅฎŒๆˆ้ฆ–ไพ‹"
"ๆœบๅ™จไบบ่พ…ๅŠฉๅ† ็ŠถๅŠจ่„‰ๆญๆกฅๆ‰‹ๆœฏ๏ผŒๆ‚ฃ่€…ๆฅ่‡ชๅฑฑไธœ็œๆตŽๅ—ๅธ‚ใ€‚"
),
"labels": [
"ไบบๅๆˆ–ๅง“ๅ", "full name of a person",
"ๅŒป้™ขๆˆ–ๆœบๆž„ๅ็งฐ", "hospital or institution name",
"ๅœฐๅๆˆ–ๅŸŽๅธ‚", "geographical location",
"ๅŒป็–—ๆŠ€ๆœฏๆˆ–ๆ‰‹ๆœฏๅ็งฐ", "medical procedure or technology",
"ๆ—ฅๆœŸๆˆ–ๅนดไปฝ", "date or year",
],
"expected": ["็Ž‹ๅปบๅ›ฝ", "ๅŒ—ไบฌๅๅ’ŒๅŒป้™ข", "ๆตŽๅ—", "ๅฑฑไธœ", "2023"],
},
# โ”€โ”€ ้˜ฟๆ‹‰ไผฏๆ–‡ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
{
"name": "AR-01 ้˜ฟๆ‹‰ไผฏ่ฏญ ยท ๆ–ฐ้—ป",
"lang": "ar",
"text": (
"ุฃุนู„ู† ุงู„ุฑุฆูŠุณ ู…ุญู…ุฏ ุจู† ุณู„ู…ุงู† ุนู† ุฅุทู„ุงู‚ ู…ุดุฑูˆุน ู†ูŠูˆู… ููŠ ุงู„ู…ู…ู„ูƒุฉ ุงู„ุนุฑุจูŠุฉ ุงู„ุณุนูˆุฏูŠุฉ "
"ุนุงู… 2017ุŒ ูˆุชุจู„ุบ ุชูƒู„ูุชู‡ 500 ู…ู„ูŠุงุฑ ุฏูˆู„ุงุฑ."
),
"labels": [
"full name of a person",
"company or organization name",
"geographical location",
"project or initiative name",
"date or year",
"monetary amount",
],
"expected": ["ู…ุญู…ุฏ ุจู† ุณู„ู…ุงู†", "ู†ูŠูˆู…", "ุงู„ู…ู…ู„ูƒุฉ ุงู„ุนุฑุจูŠุฉ ุงู„ุณุนูˆุฏูŠุฉ", "2017"],
},
# โ”€โ”€ ไธญ่‹ฑๆททๅˆ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
{
"name": "MIX-01 ไธญ่‹ฑๆททๅˆ ยท ่Œๅœบๅœบๆ™ฏ๏ผˆๅŒ่ฏญๆ ‡็ญพ๏ผ‰",
"lang": "mixed",
"text": (
"ๅผ ไผŸๅŠ ๅ…ฅไบ† Google ๅŒ—ไบฌ็ ”ๅ‘ไธญๅฟƒ๏ผŒ่ดŸ่ดฃ Android ็ณป็ปŸไผ˜ๅŒ–ใ€‚"
"ไป–็š„ๅŒไบ‹ Sarah Chen ๆฅ่‡ช Meta๏ผŒไธคไบบๅ…ฑๅŒๅ‚ไธŽไบ† 2024 ๅนด็š„ AI Summitใ€‚"
),
"labels": [
"ไบบๅๆˆ–ๅง“ๅ", "full name of a person",
"ๅ…ฌๅธๆˆ–็ป„็ป‡ๆœบๆž„ๅ็งฐ", "company or organization name",
"ๅœฐๅๆˆ–ๅŸŽๅธ‚", "geographical location",
"ไบงๅ“ๆˆ–ๆŠ€ๆœฏๅ็งฐ", "product or technology name",
"ๆ—ฅๆœŸๆˆ–ๅนดไปฝ", "date or year",
],
"expected": ["ๅผ ไผŸ", "Google", "Sarah Chen", "Meta", "Android", "ๅŒ—ไบฌ", "2024"],
},
{
"name": "MIX-02 ไธญ่‹ฑๆททๅˆ ยท ๅญฆๆœฏๅœบๆ™ฏ๏ผˆๅŒ่ฏญๆ ‡็ญพ๏ผ‰",
"lang": "mixed",
"text": (
"ๆธ…ๅŽๅคงๅญฆ่ฎก็ฎ—ๆœบ็ณปๆ•™ๆŽˆๆŽๆ˜Žๅœจ NeurIPS 2023 ๅ‘่กจไบ†ๅ…ณไบŽ Transformer ๆžถๆž„็š„่ฎบๆ–‡๏ผŒ"
"ๅˆไฝœ่€…ๆฅ่‡ช MIT ๅ’Œ Stanford Universityใ€‚"
),
"labels": [
"ไบบๅๆˆ–ๅง“ๅ", "full name of a person",
"ๅคงๅญฆๆˆ–็ ”็ฉถๆœบๆž„", "university or research institution",
"ไผš่ฎฎๆˆ–ๆœŸๅˆŠๅ็งฐ", "conference or journal name",
"ๆŠ€ๆœฏๆˆ–ๆจกๅž‹ๅ็งฐ", "technology or model name",
"ๆ—ฅๆœŸๆˆ–ๅนดไปฝ", "date or year",
],
"expected": ["ๆŽๆ˜Ž", "ๆธ…ๅŽๅคงๅญฆ", "NeurIPS 2023", "Transformer", "MIT", "Stanford University"],
},
]
THRESHOLD = 0.4
CACHE_DIR = "./model_cache"
REPORT_DIR = Path("reports")
MODELS = [
("gliner_multi-v2.1", "urchade/gliner_multi-v2.1"),
("gliner-multitask-large-v0.5", "knowledgator/gliner-multitask-large-v0.5"),
]
# โ”€โ”€ span ๅŽป้‡ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def deduplicate(entities: list[dict]) -> list[dict]:
"""ๅŒ่ฏญๆ ‡็ญพๅฏ่ƒฝๅฏนๅŒไธ€ span ไบง็”Ÿไธคๆก็ป“ๆžœ๏ผŒไฟ็•™ๅพ—ๅˆ†ๆœ€้ซ˜็š„้‚ฃๆกใ€‚"""
best: dict[tuple, dict] = {}
for e in entities:
key = (e["start"], e["end"])
if key not in best or e["score"] > best[key]["score"]:
best[key] = e
return sorted(best.values(), key=lambda x: x["start"])
# โ”€โ”€ ๆจกๅž‹ไธ‹่ฝฝ๏ผˆ็›ดๆŽฅๅคๅˆถ๏ผŒๆ— ็ฌฆๅท้“พๆŽฅ๏ผŒๅ…ผๅฎน Windows๏ผ‰ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def ensure_local(model_name: str) -> str:
safe = model_name.replace("/", "__")
local_dir = Path(CACHE_DIR) / safe
if local_dir.exists() and any(local_dir.iterdir()):
print(f" [cached] {local_dir}")
else:
print(f" [download] {model_name} -> {local_dir}")
snapshot_download(repo_id=model_name, local_dir=str(local_dir))
print(f" [done]")
return str(local_dir)
# โ”€โ”€ ๆ•ฐๆฎ็ป“ๆž„ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
@dataclass
class CaseResult:
case_name: str
lang: str
text: str
expected: list[str]
entities: list[dict]
elapsed_ms: float
boundary_violations: list[str] = field(default_factory=list)
@property
def found_texts(self) -> set[str]:
return {e["text"] for e in self.entities}
@property
def hit_count(self) -> int:
return sum(1 for exp in self.expected if exp in self.found_texts)
@property
def recall(self) -> float:
if not self.expected:
return 1.0
return self.hit_count / len(self.expected)
@dataclass
class ModelResult:
model_name: str
load_ms: float
cases: list[CaseResult] = field(default_factory=list)
@property
def avg_recall(self) -> float:
if not self.cases:
return 0.0
return sum(c.recall for c in self.cases) / len(self.cases)
@property
def avg_infer_ms(self) -> float:
if not self.cases:
return 0.0
return sum(c.elapsed_ms for c in self.cases) / len(self.cases)
# โ”€โ”€ ่ฟ่กŒๆจกๅž‹ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def run_model(short_name: str, model_name: str) -> ModelResult:
print(f"\n{'โ”€'*60}")
print(f"Loading model: {model_name}")
t0 = time.perf_counter()
local_path = ensure_local(model_name)
model = GLiNER.from_pretrained(local_path, local_files_only=True)
load_ms = (time.perf_counter() - t0) * 1000
print(f"[loaded] {load_ms:.0f}ms")
result = ModelResult(model_name=short_name, load_ms=load_ms)
for case in CASES:
t1 = time.perf_counter()
raw = model.predict_entities(case["text"], case["labels"], threshold=THRESHOLD)
elapsed_ms = (time.perf_counter() - t1) * 1000
entities = deduplicate(raw)
bc = case.get("boundary_check", {})
violations = [
e["text"] for e in entities
if e["text"] in bc.get("must_not_contain", [])
]
result.cases.append(CaseResult(
case_name=case["name"],
lang=case["lang"],
text=case["text"],
expected=case.get("expected", []),
entities=entities,
elapsed_ms=elapsed_ms,
boundary_violations=violations,
))
status = "OK" if not violations else f"BOUNDARY ERR: {violations}"
print(f" {case['name'][:30]:30s} {len(entities):2d} entities {elapsed_ms:.0f}ms {status}")
return result
# โ”€โ”€ Markdown ๆŠฅๅ‘Š็”Ÿๆˆ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def write_report(all_results: list[ModelResult], out_path: Path):
buf = io.StringIO()
w = buf.write
w("# NER ๆจกๅž‹ๅฏนๆฏ”ๆต‹่ฏ•ๆŠฅๅ‘Š\n\n")
w(f"็”Ÿๆˆๆ—ถ้—ด๏ผš{time.strftime('%Y-%m-%d %H:%M:%S')} \n")
w(f"้˜ˆๅ€ผ๏ผˆthreshold๏ผ‰๏ผš`{THRESHOLD}` \n\n")
# โ”€โ”€ ๆฑ‡ๆ€ป่กจ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
w("## ไธ€ใ€ๆฑ‡ๆ€ปๅฏนๆฏ”\n\n")
header = "| ๆต‹่ฏ•็”จไพ‹ | ่ฏญ่จ€ |"
sep = "|---|---|"
for r in all_results:
header += f" {r.model_name} ๅฌๅ›ž | {r.model_name} ่€—ๆ—ถ |"
sep += "---|---|"
w(header + "\n")
w(sep + "\n")
for i, case in enumerate(CASES):
row = f"| {case['name']} | `{case['lang']}` |"
for r in all_results:
cr = r.cases[i]
pct = f"{cr.recall*100:.0f}%"
row += f" {cr.hit_count}/{len(cr.expected)} ({pct}) | {cr.elapsed_ms:.0f}ms |"
w(row + "\n")
# avg row
avg_row = "| **ๅนณๅ‡** | โ€” |"
for r in all_results:
avg_row += f" **{r.avg_recall*100:.0f}%** | **{r.avg_infer_ms:.0f}ms** |"
w(avg_row + "\n\n")
# โ”€โ”€ ๅŠ ่ฝฝๆ—ถ้—ด โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
w("## ไบŒใ€ๆจกๅž‹ๅŠ ่ฝฝๆ—ถ้—ด\n\n")
w("| ๆจกๅž‹ | ๅŠ ่ฝฝ่€—ๆ—ถ |\n|---|---|\n")
for r in all_results:
w(f"| {r.model_name} | {r.load_ms/1000:.1f}s |\n")
w("\n")
# โ”€โ”€ ้€็”จไพ‹่ฏฆๆƒ… โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
w("## ไธ‰ใ€้€็”จไพ‹่ฏฆ็ป†็ป“ๆžœ\n\n")
for i, case in enumerate(CASES):
w(f"### {case['name']}\n\n")
w(f"**ๆ–‡ๆœฌ**\n```\n{case['text']}\n```\n\n")
w(f"**ๆœŸๆœ›ๅฎžไฝ“**๏ผš{', '.join(f'`{e}`' for e in case.get('expected', []))}\n\n")
for r in all_results:
cr = r.cases[i]
hits = [e for e in cr.expected if e in cr.found_texts]
misses = [e for e in cr.expected if e not in cr.found_texts]
w(f"#### {r.model_name} ๏ผˆ{cr.elapsed_ms:.0f}ms๏ผŒ{len(cr.entities)} ไธชๅฎžไฝ“๏ผŒๅฌๅ›ž {cr.recall*100:.0f}%๏ผ‰\n\n")
if cr.entities:
w("| ๆ–‡ๆœฌ | ๆ ‡็ญพ | ็ฝฎไฟกๅบฆ | ๅ‘ฝไธญๆœŸๆœ› |\n|---|---|---|---|\n")
for e in cr.entities:
hit_mark = "โœ“" if e["text"] in cr.expected else ""
w(f"| `{e['text']}` | {e['label']} | {e['score']:.2f} | {hit_mark} |\n")
else:
w("_ๆœช่ฏ†ๅˆซๅˆฐๅฎžไฝ“_\n")
if misses:
w(f"\n**ๆœชๅ‘ฝไธญ**๏ผš{', '.join(f'`{m}`' for m in misses)}\n")
if cr.boundary_violations:
w(f"\n> โš ๏ธ **่พน็•Œ้”™่ฏฏ**๏ผš{cr.boundary_violations}\n")
w("\n")
# โ”€โ”€ ็ป“่ฎบ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
w("## ๅ››ใ€็ป“่ฎบไธŽๅปบ่ฎฎ\n\n")
best = max(all_results, key=lambda r: r.avg_recall)
fast = min(all_results, key=lambda r: r.avg_infer_ms)
w(f"- **็ปผๅˆๅฌๅ›žๆœ€้ซ˜**๏ผš`{best.model_name}`๏ผˆๅนณๅ‡ๅฌๅ›ž {best.avg_recall*100:.0f}%๏ผ‰\n")
w(f"- **ๆŽจ็†ๆœ€ๅฟซ**๏ผš`{fast.model_name}`๏ผˆๅนณๅ‡ {fast.avg_infer_ms:.0f}ms/ๆฌก๏ผ‰\n\n")
w("### ไผ˜ๅŒ–ๅปบ่ฎฎ\n\n")
w("1. **ๅŒ่ฏญๆ ‡็ญพ็ญ–็•ฅ**๏ผšๅฏนไธญๆ–‡ๆˆ–ๆททๅˆๆ–‡ๆœฌ๏ผŒๅŒๆ—ถๆไพ›ไธญ่‹ฑๆ–‡ๆ ‡็ญพๆ่ฟฐ๏ผˆๅฆ‚ `\"ไบบๅๆˆ–ๅง“ๅ\"` + `\"full name of a person\"`๏ผ‰๏ผŒๅฏๆ˜พ่‘—ๆๅ‡ไธญๆ–‡ๅฎžไฝ“ๅฌๅ›ž็އใ€‚GLiNER ๆ˜ฏ้›ถๆ ทๆœฌๆจกๅž‹๏ผŒๆ ‡็ญพๆ่ฟฐ่ถŠๅ…ทไฝ“ใ€่ถŠๆŽฅ่ฟ‘่ฎญ็ปƒ่ฏญๆ–™็š„่กจ่พพๆ–นๅผ๏ผŒ่ฏ†ๅˆซๆ•ˆๆžœ่ถŠๅฅฝใ€‚\n")
w("2. **Span ๅŽป้‡**๏ผšไฝฟ็”จๅŒ่ฏญๆ ‡็ญพๆ—ถๅŒไธ€ๆ–‡ๆœฌ่ทจๅบฆๅฏ่ƒฝ่ขซๆ‰“ไธŠไธคไธชๆ ‡็ญพ๏ผŒๅปบ่ฎฎๅœจๆœๅŠกๅฑ‚ๆŒ‰ `(start, end)` ๅŽป้‡๏ผŒไฟ็•™ๅพ—ๅˆ†ๆœ€้ซ˜็š„็ป“ๆžœ๏ผˆๅทฒๅœจ `app/ner.py` ๅฎž็Žฐ๏ผ‰ใ€‚\n")
w("3. **้˜ˆๅ€ผ่ฐƒไผ˜**๏ผš่‹ฑๆ–‡ๅปบ่ฎฎ `threshold=0.5`๏ผŒไธญๆ–‡ๅปบ่ฎฎ `threshold=0.35~0.4`๏ผˆๆจกๅž‹ๅฏนไธญๆ–‡็ฝฎไฟกๅบฆๆ™ฎ้ๅไฝŽ๏ผ‰ใ€‚\n")
w("4. **ๅคๅ…ธ/ๆ–‡่จ€ๆ–‡**๏ผšไธคไธชๆจกๅž‹ๅฏนๆ–‡่จ€ๆ–‡ๆ”ฏๆŒๅ‡ๅผฑ๏ผŒๅปบ่ฎฎ็ป“ๅˆ่ง„ๅˆ™ๆˆ–ไธ“็”จๆจกๅž‹๏ผˆๅฆ‚ `BERT-CRF` ๅœจๅคๆฑ‰่ฏญ่ฏญๆ–™ไธŠๅพฎ่ฐƒ๏ผ‰ๅค„็†ๆญค็ฑปๆ–‡ๆœฌใ€‚\n")
w("5. **้˜ฟๆ‹‰ไผฏ่ฏญ**๏ผš`gliner-multitask-large-v0.5` ๅœจๅคš่ฏญ่จ€ไธŠ่ฎญ็ปƒ๏ผŒๅฏน้˜ฟๆ‹‰ไผฏ่ฏญๆœ‰ๅŸบ็ก€ๆ”ฏๆŒ๏ผ›`gliner_multi-v2.1` ้˜ฟๆ‹‰ไผฏ่ฏญๆ•ˆๆžœๆœ‰้™ใ€‚\n")
out_path.parent.mkdir(parents=True, exist_ok=True)
out_path.write_text(buf.getvalue(), encoding="utf-8")
print(f"\n[report] {out_path.resolve()}")
# โ”€โ”€ ๅ…ฅๅฃ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
if __name__ == "__main__":
all_results: list[ModelResult] = []
for short_name, model_name in MODELS:
all_results.append(run_model(short_name, model_name))
# ๆŽงๅˆถๅฐ็ฎ€่ฆๆฑ‡ๆ€ป๏ผˆASCII safe๏ผ‰
print(f"\n{'='*70}")
print(f"{'Case':<42} " + " ".join(f"{r.model_name[:20]:<20}" for r in all_results))
print(f"{'โ”€'*70}")
for i, case in enumerate(CASES):
row = f"{case['name'][:40]:<42}"
for r in all_results:
cr = r.cases[i]
row += f" {cr.hit_count}/{len(cr.expected)} {cr.recall*100:3.0f}% {cr.elapsed_ms:5.0f}ms "
print(row)
print(f"{'โ”€'*70}")
avg_row = f"{'Average':<42}"
for r in all_results:
avg_row += f" avg {r.avg_recall*100:.0f}% / {r.avg_infer_ms:.0f}ms "
print(avg_row)
report_path = REPORT_DIR / "comparison_report.md"
write_report(all_results, report_path)