Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| from typing import List, Dict | |
| import os | |
| import json | |
| from .config import settings | |
| SYNONYM_MAP = { | |
| # ็็ถๅไน | |
| "่่่": "่ๆปก", | |
| "่่": "่ๆปก", | |
| "่้ท": "่ๆปก", | |
| "ๅฟ็ช่": "่ๆปก", | |
| "ๅณ่ ": "ๅณๆฐ", | |
| "็งๅฟ": "ๅ้ ธ", | |
| # ่่ | |
| "่่ๅ่ ป": "่่็ฝ่ ป", | |
| "่็ฝ่ ป": "่่็ฝ่ ป", | |
| # ่ฏๅ/็ ๆบๅ ณ้ฎ่ฏ | |
| "่้": "่ๆฐ้็ป", | |
| "ๆฐๆบไธ็ ": "ๆฐๆป", | |
| "้ฃๆป": "้ฅฎ้ฃ็งฏๆป", | |
| "็ฐๆนฟ": "็ฐๆนฟไธญ้ป", | |
| "่พ่": "่พ่่ๅผฑ", | |
| "้ณ่": "่พ่่ๅฏ", | |
| } | |
| _external_map: Dict[str, str] | None = None | |
| def _load_external_synonyms() -> Dict[str, str]: | |
| global _external_map | |
| if _external_map is not None: | |
| return _external_map | |
| path = settings.synonyms_path | |
| mapping: Dict[str, str] = {} | |
| if os.path.isfile(path): | |
| try: | |
| text = open(path, "r", encoding="utf-8").read() | |
| # ๅ ่ฎธ YAML ๆ JSON๏ผๆ PyYAML ๆถๅ่ฝป้็บง่งฃๆ | |
| if path.endswith(".json") or text.strip().startswith("{"): | |
| mapping = json.loads(text) | |
| else: | |
| # ็ฎๆ YAML: ๆฏๆ "a: b" ๆฏ่กไธๆก๏ผๅฟฝ็ฅๆณจ้ไธ็ฉบ่ก | |
| for line in text.splitlines(): | |
| line = line.strip() | |
| if (not line) or line.startswith("#"): | |
| continue | |
| if ":" in line: | |
| k, v = line.split(":", 1) | |
| k = k.strip().strip('"\'') | |
| v = v.strip().strip('"\'') | |
| if k and v: | |
| mapping[k] = v | |
| except Exception: | |
| mapping = {} | |
| _external_map = mapping | |
| return mapping | |
| def normalize_terms(terms: List[str]) -> List[str]: | |
| normalized: List[str] = [] | |
| external = _load_external_synonyms() | |
| for t in terms: | |
| if not t: | |
| continue | |
| t2 = t.strip() | |
| t2 = external.get(t2, SYNONYM_MAP.get(t2, t2)) | |
| if t2 not in normalized: | |
| normalized.append(t2) | |
| return normalized | |