|
|
""" |
|
|
Score๋ฅผ Priority ํด๋์ค๋ก ๋ณํํ๋ ํจ์ |
|
|
|
|
|
์ฌ์ฉ๋ฒ: |
|
|
from postprocess.to_priority import to_priority |
|
|
|
|
|
# ๊ธฐ๋ณธ ์ฌ์ฉ (ํ์ฒ๋ฆฌ ๊ท์น ์์) |
|
|
priority = to_priority(score=0.82, text="๋ก๊ทธ์ธ ์๋ฌ") |
|
|
|
|
|
# ํ์ฒ๋ฆฌ ๊ท์น ํฌํจ |
|
|
priority = to_priority(score=0.82, text="๋ก๊ทธ์ธ ์๋ฌ", use_rules=True) |
|
|
""" |
|
|
|
|
|
import json |
|
|
import os |
|
|
from typing import Optional |
|
|
import yaml |
|
|
|
|
|
|
|
|
def to_priority( |
|
|
score: float, |
|
|
text: str = "", |
|
|
thresholds_path: str = "score_thresholds.json", |
|
|
rules_path: Optional[str] = None, |
|
|
use_rules: bool = False |
|
|
) -> str: |
|
|
""" |
|
|
์ ์๋ฅผ HIGH/MED/LOW ์ฐ์ ์์๋ก ๋ณํ |
|
|
|
|
|
Args: |
|
|
score: ๋ชจ๋ธ์ด ์์ธกํ ์ ์ (์๋ ์ค์ผ์ผ) |
|
|
text: ์ด์/์ปค๋ฐ ํ
์คํธ (ํ์ฒ๋ฆฌ ๊ท์น ์ฌ์ฉ ์ ํ์) |
|
|
thresholds_path: score_thresholds.json ํ์ผ ๊ฒฝ๋ก |
|
|
rules_path: priority_rules.yaml ํ์ผ ๊ฒฝ๋ก (None์ด๋ฉด ์๋ ํ์) |
|
|
use_rules: ํค์๋ ๊ธฐ๋ฐ ํ์ฒ๋ฆฌ ๊ท์น ์ฌ์ฉ ์ฌ๋ถ |
|
|
|
|
|
Returns: |
|
|
"HIGH", "MED", ๋๋ "LOW" |
|
|
""" |
|
|
|
|
|
if os.path.exists(thresholds_path): |
|
|
with open(thresholds_path, "r", encoding="utf-8") as f: |
|
|
thresholds = json.load(f) |
|
|
else: |
|
|
raise FileNotFoundError(f"Threshold ํ์ผ์ ์ฐพ์ ์ ์์ต๋๋ค: {thresholds_path}") |
|
|
|
|
|
q_low = thresholds.get("q_low", 0.0) |
|
|
q_high = thresholds.get("q_high", 0.0) |
|
|
|
|
|
|
|
|
if use_rules: |
|
|
if rules_path is None: |
|
|
|
|
|
rules_path = os.path.join(os.path.dirname(__file__), "priority_rules.yaml") |
|
|
|
|
|
if os.path.exists(rules_path): |
|
|
with open(rules_path, "r", encoding="utf-8") as f: |
|
|
rules = yaml.safe_load(f) |
|
|
|
|
|
text_lower = text.lower() |
|
|
|
|
|
|
|
|
low_keywords = rules.get("low_forced_keywords", []) |
|
|
if any(kw in text_lower for kw in low_keywords): |
|
|
return "LOW" |
|
|
|
|
|
|
|
|
high_keywords = rules.get("high_boost_keywords", []) |
|
|
if any(kw in text_lower for kw in high_keywords): |
|
|
return "HIGH" |
|
|
|
|
|
|
|
|
min_med_keywords = rules.get("min_med_keywords", []) |
|
|
if any(kw in text_lower for kw in min_med_keywords): |
|
|
|
|
|
if score <= q_low: |
|
|
return "MED" |
|
|
|
|
|
|
|
|
if score >= q_high: |
|
|
return "HIGH" |
|
|
elif score <= q_low: |
|
|
return "LOW" |
|
|
else: |
|
|
return "MED" |
|
|
|
|
|
|
|
|
def to_priority_batch( |
|
|
scores: list, |
|
|
texts: list = None, |
|
|
thresholds_path: str = "score_thresholds.json", |
|
|
rules_path: Optional[str] = None, |
|
|
use_rules: bool = False |
|
|
) -> list: |
|
|
""" |
|
|
๋ฐฐ์น๋ก ์ ์๋ฅผ ์ฐ์ ์์๋ก ๋ณํ |
|
|
|
|
|
Args: |
|
|
scores: ์ ์ ๋ฆฌ์คํธ |
|
|
texts: ํ
์คํธ ๋ฆฌ์คํธ (ํ์ฒ๋ฆฌ ๊ท์น ์ฌ์ฉ ์ ํ์) |
|
|
thresholds_path: score_thresholds.json ํ์ผ ๊ฒฝ๋ก |
|
|
rules_path: priority_rules.yaml ํ์ผ ๊ฒฝ๋ก |
|
|
use_rules: ํค์๋ ๊ธฐ๋ฐ ํ์ฒ๋ฆฌ ๊ท์น ์ฌ์ฉ ์ฌ๋ถ |
|
|
|
|
|
Returns: |
|
|
์ฐ์ ์์ ๋ฆฌ์คํธ ["HIGH", "MED", "LOW", ...] |
|
|
""" |
|
|
if texts is None: |
|
|
texts = [""] * len(scores) |
|
|
|
|
|
priorities = [] |
|
|
for score, text in zip(scores, texts): |
|
|
priority = to_priority( |
|
|
score=score, |
|
|
text=text, |
|
|
thresholds_path=thresholds_path, |
|
|
rules_path=rules_path, |
|
|
use_rules=use_rules |
|
|
) |
|
|
priorities.append(priority) |
|
|
|
|
|
return priorities |
|
|
|