spinxxxx's picture
feat: add issue priority prediction model (score-based)
902efd1
"""
Score๋ฅผ Priority ํด๋ž˜์Šค๋กœ ๋ณ€ํ™˜ํ•˜๋Š” ํ•จ์ˆ˜
์‚ฌ์šฉ๋ฒ•:
from postprocess.to_priority import to_priority
# ๊ธฐ๋ณธ ์‚ฌ์šฉ (ํ›„์ฒ˜๋ฆฌ ๊ทœ์น™ ์—†์Œ)
priority = to_priority(score=0.82, text="๋กœ๊ทธ์ธ ์—๋Ÿฌ")
# ํ›„์ฒ˜๋ฆฌ ๊ทœ์น™ ํฌํ•จ
priority = to_priority(score=0.82, text="๋กœ๊ทธ์ธ ์—๋Ÿฌ", use_rules=True)
"""
import json
import os
from typing import Optional
import yaml
def to_priority(
score: float,
text: str = "",
thresholds_path: str = "score_thresholds.json",
rules_path: Optional[str] = None,
use_rules: bool = False
) -> str:
"""
์ ์ˆ˜๋ฅผ HIGH/MED/LOW ์šฐ์„ ์ˆœ์œ„๋กœ ๋ณ€ํ™˜
Args:
score: ๋ชจ๋ธ์ด ์˜ˆ์ธกํ•œ ์ ์ˆ˜ (์›๋ž˜ ์Šค์ผ€์ผ)
text: ์ด์Šˆ/์ปค๋ฐ‹ ํ…์ŠคํŠธ (ํ›„์ฒ˜๋ฆฌ ๊ทœ์น™ ์‚ฌ์šฉ ์‹œ ํ•„์š”)
thresholds_path: score_thresholds.json ํŒŒ์ผ ๊ฒฝ๋กœ
rules_path: priority_rules.yaml ํŒŒ์ผ ๊ฒฝ๋กœ (None์ด๋ฉด ์ž๋™ ํƒ์ƒ‰)
use_rules: ํ‚ค์›Œ๋“œ ๊ธฐ๋ฐ˜ ํ›„์ฒ˜๋ฆฌ ๊ทœ์น™ ์‚ฌ์šฉ ์—ฌ๋ถ€
Returns:
"HIGH", "MED", ๋˜๋Š” "LOW"
"""
# Threshold ๋กœ๋“œ
if os.path.exists(thresholds_path):
with open(thresholds_path, "r", encoding="utf-8") as f:
thresholds = json.load(f)
else:
raise FileNotFoundError(f"Threshold ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {thresholds_path}")
q_low = thresholds.get("q_low", 0.0)
q_high = thresholds.get("q_high", 0.0)
# ํ›„์ฒ˜๋ฆฌ ๊ทœ์น™ ์ ์šฉ (์˜ต์…˜)
if use_rules:
if rules_path is None:
# ์ž๋™ ํƒ์ƒ‰: postprocess/priority_rules.yaml
rules_path = os.path.join(os.path.dirname(__file__), "priority_rules.yaml")
if os.path.exists(rules_path):
with open(rules_path, "r", encoding="utf-8") as f:
rules = yaml.safe_load(f)
text_lower = text.lower()
# 1. LOW ๊ฐ•์ œ ํ‚ค์›Œ๋“œ ์ฒดํฌ (์ตœ์šฐ์„ )
low_keywords = rules.get("low_forced_keywords", [])
if any(kw in text_lower for kw in low_keywords):
return "LOW"
# 2. HIGH ๋ถ€์ŠคํŠธ ํ‚ค์›Œ๋“œ ์ฒดํฌ
high_keywords = rules.get("high_boost_keywords", [])
if any(kw in text_lower for kw in high_keywords):
return "HIGH"
# 3. ์ตœ์†Œ MED ๋ณด์žฅ ํ‚ค์›Œ๋“œ ์ฒดํฌ
min_med_keywords = rules.get("min_med_keywords", [])
if any(kw in text_lower for kw in min_med_keywords):
# ์ ์ˆ˜๊ฐ€ ๋‚ฎ์•„๋„ ์ตœ์†Œ MED ๋ณด์žฅ
if score <= q_low:
return "MED"
# ๊ธฐ๋ณธ ๋ณ€ํ™˜ (threshold ๊ธฐ๋ฐ˜)
if score >= q_high:
return "HIGH"
elif score <= q_low:
return "LOW"
else:
return "MED"
def to_priority_batch(
scores: list,
texts: list = None,
thresholds_path: str = "score_thresholds.json",
rules_path: Optional[str] = None,
use_rules: bool = False
) -> list:
"""
๋ฐฐ์น˜๋กœ ์ ์ˆ˜๋ฅผ ์šฐ์„ ์ˆœ์œ„๋กœ ๋ณ€ํ™˜
Args:
scores: ์ ์ˆ˜ ๋ฆฌ์ŠคํŠธ
texts: ํ…์ŠคํŠธ ๋ฆฌ์ŠคํŠธ (ํ›„์ฒ˜๋ฆฌ ๊ทœ์น™ ์‚ฌ์šฉ ์‹œ ํ•„์š”)
thresholds_path: score_thresholds.json ํŒŒ์ผ ๊ฒฝ๋กœ
rules_path: priority_rules.yaml ํŒŒ์ผ ๊ฒฝ๋กœ
use_rules: ํ‚ค์›Œ๋“œ ๊ธฐ๋ฐ˜ ํ›„์ฒ˜๋ฆฌ ๊ทœ์น™ ์‚ฌ์šฉ ์—ฌ๋ถ€
Returns:
์šฐ์„ ์ˆœ์œ„ ๋ฆฌ์ŠคํŠธ ["HIGH", "MED", "LOW", ...]
"""
if texts is None:
texts = [""] * len(scores)
priorities = []
for score, text in zip(scores, texts):
priority = to_priority(
score=score,
text=text,
thresholds_path=thresholds_path,
rules_path=rules_path,
use_rules=use_rules
)
priorities.append(priority)
return priorities