Spaces:

nhs0327
/

disaster-classifier

Running

App Files Files Community

nhs0327 commited on 6 days ago

Commit

d46ae28

verified ·

1 Parent(s): 5f4c36c

v9n 모델로 업데이트 (KoELECTRA → koelectra-disaster-v9n)

Browse files

Files changed (1) hide show

app.py +32 -34

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-재난문자 분류 API — Hugging Face Spaces 배포용
 HUB_MODEL_ID를 push_to_hub.py 실행 후 업로드한 모델 ID로 변경하세요.
 """
@@ -12,28 +12,26 @@ from pydantic import BaseModel
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 # ── 수정 필요 ──────────────────────────────────
-HUB_MODEL_ID = "nhs0327/koelectra-disaster-v3"
 # ──────────────────────────────────────────────
 app = FastAPI(title="재난문자 분류 API")
-MAX_LENGTH  = 96
-LABEL_NAMES = ['긴급', '주의', '일반']
-UNCERTAIN_THRESH = {'긴급': 0.60, '주의': 0.70, '일반': 0.70}
-EMERG_THRESH     = 0.10
-_ORG_PATTERN  = re.compile(r'\[[^\]]{1,20}\]')
-_CERT_EMERG   = [
-    '즉시 대피', '대피명령', '대피 명령', '긴급대피', '긴급 대피', '신속히 대피',
-    '지진 발생', '쓰나미', '민방공 경보', '민방공경보', '테러 발생',
-]
-_CERT_CAUTION = [
-    '호우경보', '호우주의보', '태풍경보', '태풍주의보',
-    '한파경보', '한파주의보', '폭염경보', '폭염주의보',
-    '대설경보', '대설주의보', '강풍경보', '강풍주의보',
-    '풍랑경보', '풍랑주의보',
-]
-_CERT_GENERAL = ['찾습니다', '실종된']
 device    = torch.device("cpu")
 tokenizer = AutoTokenizer.from_pretrained(HUB_MODEL_ID)
@@ -49,31 +47,31 @@ class ClassifyRequest(BaseModel):
 async def classify(req: ClassifyRequest):
     text = _ORG_PATTERN.sub('[기관]', req.message)
-    has_emerg   = any(kw in text for kw in _CERT_EMERG)
-    has_caution = any(kw in text for kw in _CERT_CAUTION)
-    has_general = any(kw in text for kw in _CERT_GENERAL) and 'cm' in text
-    if has_emerg and not has_caution:
-        return {"label": "긴급", "confidence": 1.0, "stage": "rule", "uncertain": False}
-    if has_caution and not has_emerg:
-        return {"label": "주의", "confidence": 1.0, "stage": "rule", "uncertain": False}
-    if has_general and not has_emerg and not has_caution:
-        return {"label": "일반", "confidence": 1.0, "stage": "rule", "uncertain": False}
     inputs = tokenizer(text, truncation=True, padding='max_length',
                        max_length=MAX_LENGTH, return_tensors='pt')
     with torch.no_grad():
         probs = F.softmax(model(**inputs).logits, dim=-1)[0]
-    pred_idx   = 0 if probs[0].item() >= EMERG_THRESH else probs.argmax().item()
     label      = LABEL_NAMES[pred_idx]
-    confidence = probs[pred_idx].item()
     return {
         "label":      label,
         "confidence": round(confidence, 4),
         "stage":      "model",
-        "uncertain":  confidence < UNCERTAIN_THRESH[label],
-        "probs":      {LABEL_NAMES[i]: round(probs[i].item(), 4) for i in range(3)},
     }

 """
+재난문자 분류 API — Hugging Face Spaces 배포용 (KLUE-BERT 5-class)
 HUB_MODEL_ID를 push_to_hub.py 실행 후 업로드한 모델 ID로 변경하세요.
 """
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 # ── 수정 필요 ──────────────────────────────────
+HUB_MODEL_ID = "nhs0327/koelectra-disaster-v9n"
 # ──────────────────────────────────────────────
 app = FastAPI(title="재난문자 분류 API")
+MAX_LENGTH  = 128
+LABEL_NAMES = ['긴급 아님', '낮은 긴급성', '중간 긴급성', '높은 긴급성', '매우 높은 긴급성']
+UNCERTAIN_THRESH = 0.70
+L3_THRESHOLD = 0.69
+_ORG_PATTERN = re.compile(r'\[[^\]]{1,20}\]')
+def label_to_priority(idx: int) -> str:
+    if idx == 4:
+        return '긴급'
+    if idx in (2, 3):
+        return '주의'
+    return '일반'
 device    = torch.device("cpu")
 tokenizer = AutoTokenizer.from_pretrained(HUB_MODEL_ID)
 async def classify(req: ClassifyRequest):
     text = _ORG_PATTERN.sub('[기관]', req.message)
     inputs = tokenizer(text, truncation=True, padding='max_length',
                        max_length=MAX_LENGTH, return_tensors='pt')
     with torch.no_grad():
         probs = F.softmax(model(**inputs).logits, dim=-1)[0]
+    import numpy as np
+    probs_np = probs.cpu().numpy()
+    if probs_np[3] >= L3_THRESHOLD:
+        pred_idx = 3
+    else:
+        probs_mod = probs_np.copy()
+        probs_mod[3] = -1.0
+        pred_idx = int(probs_mod.argmax())
     label      = LABEL_NAMES[pred_idx]
+    confidence = float(probs_np[pred_idx])
+    priority   = label_to_priority(pred_idx)
     return {
         "label":      label,
+        "priority":   priority,
         "confidence": round(confidence, 4),
         "stage":      "model",
+        "uncertain":  confidence < UNCERTAIN_THRESH,
+        "probs":      {LABEL_NAMES[i]: round(probs[i].item(), 4) for i in range(5)},
     }