Spaces:

gkdud00
/

project-tdm

Runtime error

App Files Files Community

hy commited on Dec 16, 2025

Commit

1225cdd

1 Parent(s): 8fd2b87

mismatch

Browse files

Files changed (1) hide show

mismatch_model.py +27 -25

mismatch_model.py CHANGED Viewed

@@ -47,41 +47,43 @@ def _split_sentences_ko(text: str):
     return [p.strip() for p in parts if p.strip()]
 def summarize_kobart_strict(text):
-    """
-    [수정 버전]
-    - 정규식(Regex) 검사 로직을 모두 제거했습니다.
-    - KoBART가 생성한 요약문을 조건 없이 그대로 반환합니다.
-    """
     text = _clean_text(text)
     sents = _split_sentences_ko(text)
-    # 1. 입력이 너무 짧으면 모델 안 거치고 앞문장 반환 (속도 최적화)
-    # (기준을 200자로 완화함)
-    if len(text) < 200 or len(sents) <= 3:
-        return _clean_text(" ".join(sents[:3])) if sents else text
     try:
-        # 2. KoBART 요약 수행
         result = kobart_summarizer(
             text,
-            min_length=30,
-            max_length=90,
-            num_beams=4,
-            no_repeat_ngram_size=3,
-            early_stopping=True
         )[0]["summary_text"]
         out = _clean_text(result)
-        # 4. 요약문이 너무 짧게(10글자 미만) 나온 경우만 예외 처리
         if len(out) < 10:
-             return _clean_text(" ".join(sents[:3]))
         return out
     except Exception as e:
-        print(f"🚨 [Error] 요약 모델 에러: {e}")
-        return _clean_text(" ".join(sents[:3])) if sents else text[:200]
 def get_cosine_similarity(title, summary):
     """(유지) SBERT 코사인 유사도"""
@@ -163,8 +165,8 @@ def get_mismatch_score(summary, title):
 def calculate_mismatch_score(article_title, article_body):
     """
-    - w1 (SBERT 거리): 0.8
-    - w2 (NLI 불일치): 0.2
     - Threshold: 0.45 이상이면 '위험'
     """
     # 1) 본문 요약
@@ -178,7 +180,7 @@ def calculate_mismatch_score(article_title, article_body):
     nli_mismatch, entail, neutral, contra = get_mismatch_score(summary, article_title)
     # 4) 최종 점수(예전과 동일 구조)
-    w1, w2 = 0.8, 0.2
     final_score = (w1 * semantic_distance) + (w2 * nli_mismatch)
     reason = (
@@ -200,4 +202,4 @@ def calculate_mismatch_score(article_title, article_body):
         "score": round(final_score, 4),
         "reason": reason,
         "recommendation": recommendation
-    }

     return [p.strip() for p in parts if p.strip()]
 def summarize_kobart_strict(text):
     text = _clean_text(text)
     sents = _split_sentences_ko(text)
+    print("[DEBUG] len(text) =", len(text), "len(sents) =", len(sents))
+    print("[DEBUG] first3 =", " | ".join(sents[:3]))
+    # ✅ 오직 문장 수 기준만 사용
+    if len(sents) <= 3:
+        print("[DEBUG] <=3 sentences -> return as-is")
+        return _clean_text(" ".join(sents)) if sents else text
     try:
         result = kobart_summarizer(
             text,
+            min_length=30,
+            max_length=90,
+            num_beams=4,
+            no_repeat_ngram_size=3,
+            early_stopping=True,
+            truncation=True,   # 길이 초과 방지
         )[0]["summary_text"]
         out = _clean_text(result)
+        print("[DEBUG] kobart_out =", out)
+        # 요약이 말도 안 되게 짧을 때만 fallback
         if len(out) < 10:
+            print("[DEBUG] too short -> fallback to first 3 sentences")
+            return _clean_text(" ".join(sents[:3]))
         return out
     except Exception as e:
+        print("🚨 [Error] 요약 모델 에러:", repr(e))
+        return _clean_text(" ".join(sents[:3])) if sents else text
 def get_cosine_similarity(title, summary):
     """(유지) SBERT 코사인 유사도"""
 def calculate_mismatch_score(article_title, article_body):
     """
+    - w1 (SBERT 거리): 0.6
+    - w2 (NLI 불일치): 0.4
     - Threshold: 0.45 이상이면 '위험'
     """
     # 1) 본문 요약
     nli_mismatch, entail, neutral, contra = get_mismatch_score(summary, article_title)
     # 4) 최종 점수(예전과 동일 구조)
+    w1, w2 = 0.6, 0.4
     final_score = (w1 * semantic_distance) + (w2 * nli_mismatch)
     reason = (
         "score": round(final_score, 4),
         "reason": reason,
         "recommendation": recommendation
+    }