Spaces:

VIDraft
/

TeXray-backup

Sleeping

App Files Files Community

seawolf2357 commited on Mar 1

Commit

040bfa1

verified ·

1 Parent(s): 5df96e9

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -5

app.py CHANGED Viewed

@@ -502,17 +502,108 @@ def analyze_quality(text, sentences, words, morphemes):
 # ═══════════════════════════════════════════════
 LLM_JUDGES = [("openai/gpt-oss-120b","GPT-OSS 120B"),("qwen/qwen3-32b","Qwen3 32B"),("moonshotai/kimi-k2-instruct-0905","Kimi-K2")]
 def llm_cross_check(text):
     if not GROQ_KEY: return {"score":-1,"detail":{}}
-    prompt = f"AI 텍스트 탐지 전문가로서 분석. 1) AI vs 사람+근거3 2) 마지막줄: \"AI확률: XX%\"\n\n[텍스트]\n{text[:2000]}"
     votes=[]; rpt={}
     for mid,mn in LLM_JUDGES:
         resp,err = call_groq(mid,prompt)
         if resp:
-            pm = re.search(r'AI\s*확률[:\s]*(\d+)',resp)
-            if pm: p=int(pm.group(1)); votes.append(p); rpt[mn]=f"{p}%"
-            else: rpt[mn]="파싱실패"
-        else: rpt[mn]=f"ERR"
     if votes: return {"score":int(sum(votes)/len(votes)),"detail":rpt}
     return {"score":-1,"detail":rpt}

 # ═══════════════════════════════════════════════
 LLM_JUDGES = [("openai/gpt-oss-120b","GPT-OSS 120B"),("qwen/qwen3-32b","Qwen3 32B"),("moonshotai/kimi-k2-instruct-0905","Kimi-K2")]
+def _parse_ai_probability(raw_resp):
+    """LLM 응답에서 AI 확률(0~100)을 추출. 한국어/영어 다양한 형식 대응."""
+    if not raw_resp: return -1
+    # 1. <think> 태그 분리
+    think_content = ''
+    think_m = re.search(r'<think>(.*?)</think>', raw_resp, flags=re.S)
+    if think_m: think_content = think_m.group(1)
+    resp = re.sub(r'<think>.*?</think>', '', raw_resp, flags=re.S).strip()
+    if not resp or len(resp) < 5:
+        resp = raw_resp  # think만 있으면 원본 포함
+    # 2. 특정 키워드 패턴 (높은 우선순위 — 첫 매칭)
+    specific_patterns = [
+        r'AI\s*확률\s*[:：]\s*(?:약\s*)?(\d+)\s*%?',
+        r'AI\s*[Pp]robability\s*[:：]\s*(?:about|approximately?\s*)?(\d+)\s*%?',
+        r'[Pp]robability\s*(?:of\s*)?(?:being\s*)?AI\s*[:：\-]\s*(?:about|approximately?\s*)?(\d+)\s*%?',
+        r'AI\s*(?:생성|작성|판정)?\s*확률\s*[:：]?\s*(?:약\s*)?(\d+)',
+        r'(?:Score|Rating|Confidence)\s*[:：]\s*(\d+)',
+        r'(\d+)\s*%\s*(?:의\s*)?(?:확률|가능성|probability|likely|chance|likelihood)',
+        r'(?:신뢰도|확신도)\s*[:：]?\s*(?:약\s*)?(\d+)\s*(?:%|퍼센트)',
+        r'(?:약\s*)?(\d+)\s*(?:%|퍼센트)\s*(?:정도|수준)',
+    ]
+    for pat in specific_patterns:
+        m = re.search(pat, resp, re.I)
+        if m:
+            v = int(m.group(1))
+            if 0 <= v <= 100: return v
+    # 3. 범용 패턴 — 마지막 5줄에서만 검색 (통계 수치 오탐 방지)
+    lines = [l.strip() for l in resp.strip().split('\n') if l.strip()]
+    for line in reversed(lines[-5:]):
+        # 라인에 AI/확률/probability 키워드가 있으면 우선
+        if re.search(r'AI|확률|[Pp]robab|신뢰|판[정단]', line):
+            nums = re.findall(r'(\d+)\s*%', line)
+            if nums:
+                v = int(nums[-1])
+                if 0 <= v <= 100: return v
+            nums = re.findall(r'(\d+)\s*퍼센트', line)
+            if nums:
+                v = int(nums[-1])
+                if 0 <= v <= 100: return v
+    # 4. 전체 텍스트에서 마지막 XX% (단, AI/확률 근처만)
+    all_pcts = list(re.finditer(r'(\d+)\s*(?:%|퍼센트|percent)', resp, re.I))
+    for m in reversed(all_pcts):
+        v = int(m.group(1))
+        # 주변 50자 내에 AI/확률 키워드 있는지
+        ctx_start = max(0, m.start()-50)
+        ctx = resp[ctx_start:m.end()+20]
+        if re.search(r'AI|확률|[Pp]robab|신뢰|판[정단]|가능성|likelihood', ctx, re.I):
+            if 0 <= v <= 100: return v
+    # 5. 최후 수단: 전체에서 마지막 XX%
+    if all_pcts:
+        v = int(all_pcts[-1].group(1))
+        if 5 <= v <= 99: return v  # 100% 제외 (통계 수치 오탐 방지)
+    # 6. think 내부 폴백 (본문 파싱 실패 시)
+    if think_content:
+        for pat in specific_patterns:
+            m = re.search(pat, think_content, re.I)
+            if m:
+                v = int(m.group(1))
+                if 0 <= v <= 100: return v
+        # think 내부 마지막 XX%
+        think_pcts = re.findall(r'(\d+)\s*%', think_content)
+        if think_pcts:
+            v = int(think_pcts[-1])
+            if 5 <= v <= 99: return v
+    return -1
 def llm_cross_check(text):
     if not GROQ_KEY: return {"score":-1,"detail":{}}
+    # 한국어+영어 병행 프롬프트 (GPT-OSS는 영어 모델이므로)
+    prompt = f"""Analyze whether this text was written by AI.
+[Instructions]
+1. Determine AI vs Human with 3 brief reasons
+2. IMPORTANT - Your LAST line MUST be exactly this format:
+AI확률: XX%
+(Replace XX with your estimated probability 0-100)
+Example of correct last line:
+AI확률: 75%
+[Text to analyze]
+{text[:2000]}"""
     votes=[]; rpt={}
     for mid,mn in LLM_JUDGES:
         resp,err = call_groq(mid,prompt)
         if resp:
+            p = _parse_ai_probability(resp)
+            if p >= 0:
+                votes.append(p); rpt[mn]=f"{p}%"
+            else:
+                # 디버그: think 제거 후 응답 끝부분
+                cleaned = re.sub(r'<think>.*?</think>', '', resp, flags=re.S).strip()
+                tail = cleaned[-60:].replace('\n',' ') if len(cleaned) > 60 else cleaned.replace('\n',' ')
+                rpt[mn]=f"파싱실패({tail[:40]})"
+        else: rpt[mn]=f"ERR:{err[:30] if err else '?'}"
     if votes: return {"score":int(sum(votes)/len(votes)),"detail":rpt}
     return {"score":-1,"detail":rpt}