Spaces:

VIDraft
/

TeXray-backup

Sleeping

App Files Files Community

openfree commited on 16 days ago

Commit

ada4ae5

verified ·

1 Parent(s): 0d5c726

Update app.py

Browse files

Files changed (1) hide show

app.py +936 -247

app.py CHANGED Viewed

@@ -6,27 +6,6 @@ from pathlib import Path
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from xml.etree import ElementTree as ET
 from kiwipiepy import Kiwi
-# ✅ text_utils에서 필요한 함수들 import
-from text_utils import (
-    extract_text_from_pdf,
-    extract_text_from_docx,
-    extract_text_from_txt,
-    extract_text_from_hwpx,
-    extract_text_from_hwp,
-    extract_file_text_api,
-    split_sentences,
-    split_words,
-    http_get,
-    brave_search,
-    search_kci,
-    search_riss,
-    search_arxiv,
-    duckduckgo_search,
-    self_crawl_search,
-    parallel_brave_search,
-)
 KIWI = Kiwi()
 try:
     import httpx; HAS_HTTPX = True
@@ -36,6 +15,618 @@ try:
     from google import genai
     from google.genai import types as gtypes
     HAS_GENAI = True
     passive = len(HUMANIZER_PASSIVE.findall(text))
     if oversubst >= 3:
         signals.append(("유사동사난무", 12, f"활용/이용/사용/적용 등 {oversubst}개"))
@@ -358,72 +949,34 @@ def search_arxiv(query):
         pass
     return results[:3]
 def gemini_plagiarism_check(text_chunk):
-    """✅ 개선: 전체 텍스트로 표절 검사 (블록 제한 없음)"""
     if not HAS_GENAI or not GEMINI_KEY: return None
     try:
         client = genai.Client(api_key=GEMINI_KEY)
         tool = gtypes.Tool(google_search=gtypes.GoogleSearch())
-        # ✅ 개선: 전체 텍스트를 전달 ([:1000] 제거!)
-        prompt = f"""당신은 고급 표절 검사 전문가입니다.
-다음 텍스트를 Google Search로 검색하여 인터넷에서 동일하거나 매우 유사한 내용이 있는지 확인하세요.
-검사 방법:
-1. 핵심 문장들을 Google Search로 검색
-2. 발견된 출처와 유사도(%) 정리
-3. 한국어/영어 모두 검색
-[검사 대상 텍스트]
-{text_chunk}
-응답 형식:
-발견된 표절:
-- 출처 1: [제목] (유사도: XX%)
-- 출처 2: [제목] (유사도: XX%)
-...
-평가:
-전체 유사도: XX%
-(가장 높은 유사도 기준)
-참고: 블로그, 뉴스, 학술지, SNS 등 모든 출처 검색"""
         resp = client.models.generate_content(
             model="gemini-2.0-flash-lite",
             contents=prompt,
-            config=gtypes.GenerateContentConfig(tools=[tool], temperature=0.1, max_output_tokens=2000)  # 더 긴 응답
         )
         text_resp = resp.text if resp.text else ""
         sources = []
-        # Grounding 메타데이터에서 출처 추출
         if hasattr(resp, 'candidates') and resp.candidates:
             gc = resp.candidates[0].grounding_metadata
             if gc and hasattr(gc, 'grounding_chunks'):
                 for chunk in gc.grounding_chunks:
                     if hasattr(chunk, 'web') and chunk.web:
-                        sources.append({
-                            "title": chunk.web.title or "",
-                            "url": chunk.web.uri or "",
-                            "source": "Google"
-                        })
-        # 응답에서 유사도 추출
-        pm = re.search(r'(?:전체\s)?유사도[:\s]*(\d+)', text_resp)
         pct = int(pm.group(1)) if pm else 0
-        return {
-            "pct": pct,
-            "response": text_resp,
-            "sources": sources,
-            "full_analysis": True  # 전체 텍스트 검사 완료 표시
-        }
     except Exception as e:
-        return {
-            "pct": 0,
-            "response": str(e)[:100],
-            "sources": [],
-            "full_analysis": False
-        }
 def parallel_brave_search(queries, max_workers=10):
     """Brave Search 병렬 실행 (최대 20개)"""
     all_results = {}
@@ -467,142 +1020,103 @@ def self_crawl_search(query, max_results=3):
         all_results.extend(duckduckgo_search(f"{query} 논문 학술", 2))
     return all_results
 def run_plagiarism(text, progress=gr.Progress()):
-    """✅ Gemini Google Search 90% (메인) + Brave/arXiv 10% (보조)"""
     if not text or len(text.strip())<50:
         return "<div style='padding:20px;text-align:center;color:#888;'>⚠️ 최소 50자 이상</div>", ""
     text = text.strip()
     sents = split_sentences(text)
     now = datetime.now().strftime("%Y-%m-%d %H:%M")
     progress(0.05, "문장 분리...")
     all_sources = []
     log_lines = []
-    # ============================================
-    # 메인 (90%): Gemini Google Search
-    # ============================================
-    gemini_pct = 0
-    gemini_sources = []
-    if HAS_GENAI and GEMINI_KEY:
-        progress(0.20, "Gemini + Google Search (메인 90% 표절 검사)...")
-        try:
-            client = genai.Client(api_key=GEMINI_KEY)
-            prompt = f"""당신은 고급 표절 검사 전문가입니다.
-아래 텍스트를 Google Search로 철저히 검색하여 유사한 내용을 모두 찾으세요.
-[검사 텍스트]
-{text}
-응답 형식:
-발견된 유사 내용:
-- [제목 1] (유사도: XX%)
-- [제목 2] (유사도: XX%)
-최종 표절율: XX%"""
-            contents = [
-                types.Content(
-                    role="user",
-                    parts=[
-                        types.Part.from_text(text=prompt),
-                    ],
-                )
-            ]
-            tools = [
-                types.Tool(googleSearch=types.GoogleSearch()),
-            ]
-            config = types.GenerateContentConfig(
-                thinking_config=types.ThinkingConfig(thinking_budget=0),
-                tools=tools,
-                temperature=0.3,
-                max_output_tokens=3000,
-            )
-            # 스트리밍으로 수집
-            full_response = ""
-            for chunk in client.models.generate_content_stream(
-                model="gemini-2.0-flash-lite-latest",
-                contents=contents,
-                config=config,
-            ):
-                if chunk.text:
-                    full_response += chunk.text
-            # 유사도 추출
-            pm = re.search(r'(?:표절|유사도)[:\s]*(\d+)', full_response)
-            if pm:
-                gemini_pct = int(pm.group(1))
-            # 출처 추출
-            for m in re.finditer(r'https?://[^\s\)]+', full_response):
-                url = m.group(0)
-                if len(url) > 10:
-                    gemini_sources.append({
-                        "title": url.split('/')[2] if '/' in url else url,
-                        "url": url,
-                        "source": "Google",
-                    })
-                    all_sources.append({
-                        "title": url.split('/')[2] if '/' in url else url,
-                        "url": url,
-                        "source": "Google",
-                    })
-            log_lines.append(f"✅ Gemini: {gemini_pct}% → {len(gemini_sources)}개 출처")
-        except Exception as e:
-            log_lines.append(f"⚠️ Gemini: {str(e)[:80]}")
-    # ============================================
-    # 보조 (10%): Brave + arXiv
-    # ============================================
-    brave_pct = 0
-    arxiv_pct = 0
-    progress(0.60, "보조 검사 (Brave + arXiv)...")
-    try:
-        if BRAVE_KEY:
-            words = split_words(text)
-            key_query = ' '.join(words[:5])
-            brave_results = brave_search(key_query, 3)
-            if brave_results:
-                brave_pct = 30
-                all_sources.extend(brave_results)
-            log_lines.append(f"Brave: {len(brave_results)}건")
-        words = split_words(text)
-        wf = Counter(words)
-        keywords = [w for w, c in wf.most_common(10) if len(w) >= 3][:3]
-        arxiv_results = search_arxiv(' '.join(keywords))
-        if arxiv_results:
-            arxiv_pct = 20
-            all_sources.extend(arxiv_results)
-        log_lines.append(f"arXiv: {len(arxiv_results)}건")
-    except:
-        pass
-    # ============================================
-    # 최종: Gemini 90% + 보조 10%
-    # ============================================
     progress(0.80, "보고서 생성...")
-    helper_pct = max(brave_pct, arxiv_pct)
-    plag_pct = int(gemini_pct * 0.9 + helper_pct * 0.1) if gemini_pct > 0 else helper_pct
-    # ============================================
-    # HTML 보고서
-    # ============================================
     seen_urls = set()
     unique_sources = []
     for s in all_sources:
@@ -610,79 +1124,254 @@ def run_plagiarism(text, progress=gr.Progress()):
         if url and url not in seen_urls:
             seen_urls.add(url)
             unique_sources.append(s)
-    if plag_pct >= 50: grade, gc = "표절 의심", "#FF4444"
-    elif plag_pct >= 30: grade, gc = "주의 필요", "#FF8800"
-    elif plag_pct >= 15: grade, gc = "유사 표현 일부", "#DDAA00"
-    elif plag_pct >= 5: grade, gc = "양호", "#4ECDC4"
-    else: grade, gc = "우수", "#22AA44"
     word_count = len(split_words(text))
     char_count = len(text)
     doc_id = hashlib.md5(text[:100].encode()).hexdigest()[:8].upper()
     src_rows = ""
-    for i, s in enumerate(unique_sources[:15]):
-        ico = "🔍" if "google" in s.get("source","").lower() else "🌐"
         src_rows += f"""<tr>
-          <td style="padding:6px 8px;font-size:11px;text-align:center;border:1px solid #D5D5D5;">{i+1}</td>
-          <td style="padding:6px 8px;font-size:11px;text-align:center;border:1px solid #D5D5D5;">{ico}</td>
-          <td style="padding:6px 8px;border:1px solid #D5D5D5;font-size:11px;color:#1A3C6E;"><strong>{s['title'][:40]}</strong></td>
-          <td style="padding:6px 8px;border:1px solid #D5D5D5;font-size:9px;color:#666;word-break:break-all;">{s['url'][:50]}</td>
         </tr>"""
-    HDR = '#3B7DD8'
-    TL = f'padding:7px 10px;font-size:11px;color:#444;font-weight:600;background:#EDF2FA;border:1px solid #D5D5D5;'
     TV = 'padding:7px 10px;font-size:12px;color:#333;border:1px solid #D5D5D5;'
-    html = f"""<div style="font-family:'Noto Sans KR',sans-serif;max-width:780px;margin:0 auto;background:#fff;border:2px solid {HDR};box-shadow:0 2px 12px rgba(0,0,0,0.08);">
-    <div style="background:linear-gradient(135deg,{HDR},#4A8DE0);padding:18px 24px;color:#fff;">
-      <div style="font-size:22px;font-weight:900;">표절 검사 결과</div>
-      <div style="font-size:10px;opacity:0.8;">문서: {doc_id} | {now}</div>
     </div>
     <div style="padding:18px 24px 0;">
       <table style="width:100%;border-collapse:collapse;">
         <tr>
-          <td style="width:200px;padding:20px;text-align:center;border:1px solid #D5D5D5;background:#FAFBFE;">
-            <div style="font-size:48px;font-weight:900;color:{gc};">{plag_pct}%</div>
-            <div style="font-size:12px;color:#666;margin-top:4px;">표절율</div>
           </td>
-          <td style="padding:20px;border:1px solid #D5D5D5;">
-            <div style="font-size:16px;font-weight:900;color:{gc};margin-bottom:12px;">{grade}</div>
-            <div style="font-size:11px;color:#666;line-height:1.8;">
-              <div>글자수: <b>{char_count:,}</b></div>
-              <div>단어수: <b>{word_count:,}</b></div>
-              <div>출처: <b>{len(unique_sources)}</b>개</div>
             </div>
           </td>
         </tr>
       </table>
     </div>
-    <div style="padding:16px 24px;">
-      <div style="font-size:13px;font-weight:800;color:#1A3C6E;margin-bottom:10px;padding-bottom:8px;border-bottom:2px solid {HDR};">🔍 발견된 출처</div>
-      <table style="width:100%;border-collapse:collapse;font-size:10px;">
-        <thead>
-          <tr style="background:{HDR};color:white;">
-            <th style="padding:8px;border:1px solid #D5D5D5;width:30px;">순위</th>
-            <th style="padding:8px;border:1px solid #D5D5D5;width:30px;">출처</th>
-            <th style="padding:8px;border:1px solid #D5D5D5;">제목</th>
-            <th style="padding:8px;border:1px solid #D5D5D5;">URL</th>
-          </tr>
-        </thead>
-        <tbody>
-          {src_rows if src_rows else '<tr><td colspan="4" style="padding:16px;text-align:center;color:#999;">발견된 출처 없음</td></tr>'}
-        </tbody>
       </table>
     </div>
-    <div style="padding:16px 24px;background:#FFF8E1;border-top:1px solid #D5D5D5;font-size:10px;color:#666;line-height:1.6;">
-      <strong>분석:</strong> <strong style="color:#D63031;">Gemini Google Search 90% (메인)</strong> + Brave/arXiv 10% (보조)<br>
-      <strong>방법:</strong> Gemini의 AI 분석 + Google Search Grounding으로 실시간 웹 검색
     </div>
     </div>"""
-    log = "\n".join(log_lines)
-    progress(0.95, "완료...")
     return html, log
 def run_detection(text, progress=gr.Progress()):
     if not text or len(text.strip())<50: return "<div style='padding:20px;text-align:center;color:#888;'>⚠️ 최소 50자</div>",""

 from concurrent.futures import ThreadPoolExecutor, as_completed
 from xml.etree import ElementTree as ET
 from kiwipiepy import Kiwi
 KIWI = Kiwi()
 try:
     import httpx; HAS_HTTPX = True
     from google import genai
     from google.genai import types as gtypes
     HAS_GENAI = True
+except ImportError:
+    HAS_GENAI = False
+try:
+    import olefile; HAS_OLEFILE = True
+except ImportError:
+    HAS_OLEFILE = False
+try:
+    import pdfplumber; HAS_PDFPLUMBER = True
+except ImportError:
+    HAS_PDFPLUMBER = False
+try:
+    import PyPDF2; HAS_PYPDF2 = True
+except ImportError:
+    HAS_PYPDF2 = False
+try:
+    from docx import Document as DocxDocument; HAS_DOCX = True
+except ImportError:
+    HAS_DOCX = False
+GROQ_KEY = os.getenv("GROQ_API_KEY", "")
+GEMINI_KEY = os.getenv("GEMINI_API_KEY", "")
+BRAVE_KEY = os.getenv("BRAVE_API_KEY", "")
+def extract_text_from_pdf(file_path):
+    """PDF → 텍스트 (페이지별 분리)"""
+    pages = []
+    if HAS_PDFPLUMBER:
+        try:
+            with pdfplumber.open(file_path) as pdf:
+                for p in pdf.pages:
+                    t = p.extract_text()
+                    if t: pages.append(t)
+            if pages: return pages, None
+        except Exception as e:
+            print(f"pdfplumber: {e}")
+    if HAS_PYPDF2:
+        try:
+            with open(file_path, 'rb') as f:
+                reader = PyPDF2.PdfReader(f)
+                for p in reader.pages:
+                    t = p.extract_text()
+                    if t: pages.append(t)
+            if pages: return pages, None
+        except Exception as e:
+            print(f"PyPDF2: {e}")
+    return None, "PDF 추출 실패 (pdfplumber, PyPDF2 없음)"
+def extract_text_from_docx(file_path):
+    """DOCX → 텍스트 (문단별 분리)"""
+    if not HAS_DOCX: return None, "python-docx 없음"
+    try:
+        doc = DocxDocument(file_path)
+        sections = []
+        current = []
+        for para in doc.paragraphs:
+            txt = para.text.strip()
+            if not txt:
+                if current:
+                    sections.append('\n'.join(current))
+                    current = []
+            else:
+                current.append(txt)
+        if current: sections.append('\n'.join(current))
+        if sections: return sections, None
+        return None, "DOCX 텍스트 없음"
+    except Exception as e:
+        return None, f"DOCX 오류: {e}"
+def extract_text_from_txt(file_path):
+    """TXT/MD/CSV 등 → 텍스트"""
+    for enc in ['utf-8', 'euc-kr', 'cp949', 'utf-16', 'latin-1']:
+        try:
+            with open(file_path, 'r', encoding=enc) as f:
+                text = f.read()
+            if text.strip():
+                sections = [s.strip() for s in re.split(r'\n{2,}', text) if s.strip()]
+                return sections if sections else [text], None
+        except: continue
+    return None, "텍스트 인코딩 감지 실패"
+def extract_text_from_hwpx(file_path):
+    """HWPX (ZIP 기반) → 텍스트"""
+    try:
+        text_parts = []
+        with zipfile.ZipFile(file_path, 'r') as zf:
+            file_list = zf.namelist()
+            section_files = sorted([f for f in file_list if f.startswith('Contents/section') and f.endswith('.xml')])
+            if not section_files:
+                section_files = sorted([f for f in file_list if 'section' in f.lower() and f.endswith('.xml')])
+            for sf_name in section_files:
+                try:
+                    with zf.open(sf_name) as sf:
+                        content = sf.read().decode('utf-8', errors='ignore')
+                        content = re.sub(r'\sxmlns[^"]*"[^"]*"', '', content)
+                        content = re.sub(r'<[a-zA-Z]+:', '<', content)
+                        content = re.sub(r'</[a-zA-Z]+:', '</', content)
+                        try:
+                            root = ET.fromstring(content)
+                            texts = []
+                            for elem in root.iter():
+                                if elem.tag.endswith('t') or elem.tag == 't':
+                                    if elem.text: texts.append(elem.text)
+                                elif elem.text and elem.text.strip():
+                                    if any(x in elem.tag.lower() for x in ['text', 'run', 'para', 'char']):
+                                        texts.append(elem.text.strip())
+                            if texts: text_parts.append(' '.join(texts))
+                        except ET.ParseError:
+                            matches = re.findall(r'>([^<]+)<', content)
+                            clean = [t.strip() for t in matches if t.strip() and len(t.strip()) > 1]
+                            if clean: text_parts.append(' '.join(clean))
+                except: continue
+        if text_parts:
+            return text_parts, None
+        return None, "HWPX 텍스트 없음"
+    except zipfile.BadZipFile:
+        return None, "유효하지 않은 HWPX"
+    except Exception as e:
+        return None, f"HWPX 오류: {e}"
+def _decode_hwp_para(data):
+    """HWP 바이너리 → 문단 텍스트"""
+    result = []
+    i = 0
+    while i < len(data) - 1:
+        code = int.from_bytes(data[i:i+2], 'little')
+        if code in (1,2,3): i += 14
+        elif code == 9: result.append('\t')
+        elif code in (10,13): result.append('\n')
+        elif code == 24: result.append('-')
+        elif code in (30,31): result.append(' ')
+        elif code >= 32:
+            try:
+                ch = chr(code)
+                if ch.isprintable() or ch in '\n\t ': result.append(ch)
+            except: pass
+        i += 2
+    text = ''.join(result).strip()
+    text = re.sub(r'[ \t]+', ' ', text)
+    text = re.sub(r'\n{3,}', '\n\n', text)
+    return text if len(text) > 2 else None
+def _extract_hwp_section(data):
+    """HWP 섹션 바이너리 → 텍스트"""
+    texts = []
+    pos = 0
+    while pos < len(data) - 4:
+        try:
+            header = int.from_bytes(data[pos:pos+4], 'little')
+            tag_id = header & 0x3FF
+            size = (header >> 20) & 0xFFF
+            pos += 4
+            if size == 0xFFF:
+                if pos + 4 > len(data): break
+                size = int.from_bytes(data[pos:pos+4], 'little')
+                pos += 4
+            if pos + size > len(data): break
+            record_data = data[pos:pos+size]
+            pos += size
+            if tag_id == 67 and size > 0:
+                t = _decode_hwp_para(record_data)
+                if t: texts.append(t)
+        except:
+            pos += 1
+    return '\n'.join(texts) if texts else None
+def extract_text_from_hwp(file_path):
+    """HWP (OLE 기반) → 텍스트"""
+    if not HAS_OLEFILE: return None, "olefile 없음"
+    try:
+        ole = olefile.OleFileIO(file_path)
+        if not ole.exists('FileHeader'):
+            ole.close(); return None, "HWP 헤더 없음"
+        header_data = ole.openstream('FileHeader').read()
+        is_compressed = (header_data[36] & 1) == 1 if len(header_data) > 36 else True
+        all_texts = []
+        for entry in ole.listdir():
+            entry_path = '/'.join(entry)
+            if entry_path.startswith('BodyText/Section'):
+                try:
+                    stream = ole.openstream(entry).read()
+                    if is_compressed:
+                        try: stream = zlib.decompress(stream, -15)
+                        except:
+                            try: stream = zlib.decompress(stream)
+                            except: pass
+                    section_text = _extract_hwp_section(stream)
+                    if section_text: all_texts.append(section_text)
+                except: continue
+        ole.close()
+        if all_texts: return all_texts, None
+        return None, "HWP 텍스트 없음"
+    except Exception as e:
+        return None, f"HWP 오류: {e}"
+def extract_text_from_file(file_path):
+    """
+    만능 문서 추출: PDF/DOCX/HWP/HWPX/TXT → (sections_list, full_text, error)
+    sections_list: 페이지/섹션별 텍스트 리스트
+    full_text: 전체 합친 텍스트
+    """
+    if not file_path or not os.path.exists(file_path):
+        return None, None, "파일 없음"
+    ext = Path(file_path).suffix.lower()
+    sections, error = None, None
+    if ext == '.pdf':
+        sections, error = extract_text_from_pdf(file_path)
+    elif ext == '.docx':
+        sections, error = extract_text_from_docx(file_path)
+    elif ext == '.hwpx':
+        sections, error = extract_text_from_hwpx(file_path)
+    elif ext == '.hwp':
+        sections, error = extract_text_from_hwp(file_path)
+    elif ext in ('.txt', '.md', '.csv', '.json', '.xml', '.html'):
+        sections, error = extract_text_from_txt(file_path)
+    else:
+        return None, None, f"지원하지 않는 형식: {ext}"
+    if sections:
+        full = '\n\n'.join(sections)
+        return sections, full, None
+    return None, None, error or "텍스트 추출 실패"
+def split_sentences(text):
+    try:
+        s = [x.text.strip() for x in KIWI.split_into_sents(text) if x.text.strip()]
+        if s: return s
+    except: pass
+    return [x.strip() for x in re.split(r'(?<=[.!?。])\s+', text) if x.strip()]
+def split_words(text):
+    return [w for w in re.findall(r'[가-힣a-zA-Z0-9]+', text) if w]
+def get_morphemes(text):
+    try:
+        r = KIWI.analyze(text)
+        if r and r[0]: return [(m.form, m.tag) for m in r[0][0]]
+    except: pass
+    return []
+def http_get(url, headers=None, timeout=15):
+    try:
+        if HAS_HTTPX:
+            r = httpx.get(url, headers=headers or {}, timeout=timeout, follow_redirects=True)
+            return r.text if r.status_code == 200 else None
+        else:
+            import urllib.request
+            req = urllib.request.Request(url, headers=headers or {})
+            with urllib.request.urlopen(req, timeout=timeout) as resp:
+                return resp.read().decode('utf-8', errors='replace')
+    except: return None
+def http_post_json(url, body, headers=None, timeout=30):
+    try:
+        h = headers or {}
+        h["Content-Type"] = "application/json"
+        if HAS_HTTPX:
+            r = httpx.post(url, json=body, headers=h, timeout=timeout)
+            if r.status_code == 200: return r.json()
+            return None
+        else:
+            import urllib.request, ssl
+            req = urllib.request.Request(url, json.dumps(body).encode(), h)
+            with urllib.request.urlopen(req, timeout=timeout, context=ssl.create_default_context()) as resp:
+                return json.loads(resp.read())
+    except: return None
+def call_groq(model, prompt, max_tokens=800, temperature=0.1):
+    if not GROQ_KEY: return None, "NO_KEY"
+    url = "https://api.groq.com/openai/v1/chat/completions"
+    h = {"Authorization": f"Bearer {GROQ_KEY}", "Content-Type": "application/json"}
+    b = {"model": model, "messages": [{"role":"user","content":prompt}], "max_tokens": max_tokens, "temperature": temperature}
+    try:
+        if HAS_HTTPX:
+            r = httpx.post(url, json=b, headers=h, timeout=45)
+            if r.status_code == 200: return r.json()["choices"][0]["message"]["content"], None
+            return None, f"HTTP {r.status_code}"
+        else:
+            import urllib.request, ssl
+            req = urllib.request.Request(url, json.dumps(b).encode(), h)
+            with urllib.request.urlopen(req, timeout=45, context=ssl.create_default_context()) as resp:
+                return json.loads(resp.read())["choices"][0]["message"]["content"], None
+    except Exception as e: return None, str(e)[:150]
+AI_ENDINGS = ['합니다','입니다','됩니다','습니다','있습니다','했습니다','겠습니다']
+AI_CASUAL_ENDINGS = ['라고 할 수 있다','라고 볼 수 있다','다고 생각한다','다고 판단된다',
+    '인 셈이다','인 것이다','는 것이다','는 셈이다','ㄹ 것이다','을 것이다',
+    '라 할 수 있다','로 보인다','로 판단된다','고 있다','는 추세다','는 상황이다',
+    '지 않을 수 없다','라 하겠다','음을 알 수 있다','할 필요가 있다']
+AI_CONNS = ['또한','따라서','그러므로','이에 따라','한편','더불어','아울러','뿐만 아니라',
+    '이를 통해','이에','결과적으로','궁극적으로','특히','나아가','이러한']
+AI_SOFT_CONNS = ['물론','그러나','하지만','이처럼','이와 같이','이를 바탕으로']
+AI_FILLER = ['것으로 보','것으로 나타','것으로 예상','할 수 있','볼 수 있','주목할 만',
+    '중요한 역할','중요한 의미','긍정적인 영향','부정적인 영향','필요합니다','필요하다',
+    '중요합니다','중요하다','역할을 하','영향을 미','기대된다','예상됩니다','부각되고',
+    '대두되고','다양한 분야','다양한 산업','눈부신 성과','획기적인 변화','혁신적인',
+    '점에서','측면에서','관점에서']
+AI_CASUAL_FILLER = ['무궁무진하다','무궁무진한','과언이 아니','돌파구가 될','전환점이 될',
+    '기반으로','발판으로','원동력이','초석이 될','가속화되','급부상','패러다임',
+    '지평을 열','새로운 장을','대전환','본격화되','고도화','이정표']
+AI_CONCESSION = re.compile(r'물론.{2,20}(하지만|그러나|그렇지만|다만)|.{2,15}(이긴 하지만|기는 하지만|수 있지만|수는 있지만)')
+EN_AI_MARKERS = ['furthermore','additionally','moreover','it is worth noting','in conclusion',
+    'it is important to','plays a crucial role','significant impact','various aspects',
+    'in this regard','consequently','nevertheless','integral part of','led to remarkable',
+    'fundamentally transformed','has become increasingly','it should be noted',
+    'in the context of','paradigm shift','landscape of','methodologies',
+    'transformative impact','unprecedented','in various domains']
+HUMAN_MARKERS = {
+    'ㅋㅎㅠ': re.compile(r'([ㅋㅎㅠㅜㄷㄱ])\1{1,}'),
+    '이모티콘': re.compile(r'[;:]-?[)(DPp]|\^[_\-]?\^|ㅡㅡ|;;'),
+    '줄임': re.compile(r'ㄹㅇ|ㅇㅇ|ㄴㄴ|ㅇㅋ|ㄷㄷ|ㅂㅂ'),
+    '느낌표': re.compile(r'[!?]{2,}'),
+    '비격식종결': re.compile(r'(거든|잖아|인데|인걸|같음|느낌|아님|대박|미쳤|헐|ㅋ$|ㅎ$|임$|음$|듯$)'),
+    '구어축약': re.compile(r'(걍|좀|막|완전|진짜|레알|존나|개|졸라|존맛|겁나)'),
+    '맞춤법오류': re.compile(r'됬|몇일|금새|할수있|것같[은다]|되가|되서|안됀|햇다'),
+    '말줄임표': re.compile(r'\.{3,}|…'),
+}
+FP = {
+    "GPT": {"m":['물론이죠','도움이 되셨기를','설명해 드리겠습니다','추가 질문','도움이 필요하시면',
+                 '요약하자면','간략히 정리하면','핵심은'],"e":['습니다','드리겠습니다'],"lp":re.compile(r'^\d+\.\s|^[-•]\s',re.M)},
+    "Claude": {"m":['말씀하신','살펴보겠습니다','균형 잡힌','맥락에서','한 가지 주의할','뉘앙스',
+                    '흥미로운 질문','복잡한 주제'],"e":['네요','거예요'],"lp":re.compile(r'^\*\*.*\*\*|^#+\s',re.M)},
+    "Gemini": {"m":['다음과 같습니다','정리해 드리겠습니다','핵심 내용을','더 알고 싶으시면',
+                   '알아보겠습니다'],"e":['겠습니다','보세요'],"lp":re.compile(r'^\*\s|^-\s\*\*',re.M)},
+    "Perplexity": {"m":['검색 결과에 따르면','보도에 따르면','연구에 따르면','밝혔다','전했다',
+                       '것으로 나타났다','것으로 조사됐다','것으로 집계됐다','발표했다'],"e":['밝혔다','나타났다','전했다'],"lp":re.compile(r'\[\d+\]',re.M)},
+}
+def score_sentence(sent):
+    """단일 문장 AI 점수 (0~100). 탭1·탭2 공유. v5.0 대폭 강화."""
+    sc = 0; reasons = []
+    sl = sent.lower().strip()
+    sr = sent.rstrip('.!?。')
+    for e in AI_ENDINGS:
+        if sr.endswith(e): sc += 22; reasons.append(f"격식어미(-{e})"); break
+    if sc == 0:  # 격식이 아닌 경우만
+        for e in AI_CASUAL_ENDINGS:
+            if sr.endswith(e): sc += 15; reasons.append(f"비격식AI(-{e})"); break
+    stripped = sent.strip()
+    for c in AI_CONNS:
+        if stripped.startswith(c):
+            sc += 18; reasons.append(f"AI접속사({c})"); break
+    else:
+        for c in AI_SOFT_CONNS:
+            if stripped.startswith(c): sc += 8; reasons.append(f"약한접속사({c})"); break
+    filler_found = sum(1 for f in AI_FILLER if f in sent)
+    casual_filler = sum(1 for f in AI_CASUAL_FILLER if f in sent)
+    total_filler = filler_found + casual_filler
+    if total_filler >= 3: sc += 25; reasons.append(f"상투표현×{total_filler}")
+    elif total_filler == 2: sc += 18; reasons.append(f"상투표현×2")
+    elif total_filler == 1: sc += 10; reasons.append(f"상투표현×1")
+    if AI_CONCESSION.search(sent): sc += 10; reasons.append("양보패턴")
+    for mn, fp in FP.items():
+        for m in fp["m"]:
+            if m in sent: sc += 8; reasons.append(f"{mn}지문"); break
+    en_count = sum(1 for em in EN_AI_MARKERS if em in sl)
+    if en_count >= 3: sc += 25; reasons.append(f"영어AI×{en_count}")
+    elif en_count >= 2: sc += 18; reasons.append(f"영어AI×{en_count}")
+    elif en_count >= 1: sc += 12; reasons.append(f"영어AI×1")
+    has_formal = any(sr.endswith(e) for e in AI_ENDINGS)
+    has_conn = any(stripped.startswith(c) for c in AI_CONNS)
+    if has_formal and total_filler >= 1 and has_conn: sc += 8; reasons.append("복합AI")
+    elif has_formal and total_filler >= 2: sc += 5; reasons.append("격식+상투")
+    for n, p in HUMAN_MARKERS.items():
+        matches = p.findall(sent)
+        if matches:
+            if n in ('ㅋㅎㅠ','이모티콘','줄임'): sc -= 25; reasons.append(f"인간({n})")
+            elif n in ('비격식종결','구어축약'): sc -= 18; reasons.append(f"구어체({n})")
+            elif n == '맞춤법오류': sc -= 12; reasons.append("맞춤법오류")
+            elif n in ('느낌표','말줄임표'): sc -= 10; reasons.append(f"인간({n})")
+    return max(0, min(100, sc)), reasons
+def analyze_statistics(text, sentences, words):
+    sl = [len(s) for s in sentences]
+    if len(sl) < 2: return {"score":50}
+    avg = sum(sl)/len(sl); std = math.sqrt(sum((l-avg)**2 for l in sl)/len(sl))
+    cv = std/avg if avg > 0 else 0
+    cv_score = 85 if cv<0.20 else 70 if cv<0.30 else 50 if cv<0.45 else 30 if cv<0.60 else 15
+    diffs = [abs(sl[i]-sl[i-1]) for i in range(1,len(sl))]
+    burst_score = 50
+    if diffs:
+        avg_d = sum(diffs)/len(diffs)
+        max_d = max(diffs)
+        burst_ratio = max_d / (avg_d + 1)
+        burst_score = 85 if burst_ratio < 1.8 else 65 if burst_ratio < 2.5 else 40 if burst_ratio < 3.5 else 20
+    standard_ratio = sum(1 for l in sl if 20 <= l <= 60) / len(sl)
+    std_score = 80 if standard_ratio > 0.8 else 60 if standard_ratio > 0.6 else 40 if standard_ratio > 0.4 else 20
+    extreme = sum(1 for l in sl if l < 10 or l > 80)
+    if extreme >= 2: std_score = max(10, std_score - 20)
+    elif extreme >= 1: std_score = max(15, std_score - 10)
+    wf = Counter(words); t = len(words)
+    ttr = len(wf)/t if t>0 else 0
+    vocab_score = 70 if ttr<0.45 else 55 if ttr<0.55 else 35 if ttr<0.65 else 20
+    wpc = [len(split_words(s)) for s in sentences]
+    complex_score = 50
+    if len(wpc) >= 3:
+        wpc_avg = sum(wpc)/len(wpc)
+        wpc_std = math.sqrt(sum((w-wpc_avg)**2 for w in wpc)/len(wpc))
+        wpc_cv = wpc_std/wpc_avg if wpc_avg > 0 else 0
+        complex_score = 80 if wpc_cv < 0.20 else 60 if wpc_cv < 0.35 else 35 if wpc_cv < 0.50 else 15
+    final = int(cv_score*0.20 + burst_score*0.20 + std_score*0.25 + vocab_score*0.15 + complex_score*0.20)
+    return {"score":final,"cv":round(cv,3),"ttr":round(ttr,3)}
+def analyze_korean_style(text, sentences, morphemes):
+    if not sentences: return {"score":50}
+    formal_cnt = sum(1 for s in sentences if any(s.rstrip('.!?').endswith(e) for e in AI_ENDINGS))
+    casual_ai = sum(1 for s in sentences if any(s.rstrip('.!?').endswith(e) for e in AI_CASUAL_ENDINGS))
+    fr = formal_cnt/len(sentences)
+    car = casual_ai/len(sentences)
+    ending_score = 85 if fr>0.7 else 65 if fr>0.5 else 45 if fr>0.3 else 25 if fr>0.1 else 10
+    ending_score = min(90, ending_score + int(car * 25))  # 비격식AI 보너스
+    conn_positions = []
+    for i, s in enumerate(sentences):
+        for c in AI_CONNS:
+            if s.strip().startswith(c): conn_positions.append(i); break
+    conn_density = len(conn_positions)/len(sentences) if sentences else 0
+    conn_score = 85 if conn_density>0.4 else 65 if conn_density>0.25 else 40 if conn_density>0.1 else 15
+    if len(conn_positions) >= 2:
+        gaps = [conn_positions[i]-conn_positions[i-1] for i in range(1,len(conn_positions))]
+        gap_cv = (math.sqrt(sum((g-sum(gaps)/len(gaps))**2 for g in gaps)/len(gaps))/(sum(gaps)/len(gaps)+0.01))
+        if gap_cv < 0.5: conn_score = min(90, conn_score + 10)  # 매우 규칙적 → AI 보너스
+    filler_cnt = sum(1 for f in AI_FILLER if f in text) + sum(1 for f in AI_CASUAL_FILLER if f in text)
+    filler_score = 90 if filler_cnt>=6 else 75 if filler_cnt>=4 else 55 if filler_cnt>=2 else 30 if filler_cnt>=1 else 10
+    concession_cnt = len(AI_CONCESSION.findall(text))
+    conc_score = 80 if concession_cnt >= 2 else 55 if concession_cnt >= 1 else 20
+    human_count = sum(len(p.findall(text)) for p in HUMAN_MARKERS.values())
+    human_penalty = min(35, human_count * 8)
+    pos_score = 45
+    if morphemes:
+        pc = Counter(t for _,t in morphemes); tm = sum(pc.values())
+        noun_r = sum(pc.get(t,0) for t in ['NNG','NNP','NNB','NR'])/tm if tm else 0
+        pos_score = 70 if noun_r>0.42 else 55 if noun_r>0.38 else 35 if noun_r>0.32 else 20
+    final = max(5, int(ending_score*0.25 + conn_score*0.20 + filler_score*0.20 +
+                       conc_score*0.10 + pos_score*0.15 + 10*0.10) - human_penalty)
+    return {"score":final,"formal":f"{fr:.0%}","conn":f"{conn_density:.2f}","filler":filler_cnt,"human":human_count}
+def analyze_repetition(text, sentences, words):
+    if not sentences or len(sentences) < 2: return {"score":35}
+    tr = 0
+    if len(words)>=5:
+        tg = Counter(tuple(words[i:i+3]) for i in range(len(words)-2))
+        tr = sum(1 for c in tg.values() if c>1)/len(tg) if tg else 0
+    ngram_score = 80 if tr>0.15 else 60 if tr>0.08 else 35 if tr>0.03 else 15
+    openers_2 = []
+    openers_3 = []
+    for s in sentences:
+        ws = split_words(s)
+        if len(ws) >= 2: openers_2.append(tuple(ws[:2]))
+        if len(ws) >= 3: openers_3.append(tuple(ws[:3]))
+    opener2_score = 50
+    if openers_2:
+        unique2 = len(set(openers_2))/len(openers_2)
+        opener2_score = 80 if unique2 < 0.5 else 60 if unique2 < 0.7 else 35 if unique2 < 0.85 else 15
+    ai_only_conns = ['또한','따라서','그러므로','이에 따라','더불어','아울러','뿐만 아니라',
+                     '이를 통해','이에','결과적으로','궁극적으로','나아가','이러한']
+    cr = sum(1 for s in sentences if any(s.strip().startswith(c) for c in ai_only_conns))
+    crr = cr/len(sentences) if sentences else 0
+    ai_conn_score = 85 if crr>0.35 else 65 if crr>0.2 else 40 if crr>0.08 else 15
+    templates = []
+    for s in sentences:
+        ws = split_words(s)
+        if len(ws) >= 4:
+            templates.append((ws[0], ws[-1]))
+    template_rep = 0
+    if templates:
+        tc = Counter(templates)
+        template_rep = sum(1 for c in tc.values() if c > 1) / len(tc) if tc else 0
+    template_score = 80 if template_rep > 0.3 else 55 if template_rep > 0.1 else 25
+    endings = []
+    for s in sentences:
+        sr = s.rstrip('.!?。')
+        for e in AI_ENDINGS + ['있다','했다','된다','한다','이다','는다']:
+            if sr.endswith(e): endings.append(e); break
+    ending_div = 50
+    if endings:
+        unique_e = len(set(endings))/len(endings)
+        ending_div = 80 if unique_e < 0.3 else 60 if unique_e < 0.5 else 35 if unique_e < 0.7 else 15
+    final = int(ngram_score*0.15 + opener2_score*0.20 + ai_conn_score*0.25 +
+                template_score*0.15 + ending_div*0.25)
+    return {"score":final}
+AI_VAGUE = re.compile(r'다양한|중요한|긍정적인|부정적인|획기적인|혁신적인|효율적인|체계적인|종합적인|전반적인|지속적인|적극적인|상당한|주요한')
+CONCRETE_PROPER = re.compile(r'삼성|LG|현대|SK|카카오|네이버|넷플릭스|구글|애플|테슬라|아마존|마이크로소프트|[가-힣]{2,}대학|[가-힣]{2,}병원|[가-힣]{1,3}시[는을이가]|[가-힣]{1,3}구[는을이가]|[가-힣]{2,}동[에서]')
+CONCRETE_NUMBER = re.compile(r'\d{2,}[만억조원달러%개년월일위등호]|\d+\.\d+%|\d{4}년|\d{1,2}월')
+CONCRETE_QUOTE = re.compile(r'에 따르면|발표했|밝혔다|보도했|전했다|라고 말|인터뷰|설문|조사|통계청|보고서')
+def analyze_structure(text, sentences):
+    if not sentences: return {"score":35}
+    lt = (len(re.findall(r'^\d+[.)]\s',text,re.M)) + len(re.findall(r'^[-•*]\s',text,re.M)) +
+         len(re.findall(r'^#+\s',text,re.M)) + len(re.findall(r'\*\*[^*]+\*\*',text)))
+    list_score = 90 if lt>=5 else 70 if lt>=3 else 45 if lt>=1 else 10
+    vague_cnt = len(AI_VAGUE.findall(text))
+    proper_cnt = len(CONCRETE_PROPER.findall(text))
+    number_cnt = len(CONCRETE_NUMBER.findall(text))
+    quote_cnt = len(CONCRETE_QUOTE.findall(text))
+    concrete_total = proper_cnt + number_cnt + quote_cnt
+    if vague_cnt >= 3 and concrete_total == 0: abstract_score = 90
+    elif vague_cnt >= 2 and concrete_total <= 1: abstract_score = 70
+    elif vague_cnt >= 1 and concrete_total == 0: abstract_score = 55
+    elif concrete_total >= 3: abstract_score = 10
+    elif concrete_total >= 2: abstract_score = 20
+    elif concrete_total >= 1: abstract_score = 30
+    else: abstract_score = 45
+    has_question = any(s.strip().endswith('?') for s in sentences)
+    has_exclaim = any(s.strip().endswith('!') for s in sentences)
+    has_ellipsis = any('...' in s or '…' in s for s in sentences)
+    variety = sum([has_question, has_exclaim, has_ellipsis])
+    type_score = 15 if variety >= 2 else 40 if variety >= 1 else 65
+    puncts = re.findall(r'[!?,;:…—\-~]', text)
+    unique_punct = len(set(puncts))
+    punct_score = 65 if unique_punct <= 1 else 45 if unique_punct <= 3 else 20
+    paras = [p.strip() for p in text.split('\n\n') if p.strip()]
+    para_score = 35
+    if len(paras) >= 2:
+        pl = [len(split_sentences(p)) for p in paras]
+        avg_p = sum(pl)/len(pl)
+        if avg_p > 0:
+            pcv = math.sqrt(sum((l-avg_p)**2 for l in pl)/len(pl))/avg_p
+            para_score = 75 if pcv < 0.2 else 55 if pcv < 0.35 else 30
+        if len(paras) >= 3 and pl[0] < avg_p and pl[-1] < avg_p:
+            para_score = min(85, para_score + 10)
+    final = int(list_score*0.10 + abstract_score*0.40 + type_score*0.20 + punct_score*0.10 + para_score*0.20)
+    return {"score":final}
+def analyze_model_fingerprint(text, sentences):
+    ms = {}
+    sl = text.lower()
+    for mn, fp in FP.items():
+        sc = sum(min(15,text.count(m)*5) for m in fp["m"] if text.count(m)>0)
+        lm = fp["lp"].findall(text)
+        if lm: sc += min(20,len(lm)*3)
+        em = sum(1 for s in sentences if any(s.rstrip('.!?').endswith(e) for e in fp.get("e",[])))
+        if sentences: sc += int((em/len(sentences))*20)
+        ms[mn] = min(100,sc)
+    general_ai = 0
+    general_ai += sum(5 for f in AI_CASUAL_FILLER if f in text)
+    casual_end_cnt = sum(1 for s in sentences if any(s.rstrip('.!?').endswith(e) for e in AI_CASUAL_ENDINGS))
+    general_ai += casual_end_cnt * 5
+    general_ai += len(AI_CONCESSION.findall(text)) * 8
+    ms["비격식AI"] = min(100, general_ai)
+    en_score = sum(5 for em in EN_AI_MARKERS if em in sl)
+    ms["영어AI"] = min(100, en_score)
+    mx = max(ms.values()) if ms else 0
+    multi = sum(1 for v in ms.values() if v >= 10)
+    multi_bonus = 10 if multi >= 3 else 5 if multi >= 2 else 0
+    base = 85 if mx>=50 else 65 if mx>=35 else 45 if mx>=20 else 25 if mx>=10 else 10
+    return {"score":min(95, base + multi_bonus),"model_scores":{k:v for k,v in ms.items() if k not in ("비격식AI","영어AI") or v > 0}}
+def analyze_perplexity(text, sentences, morphemes):
+    """한국어 특화 Perplexity + Burstiness — 문자 엔트로피 보정"""
+    if len(sentences) < 2: return {"score": 40, "entropy": 0, "variance": 0, "order": 0, "zipf": 0}
+    chars = [c for c in text if c.strip()]
+    char_score = 45
+    if len(chars) >= 30:
+        cbigrams = [(chars[i], chars[i+1]) for i in range(len(chars)-1)]
+        cb_freq = Counter(cbigrams)
+        total_cb = len(cbigrams)
+        char_entropy = -sum((cnt/total_cb)*math.log2(cnt/total_cb) for cnt in cb_freq.values())
+        if char_entropy < 7.5: char_score = 78
+        elif char_entropy < 8.5: char_score = 62
+        elif char_entropy < 9.5: char_score = 42
+        elif char_entropy < 10.5: char_score = 25
+        else: char_score = 12
+    sl = [len(s) for s in sentences]
+    burst_score = 45
+    if len(sl) >= 3:
+        avg = sum(sl)/len(sl)
+        std = math.sqrt(sum((l-avg)**2 for l in sl)/len(sl))
+        cv = std/(avg+1e-10)
+        if cv < 0.15: burst_score = 82
+        elif cv < 0.25: burst_score = 62
+        elif cv < 0.40: burst_score = 38
+        elif cv < 0.60: burst_score = 20
+        else: burst_score = 8
+    sent_ttr = []
+    for s in sentences:
+        sw = split_words(s)
+        if len(sw) >= 3:
+            sent_ttr.append(len(set(sw))/len(sw))
+    ttr_score = 42
+    if len(sent_ttr) >= 3:
+        avg_ttr = sum(sent_ttr)/len(sent_ttr)
+        std_ttr = math.sqrt(sum((t-avg_ttr)**2 for t in sent_ttr)/len(sent_ttr))
+        if std_ttr < 0.04: ttr_score = 75
+        elif std_ttr < 0.08: ttr_score = 55
+        elif std_ttr < 0.15: ttr_score = 35
+        else: ttr_score = 15
+    endings = [s.rstrip('.!?\u2026')[-3:] for s in sentences if len(s) >= 5]
+    end_score = 40
+    if len(endings) >= 3:
+        ef = Counter(endings)
+        end_ent = -sum((c/len(endings))*math.log2(c/len(endings)) for c in ef.values())
+        max_ent = math.log2(len(ef)) if len(ef) > 1 else 1
+        norm_ent = end_ent / (max_ent + 1e-10)
+        if norm_ent < 0.5: end_score = 72
+        elif norm_ent < 0.7: end_score = 50
+        elif norm_ent < 0.85: end_score = 32
+        else: end_score = 15
+    final = int(char_score * 0.30 + burst_score * 0.30 + ttr_score * 0.20 + end_score * 0.20)
+    return {"score": final, "entropy": char_score, "variance": burst_score, "order": ttr_score, "zipf": end_score}
+HUMANIZER_OVERSUBST = re.compile(r'활용하다|이용하다|사용하다|적용하다|도입하다|채택하다|수행하다|진행하다|실시하다|실행하다')
+HUMANIZER_AWKWARD = re.compile(r'그것은|이것은|저것은|해당 사항|앞서 언급한|전술한|상기한|기술된')
+HUMANIZER_PASSIVE = re.compile(r'되어지[고는며]|하게 되었[다습]|수행되[었어]|진행되[었어]|실시되[었어]|활용되[었어]')
+def analyze_humanizer(text, sentences, words, morphemes):
+    """Humanizer/Bypasser 탐지 — AI 원문 패러프레이즈 흔적 분석"""
+    if len(sentences) < 2: return {"score": 20, "signals": []}
+    signals = []
+    content_words = [f for f, t in morphemes if t in ('NNG', 'NNP', 'VV', 'VA')]
+    if len(content_words) >= 10:
+        cw_freq = Counter(content_words)
+        hapax = sum(1 for c in cw_freq.values() if c == 1)
+        hapax_ratio = hapax / len(cw_freq) if cw_freq else 0
+        if hapax_ratio > 0.95 and len(content_words) >= 30:
+            signals.append(("동의어과다치환", 20, "핵심 어휘가 과도하게 분산"))
+        elif hapax_ratio > 0.90 and len(content_words) >= 25:
+            signals.append(("동의어치환의심", 12, "어휘 반복 회피 패턴"))
+    sl = [len(s) for s in sentences]
+    if len(sl) >= 4:
+        avg = sum(sl) / len(sl)
+        cv = math.sqrt(sum((l - avg)**2 for l in sl) / len(sl)) / (avg + 1e-10)
+        unique_ratio = len(set(words)) / len(words) if words else 0
+        if cv < 0.20 and unique_ratio > 0.80 and len(sentences) >= 5:
+            signals.append(("구조보존어휘변경", 18, "문장 구조 균일 + 비정상적 어휘 다양성"))
+    residual = 0
+    conn_positions = []
+    for i, s in enumerate(sentences):
+        stripped = s.strip()
+        for c in ['또한','특히','한편','더불어','아울러','나아가','이에','게다가','반면','결국']:
+            if stripped.startswith(c):
+                conn_positions.append(i)
+                break
+    if len(conn_positions) >= 2:
+        gaps = [conn_positions[i] - conn_positions[i-1] for i in range(1, len(conn_positions))]
+        if gaps and max(gaps) - min(gaps) <= 1:  # 거의 등간격
+            signals.append(("접속사등간격잔존", 15, "접속사 배치가 규칙적 (AI 원문 구조 잔존)"))
+            residual += 15
+    oversubst = len(HUMANIZER_OVERSUBST.findall(text))
+    awkward = len(HUMANIZER_AWKWARD.findall(text))
     passive = len(HUMANIZER_PASSIVE.findall(text))
     if oversubst >= 3:
         signals.append(("유사동사난무", 12, f"활용/이용/사용/적용 등 {oversubst}개"))
         pass
     return results[:3]
 def gemini_plagiarism_check(text_chunk):
+    """Gemini + Google Search Grounding으로 표절 검사"""
     if not HAS_GENAI or not GEMINI_KEY: return None
     try:
         client = genai.Client(api_key=GEMINI_KEY)
         tool = gtypes.Tool(google_search=gtypes.GoogleSearch())
+        prompt = f"""다음 텍스트가 인터넷에 존재하는지 Google Search로 확인하세요.
+유사한 문장이 발견되면 출처 URL과 유사도(%)를 보고하세요.
+마지막 줄에 "유사도: XX%" 형식으로 작성.
+[텍스트]
+{text_chunk[:1000]}"""
         resp = client.models.generate_content(
             model="gemini-2.0-flash-lite",
             contents=prompt,
+            config=gtypes.GenerateContentConfig(tools=[tool], temperature=0.1, max_output_tokens=600)
         )
         text_resp = resp.text if resp.text else ""
         sources = []
         if hasattr(resp, 'candidates') and resp.candidates:
             gc = resp.candidates[0].grounding_metadata
             if gc and hasattr(gc, 'grounding_chunks'):
                 for chunk in gc.grounding_chunks:
                     if hasattr(chunk, 'web') and chunk.web:
+                        sources.append({"title": chunk.web.title or "", "url": chunk.web.uri or "", "source": "Google"})
+        pm = re.search(r'유사도[:\s]*(\d+)', text_resp)
         pct = int(pm.group(1)) if pm else 0
+        return {"pct": pct, "response": text_resp, "sources": sources}
     except Exception as e:
+        return {"pct": 0, "response": str(e)[:100], "sources": []}
 def parallel_brave_search(queries, max_workers=10):
     """Brave Search 병렬 실행 (최대 20개)"""
     all_results = {}
         all_results.extend(duckduckgo_search(f"{query} 논문 학술", 2))
     return all_results
 def run_plagiarism(text, progress=gr.Progress()):
     if not text or len(text.strip())<50:
         return "<div style='padding:20px;text-align:center;color:#888;'>⚠️ 최소 50자 이상</div>", ""
     text = text.strip()
     sents = split_sentences(text)
     now = datetime.now().strftime("%Y-%m-%d %H:%M")
+    has_brave = bool(BRAVE_KEY)
+    has_gemini = bool(HAS_GENAI and GEMINI_KEY)
     progress(0.05, "문장 분리...")
+    blocks = []
+    for i in range(0, len(sents), 4):
+        block = ' '.join(sents[i:i+4])
+        if len(block) > 20:
+            blocks.append({"text": block, "sent_indices": list(range(i, min(i+4, len(sents))))})
     all_sources = []
+    sent_matches = {i: [] for i in range(len(sents))}  # 문장별 매칭 정보
+    block_results = []
     log_lines = []
+    if has_brave:
+        progress(0.15, f"Brave Search 병렬 검색 ({len(blocks)}블록)...")
+        queries = []
+        for b in blocks:
+            key_phrase = b["text"][:60].strip()
+            queries.append(f'"{key_phrase}"')
+        brave_results = parallel_brave_search(queries[:20])
+        for q, results in brave_results.items():
+            for r in results:
+                all_sources.append(r)
+                for b in blocks:
+                    if q.strip('"') in b["text"][:60]:
+                        for si in b["sent_indices"]:
+                            sent_matches[si].append({"source": r["title"], "url": r["url"], "type": "Brave"})
+        log_lines.append(f"Brave Search: {len(queries)}쿼리 → {sum(len(v) for v in brave_results.values())}건")
+    else:
+        progress(0.15, f"자체 웹 검색 ({len(blocks)}블록)...")
+        crawl_queries = []
+        for b in blocks[:10]:  # 최대 10블록
+            key_phrase = b["text"][:50].strip()
+            crawl_queries.append((key_phrase, b))
+        with ThreadPoolExecutor(max_workers=5) as executor:
+            futures = {executor.submit(self_crawl_search, q, 3): (q, b) for q, b in crawl_queries}
+            for future in as_completed(futures):
+                q, b = futures[future]
+                try:
+                    results = future.result()
+                    for r in results:
+                        all_sources.append(r)
+                        for si in b["sent_indices"]:
+                            sent_matches[si].append({"source": r["title"], "url": r["url"], "type": r.get("source","Web")})
+                except: pass
+        log_lines.append(f"자체 웹검색: {len(crawl_queries)}쿼리 (DuckDuckGo)")
+    progress(0.40, "학술 DB 검색 (KCI/RISS/arXiv)...")
+    words = split_words(text)
+    wf = Counter(words)
+    keywords = [w for w, c in wf.most_common(20) if len(w) >= 2 and c >= 2][:5]
+    kw_query = ' '.join(keywords[:3])
+    academic_results = []
+    with ThreadPoolExecutor(max_workers=3) as executor:
+        futures = [
+            executor.submit(search_kci, kw_query),
+            executor.submit(search_riss, kw_query),
+            executor.submit(search_arxiv, kw_query),
+        ]
+        for future in as_completed(futures):
+            try:
+                results = future.result()
+                academic_results.extend(results)
+                all_sources.extend(results)
+            except: pass
+    log_lines.append(f"학술DB: KCI/RISS/arXiv → {len(academic_results)}건")
+    gemini_results = []
+    if has_gemini:
+        progress(0.60, "Gemini + Google Search...")
+        for i, b in enumerate(blocks[:5]):  # 최대 5블록
+            gr_result = gemini_plagiarism_check(b["text"])
+            if gr_result:
+                gemini_results.append(gr_result)
+                for src in gr_result.get("sources", []):
+                    all_sources.append(src)
+                    for si in b["sent_indices"]:
+                        sent_matches[si].append({"source": src.get("title",""), "url": src.get("url",""), "type": "Google"})
+        log_lines.append(f"Gemini: {len(blocks[:5])}블록 → {sum(len(r.get('sources',[])) for r in gemini_results)}출처")
     progress(0.80, "보고서 생성...")
+    matched_sents = sum(1 for si, matches in sent_matches.items() if matches)
+    total_sents = len(sents)
+    # ✅ Gemini 90% (메인) + Brave/학술DB 10% (보조)
+    brave_pct = int(matched_sents / total_sents * 100) if total_sents > 0 else 0
+    if gemini_results:
+        gemini_pcts = [r["pct"] for r in gemini_results if r["pct"] > 0]
+        if gemini_pcts:
+            gemini_avg = sum(gemini_pcts) / len(gemini_pcts)
+            plag_pct = int(gemini_avg * 0.9 + brave_pct * 0.1)
+        else:
+            plag_pct = brave_pct
+    else:
+        plag_pct = brave_pct
     seen_urls = set()
     unique_sources = []
     for s in all_sources:
         if url and url not in seen_urls:
             seen_urls.add(url)
             unique_sources.append(s)
+    if plag_pct >= 50: grade, grade_color, grade_bg = "표절 의심", "#FF4444", "#FFE0E0"
+    elif plag_pct >= 30: grade, grade_color, grade_bg = "주의 필요", "#FF8800", "#FFF0DD"
+    elif plag_pct >= 15: grade, grade_color, grade_bg = "유사 표현 일부", "#DDAA00", "#FFFBE0"
+    elif plag_pct >= 5: grade, grade_color, grade_bg = "양호", "#4ECDC4", "#E0FFF8"
+    else: grade, grade_color, grade_bg = "우수 (원본성 높음)", "#22AA44", "#E0FFE8"
+    sent_analysis = []
+    for i, s in enumerate(sents):
+        matches = sent_matches.get(i, [])
+        if matches:
+            best = matches[0]
+            sent_analysis.append({"idx":i, "text":s, "matched":True, "source":best.get("source","")[:40], "url":best.get("url",""), "type":best.get("type","")})
+        else:
+            sent_analysis.append({"idx":i, "text":s, "matched":False})
+    sim_sents = [s for s in sent_analysis if s["matched"]]
+    src_groups = {}
+    for src in unique_sources:
+        key = src.get("url","")[:80]
+        if key not in src_groups:
+            src_groups[key] = {"title":src.get("title",""), "url":src.get("url",""), "source":src.get("source",""), "count":0}
+        src_groups[key]["count"] += 1
+    src_list = sorted(src_groups.values(), key=lambda x: -x["count"])
+    methods_used = []
+    if has_brave: methods_used.append("Brave Search(병렬)")
+    elif all_sources: methods_used.append("DuckDuckGo(자체크롤링)")
+    methods_used.append("KCI · RISS · arXiv")
+    if has_gemini: methods_used.append("Gemini+Google Search")
+    method_str = " + ".join(methods_used)
+    gc = grade_color
     word_count = len(split_words(text))
     char_count = len(text)
     doc_id = hashlib.md5(text[:100].encode()).hexdigest()[:8].upper()
+    similarity_pct = plag_pct
+    citation_pct = 0
+    cat_suspect = len(sim_sents)  # 의심
+    cat_cited = 0                  # 인용 (형식적 인용 감지)
+    cat_normal = total_sents - cat_suspect - cat_cited  # 일반
+    cat_suspect_pct = int(cat_suspect / max(1, total_sents) * 100)
+    cat_normal_pct = 100 - cat_suspect_pct
+    def src_icon(s):
+        src = s.get("source","").lower()
+        if "kci" in src: return "📚", "KCI"
+        if "riss" in src: return "📖", "RISS"
+        if "arxiv" in src: return "📄", "arXiv"
+        if "google" in src: return "🔍", "Google"
+        if "brave" in src: return "🌐", "Brave"
+        return "🌐", "Web"
     src_rows = ""
+    for i, sg in enumerate(src_list[:15]):
+        pct = min(100, int(sg["count"] / max(1, total_sents) * 100 * 3))
+        ico, stype = src_icon(sg)
+        title_short = sg["title"][:50] or "(제목 없음)"
+        url_short = sg["url"][:60]
         src_rows += f"""<tr>
+          <td style="padding:6px 8px;font-size:11px;text-align:center;border:1px solid #D5D5D5;color:#555;">{i+1}</td>
+          <td style="padding:6px 8px;font-size:11px;text-align:center;border:1px solid #D5D5D5;"><span style="font-size:14px;">{ico}</span><br><span style="font-size:9px;color:#666;">{stype}</span></td>
+          <td style="padding:6px 8px;border:1px solid #D5D5D5;"><div style="font-size:11px;font-weight:600;color:#1A3C6E;margin-bottom:2px;">{title_short}</div><div style="font-size:9px;color:#888;word-break:break-all;">{url_short}</div></td>
+          <td style="padding:6px 8px;font-size:12px;text-align:center;border:1px solid #D5D5D5;font-weight:800;color:#D63031;">{pct}%</td>
+          <td style="padding:6px 10px;border:1px solid #D5D5D5;"><div style="background:#EDEDED;height:14px;border-radius:2px;overflow:hidden;"><div style="background:linear-gradient(90deg,#D63031,#FF7675);height:100%;width:{max(3,pct)}%;border-radius:2px;"></div></div></td>
         </tr>"""
+    suspect_rows = ""
+    for i, sa in enumerate(sim_sents[:15]):
+        suspect_rows += f"""<tr>
+          <td style="padding:8px;font-size:11px;text-align:center;color:#888;border:1px solid #D5D5D5;vertical-align:top;">{i+1}</td>
+          <td style="padding:8px;font-size:11px;line-height:1.7;border:1px solid #D5D5D5;vertical-align:top;"><span style="background:#FFF3CD;border-bottom:2px solid #FFD43B;padding:1px 3px;">{sa["text"][:90]}</span></td>
+          <td style="padding:8px;font-size:10px;line-height:1.6;border:1px solid #D5D5D5;vertical-align:top;color:#555;"><span style="background:#FFE0E0;border-bottom:2px solid #E74C3C;padding:1px 3px;">{sa["text"][:70]}...</span></td>
+          <td style="padding:8px;font-size:10px;border:1px solid #D5D5D5;vertical-align:top;"><a href="{sa.get('url','#')}" target="_blank" style="color:#2E86C1;text-decoration:none;font-weight:600;">{sa["source"][:28]}</a><br><span style="font-size:8px;color:#AAA;">{sa.get('type','')}</span></td>
+        </tr>"""
+    full_hl = ""
+    for sa in sent_analysis:
+        sidx = sa["idx"] + 1
+        if sa["matched"]:
+            full_hl += f'<span style="background:#FFD6D6;border-bottom:2px solid #E74C3C;padding:1px 2px;cursor:pointer;" title="[의심 #{sidx}] 출처: {sa.get("source","")}">{sa["text"]}</span> '
+        else:
+            full_hl += f'<span style="color:#333;">{sa["text"]}</span> '
+    bar_suspect_w = max(2, cat_suspect_pct) if cat_suspect > 0 else 0
+    bar_normal_w = 100 - bar_suspect_w
+    HDR_BG = '#3B7DD8'
+    HDR_BG2 = '#4A8DE0'
+    TH = 'padding:8px 10px;font-size:10px;font-weight:700;color:#fff;background:{};text-align:center;border:1px solid {};'.format(HDR_BG, HDR_BG)
+    TL = 'padding:7px 10px;font-size:11px;color:#444;font-weight:600;background:#EDF2FA;border:1px solid #D5D5D5;'
     TV = 'padding:7px 10px;font-size:12px;color:#333;border:1px solid #D5D5D5;'
+    SEC = 'font-size:13px;font-weight:800;color:#1A3C6E;margin:0 0 10px 0;padding:8px 12px;background:#EDF2FA;border-left:4px solid {};border-bottom:1px solid #D5D5D5;'.format(HDR_BG)
+    html = f"""<div style="font-family:'Noto Sans KR','Malgun Gothic','Apple SD Gothic Neo',sans-serif;max-width:780px;margin:0 auto;background:#fff;border:2px solid #3B7DD8;box-shadow:0 2px 12px rgba(0,0,0,0.08);">
+    <!-- ═══════ 헤더 (CopyKiller 스타일) ═══════ -->
+    <div style="background:linear-gradient(135deg,{HDR_BG},{HDR_BG2});padding:18px 24px;color:#fff;">
+      <table style="width:100%;"><tr>
+        <td>
+          <div style="font-size:10px;opacity:0.8;letter-spacing:1px;margin-bottom:4px;">AI TEXT DETECTOR · PLAGIARISM REPORT</div>
+          <div style="font-size:22px;font-weight:900;letter-spacing:-0.5px;">표절 검사 결과 확인서</div>
+        </td>
+        <td style="text-align:right;vertical-align:bottom;">
+          <div style="font-size:10px;opacity:0.7;">문서번호 {doc_id}</div>
+          <div style="font-size:10px;opacity:0.7;">{now}</div>
+        </td>
+      </tr></table>
     </div>
+    <!-- ═══════ 제출 정보 테이블 ═══════ -->
+    <div style="padding:16px 24px 0;">
+      <div style="{SEC}">📋 검사 정보</div>
+      <table style="width:100%;border-collapse:collapse;">
+        <tr>
+          <td style="{TL}width:90px;">검사 일시</td>
+          <td style="{TV}">{now}</td>
+          <td style="{TL}width:90px;">문서번호</td>
+          <td style="{TV}">{doc_id}</td>
+        </tr>
+        <tr>
+          <td style="{TL}">검사 방법</td>
+          <td style="{TV}font-size:10px;" colspan="3">{method_str}</td>
+        </tr>
+        <tr>
+          <td style="{TL}">전체 분량</td>
+          <td style="{TV}" colspan="3">글자수 <b>{char_count:,}</b> · 어절수 <b>{word_count:,}</b> · 문장수 <b>{total_sents}</b></td>
+        </tr>
+        <tr>
+          <td style="{TL}">검색 범위</td>
+          <td style="{TV}" colspan="3">인터넷(웹), 학술논문(KCI·RISS), 해외논문(arXiv), Google Scholar</td>
+        </tr>
+      </table>
+    </div>
+    <!-- ═══════ 검사 결과 (도넛 + 바) ═══════ -->
     <div style="padding:18px 24px 0;">
+      <div style="{SEC}">📊 검사 결과</div>
       <table style="width:100%;border-collapse:collapse;">
         <tr>
+          <!-- 도넛 차트 -->
+          <td style="width:200px;padding:20px;text-align:center;border:1px solid #D5D5D5;vertical-align:middle;background:#FAFBFE;">
+            <div style="position:relative;width:140px;height:140px;margin:0 auto;">
+              <svg viewBox="0 0 36 36" style="width:140px;height:140px;transform:rotate(-90deg);">
+                <circle cx="18" cy="18" r="15.9" fill="none" stroke="#E8ECF0" stroke-width="2.8"/>
+                <circle cx="18" cy="18" r="15.9" fill="none" stroke="{gc}" stroke-width="2.8" stroke-dasharray="{plag_pct} {100-plag_pct}" stroke-linecap="round"/>
+              </svg>
+              <div style="position:absolute;top:50%;left:50%;transform:translate(-50%,-50%);text-align:center;">
+                <div style="font-size:9px;color:#888;margin-bottom:2px;">표절률</div>
+                <div style="font-size:32px;font-weight:900;color:{gc};line-height:1;">{plag_pct}<span style="font-size:14px;font-weight:700;">%</span></div>
+              </div>
+            </div>
+            <div style="margin-top:8px;padding:4px 12px;background:{grade_bg};border:1px solid {gc};border-radius:20px;display:inline-block;">
+              <span style="font-size:11px;font-weight:800;color:{gc};">{grade}</span>
+            </div>
           </td>
+          <!-- 상세 결과 -->
+          <td style="padding:0;border:1px solid #D5D5D5;vertical-align:top;">
+            <!-- 카테고리 바 (CopyKiller 핵심) -->
+            <div style="padding:14px 18px;border-bottom:1px solid #E8E8E8;">
+              <div style="display:flex;height:28px;border-radius:4px;overflow:hidden;border:1px solid #D0D0D0;margin-bottom:8px;">
+                <div style="background:#E74C3C;width:{bar_suspect_w}%;"></div>
+                <div style="background:#E8ECF0;width:{bar_normal_w}%;"></div>
+              </div>
+              <div style="display:flex;gap:14px;flex-wrap:wrap;font-size:9px;">
+                <span style="display:flex;align-items:center;gap:3px;"><span style="display:inline-block;width:10px;height:10px;background:#E74C3C;border-radius:2px;"></span> 의심 <b>{cat_suspect}</b>건</span>
+                <span style="display:flex;align-items:center;gap:3px;"><span style="display:inline-block;width:10px;height:10px;background:#F39C12;border-radius:2px;"></span> 출처표시 <b>0</b>건</span>
+                <span style="display:flex;align-items:center;gap:3px;"><span style="display:inline-block;width:10px;height:10px;background:#3498DB;border-radius:2px;"></span> 인용 <b>{cat_cited}</b>건</span>
+                <span style="display:flex;align-items:center;gap:3px;"><span style="display:inline-block;width:10px;height:10px;background:#E8ECF0;border:1px solid #CCC;border-radius:2px;"></span> 일반 <b>{cat_normal}</b>건</span>
+              </div>
+            </div>
+            <!-- 비율 바 3줄 -->
+            <div style="padding:12px 18px;">
+              <div style="margin-bottom:10px;">
+                <div style="display:flex;justify-content:space-between;margin-bottom:4px;"><span style="font-size:11px;font-weight:700;color:#E74C3C;">■ 표절률</span><span style="font-size:18px;font-weight:900;color:#E74C3C;">{plag_pct}%</span></div>
+                <div style="background:#EDEDED;height:16px;border-radius:3px;overflow:hidden;border:1px solid #DDD;"><div style="background:linear-gradient(90deg,#E74C3C,#FF7675);height:100%;width:{max(1,plag_pct)}%;transition:width 0.5s;"></div></div>
+              </div>
+              <div style="margin-bottom:10px;">
+                <div style="display:flex;justify-content:space-between;margin-bottom:4px;"><span style="font-size:11px;font-weight:700;color:#F39C12;">■ 유사���</span><span style="font-size:18px;font-weight:900;color:#F39C12;">{similarity_pct}%</span></div>
+                <div style="background:#EDEDED;height:16px;border-radius:3px;overflow:hidden;border:1px solid #DDD;"><div style="background:linear-gradient(90deg,#F39C12,#FFEAA7);height:100%;width:{max(1,similarity_pct)}%;transition:width 0.5s;"></div></div>
+              </div>
+              <div>
+                <div style="display:flex;justify-content:space-between;margin-bottom:4px;"><span style="font-size:11px;font-weight:700;color:#3498DB;">■ 인용률</span><span style="font-size:18px;font-weight:900;color:#3498DB;">{citation_pct}%</span></div>
+                <div style="background:#EDEDED;height:16px;border-radius:3px;overflow:hidden;border:1px solid #DDD;"><div style="background:linear-gradient(90deg,#3498DB,#85C1E9);height:100%;width:{max(1,citation_pct)}%;transition:width 0.5s;"></div></div>
+              </div>
+            </div>
+            <!-- 요약 수치 -->
+            <div style="padding:8px 18px;background:#F8F9FB;border-top:1px solid #E8E8E8;">
+              <table style="width:100%;border-collapse:collapse;">
+                <tr>
+                  <td style="padding:4px;font-size:10px;color:#888;">의심문장</td>
+                  <td style="padding:4px;font-size:12px;font-weight:800;color:#E74C3C;">{cat_suspect}건</td>
+                  <td style="padding:4px;font-size:10px;color:#888;">일반문장</td>
+                  <td style="padding:4px;font-size:12px;font-weight:800;color:#27AE60;">{cat_normal}건</td>
+                  <td style="padding:4px;font-size:10px;color:#888;">전체</td>
+                  <td style="padding:4px;font-size:12px;font-weight:800;color:#333;">{total_sents}건</td>
+                </tr>
+              </table>
             </div>
           </td>
         </tr>
       </table>
     </div>
+    <!-- ═══════ 전체 텍스트 분석 ═══════ -->
+    <div style="padding:18px 24px 0;">
+      <div style="{SEC}">📝 전체 텍스트 분석</div>
+      <div style="display:flex;gap:16px;margin-bottom:8px;font-size:9px;padding:6px 10px;background:#F8F9FB;border:1px solid #E8E8E8;border-radius:4px;">
+        <span style="display:flex;align-items:center;gap:4px;"><span style="display:inline-block;width:12px;height:12px;background:#E74C3C;border-radius:2px;"></span> 표절 의심</span>
+        <span style="display:flex;align-items:center;gap:4px;"><span style="display:inline-block;width:12px;height:12px;background:#F39C12;border-radius:2px;"></span> 출처표시</span>
+        <span style="display:flex;align-items:center;gap:4px;"><span style="display:inline-block;width:12px;height:12px;background:#3498DB;border-radius:2px;"></span> 인용</span>
+        <span style="display:flex;align-items:center;gap:4px;"><span style="display:inline-block;width:12px;height:12px;background:#27AE60;border-radius:2px;"></span> 자기표절</span>
+        <span style="display:flex;align-items:center;gap:4px;"><span style="display:inline-block;width:12px;height:12px;background:#fff;border:1px solid #CCC;border-radius:2px;"></span> 일반</span>
+      </div>
+      <div style="padding:14px;background:#FAFBFC;border:1px solid #D5D5D5;line-height:2.1;font-size:13px;max-height:300px;overflow-y:auto;">{full_hl}</div>
+    </div>
+    <!-- ═══════ 표절 의심 출처 ═══════ -->
+    <div style="padding:18px 24px 0;">
+      <div style="{SEC}">🔗 표절 의심 출처 ({len(src_list)}건)</div>
+      <table style="width:100%;border-collapse:collapse;">
+        <tr>
+          <th style="{TH}width:32px;">No</th>
+          <th style="{TH}width:48px;">유형</th>
+          <th style="{TH}">출처명 / URL</th>
+          <th style="{TH}width:55px;">유사율</th>
+          <th style="{TH}width:100px;">분포</th>
+        </tr>
+        {src_rows if src_rows else '<tr><td colspan="5" style="padding:18px;text-align:center;color:#999;font-size:11px;border:1px solid #D5D5D5;background:#FAFBFC;">발견된 유사 출처가 없습니다.</td></tr>'}
       </table>
     </div>
+    <!-- ═══════ 의심 문장 비교 ═══════ -->
+    <div style="padding:18px 24px 0;">
+      <div style="{SEC}">⚠️ 의심 문장 비교 ({len(sim_sents)}건)</div>
+      <table style="width:100%;border-collapse:collapse;">
+        <tr>
+          <th style="{TH}width:32px;">No</th>
+          <th style="{TH}width:38%;">검사 문장 (원문)</th>
+          <th style="{TH}width:34%;">비교 문장 (출처)</th>
+          <th style="{TH}">출처</th>
+        </tr>
+        {suspect_rows if suspect_rows else '<tr><td colspan="4" style="padding:18px;text-align:center;color:#999;font-size:11px;border:1px solid #D5D5D5;background:#FAFBFC;">유사 의심 문장이 발견되지 않았습니다.</td></tr>'}
+      </table>
+    </div>
+    <!-- ═══════ 검사 안내 ═══════ -->
+    <div style="margin:18px 24px;padding:12px 14px;background:#F5F8FC;border:1px solid #D0DAEA;border-radius:4px;font-size:9px;color:#555;line-height:1.8;">
+      <b style="color:#333;">📌 검사 안내</b><br>
+      · 본 보고서는 <b>{method_str}</b> 기반 자동 표절 검사 결과입니다.<br>
+      · 검색 범위: 인터넷 웹페이지, 학술논문(KCI, RISS), 해외논문(arXiv)<br>
+      · 유사도는 문장 단위 매칭 기반이며, 최종 판정은 교수자/검토자의 확인이 필요합니다.<br>
+      · 인용 표기(따옴표, 각주 등)가 포함된 문장은 인용으로 분류될 수 있습니다.
+    </div>
+    <!-- ═══════ 푸터 (CopyKiller 스타일) ═══════ -->
+    <div style="padding:10px 24px;background:#F0F3F8;border-top:2px solid {HDR_BG};display:flex;justify-content:space-between;align-items:center;">
+      <div>
+        <span style="font-size:13px;font-weight:900;color:{HDR_BG};">AI Detector</span>
+        <span style="font-size:9px;color:#999;margin-left:6px;">Plagiarism Checker v3.5</span>
+      </div>
+      <div style="text-align:right;">
+        <div style="font-size:9px;color:#AAA;">Powered by Brave · KCI · RISS · arXiv · Gemini</div>
+        <div style="font-size:8px;color:#CCC;">{now} · ID: {doc_id} · All Rights Reserved.</div>
+      </div>
     </div>
     </div>"""
+    log = '\n'.join(log_lines) + f"\n\n종합: {plag_pct}% {grade} | 출처 {len(unique_sources)}건 | 유사문장 {matched_sents}/{total_sents}"
     return html, log
 def run_detection(text, progress=gr.Progress()):
     if not text or len(text.strip())<50: return "<div style='padding:20px;text-align:center;color:#888;'>⚠️ 최소 50자</div>",""