openfree commited on
Commit
0f3de24
·
verified ·
1 Parent(s): 2acc9c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -476
app.py CHANGED
@@ -1,11 +1,3 @@
1
- """
2
- AI 글 판별기 v5.1 — 5축+Perplexity+Humanizer+모델추정 + 품질 + LLM교차검증 + 표절
3
- ═══════════════════════════════════════════════════════════════════════════
4
- ★ v5.1 3대 킬러: Perplexity 확률분석 · Humanizer/Bypasser 탐지 · AI 모델 추정
5
- ★ 5축 AI 탐지 | 6항목 품질 | LLM 교차검증 (GPT-OSS-120B · Qwen3-32B · Kimi-K2)
6
- ★ 표절: Brave Search 병렬(최대20) + KCI/RISS/ARXIV + Gemini + CopyKiller 보고서
7
- ★ 문서: PDF·DOCX·HWP·HWPX·TXT 업로드 → 섹션별 히트맵 + PDF 보고서
8
- """
9
  import gradio as gr
10
  import math, re, os, json, random, time, hashlib, zlib, zipfile, tempfile
11
  from collections import Counter
@@ -14,7 +6,6 @@ from pathlib import Path
14
  from concurrent.futures import ThreadPoolExecutor, as_completed
15
  from xml.etree import ElementTree as ET
16
  from kiwipiepy import Kiwi
17
-
18
  KIWI = Kiwi()
19
  try:
20
  import httpx; HAS_HTTPX = True
@@ -26,8 +17,6 @@ try:
26
  HAS_GENAI = True
27
  except ImportError:
28
  HAS_GENAI = False
29
-
30
- # ── 문서 추출 라이브러리 ──
31
  try:
32
  import olefile; HAS_OLEFILE = True
33
  except ImportError:
@@ -44,15 +33,9 @@ try:
44
  from docx import Document as DocxDocument; HAS_DOCX = True
45
  except ImportError:
46
  HAS_DOCX = False
47
-
48
  GROQ_KEY = os.getenv("GROQ_API_KEY", "")
49
  GEMINI_KEY = os.getenv("GEMINI_API_KEY", "")
50
  BRAVE_KEY = os.getenv("BRAVE_API_KEY", "")
51
-
52
- # ═══════════════════════════════════════════════
53
- # 문서 텍스트 추출 엔진
54
- # ═══════════════════════════════════════════════
55
-
56
  def extract_text_from_pdf(file_path):
57
  """PDF → 텍스트 (페이지별 분리)"""
58
  pages = []
@@ -76,7 +59,6 @@ def extract_text_from_pdf(file_path):
76
  except Exception as e:
77
  print(f"PyPDF2: {e}")
78
  return None, "PDF 추출 실패 (pdfplumber, PyPDF2 없음)"
79
-
80
  def extract_text_from_docx(file_path):
81
  """DOCX → 텍스트 (문단별 분리)"""
82
  if not HAS_DOCX: return None, "python-docx 없음"
@@ -97,7 +79,6 @@ def extract_text_from_docx(file_path):
97
  return None, "DOCX 텍스트 없음"
98
  except Exception as e:
99
  return None, f"DOCX 오류: {e}"
100
-
101
  def extract_text_from_txt(file_path):
102
  """TXT/MD/CSV 등 → 텍스트"""
103
  for enc in ['utf-8', 'euc-kr', 'cp949', 'utf-16', 'latin-1']:
@@ -105,12 +86,10 @@ def extract_text_from_txt(file_path):
105
  with open(file_path, 'r', encoding=enc) as f:
106
  text = f.read()
107
  if text.strip():
108
- # 빈 줄 기준으로 섹션 분리
109
  sections = [s.strip() for s in re.split(r'\n{2,}', text) if s.strip()]
110
  return sections if sections else [text], None
111
  except: continue
112
  return None, "텍스트 인코딩 감지 실패"
113
-
114
  def extract_text_from_hwpx(file_path):
115
  """HWPX (ZIP 기반) → 텍스트"""
116
  try:
@@ -149,7 +128,6 @@ def extract_text_from_hwpx(file_path):
149
  return None, "유효하지 않은 HWPX"
150
  except Exception as e:
151
  return None, f"HWPX 오류: {e}"
152
-
153
  def _decode_hwp_para(data):
154
  """HWP 바이너리 → 문단 텍스트"""
155
  result = []
@@ -171,7 +149,6 @@ def _decode_hwp_para(data):
171
  text = re.sub(r'[ \t]+', ' ', text)
172
  text = re.sub(r'\n{3,}', '\n\n', text)
173
  return text if len(text) > 2 else None
174
-
175
  def _extract_hwp_section(data):
176
  """HWP 섹션 바이너리 → 텍스트"""
177
  texts = []
@@ -195,7 +172,6 @@ def _extract_hwp_section(data):
195
  except:
196
  pos += 1
197
  return '\n'.join(texts) if texts else None
198
-
199
  def extract_text_from_hwp(file_path):
200
  """HWP (OLE 기반) → 텍스트"""
201
  if not HAS_OLEFILE: return None, "olefile 없음"
@@ -224,7 +200,6 @@ def extract_text_from_hwp(file_path):
224
  return None, "HWP 텍스트 없음"
225
  except Exception as e:
226
  return None, f"HWP 오류: {e}"
227
-
228
  def extract_text_from_file(file_path):
229
  """
230
  만능 문서 추출: PDF/DOCX/HWP/HWPX/TXT → (sections_list, full_text, error)
@@ -235,7 +210,6 @@ def extract_text_from_file(file_path):
235
  return None, None, "파일 없음"
236
  ext = Path(file_path).suffix.lower()
237
  sections, error = None, None
238
-
239
  if ext == '.pdf':
240
  sections, error = extract_text_from_pdf(file_path)
241
  elif ext == '.docx':
@@ -248,32 +222,24 @@ def extract_text_from_file(file_path):
248
  sections, error = extract_text_from_txt(file_path)
249
  else:
250
  return None, None, f"지원하지 않는 형식: {ext}"
251
-
252
  if sections:
253
  full = '\n\n'.join(sections)
254
  return sections, full, None
255
  return None, None, error or "텍스트 추출 실패"
256
-
257
- # ════════════════════════════════════════���══════
258
- # 유틸리티
259
- # ═══════════════════════════════════════════════
260
  def split_sentences(text):
261
  try:
262
  s = [x.text.strip() for x in KIWI.split_into_sents(text) if x.text.strip()]
263
  if s: return s
264
  except: pass
265
  return [x.strip() for x in re.split(r'(?<=[.!?。])\s+', text) if x.strip()]
266
-
267
  def split_words(text):
268
  return [w for w in re.findall(r'[가-힣a-zA-Z0-9]+', text) if w]
269
-
270
  def get_morphemes(text):
271
  try:
272
  r = KIWI.analyze(text)
273
  if r and r[0]: return [(m.form, m.tag) for m in r[0][0]]
274
  except: pass
275
  return []
276
-
277
  def http_get(url, headers=None, timeout=15):
278
  try:
279
  if HAS_HTTPX:
@@ -285,7 +251,6 @@ def http_get(url, headers=None, timeout=15):
285
  with urllib.request.urlopen(req, timeout=timeout) as resp:
286
  return resp.read().decode('utf-8', errors='replace')
287
  except: return None
288
-
289
  def http_post_json(url, body, headers=None, timeout=30):
290
  try:
291
  h = headers or {}
@@ -300,7 +265,6 @@ def http_post_json(url, body, headers=None, timeout=30):
300
  with urllib.request.urlopen(req, timeout=timeout, context=ssl.create_default_context()) as resp:
301
  return json.loads(resp.read())
302
  except: return None
303
-
304
  def call_groq(model, prompt, max_tokens=800, temperature=0.1):
305
  if not GROQ_KEY: return None, "NO_KEY"
306
  url = "https://api.groq.com/openai/v1/chat/completions"
@@ -317,32 +281,23 @@ def call_groq(model, prompt, max_tokens=800, temperature=0.1):
317
  with urllib.request.urlopen(req, timeout=45, context=ssl.create_default_context()) as resp:
318
  return json.loads(resp.read())["choices"][0]["message"]["content"], None
319
  except Exception as e: return None, str(e)[:150]
320
-
321
- # ═══════════════════════════════════════════════
322
- # ★ 통합 문장 점수 (탭1 + 탭2 공유) — v5.0 대폭 강화
323
- # ═══════════════════════════════════════════════
324
  AI_ENDINGS = ['합니다','입니다','됩니다','습니다','있습니다','했습니다','겠습니다']
325
- # 비격식이지만 AI적인 종결 패턴
326
  AI_CASUAL_ENDINGS = ['라고 할 수 있다','라고 볼 수 있다','다고 생각한다','다고 판단된다',
327
  '인 셈이다','인 것이다','는 것이다','는 셈이다','ㄹ 것이다','을 것이다',
328
  '라 할 수 있다','로 보인다','로 판단된다','고 있다','는 추세다','는 상황이다',
329
  '지 않을 수 없다','라 하겠다','음을 알 수 있다','할 필요가 있다']
330
  AI_CONNS = ['또한','따라서','그러므로','이에 따라','한편','더불어','아울러','뿐만 아니라',
331
  '이를 통해','이에','결과적으로','궁극적으로','특히','나아가','이러한']
332
- # 자연어에서도 쓰이지만 AI가 과도하게 쓰는 접속사
333
  AI_SOFT_CONNS = ['물론','그러나','하지만','이처럼','이와 같이','이를 바탕으로']
334
  AI_FILLER = ['것으로 보','것으로 나타','것으로 예상','할 수 있','볼 수 있','주목할 만',
335
  '중요한 역할','중요한 의미','긍정적인 영향','부정적인 영향','필요합니다','필요하다',
336
  '중요합니다','중요하다','역할을 하','영향을 미','기대된다','예상됩니다','부각되고',
337
  '대두되고','다양한 분야','다양한 산업','눈부신 성과','획기적인 변화','혁신적인',
338
  '점에서','측면에서','관점에서']
339
- # 비격식 AI 상투표현 (비격식체로 써도 AI적)
340
  AI_CASUAL_FILLER = ['무궁무진하다','무궁무진한','과언이 아니','돌파구가 될','전환점이 될',
341
  '기반으로','발판으로','원동력이','초석이 될','가속화되','급부상','패러다임',
342
  '지평을 열','새로운 장을','대전환','본격화되','고도화','이정표']
343
- # 양보-주장 패턴 (AI 특유: "물론 X하지만, Y" 구조)
344
  AI_CONCESSION = re.compile(r'물론.{2,20}(하지만|그러나|그렇지만|다만)|.{2,15}(이긴 하지만|기는 하지만|수 있지만|수는 있지만)')
345
- # 영어 AI 패턴
346
  EN_AI_MARKERS = ['furthermore','additionally','moreover','it is worth noting','in conclusion',
347
  'it is important to','plays a crucial role','significant impact','various aspects',
348
  'in this regard','consequently','nevertheless','integral part of','led to remarkable',
@@ -369,23 +324,16 @@ FP = {
369
  "Perplexity": {"m":['검색 결과에 따르면','보도에 따르면','연구에 따르면','밝혔다','전했다',
370
  '것으로 나타났다','것으로 조사됐다','것으로 집계됐다','발표했다'],"e":['밝혔다','나타났다','전했다'],"lp":re.compile(r'\[\d+\]',re.M)},
371
  }
372
-
373
  def score_sentence(sent):
374
  """단일 문장 AI 점수 (0~100). 탭1·탭2 공유. v5.0 대폭 강화."""
375
  sc = 0; reasons = []
376
  sl = sent.lower().strip()
377
  sr = sent.rstrip('.!?。')
378
-
379
- # ── 격식 종결어미 ──
380
  for e in AI_ENDINGS:
381
  if sr.endswith(e): sc += 22; reasons.append(f"격식어미(-{e})"); break
382
-
383
- # ── 비격식 AI 종결 패턴 ──
384
  if sc == 0: # 격식이 아닌 경우만
385
  for e in AI_CASUAL_ENDINGS:
386
  if sr.endswith(e): sc += 15; reasons.append(f"비격식AI(-{e})"); break
387
-
388
- # ── 문두 접속사 ──
389
  stripped = sent.strip()
390
  for c in AI_CONNS:
391
  if stripped.startswith(c):
@@ -393,36 +341,24 @@ def score_sentence(sent):
393
  else:
394
  for c in AI_SOFT_CONNS:
395
  if stripped.startswith(c): sc += 8; reasons.append(f"약한접속사({c})"); break
396
-
397
- # ── 상투적 표현 (클래식 + 비격식) ──
398
  filler_found = sum(1 for f in AI_FILLER if f in sent)
399
  casual_filler = sum(1 for f in AI_CASUAL_FILLER if f in sent)
400
  total_filler = filler_found + casual_filler
401
  if total_filler >= 3: sc += 25; reasons.append(f"상투표현×{total_filler}")
402
  elif total_filler == 2: sc += 18; reasons.append(f"상투표현×2")
403
  elif total_filler == 1: sc += 10; reasons.append(f"상투표현×1")
404
-
405
- # ── 양보-주장 패턴 (Claude/GPT 특유) ──
406
  if AI_CONCESSION.search(sent): sc += 10; reasons.append("양보패턴")
407
-
408
- # ── 모델 지문 ──
409
  for mn, fp in FP.items():
410
  for m in fp["m"]:
411
  if m in sent: sc += 8; reasons.append(f"{mn}지문"); break
412
-
413
- # ── 영어 AI 패턴 (복수 누적) ──
414
  en_count = sum(1 for em in EN_AI_MARKERS if em in sl)
415
  if en_count >= 3: sc += 25; reasons.append(f"영어AI×{en_count}")
416
  elif en_count >= 2: sc += 18; reasons.append(f"영어AI×{en_count}")
417
  elif en_count >= 1: sc += 12; reasons.append(f"영어AI×1")
418
-
419
- # ── 복합 보너스: 격식+상투+접속 동시 → 거의 확실 AI ──
420
  has_formal = any(sr.endswith(e) for e in AI_ENDINGS)
421
  has_conn = any(stripped.startswith(c) for c in AI_CONNS)
422
  if has_formal and total_filler >= 1 and has_conn: sc += 8; reasons.append("복합AI")
423
  elif has_formal and total_filler >= 2: sc += 5; reasons.append("격식+상투")
424
-
425
- # ── 인간 마커 (감점) — 세분화 ──
426
  for n, p in HUMAN_MARKERS.items():
427
  matches = p.findall(sent)
428
  if matches:
@@ -430,22 +366,13 @@ def score_sentence(sent):
430
  elif n in ('비격식종결','구어축약'): sc -= 18; reasons.append(f"구어체({n})")
431
  elif n == '맞춤법오류': sc -= 12; reasons.append("맞춤법오류")
432
  elif n in ('느낌표','말줄임표'): sc -= 10; reasons.append(f"인간({n})")
433
-
434
  return max(0, min(100, sc)), reasons
435
-
436
- # ═══════════════════════════════════════════════
437
- # 축① 통계 — v5.0: Burstiness + 조건부 엔트로피 추가
438
- # ═══════════════════════════════════════════════
439
  def analyze_statistics(text, sentences, words):
440
  sl = [len(s) for s in sentences]
441
  if len(sl) < 2: return {"score":50}
442
-
443
- # 1. CV (문장 길이 변동 계수)
444
  avg = sum(sl)/len(sl); std = math.sqrt(sum((l-avg)**2 for l in sl)/len(sl))
445
  cv = std/avg if avg > 0 else 0
446
  cv_score = 85 if cv<0.20 else 70 if cv<0.30 else 50 if cv<0.45 else 30 if cv<0.60 else 15
447
-
448
- # 2. Burstiness — 연속 문장 길이차 변동
449
  diffs = [abs(sl[i]-sl[i-1]) for i in range(1,len(sl))]
450
  burst_score = 50
451
  if diffs:
@@ -453,21 +380,14 @@ def analyze_statistics(text, sentences, words):
453
  max_d = max(diffs)
454
  burst_ratio = max_d / (avg_d + 1)
455
  burst_score = 85 if burst_ratio < 1.8 else 65 if burst_ratio < 2.5 else 40 if burst_ratio < 3.5 else 20
456
-
457
- # 3. ★ 표준 길이 비율 — AI는 대부분 25~60자, 인간은 극단 outlier 있음
458
  standard_ratio = sum(1 for l in sl if 20 <= l <= 60) / len(sl)
459
  std_score = 80 if standard_ratio > 0.8 else 60 if standard_ratio > 0.6 else 40 if standard_ratio > 0.4 else 20
460
- # 극단 문장(10자 미만 or 80자 초과) 있으면 인간적
461
  extreme = sum(1 for l in sl if l < 10 or l > 80)
462
  if extreme >= 2: std_score = max(10, std_score - 20)
463
  elif extreme >= 1: std_score = max(15, std_score - 10)
464
-
465
- # 4. 어휘 다양성
466
  wf = Counter(words); t = len(words)
467
  ttr = len(wf)/t if t>0 else 0
468
  vocab_score = 70 if ttr<0.45 else 55 if ttr<0.55 else 35 if ttr<0.65 else 20
469
-
470
- # 5. 문장 복잡도 균일성
471
  wpc = [len(split_words(s)) for s in sentences]
472
  complex_score = 50
473
  if len(wpc) >= 3:
@@ -475,108 +395,72 @@ def analyze_statistics(text, sentences, words):
475
  wpc_std = math.sqrt(sum((w-wpc_avg)**2 for w in wpc)/len(wpc))
476
  wpc_cv = wpc_std/wpc_avg if wpc_avg > 0 else 0
477
  complex_score = 80 if wpc_cv < 0.20 else 60 if wpc_cv < 0.35 else 35 if wpc_cv < 0.50 else 15
478
-
479
  final = int(cv_score*0.20 + burst_score*0.20 + std_score*0.25 + vocab_score*0.15 + complex_score*0.20)
480
  return {"score":final,"cv":round(cv,3),"ttr":round(ttr,3)}
481
-
482
- # ═══════════════════════════════════════════════
483
- # 축② 문체 — v5.0: 양보패턴 + 비격식AI + 접속사 위치패턴
484
- # ═══════════════════════════════════════════════
485
  def analyze_korean_style(text, sentences, morphemes):
486
  if not sentences: return {"score":50}
487
-
488
- # 1. 종결어미 분석 (격식 + 비격식AI)
489
  formal_cnt = sum(1 for s in sentences if any(s.rstrip('.!?').endswith(e) for e in AI_ENDINGS))
490
  casual_ai = sum(1 for s in sentences if any(s.rstrip('.!?').endswith(e) for e in AI_CASUAL_ENDINGS))
491
  fr = formal_cnt/len(sentences)
492
  car = casual_ai/len(sentences)
493
- # 격식 비율 높으면 AI적, 비격식AI도 가산
494
  ending_score = 85 if fr>0.7 else 65 if fr>0.5 else 45 if fr>0.3 else 25 if fr>0.1 else 10
495
  ending_score = min(90, ending_score + int(car * 25)) # 비격식AI 보너스
496
-
497
- # 2. 접속사 밀도 + ★ 위치 패턴
498
  conn_positions = []
499
  for i, s in enumerate(sentences):
500
  for c in AI_CONNS:
501
  if s.strip().startswith(c): conn_positions.append(i); break
502
  conn_density = len(conn_positions)/len(sentences) if sentences else 0
503
  conn_score = 85 if conn_density>0.4 else 65 if conn_density>0.25 else 40 if conn_density>0.1 else 15
504
- # AI는 접속사를 규칙적 간격으로 배치 (2-3문장마다)
505
  if len(conn_positions) >= 2:
506
  gaps = [conn_positions[i]-conn_positions[i-1] for i in range(1,len(conn_positions))]
507
  gap_cv = (math.sqrt(sum((g-sum(gaps)/len(gaps))**2 for g in gaps)/len(gaps))/(sum(gaps)/len(gaps)+0.01))
508
  if gap_cv < 0.5: conn_score = min(90, conn_score + 10) # 매우 규칙적 → AI 보너스
509
-
510
- # 3. 상투표현 (클래식 + 비격식)
511
  filler_cnt = sum(1 for f in AI_FILLER if f in text) + sum(1 for f in AI_CASUAL_FILLER if f in text)
512
  filler_score = 90 if filler_cnt>=6 else 75 if filler_cnt>=4 else 55 if filler_cnt>=2 else 30 if filler_cnt>=1 else 10
513
-
514
- # 4. ★ 양보-주장 구문 (AI 특유: "물론~하지만~" 패턴)
515
  concession_cnt = len(AI_CONCESSION.findall(text))
516
  conc_score = 80 if concession_cnt >= 2 else 55 if concession_cnt >= 1 else 20
517
-
518
- # 5. 인간 마커 감점
519
  human_count = sum(len(p.findall(text)) for p in HUMAN_MARKERS.values())
520
  human_penalty = min(35, human_count * 8)
521
-
522
- # 6. 품사 비율 (명사 과다 = AI적)
523
  pos_score = 45
524
  if morphemes:
525
  pc = Counter(t for _,t in morphemes); tm = sum(pc.values())
526
  noun_r = sum(pc.get(t,0) for t in ['NNG','NNP','NNB','NR'])/tm if tm else 0
527
  pos_score = 70 if noun_r>0.42 else 55 if noun_r>0.38 else 35 if noun_r>0.32 else 20
528
-
529
  final = max(5, int(ending_score*0.25 + conn_score*0.20 + filler_score*0.20 +
530
  conc_score*0.10 + pos_score*0.15 + 10*0.10) - human_penalty)
531
  return {"score":final,"formal":f"{fr:.0%}","conn":f"{conn_density:.2f}","filler":filler_cnt,"human":human_count}
532
-
533
- # ═══════════════════════════════════════════════
534
- # 축③ 반복 — v5.0: 문두 N-어절 + 구문 템플릿 + 의미 반복
535
- # ═══════════════════════════════════════════════
536
  def analyze_repetition(text, sentences, words):
537
  if not sentences or len(sentences) < 2: return {"score":35}
538
-
539
- # 1. N-gram 반복 (3-gram)
540
  tr = 0
541
  if len(words)>=5:
542
  tg = Counter(tuple(words[i:i+3]) for i in range(len(words)-2))
543
  tr = sum(1 for c in tg.values() if c>1)/len(tg) if tg else 0
544
  ngram_score = 80 if tr>0.15 else 60 if tr>0.08 else 35 if tr>0.03 else 15
545
-
546
- # 2. ★ 문두 2-3어절 다양성 (첫 단어만이 아닌 첫 2-3어절)
547
  openers_2 = []
548
  openers_3 = []
549
  for s in sentences:
550
  ws = split_words(s)
551
  if len(ws) >= 2: openers_2.append(tuple(ws[:2]))
552
  if len(ws) >= 3: openers_3.append(tuple(ws[:3]))
553
-
554
  opener2_score = 50
555
  if openers_2:
556
  unique2 = len(set(openers_2))/len(openers_2)
557
  opener2_score = 80 if unique2 < 0.5 else 60 if unique2 < 0.7 else 35 if unique2 < 0.85 else 15
558
-
559
- # 3. AI 접속사 문두 반복
560
  ai_only_conns = ['또한','따라서','그러므로','이에 따라','더불어','아울러','뿐만 아니라',
561
  '이를 통해','이에','결과적으로','궁극적으로','나아가','이러한']
562
  cr = sum(1 for s in sentences if any(s.strip().startswith(c) for c in ai_only_conns))
563
  crr = cr/len(sentences) if sentences else 0
564
  ai_conn_score = 85 if crr>0.35 else 65 if crr>0.2 else 40 if crr>0.08 else 15
565
-
566
- # 4. ★ 구문 템플릿 반복 (주어+조사+...+종결 패턴)
567
  templates = []
568
  for s in sentences:
569
  ws = split_words(s)
570
  if len(ws) >= 4:
571
- # 첫 어절 + 마지막 어절 패턴
572
  templates.append((ws[0], ws[-1]))
573
  template_rep = 0
574
  if templates:
575
  tc = Counter(templates)
576
  template_rep = sum(1 for c in tc.values() if c > 1) / len(tc) if tc else 0
577
  template_score = 80 if template_rep > 0.3 else 55 if template_rep > 0.1 else 25
578
-
579
- # 5. ★ 종결어미 다양성 (AI는 같은 종결어미 반복)
580
  endings = []
581
  for s in sentences:
582
  sr = s.rstrip('.!?。')
@@ -586,36 +470,23 @@ def analyze_repetition(text, sentences, words):
586
  if endings:
587
  unique_e = len(set(endings))/len(endings)
588
  ending_div = 80 if unique_e < 0.3 else 60 if unique_e < 0.5 else 35 if unique_e < 0.7 else 15
589
-
590
  final = int(ngram_score*0.15 + opener2_score*0.20 + ai_conn_score*0.25 +
591
  template_score*0.15 + ending_div*0.25)
592
  return {"score":final}
593
-
594
- # ═══════════════════════════════════════════════
595
- # 축④ 구조 — v5.0: 추상성/구체성 + 문장다양성 + 구두점
596
- # ═══════════════════════════════════════════════
597
- # AI 추상 수식어 (구체 명사 없이 쓰이는 AI적 형용사)
598
  AI_VAGUE = re.compile(r'다양한|중요한|긍정적인|부정적인|획기적인|혁신적인|효율적인|체계적인|종합적인|전반적인|지속적인|적극적인|상당한|주요한')
599
- # 구체성 지표 (인간적 — 고유명사, 숫자+단위, 인용/출처)
600
  CONCRETE_PROPER = re.compile(r'삼성|LG|현대|SK|카카오|네이버|넷플릭스|구글|애플|테슬라|아마존|마이크로소프트|[가-힣]{2,}대학|[가-힣]{2,}병원|[가-힣]{1,3}시[는을이가]|[가-힣]{1,3}구[는을이가]|[가-힣]{2,}동[에서]')
601
  CONCRETE_NUMBER = re.compile(r'\d{2,}[만억조원달러%개년월일위등호]|\d+\.\d+%|\d{4}년|\d{1,2}월')
602
  CONCRETE_QUOTE = re.compile(r'에 따르면|발표했|밝혔다|보도했|전했다|라고 말|인터뷰|설문|조사|통계청|보고서')
603
-
604
  def analyze_structure(text, sentences):
605
  if not sentences: return {"score":35}
606
-
607
- # 1. 마크다운/리스트
608
  lt = (len(re.findall(r'^\d+[.)]\s',text,re.M)) + len(re.findall(r'^[-•*]\s',text,re.M)) +
609
  len(re.findall(r'^#+\s',text,re.M)) + len(re.findall(r'\*\*[^*]+\*\*',text)))
610
  list_score = 90 if lt>=5 else 70 if lt>=3 else 45 if lt>=1 else 10
611
-
612
- # 2. ★ 추상성 vs 구체성 (핵심 판별 — 40% 가중치)
613
  vague_cnt = len(AI_VAGUE.findall(text))
614
  proper_cnt = len(CONCRETE_PROPER.findall(text))
615
  number_cnt = len(CONCRETE_NUMBER.findall(text))
616
  quote_cnt = len(CONCRETE_QUOTE.findall(text))
617
  concrete_total = proper_cnt + number_cnt + quote_cnt
618
-
619
  if vague_cnt >= 3 and concrete_total == 0: abstract_score = 90
620
  elif vague_cnt >= 2 and concrete_total <= 1: abstract_score = 70
621
  elif vague_cnt >= 1 and concrete_total == 0: abstract_score = 55
@@ -623,20 +494,14 @@ def analyze_structure(text, sentences):
623
  elif concrete_total >= 2: abstract_score = 20
624
  elif concrete_total >= 1: abstract_score = 30
625
  else: abstract_score = 45
626
-
627
- # 3. 문장 유형 다양성 (AI=서술문만, 인간=의문/감탄 혼용)
628
  has_question = any(s.strip().endswith('?') for s in sentences)
629
  has_exclaim = any(s.strip().endswith('!') for s in sentences)
630
  has_ellipsis = any('...' in s or '…' in s for s in sentences)
631
  variety = sum([has_question, has_exclaim, has_ellipsis])
632
  type_score = 15 if variety >= 2 else 40 if variety >= 1 else 65
633
-
634
- # 4. 구두점 단조로움
635
  puncts = re.findall(r'[!?,;:…—\-~]', text)
636
  unique_punct = len(set(puncts))
637
  punct_score = 65 if unique_punct <= 1 else 45 if unique_punct <= 3 else 20
638
-
639
- # 5. 문단 구조 (다문단인 경우)
640
  paras = [p.strip() for p in text.split('\n\n') if p.strip()]
641
  para_score = 35
642
  if len(paras) >= 2:
@@ -647,13 +512,8 @@ def analyze_structure(text, sentences):
647
  para_score = 75 if pcv < 0.2 else 55 if pcv < 0.35 else 30
648
  if len(paras) >= 3 and pl[0] < avg_p and pl[-1] < avg_p:
649
  para_score = min(85, para_score + 10)
650
-
651
  final = int(list_score*0.10 + abstract_score*0.40 + type_score*0.20 + punct_score*0.10 + para_score*0.20)
652
  return {"score":final}
653
-
654
- # ═══════════════════════════════════════════════
655
- # 축⑤ 지문 — v5.0: Perplexity + 비격식AI + 영어 패턴
656
- # ═══════════════════════════════════════════════
657
  def analyze_model_fingerprint(text, sentences):
658
  ms = {}
659
  sl = text.lower()
@@ -664,39 +524,22 @@ def analyze_model_fingerprint(text, sentences):
664
  em = sum(1 for s in sentences if any(s.rstrip('.!?').endswith(e) for e in fp.get("e",[])))
665
  if sentences: sc += int((em/len(sentences))*20)
666
  ms[mn] = min(100,sc)
667
-
668
- # ★ 비격식 AI 일반 지문 (특정 모델 불문)
669
  general_ai = 0
670
- # 비격식 AI 상투
671
  general_ai += sum(5 for f in AI_CASUAL_FILLER if f in text)
672
- # 비격식 AI 종결
673
  casual_end_cnt = sum(1 for s in sentences if any(s.rstrip('.!?').endswith(e) for e in AI_CASUAL_ENDINGS))
674
  general_ai += casual_end_cnt * 5
675
- # 양보 패턴
676
  general_ai += len(AI_CONCESSION.findall(text)) * 8
677
  ms["비격식AI"] = min(100, general_ai)
678
-
679
- # ★ 영어 AI 지문
680
  en_score = sum(5 for em in EN_AI_MARKERS if em in sl)
681
  ms["영어AI"] = min(100, en_score)
682
-
683
  mx = max(ms.values()) if ms else 0
684
- # 복수 모델에서 점수가 나오면 더 AI적
685
  multi = sum(1 for v in ms.values() if v >= 10)
686
  multi_bonus = 10 if multi >= 3 else 5 if multi >= 2 else 0
687
-
688
  base = 85 if mx>=50 else 65 if mx>=35 else 45 if mx>=20 else 25 if mx>=10 else 10
689
  return {"score":min(95, base + multi_bonus),"model_scores":{k:v for k,v in ms.items() if k not in ("비격식AI","영어AI") or v > 0}}
690
-
691
- # ═══════════════════════════════════════════════
692
- # ★★★ 킬러 기능 ① — Perplexity 기반 AI 확률 (v5.1)
693
- # ═══════════════════════════════════════════════
694
- # AI 텍스트는 예측 가능도가 높음 (낮은 Perplexity) → 문자/형태소 n-gram 기반
695
  def analyze_perplexity(text, sentences, morphemes):
696
  """한국어 특화 Perplexity + Burstiness — 문자 엔트로피 보정"""
697
  if len(sentences) < 2: return {"score": 40, "entropy": 0, "variance": 0, "order": 0, "zipf": 0}
698
-
699
- # === 1. 문자 바이그램 엔트로피 (형태소보다 안정적) ===
700
  chars = [c for c in text if c.strip()]
701
  char_score = 45
702
  if len(chars) >= 30:
@@ -704,14 +547,11 @@ def analyze_perplexity(text, sentences, morphemes):
704
  cb_freq = Counter(cbigrams)
705
  total_cb = len(cbigrams)
706
  char_entropy = -sum((cnt/total_cb)*math.log2(cnt/total_cb) for cnt in cb_freq.values())
707
- # AI 한국어: ~7~9비트, 인간: ~9~12비트
708
  if char_entropy < 7.5: char_score = 78
709
  elif char_entropy < 8.5: char_score = 62
710
  elif char_entropy < 9.5: char_score = 42
711
  elif char_entropy < 10.5: char_score = 25
712
  else: char_score = 12
713
-
714
- # === 2. 문장 길이 Burstiness (CV) ===
715
  sl = [len(s) for s in sentences]
716
  burst_score = 45
717
  if len(sl) >= 3:
@@ -723,8 +563,6 @@ def analyze_perplexity(text, sentences, morphemes):
723
  elif cv < 0.40: burst_score = 38
724
  elif cv < 0.60: burst_score = 20
725
  else: burst_score = 8
726
-
727
- # === 3. 문장간 어휘밀도(TTR) 편차 ===
728
  sent_ttr = []
729
  for s in sentences:
730
  sw = split_words(s)
@@ -738,8 +576,6 @@ def analyze_perplexity(text, sentences, morphemes):
738
  elif std_ttr < 0.08: ttr_score = 55
739
  elif std_ttr < 0.15: ttr_score = 35
740
  else: ttr_score = 15
741
-
742
- # === 4. 종결어미 엔트로피 ===
743
  endings = [s.rstrip('.!?\u2026')[-3:] for s in sentences if len(s) >= 5]
744
  end_score = 40
745
  if len(endings) >= 3:
@@ -751,55 +587,32 @@ def analyze_perplexity(text, sentences, morphemes):
751
  elif norm_ent < 0.7: end_score = 50
752
  elif norm_ent < 0.85: end_score = 32
753
  else: end_score = 15
754
-
755
  final = int(char_score * 0.30 + burst_score * 0.30 + ttr_score * 0.20 + end_score * 0.20)
756
  return {"score": final, "entropy": char_score, "variance": burst_score, "order": ttr_score, "zipf": end_score}
757
-
758
-
759
- # ═══════════════════════════════════════════════
760
- # ★★★ 킬러 기능 ② — Humanizer/Bypasser 탐지 (v5.1)
761
- # ═══════════════════════════════════════════════
762
- # 패러프레이즈 도구(QuillBot 등)로 수정된 AI 글의 잔존 흔적 탐지
763
-
764
- # Humanizer 특유 패턴
765
  HUMANIZER_OVERSUBST = re.compile(r'활용하다|이용하다|사용하다|적용하다|도입하다|채택하다|수행하다|진행하다|실시하다|실행하다')
766
  HUMANIZER_AWKWARD = re.compile(r'그것은|이것은|저것은|해당 사항|앞서 언급한|전술한|상기한|기술된')
767
  HUMANIZER_PASSIVE = re.compile(r'되어지[고는며]|하게 되었[다습]|수행되[었어]|진행되[었어]|실시되[었어]|활용되[었어]')
768
-
769
  def analyze_humanizer(text, sentences, words, morphemes):
770
  """Humanizer/Bypasser 탐지 — AI 원문 패러프레이즈 흔적 분석"""
771
  if len(sentences) < 2: return {"score": 20, "signals": []}
772
  signals = []
773
-
774
- # === 1. 동의어 과다 치환 패턴 ===
775
- # Humanizer는 같은 의미를 다양한 동의어로 바꿈 → 비자연적 어휘 분산
776
  content_words = [f for f, t in morphemes if t in ('NNG', 'NNP', 'VV', 'VA')]
777
  if len(content_words) >= 10:
778
  cw_freq = Counter(content_words)
779
- # Hapax ratio 극단적으로 높으면 동의어 치환 의심
780
  hapax = sum(1 for c in cw_freq.values() if c == 1)
781
  hapax_ratio = hapax / len(cw_freq) if cw_freq else 0
782
- # 자연어: 0.4~0.7, Humanizer: 0.8+ (모든 단어를 다 바꿔서)
783
  if hapax_ratio > 0.95 and len(content_words) >= 30:
784
  signals.append(("동의어과다치환", 20, "핵심 어휘가 과도하게 분산"))
785
  elif hapax_ratio > 0.90 and len(content_words) >= 25:
786
  signals.append(("동의어치환의심", 12, "어휘 반복 회피 패턴"))
787
-
788
- # === 2. 구조 보존 + 어휘만 변경 패턴 ===
789
- # 원문 AI의 문장 구조(길이, 어순)는 유지되면서 단어만 바뀜
790
  sl = [len(s) for s in sentences]
791
  if len(sl) >= 4:
792
  avg = sum(sl) / len(sl)
793
  cv = math.sqrt(sum((l - avg)**2 for l in sl) / len(sl)) / (avg + 1e-10)
794
- # 문장 길이 균일 + 어휘 다양 = Humanizer 패턴
795
  unique_ratio = len(set(words)) / len(words) if words else 0
796
  if cv < 0.20 and unique_ratio > 0.80 and len(sentences) >= 5:
797
  signals.append(("구조보존어휘변경", 18, "문장 구조 균일 + 비정상적 어휘 다양성"))
798
-
799
- # === 3. 잔존 AI 패턴 ===
800
- # Humanizer가 놓치기 쉬운 AI 흔적
801
  residual = 0
802
- # 접속사 위치 규칙성 (Humanizer는 접속사를 잘 안 바꿈)
803
  conn_positions = []
804
  for i, s in enumerate(sentences):
805
  stripped = s.strip()
@@ -808,13 +621,10 @@ def analyze_humanizer(text, sentences, words, morphemes):
808
  conn_positions.append(i)
809
  break
810
  if len(conn_positions) >= 2:
811
- # 등간격 접속사 = AI 원문 구조 잔존
812
  gaps = [conn_positions[i] - conn_positions[i-1] for i in range(1, len(conn_positions))]
813
  if gaps and max(gaps) - min(gaps) <= 1: # 거의 등간격
814
  signals.append(("접속사등간격잔존", 15, "접속사 배치가 규칙적 (AI 원문 구조 잔존)"))
815
  residual += 15
816
-
817
- # === 4. 부자연스러운 대체 표현 ===
818
  oversubst = len(HUMANIZER_OVERSUBST.findall(text))
819
  awkward = len(HUMANIZER_AWKWARD.findall(text))
820
  passive = len(HUMANIZER_PASSIVE.findall(text))
@@ -824,11 +634,6 @@ def analyze_humanizer(text, sentences, words, morphemes):
824
  signals.append(("어색한지시어", 10, f"해당/전술/상기 등 {awkward}개"))
825
  if passive >= 3:
826
  signals.append(("이중피동과다", 15, f"되어지/수행되 등 {passive}개"))
827
-
828
- # === 5. 문장 유형 단조 + 어미 다양 = Humanizer 시그니처 ===
829
- # AI 원문: 문장유형 단조 + 어미 단조
830
- # 인간: 문장유형 다양 + 어미 다양
831
- # Humanizer: 문장유형 단조(바꿀 수 없음) + 어미 다양(바꿈) → 부조화
832
  endings = [s.rstrip('.!?')[-2:] for s in sentences if len(s) >= 4]
833
  end_types = len(set(endings)) / len(endings) if endings else 0
834
  has_question = any(s.strip().endswith('?') for s in sentences)
@@ -836,29 +641,17 @@ def analyze_humanizer(text, sentences, words, morphemes):
836
  sent_type_variety = sum([has_question, has_exclaim])
837
  if sent_type_variety == 0 and end_types > 0.85 and len(sentences) >= 6:
838
  signals.append(("유형단조어미다양", 12, "서술문만 + 종결어미 과다 다양 = Humanizer 패턴"))
839
-
840
- # === 6. 문장 시작 패턴 불일치 ===
841
- # Humanizer는 문두를 다양하게 바꾸려 하나, 한국어에서는 부자연스러움 유발
842
  starters = [s.strip()[:3] for s in sentences if len(s) >= 6]
843
  starter_unique = len(set(starters)) / len(starters) if starters else 0
844
  if starter_unique >= 0.98 and len(sentences) >= 7:
845
  signals.append(("문두과다다양", 8, "모든 문장 시작이 다름 (자연스럽지 않은 다양성)"))
846
-
847
  total = sum(s[1] for s in signals)
848
- # 점수화
849
  if total >= 45: score = 85
850
  elif total >= 30: score = 68
851
  elif total >= 20: score = 52
852
  elif total >= 10: score = 35
853
  else: score = 15
854
-
855
  return {"score": score, "signals": signals, "total_evidence": total}
856
-
857
- # ═══════════════════════════════════════════════
858
- # ★★★ 킬러 기능 ③ — AI 모델 추정 (v5.1)
859
- # ═══════════════════════════════════════════════
860
- # 모델별 고유 특성으로 작성 모델 추정
861
-
862
  MODEL_PROFILES = {
863
  "GPT": {
864
  "style": ["격식체 ~습니다", "또한/특히 접속사", "~에 대해", "~것으로 예상됩니다"],
@@ -889,27 +682,19 @@ MODEL_PROFILES = {
889
  "connectors": ["한편", "또한", "이에"],
890
  },
891
  }
892
-
893
  def estimate_model(text, sentences, morphemes, model_scores):
894
  """AI 모델 추정 — 복합 증거 기반"""
895
  evidence = {m: {"score": 0, "reasons": []} for m in MODEL_PROFILES}
896
-
897
  sl = text.lower()
898
-
899
  for model, profile in MODEL_PROFILES.items():
900
- # 1. FP 점수 반영 (기존 지문 분석)
901
  fp_score = model_scores.get(model, 0)
902
  evidence[model]["score"] += fp_score * 0.4
903
  if fp_score >= 20:
904
  evidence[model]["reasons"].append(f"지문 매칭 {fp_score}점")
905
-
906
- # 2. 마커 매칭
907
  marker_cnt = sum(1 for m in profile["markers"] if m in text)
908
  if marker_cnt >= 2:
909
  evidence[model]["score"] += marker_cnt * 8
910
  evidence[model]["reasons"].append(f"특유 표현 {marker_cnt}개")
911
-
912
- # 3. 종결어미 패턴
913
  end_match = 0
914
  for s in sentences:
915
  for e in profile["endings"]:
@@ -920,34 +705,23 @@ def estimate_model(text, sentences, morphemes, model_scores):
920
  if end_ratio > 0.7:
921
  evidence[model]["score"] += 12
922
  evidence[model]["reasons"].append(f"종결어미 {end_ratio:.0%} 일치")
923
-
924
- # 4. 접속사 패턴
925
  conn_match = sum(1 for s in sentences if any(s.strip().startswith(c) for c in profile["connectors"]))
926
  if conn_match >= 2:
927
  evidence[model]["score"] += conn_match * 4
928
  evidence[model]["reasons"].append(f"접속사 패턴 {conn_match}회")
929
-
930
- # Perplexity 특화: 수치 + 출처 인용
931
  number_citations = len(re.findall(r'\d+[%만억조]|에 따르면|것으로 나타났|발표했', text))
932
  if number_citations >= 3:
933
  evidence["Perplexity"]["score"] += number_citations * 5
934
  evidence["Perplexity"]["reasons"].append(f"수치/인용 {number_citations}회")
935
-
936
- # Claude 특화: 양보-주장 구문
937
  concession_cnt = len(AI_CONCESSION.findall(text))
938
  if concession_cnt >= 1:
939
  evidence["Claude"]["score"] += concession_cnt * 10
940
  evidence["Claude"]["reasons"].append(f"양보-주장 구문 {concession_cnt}회")
941
-
942
- # 정렬 및 판정
943
  ranked = sorted(evidence.items(), key=lambda x: x[1]["score"], reverse=True)
944
  top = ranked[0]
945
  second = ranked[1] if len(ranked) > 1 else None
946
-
947
  if top[1]["score"] < 10:
948
  return {"model": "특정 불가", "confidence": "낮음", "detail": evidence, "ranked": ranked}
949
-
950
- # 신뢰도 계산
951
  gap = top[1]["score"] - (second[1]["score"] if second else 0)
952
  if gap >= 20 and top[1]["score"] >= 30:
953
  conf = "높음"
@@ -955,7 +729,6 @@ def estimate_model(text, sentences, morphemes, model_scores):
955
  conf = "중간"
956
  else:
957
  conf = "낮음"
958
-
959
  return {
960
  "model": top[0],
961
  "confidence": conf,
@@ -964,10 +737,6 @@ def estimate_model(text, sentences, morphemes, model_scores):
964
  "detail": evidence,
965
  "ranked": ranked
966
  }
967
-
968
- # ═══════════════════════════════════════════════
969
- # 품질
970
- # ═══════════════════════════════════════════════
971
  def analyze_quality(text, sentences, words, morphemes):
972
  qs = {}; sl = [len(s) for s in sentences]; tw = len(words)
973
  ideal = sum(1 for l in sl if 15<=l<=70)/len(sentences) if sentences else 0
@@ -998,24 +767,16 @@ def analyze_quality(text, sentences, words, morphemes):
998
  total = int(sum(qs[k]*wq[k] for k in wq))
999
  grade = "S" if total>=85 else "A" if total>=72 else "B" if total>=58 else "C" if total>=42 else "D" if total>=28 else "F"
1000
  return {"score":total,"grade":grade,"sub_scores":qs}
1001
-
1002
- # ═══════════════════════════════════════════════
1003
- # LLM 교차검증
1004
- # ═══════════════════════════════════════════════
1005
  LLM_JUDGES = [("openai/gpt-oss-120b","GPT-OSS 120B"),("qwen/qwen3-32b","Qwen3 32B"),("moonshotai/kimi-k2-instruct-0905","Kimi-K2")]
1006
-
1007
  def _parse_ai_probability(raw_resp):
1008
  """LLM 응답에서 AI 확률(0~100)을 추출. 한국어/영어 다양한 형식 대응."""
1009
  if not raw_resp: return -1
1010
- # 1. <think> 태그 분리
1011
  think_content = ''
1012
  think_m = re.search(r'<think>(.*?)</think>', raw_resp, flags=re.S)
1013
  if think_m: think_content = think_m.group(1)
1014
  resp = re.sub(r'<think>.*?</think>', '', raw_resp, flags=re.S).strip()
1015
  if not resp or len(resp) < 5:
1016
  resp = raw_resp # think만 있으면 원본 포함
1017
-
1018
- # 2. 특정 키워드 패턴 (높은 우선순위 — 첫 매칭)
1019
  specific_patterns = [
1020
  r'AI\s*확률\s*[::]\s*(?:약\s*)?(\d+)\s*%?',
1021
  r'AI\s*[Pp]robability\s*[::]\s*(?:about|approximately?\s*)?(\d+)\s*%?',
@@ -1031,11 +792,8 @@ def _parse_ai_probability(raw_resp):
1031
  if m:
1032
  v = int(m.group(1))
1033
  if 0 <= v <= 100: return v
1034
-
1035
- # 3. 범용 패턴 — 마지막 5줄에서만 검색 (통계 수치 오탐 방지)
1036
  lines = [l.strip() for l in resp.strip().split('\n') if l.strip()]
1037
  for line in reversed(lines[-5:]):
1038
- # 라인에 AI/확률/probability 키워드가 있으면 우선
1039
  if re.search(r'AI|확률|[Pp]robab|신뢰|판[정단]', line):
1040
  nums = re.findall(r'(\d+)\s*%', line)
1041
  if nums:
@@ -1045,54 +803,39 @@ def _parse_ai_probability(raw_resp):
1045
  if nums:
1046
  v = int(nums[-1])
1047
  if 0 <= v <= 100: return v
1048
-
1049
- # 4. 전체 텍스트에서 마지막 XX% (단, AI/확률 근처만)
1050
  all_pcts = list(re.finditer(r'(\d+)\s*(?:%|퍼센트|percent)', resp, re.I))
1051
  for m in reversed(all_pcts):
1052
  v = int(m.group(1))
1053
- # 주변 50자 내에 AI/확률 키워드 있는지
1054
  ctx_start = max(0, m.start()-50)
1055
  ctx = resp[ctx_start:m.end()+20]
1056
  if re.search(r'AI|확률|[Pp]robab|신뢰|판[정단]|가능성|likelihood', ctx, re.I):
1057
  if 0 <= v <= 100: return v
1058
-
1059
- # 5. 최후 수단: 전체에서 마지막 XX%
1060
  if all_pcts:
1061
  v = int(all_pcts[-1].group(1))
1062
  if 5 <= v <= 99: return v # 100% 제외 (통계 수치 오탐 방지)
1063
-
1064
- # 6. think 내부 폴백 (본문 파싱 실패 시)
1065
  if think_content:
1066
  for pat in specific_patterns:
1067
  m = re.search(pat, think_content, re.I)
1068
  if m:
1069
  v = int(m.group(1))
1070
  if 0 <= v <= 100: return v
1071
- # think 내부 마지막 XX%
1072
  think_pcts = re.findall(r'(\d+)\s*%', think_content)
1073
  if think_pcts:
1074
  v = int(think_pcts[-1])
1075
  if 5 <= v <= 99: return v
1076
-
1077
  return -1
1078
-
1079
  def llm_cross_check(text):
1080
  if not GROQ_KEY: return {"score":-1,"detail":{}}
1081
- # 한국어+영어 병행 프롬프트 (GPT-OSS는 영어 모델이므로)
1082
  prompt = f"""Analyze whether this text was written by AI.
1083
-
1084
  [Instructions]
1085
  1. Determine AI vs Human with 3 brief reasons
1086
  2. IMPORTANT - Your LAST line MUST be exactly this format:
1087
  AI확률: XX%
1088
  (Replace XX with your estimated probability 0-100)
1089
-
1090
  Example of correct last line:
1091
  AI확률: 75%
1092
-
1093
  [Text to analyze]
1094
  {text[:2000]}"""
1095
-
1096
  votes=[]; rpt={}
1097
  for mid,mn in LLM_JUDGES:
1098
  resp,err = call_groq(mid,prompt)
@@ -1101,52 +844,34 @@ AI확률: 75%
1101
  if p >= 0:
1102
  votes.append(p); rpt[mn]=f"{p}%"
1103
  else:
1104
- # 디버그: think 제거 후 응답 끝부분
1105
  cleaned = re.sub(r'<think>.*?</think>', '', resp, flags=re.S).strip()
1106
  tail = cleaned[-60:].replace('\n',' ') if len(cleaned) > 60 else cleaned.replace('\n',' ')
1107
  rpt[mn]=f"파싱실패({tail[:40]})"
1108
  else: rpt[mn]=f"ERR:{err[:30] if err else '?'}"
1109
  if votes: return {"score":int(sum(votes)/len(votes)),"detail":rpt}
1110
  return {"score":-1,"detail":rpt}
1111
-
1112
- # ═══════════════════════════════════════════════
1113
- # 종합 판정 (일관된 기준)
1114
- # ═══════════════════════════════════════════════
1115
  def compute_verdict(scores, llm_score=-1, sent_avg=-1, ppx_score=-1, hum_score=-1):
1116
  w={"통계":.06,"문체":.25,"반복성":.10,"구조":.12,"지문":.30}
1117
  ws=sum(scores[k]*w[k] for k in w)
1118
-
1119
- # ★ Perplexity 축 통합 (17%)
1120
  if ppx_score >= 0: ws += ppx_score * 0.17
1121
-
1122
- # ★ 교차 신호 부스트
1123
  style=scores["문체"]; fp=scores["지문"]; rep=scores["반복성"]; struct=scores["구조"]
1124
  if style>=35 and fp>=35: ws+=8
1125
  elif style>=30 and fp>=25: ws+=4
1126
  if style>=30 and rep>=25 and fp>=20: ws+=4
1127
  if fp>=45: ws+=3
1128
  if struct>=50 and style>=30: ws+=3
1129
- # Perplexity + 지문 동시 부스트
1130
  if ppx_score>=55 and fp>=35: ws+=5
1131
  if ppx_score>=65 and style>=35: ws+=3
1132
-
1133
- # ★ Humanizer 탐지 시 특별 부스트
1134
  if hum_score>=50:
1135
  ws=max(ws, 45) # Humanizer 확인 → 최소 AI 의심 중간
1136
  ws += (hum_score-50)*0.15
1137
-
1138
- # ★ 문장 수준 부스트
1139
  if sent_avg>=0 and sent_avg>ws: ws=ws*0.80+sent_avg*0.20
1140
-
1141
  hi=sum(1 for v in scores.values() if v>=50)
1142
  if hi>=4: ws+=8
1143
  elif hi>=3: ws+=5
1144
  elif hi>=2: ws+=2
1145
-
1146
- # ★ 인간 격식문 할인
1147
  if style<40 and fp<=20 and rep<22 and struct<35 and (ppx_score<0 or ppx_score<40):
1148
  ws-=5
1149
-
1150
  lo=sum(1 for v in scores.values() if v<20)
1151
  if lo>=3: ws-=8
1152
  elif lo>=2: ws-=3
@@ -1157,7 +882,6 @@ def compute_verdict(scores, llm_score=-1, sent_avg=-1, ppx_score=-1, hum_score=-
1157
  if fs>=45: return fs,"AI 의심 중간","ai_low"
1158
  if fs>=30: return fs,"판단 유보","uncertain"
1159
  return fs,"인간 작성 추정","human"
1160
-
1161
  def quick_score(text):
1162
  sents=split_sentences(text); words=split_words(text); morphs=get_morphemes(text)
1163
  sc={"통계":analyze_statistics(text,sents,words)["score"],"문체":analyze_korean_style(text,sents,morphs)["score"],
@@ -1169,10 +893,6 @@ def quick_score(text):
1169
  hum=analyze_humanizer(text,sents,words,morphs)
1170
  fs,v,lv=compute_verdict(sc, sent_avg=sent_avg, ppx_score=ppx["score"], hum_score=hum["score"])
1171
  return fs,v,lv,sc,ppx,hum
1172
-
1173
- # ═══════════════════════════════════════════════
1174
- # ═══════════════════════════════════════════════
1175
- # ★ 표절 검사 (Brave Search 병렬 + KCI/RISS/ARXIV + Gemini)
1176
  def brave_search(query, count=5):
1177
  """Brave Search API — 단일 쿼리"""
1178
  if not BRAVE_KEY: return []
@@ -1188,7 +908,6 @@ def brave_search(query, count=5):
1188
  return results
1189
  except: pass
1190
  return []
1191
-
1192
  def search_kci(query):
1193
  """KCI(한국학술지인용색인) 검색"""
1194
  try:
@@ -1201,7 +920,6 @@ def search_kci(query):
1201
  return results[:3]
1202
  except: pass
1203
  return []
1204
-
1205
  def search_riss(query):
1206
  """RISS(학술연구정보서비스) — 간접 검색"""
1207
  results = []
@@ -1215,7 +933,6 @@ def search_riss(query):
1215
  results.append({"title": title, "url": "https://www.riss.kr" + m.group(1), "snippet": "", "source": "RISS"})
1216
  except: pass
1217
  return results[:3]
1218
-
1219
  def search_arxiv(query):
1220
  """arXiv API 검색"""
1221
  results = []
@@ -1231,7 +948,6 @@ def search_arxiv(query):
1231
  except Exception as e:
1232
  pass
1233
  return results[:3]
1234
-
1235
  def gemini_plagiarism_check(text_chunk):
1236
  """Gemini + Google Search Grounding으로 표절 검사"""
1237
  if not HAS_GENAI or not GEMINI_KEY: return None
@@ -1241,7 +957,6 @@ def gemini_plagiarism_check(text_chunk):
1241
  prompt = f"""다음 텍스트가 인터넷에 존재하는지 Google Search로 확인하세요.
1242
  유사한 문장이 발견되면 출처 URL과 유사도(%)를 보고하세요.
1243
  마지막 줄에 "유사도: XX%" 형식으로 작성.
1244
-
1245
  [텍스트]
1246
  {text_chunk[:1000]}"""
1247
  resp = client.models.generate_content(
@@ -1262,7 +977,6 @@ def gemini_plagiarism_check(text_chunk):
1262
  return {"pct": pct, "response": text_resp, "sources": sources}
1263
  except Exception as e:
1264
  return {"pct": 0, "response": str(e)[:100], "sources": []}
1265
-
1266
  def parallel_brave_search(queries, max_workers=10):
1267
  """Brave Search 병렬 실행 (최대 20개)"""
1268
  all_results = {}
@@ -1275,7 +989,6 @@ def parallel_brave_search(queries, max_workers=10):
1275
  all_results[q] = results
1276
  except: all_results[q] = []
1277
  return all_results
1278
-
1279
  def duckduckgo_search(query, max_results=5):
1280
  """DuckDuckGo HTML 스크래핑 — API 키 불필요 폴백"""
1281
  results = []
@@ -1290,7 +1003,6 @@ def duckduckgo_search(query, max_results=5):
1290
  href = m.group(1)
1291
  title = re.sub(r'<[^>]+>', '', m.group(2)).strip()
1292
  snippet = re.sub(r'<[^>]+>', '', m.group(3)).strip()
1293
- # DuckDuckGo redirect URL 파싱
1294
  real_url = href
1295
  if 'uddg=' in href:
1296
  um = re.search(r'uddg=([^&]+)', href)
@@ -1300,41 +1012,31 @@ def duckduckgo_search(query, max_results=5):
1300
  if len(results) >= max_results: break
1301
  except: pass
1302
  return results
1303
-
1304
  def self_crawl_search(query, max_results=3):
1305
  """httpx 기반 자체 크롤링 (DuckDuckGo + 학술 사이트)"""
1306
  all_results = []
1307
- # DuckDuckGo
1308
  all_results.extend(duckduckgo_search(query, max_results))
1309
- # 학술 키워드 추가 검색
1310
  if '논문' not in query and 'paper' not in query.lower():
1311
  all_results.extend(duckduckgo_search(f"{query} 논문 학술", 2))
1312
  return all_results
1313
-
1314
  def run_plagiarism(text, progress=gr.Progress()):
1315
  if not text or len(text.strip())<50:
1316
  return "<div style='padding:20px;text-align:center;color:#888;'>⚠️ 최소 50자 이상</div>", ""
1317
  text = text.strip()
1318
  sents = split_sentences(text)
1319
  now = datetime.now().strftime("%Y-%m-%d %H:%M")
1320
-
1321
  has_brave = bool(BRAVE_KEY)
1322
  has_gemini = bool(HAS_GENAI and GEMINI_KEY)
1323
-
1324
  progress(0.05, "문장 분리...")
1325
- # 문장을 3~5문장 단위로 블록화
1326
  blocks = []
1327
  for i in range(0, len(sents), 4):
1328
  block = ' '.join(sents[i:i+4])
1329
  if len(block) > 20:
1330
  blocks.append({"text": block, "sent_indices": list(range(i, min(i+4, len(sents))))})
1331
-
1332
  all_sources = []
1333
  sent_matches = {i: [] for i in range(len(sents))} # 문장별 매칭 정보
1334
  block_results = []
1335
  log_lines = []
1336
-
1337
- # Phase 1: 웹 검색 (Brave Search 병렬 or 자체 크롤링)
1338
  if has_brave:
1339
  progress(0.15, f"Brave Search 병렬 검색 ({len(blocks)}블록)...")
1340
  queries = []
@@ -1351,7 +1053,6 @@ def run_plagiarism(text, progress=gr.Progress()):
1351
  sent_matches[si].append({"source": r["title"], "url": r["url"], "type": "Brave"})
1352
  log_lines.append(f"Brave Search: {len(queries)}쿼리 → {sum(len(v) for v in brave_results.values())}건")
1353
  else:
1354
- # 자체 크롤링 폴백 (DuckDuckGo + 병렬)
1355
  progress(0.15, f"자체 웹 검색 ({len(blocks)}블록)...")
1356
  crawl_queries = []
1357
  for b in blocks[:10]: # 최대 10블록
@@ -1369,15 +1070,11 @@ def run_plagiarism(text, progress=gr.Progress()):
1369
  sent_matches[si].append({"source": r["title"], "url": r["url"], "type": r.get("source","Web")})
1370
  except: pass
1371
  log_lines.append(f"자체 웹검색: {len(crawl_queries)}쿼리 (DuckDuckGo)")
1372
-
1373
- # Phase 2: 학술 DB (KCI, RISS, arXiv) — 키워드 추출 후 검색
1374
  progress(0.40, "학술 DB 검색 (KCI/RISS/arXiv)...")
1375
- # 핵심 키워드 추출
1376
  words = split_words(text)
1377
  wf = Counter(words)
1378
  keywords = [w for w, c in wf.most_common(20) if len(w) >= 2 and c >= 2][:5]
1379
  kw_query = ' '.join(keywords[:3])
1380
-
1381
  academic_results = []
1382
  with ThreadPoolExecutor(max_workers=3) as executor:
1383
  futures = [
@@ -1392,8 +1089,6 @@ def run_plagiarism(text, progress=gr.Progress()):
1392
  all_sources.extend(results)
1393
  except: pass
1394
  log_lines.append(f"학술DB: KCI/RISS/arXiv → {len(academic_results)}건")
1395
-
1396
- # Phase 3: Gemini Google Search Grounding
1397
  gemini_results = []
1398
  if has_gemini:
1399
  progress(0.60, "Gemini + Google Search...")
@@ -1406,22 +1101,15 @@ def run_plagiarism(text, progress=gr.Progress()):
1406
  for si in b["sent_indices"]:
1407
  sent_matches[si].append({"source": src.get("title",""), "url": src.get("url",""), "type": "Google"})
1408
  log_lines.append(f"Gemini: {len(blocks[:5])}블록 → {sum(len(r.get('sources',[])) for r in gemini_results)}출처")
1409
-
1410
  progress(0.80, "보고서 생성...")
1411
-
1412
- # 유사도 계산
1413
  matched_sents = sum(1 for si, matches in sent_matches.items() if matches)
1414
  total_sents = len(sents)
1415
  plag_pct = int(matched_sents / total_sents * 100) if total_sents > 0 else 0
1416
-
1417
- # Gemini 유사도도 반영
1418
  if gemini_results:
1419
  gemini_pcts = [r["pct"] for r in gemini_results if r["pct"] > 0]
1420
  if gemini_pcts:
1421
  gemini_avg = sum(gemini_pcts) / len(gemini_pcts)
1422
  plag_pct = int(plag_pct * 0.5 + gemini_avg * 0.5)
1423
-
1424
- # 출처 중복 제거
1425
  seen_urls = set()
1426
  unique_sources = []
1427
  for s in all_sources:
@@ -1429,16 +1117,11 @@ def run_plagiarism(text, progress=gr.Progress()):
1429
  if url and url not in seen_urls:
1430
  seen_urls.add(url)
1431
  unique_sources.append(s)
1432
-
1433
- # 등급
1434
  if plag_pct >= 50: grade, grade_color, grade_bg = "표절 의심", "#FF4444", "#FFE0E0"
1435
  elif plag_pct >= 30: grade, grade_color, grade_bg = "주의 필요", "#FF8800", "#FFF0DD"
1436
  elif plag_pct >= 15: grade, grade_color, grade_bg = "유사 표현 일부", "#DDAA00", "#FFFBE0"
1437
  elif plag_pct >= 5: grade, grade_color, grade_bg = "양호", "#4ECDC4", "#E0FFF8"
1438
  else: grade, grade_color, grade_bg = "우수 (원본성 높음)", "#22AA44", "#E0FFE8"
1439
-
1440
-
1441
- # ═══ CopyKiller 정밀 재현 보고서 HTML ═══
1442
  sent_analysis = []
1443
  for i, s in enumerate(sents):
1444
  matches = sent_matches.get(i, [])
@@ -1448,8 +1131,6 @@ def run_plagiarism(text, progress=gr.Progress()):
1448
  else:
1449
  sent_analysis.append({"idx":i, "text":s, "matched":False})
1450
  sim_sents = [s for s in sent_analysis if s["matched"]]
1451
-
1452
- # 출처 그룹핑
1453
  src_groups = {}
1454
  for src in unique_sources:
1455
  key = src.get("url","")[:80]
@@ -1457,29 +1138,23 @@ def run_plagiarism(text, progress=gr.Progress()):
1457
  src_groups[key] = {"title":src.get("title",""), "url":src.get("url",""), "source":src.get("source",""), "count":0}
1458
  src_groups[key]["count"] += 1
1459
  src_list = sorted(src_groups.values(), key=lambda x: -x["count"])
1460
-
1461
  methods_used = []
1462
  if has_brave: methods_used.append("Brave Search(병렬)")
1463
  elif all_sources: methods_used.append("DuckDuckGo(자체크롤링)")
1464
  methods_used.append("KCI · RISS · arXiv")
1465
  if has_gemini: methods_used.append("Gemini+Google Search")
1466
  method_str = " + ".join(methods_used)
1467
-
1468
  gc = grade_color
1469
  word_count = len(split_words(text))
1470
  char_count = len(text)
1471
  doc_id = hashlib.md5(text[:100].encode()).hexdigest()[:8].upper()
1472
  similarity_pct = plag_pct
1473
  citation_pct = 0
1474
-
1475
- # 문장 카테고리 분류 (CopyKiller 스타일)
1476
  cat_suspect = len(sim_sents) # 의심
1477
  cat_cited = 0 # 인용 (형식적 인용 감지)
1478
  cat_normal = total_sents - cat_suspect - cat_cited # 일반
1479
  cat_suspect_pct = int(cat_suspect / max(1, total_sents) * 100)
1480
  cat_normal_pct = 100 - cat_suspect_pct
1481
-
1482
- # 출처 유형 아이콘
1483
  def src_icon(s):
1484
  src = s.get("source","").lower()
1485
  if "kci" in src: return "📚", "KCI"
@@ -1488,8 +1163,6 @@ def run_plagiarism(text, progress=gr.Progress()):
1488
  if "google" in src: return "🔍", "Google"
1489
  if "brave" in src: return "🌐", "Brave"
1490
  return "🌐", "Web"
1491
-
1492
- # 출처 테이블 행
1493
  src_rows = ""
1494
  for i, sg in enumerate(src_list[:15]):
1495
  pct = min(100, int(sg["count"] / max(1, total_sents) * 100 * 3))
@@ -1503,8 +1176,6 @@ def run_plagiarism(text, progress=gr.Progress()):
1503
  <td style="padding:6px 8px;font-size:12px;text-align:center;border:1px solid #D5D5D5;font-weight:800;color:#D63031;">{pct}%</td>
1504
  <td style="padding:6px 10px;border:1px solid #D5D5D5;"><div style="background:#EDEDED;height:14px;border-radius:2px;overflow:hidden;"><div style="background:linear-gradient(90deg,#D63031,#FF7675);height:100%;width:{max(3,pct)}%;border-radius:2px;"></div></div></td>
1505
  </tr>"""
1506
-
1507
- # 의심 문장 대비 행
1508
  suspect_rows = ""
1509
  for i, sa in enumerate(sim_sents[:15]):
1510
  suspect_rows += f"""<tr>
@@ -1513,8 +1184,6 @@ def run_plagiarism(text, progress=gr.Progress()):
1513
  <td style="padding:8px;font-size:10px;line-height:1.6;border:1px solid #D5D5D5;vertical-align:top;color:#555;"><span style="background:#FFE0E0;border-bottom:2px solid #E74C3C;padding:1px 3px;">{sa["text"][:70]}...</span></td>
1514
  <td style="padding:8px;font-size:10px;border:1px solid #D5D5D5;vertical-align:top;"><a href="{sa.get('url','#')}" target="_blank" style="color:#2E86C1;text-decoration:none;font-weight:600;">{sa["source"][:28]}</a><br><span style="font-size:8px;color:#AAA;">{sa.get('type','')}</span></td>
1515
  </tr>"""
1516
-
1517
- # 전체 텍스트 하이라이트 (CopyKiller 스타일 - 문장번호 + 색상)
1518
  full_hl = ""
1519
  for sa in sent_analysis:
1520
  sidx = sa["idx"] + 1
@@ -1522,21 +1191,15 @@ def run_plagiarism(text, progress=gr.Progress()):
1522
  full_hl += f'<span style="background:#FFD6D6;border-bottom:2px solid #E74C3C;padding:1px 2px;cursor:pointer;" title="[의심 #{sidx}] 출처: {sa.get("source","")}">{sa["text"]}</span> '
1523
  else:
1524
  full_hl += f'<span style="color:#333;">{sa["text"]}</span> '
1525
-
1526
- # 카테고리 바 너비
1527
  bar_suspect_w = max(2, cat_suspect_pct) if cat_suspect > 0 else 0
1528
  bar_normal_w = 100 - bar_suspect_w
1529
-
1530
- # CSS 상수
1531
  HDR_BG = '#3B7DD8'
1532
  HDR_BG2 = '#4A8DE0'
1533
  TH = 'padding:8px 10px;font-size:10px;font-weight:700;color:#fff;background:{};text-align:center;border:1px solid {};'.format(HDR_BG, HDR_BG)
1534
  TL = 'padding:7px 10px;font-size:11px;color:#444;font-weight:600;background:#EDF2FA;border:1px solid #D5D5D5;'
1535
  TV = 'padding:7px 10px;font-size:12px;color:#333;border:1px solid #D5D5D5;'
1536
  SEC = 'font-size:13px;font-weight:800;color:#1A3C6E;margin:0 0 10px 0;padding:8px 12px;background:#EDF2FA;border-left:4px solid {};border-bottom:1px solid #D5D5D5;'.format(HDR_BG)
1537
-
1538
  html = f"""<div style="font-family:'Noto Sans KR','Malgun Gothic','Apple SD Gothic Neo',sans-serif;max-width:780px;margin:0 auto;background:#fff;border:2px solid #3B7DD8;box-shadow:0 2px 12px rgba(0,0,0,0.08);">
1539
-
1540
  <!-- ═══════ 헤더 (CopyKiller 스타일) ═══════ -->
1541
  <div style="background:linear-gradient(135deg,{HDR_BG},{HDR_BG2});padding:18px 24px;color:#fff;">
1542
  <table style="width:100%;"><tr>
@@ -1550,7 +1213,6 @@ def run_plagiarism(text, progress=gr.Progress()):
1550
  </td>
1551
  </tr></table>
1552
  </div>
1553
-
1554
  <!-- ═══════ 제출 정보 테이블 ═══════ -->
1555
  <div style="padding:16px 24px 0;">
1556
  <div style="{SEC}">📋 검사 정보</div>
@@ -1575,7 +1237,6 @@ def run_plagiarism(text, progress=gr.Progress()):
1575
  </tr>
1576
  </table>
1577
  </div>
1578
-
1579
  <!-- ═══════ 검사 결과 (도넛 + 바) ═══════ -->
1580
  <div style="padding:18px 24px 0;">
1581
  <div style="{SEC}">📊 검사 결과</div>
@@ -1597,7 +1258,6 @@ def run_plagiarism(text, progress=gr.Progress()):
1597
  <span style="font-size:11px;font-weight:800;color:{gc};">{grade}</span>
1598
  </div>
1599
  </td>
1600
-
1601
  <!-- 상세 결과 -->
1602
  <td style="padding:0;border:1px solid #D5D5D5;vertical-align:top;">
1603
  <!-- 카테고리 바 (CopyKiller 핵심) -->
@@ -1613,7 +1273,6 @@ def run_plagiarism(text, progress=gr.Progress()):
1613
  <span style="display:flex;align-items:center;gap:3px;"><span style="display:inline-block;width:10px;height:10px;background:#E8ECF0;border:1px solid #CCC;border-radius:2px;"></span> 일반 <b>{cat_normal}</b>건</span>
1614
  </div>
1615
  </div>
1616
-
1617
  <!-- 비율 바 3줄 -->
1618
  <div style="padding:12px 18px;">
1619
  <div style="margin-bottom:10px;">
@@ -1629,7 +1288,6 @@ def run_plagiarism(text, progress=gr.Progress()):
1629
  <div style="background:#EDEDED;height:16px;border-radius:3px;overflow:hidden;border:1px solid #DDD;"><div style="background:linear-gradient(90deg,#3498DB,#85C1E9);height:100%;width:{max(1,citation_pct)}%;transition:width 0.5s;"></div></div>
1630
  </div>
1631
  </div>
1632
-
1633
  <!-- 요약 수치 -->
1634
  <div style="padding:8px 18px;background:#F8F9FB;border-top:1px solid #E8E8E8;">
1635
  <table style="width:100%;border-collapse:collapse;">
@@ -1647,7 +1305,6 @@ def run_plagiarism(text, progress=gr.Progress()):
1647
  </tr>
1648
  </table>
1649
  </div>
1650
-
1651
  <!-- ═══════ 전체 텍스트 분석 ═══════ -->
1652
  <div style="padding:18px 24px 0;">
1653
  <div style="{SEC}">📝 전체 텍스트 분석</div>
@@ -1660,7 +1317,6 @@ def run_plagiarism(text, progress=gr.Progress()):
1660
  </div>
1661
  <div style="padding:14px;background:#FAFBFC;border:1px solid #D5D5D5;line-height:2.1;font-size:13px;max-height:300px;overflow-y:auto;">{full_hl}</div>
1662
  </div>
1663
-
1664
  <!-- ═══════ 표절 의심 출처 ═══════ -->
1665
  <div style="padding:18px 24px 0;">
1666
  <div style="{SEC}">🔗 표절 의심 출처 ({len(src_list)}건)</div>
@@ -1675,7 +1331,6 @@ def run_plagiarism(text, progress=gr.Progress()):
1675
  {src_rows if src_rows else '<tr><td colspan="5" style="padding:18px;text-align:center;color:#999;font-size:11px;border:1px solid #D5D5D5;background:#FAFBFC;">발견된 유사 출처가 없습니다.</td></tr>'}
1676
  </table>
1677
  </div>
1678
-
1679
  <!-- ═══════ 의심 문장 비교 ═══════ -->
1680
  <div style="padding:18px 24px 0;">
1681
  <div style="{SEC}">⚠️ 의심 문장 비교 ({len(sim_sents)}건)</div>
@@ -1689,7 +1344,6 @@ def run_plagiarism(text, progress=gr.Progress()):
1689
  {suspect_rows if suspect_rows else '<tr><td colspan="4" style="padding:18px;text-align:center;color:#999;font-size:11px;border:1px solid #D5D5D5;background:#FAFBFC;">유사 의심 문장이 발견되지 않았습니다.</td></tr>'}
1690
  </table>
1691
  </div>
1692
-
1693
  <!-- ═══════ 검사 안내 ═══════ -->
1694
  <div style="margin:18px 24px;padding:12px 14px;background:#F5F8FC;border:1px solid #D0DAEA;border-radius:4px;font-size:9px;color:#555;line-height:1.8;">
1695
  <b style="color:#333;">📌 검사 안내</b><br>
@@ -1698,7 +1352,6 @@ def run_plagiarism(text, progress=gr.Progress()):
1698
  · 유사도는 문장 단위 매칭 기반이며, 최종 판정은 교수자/검토자의 확인이 필요합니다.<br>
1699
  · 인용 표기(따옴표, 각주 등)가 포함된 문장은 인용으로 분류될 수 있습니다.
1700
  </div>
1701
-
1702
  <!-- ═══════ 푸터 (CopyKiller 스타일) ═══════ -->
1703
  <div style="padding:10px 24px;background:#F0F3F8;border-top:2px solid {HDR_BG};display:flex;justify-content:space-between;align-items:center;">
1704
  <div>
@@ -1711,13 +1364,8 @@ def run_plagiarism(text, progress=gr.Progress()):
1711
  </div>
1712
  </div>
1713
  </div>"""
1714
-
1715
  log = '\n'.join(log_lines) + f"\n\n종합: {plag_pct}% {grade} | 출처 {len(unique_sources)}건 | 유사문장 {matched_sents}/{total_sents}"
1716
  return html, log
1717
-
1718
- # ═══════════════════════════════════════════════
1719
- # 탭1: 분석 (명확한 출력)
1720
- # ═══════════════════════════════════════════════
1721
  def run_detection(text, progress=gr.Progress()):
1722
  if not text or len(text.strip())<50: return "<div style='padding:20px;text-align:center;color:#888;'>⚠️ 최소 50자</div>",""
1723
  text=text.strip()
@@ -1734,17 +1382,12 @@ def run_detection(text, progress=gr.Progress()):
1734
  sc={"통계":s1["score"],"문체":s2["score"],"반복성":s3["score"],"구조":s4["score"],"지문":s5["score"]}
1735
  sent_scores=[score_sentence(s)[0] for s in sents]
1736
  sent_avg=sum(sent_scores)/len(sent_scores) if sent_scores else -1
1737
-
1738
- # ★ 모델 추정
1739
  ms_raw=s5.get("model_scores",{})
1740
  model_est=estimate_model(text,sents,morphs,ms_raw)
1741
-
1742
  fs,verdict,level=compute_verdict(sc,lr["score"],sent_avg=sent_avg,ppx_score=ppx["score"],hum_score=hum["score"])
1743
  progress(0.95)
1744
  cm={"ai_high":("#FF4444","#FFE0E0","높음"),"ai_medium":("#FF8800","#FFF0DD","중간~높음"),"ai_low":("#DDAA00","#FFFBE0","중간"),"uncertain":("#888","#F0F0F0","낮음"),"human":("#22AA44","#E0FFE8","매우 낮음")}
1745
  fg,bg,conf=cm.get(level,("#888","#F0F0F0","?"))
1746
-
1747
- # 모델 추정 표시
1748
  est_model=model_est.get("model","특정 불가")
1749
  est_conf=model_est.get("confidence","낮음")
1750
  est_reasons=model_est.get("reasons",[])
@@ -1754,41 +1397,30 @@ def run_detection(text, progress=gr.Progress()):
1754
  mt=f"{est_model} (참고)"
1755
  else:
1756
  mt="특정 불가"
1757
-
1758
  ai_sents=sum(1 for s in sent_scores if s>=40)
1759
  human_sents=sum(1 for s in sent_scores if s<20)
1760
-
1761
  def gb(l,s,w="",desc=""):
1762
  c="#FF4444" if s>=70 else "#FF8800" if s>=50 else "#DDAA00" if s>=35 else "#22AA44"
1763
  wt=f"<span style='color:#999;font-size:9px;'> ×{w}</span>" if w else ""
1764
  dt=f"<div style='font-size:9px;color:#888;margin-top:1px;'>{desc}</div>" if desc else ""
1765
  return f"<div style='margin:4px 0;'><div style='display:flex;justify-content:space-between;'><span style='font-size:11px;font-weight:600;'>{l}{wt}</span><span style='font-size:11px;font-weight:700;color:{c};'>{s}</span></div><div style='background:#E8E8E8;border-radius:4px;height:7px;'><div style='background:{c};height:100%;width:{s}%;border-radius:4px;'></div></div>{dt}</div>"
1766
-
1767
- # 모델 지문 바
1768
  mb=""
1769
  for mn in ["GPT","Claude","Gemini","Perplexity"]:
1770
  s=ms_raw.get(mn,0); mc="#FF4444" if s>=40 else "#FF8800" if s>=20 else "#CCC"
1771
- # 추정 모델 표시
1772
  tag=""
1773
  if mn==est_model and est_conf!="낮음":
1774
  tag=f" <span style='background:#FF4444;color:white;font-size:7px;padding:0 3px;border-radius:3px;'>추정</span>"
1775
  mb+=f"<div style='display:flex;align-items:center;gap:4px;margin:2px 0;'><span style='width:66px;font-size:10px;font-weight:600;'>{mn}{tag}</span><div style='flex:1;background:#E8E8E8;border-radius:3px;height:5px;'><div style='background:{mc};height:100%;width:{s}%;'></div></div><span style='font-size:9px;width:18px;text-align:right;color:{mc};'>{s}</span></div>"
1776
-
1777
- # LLM 섹션
1778
  ls=""
1779
  if lr["score"]>=0:
1780
  lsc=lr["score"]
1781
  lr_rows="".join(f"<div style='font-size:9px;color:#555;'>{mn}: {lr['detail'].get(mn,'—')}</div>" for _,mn in LLM_JUDGES)
1782
  ls=f"<div style='margin-top:8px;padding:8px;background:#F8F8FF;border-radius:6px;border:1px solid #E0E0FF;'><div style='font-size:10px;font-weight:700;margin-bottom:3px;'>🤖 LLM 교차검증 (평균 {lsc}%)</div>{lr_rows}</div>"
1783
  else: ls="<div style='margin-top:6px;padding:4px 8px;background:#F5F5F5;border-radius:4px;color:#999;font-size:9px;'>🤖 GROQ_API_KEY 미설정</div>"
1784
-
1785
- # 품질
1786
  qs=qr["sub_scores"]; gc={"S":"#FF6B6B","A":"#4ECDC4","B":"#45B7D1","C":"#DDAA00","D":"#FF8800","F":"#FF4444"}.get(qr["grade"],"#888")
1787
  def qgb(l,s):
1788
  c="#22AA44" if s>=70 else "#4ECDC4" if s>=55 else "#DDAA00" if s>=40 else "#FF8800"
1789
  return f"<div style='margin:2px 0;display:flex;align-items:center;gap:4px;'><span style='width:50px;font-size:10px;'>{l}</span><div style='flex:1;background:#E8E8E8;border-radius:3px;height:5px;'><div style='background:{c};height:100%;width:{s}%;'></div></div><span style='font-size:9px;color:{c};width:18px;text-align:right;'>{s}</span></div>"
1790
-
1791
- # ★ 판정 이유 (3대 킬러 통합)
1792
  reasons=[]
1793
  if sc["문체"]>=70: reasons.append("격식체 종결어미가 대부분, AI형 접속사·상투표현 다수 감지")
1794
  elif sc["문체"]>=50: reasons.append("격식체와 AI형 표현이 혼재")
@@ -1805,8 +1437,6 @@ def run_detection(text, progress=gr.Progress()):
1805
  reasons.append(f"🔍 추정 모델: <b>{est_model}</b> ({est_why})")
1806
  if not reasons: reasons.append("인간적 표현이 우세하며 AI 패턴이 약함")
1807
  reason_html='<br>'.join(f"• {r}" for r in reasons)
1808
-
1809
- # ★ Perplexity 카드
1810
  ppx_c="#FF4444" if ppx["score"]>=65 else "#FF8800" if ppx["score"]>=50 else "#DDAA00" if ppx["score"]>=35 else "#22AA44"
1811
  ppx_html=f"""<div style='margin-top:8px;padding:8px;background:linear-gradient(135deg,#FFF8F0,#FFF0FF);border-radius:6px;border:1px solid #E8D0FF;'>
1812
  <div style='font-size:10px;font-weight:700;margin-bottom:4px;'>🧠 Perplexity 분석 <span style='color:{ppx_c};font-size:12px;font-weight:900;'>{ppx["score"]}점</span></div>
@@ -1817,8 +1447,6 @@ def run_detection(text, progress=gr.Progress()):
1817
  <span style='font-size:9px;color:#777;'>Zipf적합: {ppx.get("zipf",0)}</span>
1818
  </div>
1819
  </div>"""
1820
-
1821
- # ★ Humanizer 탐지 카드
1822
  hum_html=""
1823
  if hum["score"]>=30:
1824
  hc="#FF4444" if hum["score"]>=65 else "#FF8800" if hum["score"]>=50 else "#DDAA00"
@@ -1827,8 +1455,6 @@ def run_detection(text, progress=gr.Progress()):
1827
  <div style='font-size:10px;font-weight:700;margin-bottom:3px;'>🛡️ Humanizer 탐지 <span style='color:{hc};font-size:12px;font-weight:900;'>{hum["score"]}점</span></div>
1828
  {sig_rows}
1829
  </div>"""
1830
-
1831
- # ★ 모델 추정 카드
1832
  est_html=""
1833
  if est_model!="특정 불가":
1834
  ec="#FF4444" if est_conf=="높음" else "#FF8800" if est_conf=="중간" else "#DDAA00"
@@ -1842,7 +1468,6 @@ def run_detection(text, progress=gr.Progress()):
1842
  {ranked_html}
1843
  <div style='font-size:8px;color:#999;margin-top:2px;'>근거: {", ".join(est_reasons[:3]) if est_reasons else "복합 지표"}</div>
1844
  </div>"""
1845
-
1846
  html=f"""<div style="font-family:'Pretendard','Noto Sans KR',sans-serif;max-width:720px;margin:0 auto;">
1847
  <div style="background:{bg};border:2px solid {fg};border-radius:14px;padding:20px;margin-bottom:12px;">
1848
  <div style="display:flex;align-items:center;gap:16px;">
@@ -1864,7 +1489,6 @@ def run_detection(text, progress=gr.Progress()):
1864
  <div style="font-size:11px;color:#555;line-height:1.7;">{reason_html}</div>
1865
  </div>
1866
  </div>
1867
-
1868
  <div style="display:grid;grid-template-columns:1fr 1fr;gap:8px;">
1869
  <div style="background:#FAFAFA;border-radius:8px;padding:10px;">
1870
  <div style="font-size:10px;font-weight:700;margin-bottom:4px;">📊 AI 탐지 5축 + Perplexity</div>
@@ -1891,23 +1515,17 @@ def run_detection(text, progress=gr.Progress()):
1891
  </div>"""
1892
  log=f"AI:{fs}점 [{verdict}] 신뢰:{conf} | 모델:{mt} | PPX:{ppx['score']} HUM:{hum['score']} | 품질:{qr['grade']}({qr['score']})\n축: 통계{sc['통계']} 문체{sc['문체']} 반복{sc['반복성']} 구조{sc['구조']} 지문{sc['지문']} PPX{ppx['score']} HUM{hum['score']}"
1893
  return html, log
1894
-
1895
- # ═══════════════════════════════════════════════
1896
- # 탭2: 하이라이트 (탭1과 동일 기준)
1897
- # ═══════════════════════════════════════════════
1898
  def run_highlight(text):
1899
  if not text or len(text.strip())<30: return "<div style='color:#888;'>텍스트 필요</div>"
1900
  sents=split_sentences(text)
1901
  hl=[]
1902
  for s in sents:
1903
  sc, reasons = score_sentence(s)
1904
- # 5단계 색상
1905
  if sc >= 60: bg="rgba(220,38,38,0.35)"; level="AI확신"
1906
  elif sc >= 40: bg="rgba(249,115,22,0.30)"; level="AI의심"
1907
  elif sc >= 25: bg="rgba(234,179,8,0.25)"; level="주의"
1908
  elif sc >= 10: bg="rgba(132,204,22,0.15)"; level="인간추정"
1909
  else: bg="rgba(34,197,94,0.20)"; level="인간"
1910
- # 근거 상세
1911
  detail_parts = []
1912
  for r in reasons:
1913
  if '격식' in r or '비격식AI' in r: detail_parts.append(f"🔤 {r}")
@@ -1918,13 +1536,11 @@ def run_highlight(text):
1918
  else: detail_parts.append(r)
1919
  tt = ' | '.join(detail_parts) if detail_parts else '특이 패턴 없음'
1920
  hl.append(f'<span style="background:{bg};padding:2px 4px;border-radius:4px;display:inline;line-height:2.2;border-bottom:2px solid {"#DC2626" if sc>=60 else "#F97316" if sc>=40 else "#EAB308" if sc>=25 else "#84CC16" if sc>=10 else "#22C55E"};" title="[{level}] {tt} ({sc}점)">{s}</span>')
1921
-
1922
  total_scores = [score_sentence(s)[0] for s in sents]
1923
  avg_sc = sum(total_scores)/len(total_scores) if total_scores else 0
1924
  ai_high = sum(1 for s in total_scores if s >= 60)
1925
  ai_mid = sum(1 for s in total_scores if 40 <= s < 60)
1926
  human_cnt = sum(1 for s in total_scores if s < 25)
1927
-
1928
  return f"""<div style='font-family:Pretendard,sans-serif;'>
1929
  <div style='margin-bottom:10px;padding:10px;background:#F8F8FF;border-radius:8px;'>
1930
  <div style='display:flex;gap:8px;align-items:center;font-size:11px;margin-bottom:6px;flex-wrap:wrap;'>
@@ -1938,30 +1554,16 @@ def run_highlight(text):
1938
  </div>
1939
  <div style='line-height:2.4;font-size:14px;'>{' '.join(hl)}</div>
1940
  </div>"""
1941
-
1942
- # ═══════════════════════════════════════════════
1943
- # GRADIO UI
1944
- # ═══════════════════════════════════════════════
1945
  SAMPLE_AI = """인공지능 기술은 현대 사회에서 매우 중요한 역할을 하고 있습니다. 특히 자연어 처리 분야에서의 발전은 눈부신 성과를 거두고 있습니다. 이러한 기술의 발전은 다양한 산업 분야에 긍정적인 영향을 미치고 있으며, 향후 더욱 발전할 것으로 예상됩니다.
1946
-
1947
  또한 생성형 AI의 등장으로 콘텐츠 제작 방식이 크게 변화하고 있습니다. 이를 통해 기업들은 효율적인 콘텐츠 생산이 가능해졌으며, 개인 사용자들도 다양한 창작 활동에 AI를 활용할 수 있게 되었습니다. 따라서 AI 리터러시의 중요성이 더욱 부각되고 있습니다.
1948
-
1949
  나아가 AI 윤리와 규제에 대한 논의도 활발히 진행되고 있습니다. 특히 AI가 생성한 콘텐츠의 저작권 문제는 중요한 의미를 가지며, 이에 대한 법적 프레임워크 구축이 필요합니다. 결과적으로 기술 발전과 함께 사회적 합의를 이루는 것이 중요합니다."""
1950
-
1951
  SAMPLE_HUMAN = """아 진짜 요즘 AI 때문에 머리 아프다ㅋㅋㅋ 어제 chatgpt한테 레포트 써달라고 했는데 완전 교과서 같은 글만 써줘서 그냥 내가 다시 썼음;;
1952
-
1953
  근데 생각해보면 AI가 쓴 글이랑 사람이 쓴 글이 확실히 다르긴 해. 뭔가... 너무 깔끔하달까? 사람은 이렇게 횡설수설도 하고 맞춤법도 틀리고 그러잖아.
1954
-
1955
  교수님이 AI 탐지기 돌린다고 해서 좀 무서운데 ㅠㅠ 나는 진짜 직접 쓴 건데 혹시 오탐 나면 어쩌지... 걱정된다 진심으로."""
1956
-
1957
- # ═══════════════════════════════════════════════
1958
- # 탭5: 문서 업로드 → 섹션별 히트맵 분석 + PDF 보고서
1959
- # ═══════════════════════════════════════════════
1960
  def run_document_analysis(file, progress=gr.Progress()):
1961
  """문서 파일 업로드 → 섹션별 AI 탐지 히트맵 + PDF 보고서 생성"""
1962
  if file is None:
1963
  return "<div style='padding:20px;text-align:center;color:#888;'>📄 파일을 업로드하세요 (PDF, DOCX, HWP, HWPX, TXT)</div>", "", None
1964
-
1965
  if hasattr(file, 'path'):
1966
  file_path = file.path
1967
  elif hasattr(file, 'name'):
@@ -1970,30 +1572,23 @@ def run_document_analysis(file, progress=gr.Progress()):
1970
  file_path = str(file)
1971
  fname = os.path.basename(file_path)
1972
  progress(0.05, f"📄 {fname} 읽는 중...")
1973
-
1974
  sections, full_text, error = extract_text_from_file(file_path)
1975
  if error:
1976
  return f"<div style='padding:20px;text-align:center;color:#E44;'>⚠️ {error}</div>", "", None
1977
  if not sections or not full_text or len(full_text.strip()) < 50:
1978
  return "<div style='padding:20px;text-align:center;color:#E44;'>⚠️ 텍스트가 충분하지 않습니다 (50자 미만)</div>", "", None
1979
-
1980
  progress(0.15, "전체 텍스트 분석...")
1981
- # 전체 분석
1982
  sents_all = split_sentences(full_text)
1983
  words_all = split_words(full_text)
1984
  morphs_all = get_morphemes(full_text)
1985
  total_score, total_verdict, total_level, total_axes, total_ppx, total_hum = quick_score(full_text)
1986
  quality = analyze_quality(full_text, sents_all, words_all, morphs_all)
1987
-
1988
- # LLM 교차검증 (전체)
1989
  progress(0.30, "LLM 교차검증...")
1990
  llm_result = llm_cross_check(full_text[:3000])
1991
  if llm_result["score"] >= 0:
1992
  _sent_scores = [score_sentence(s)[0] for s in sents_all]
1993
  _sent_avg = sum(_sent_scores)/len(_sent_scores) if _sent_scores else -1
1994
  total_score, total_verdict, total_level = compute_verdict(total_axes, llm_result["score"], sent_avg=_sent_avg, ppx_score=total_ppx["score"], hum_score=total_hum["score"])
1995
-
1996
- # 섹션별 분석
1997
  progress(0.45, f"{len(sections)}개 섹션 분석...")
1998
  section_results = []
1999
  for i, sec in enumerate(sections):
@@ -2001,7 +1596,6 @@ def run_document_analysis(file, progress=gr.Progress()):
2001
  section_results.append({"idx": i+1, "text": sec, "score": -1, "verdict": "너무 짧음", "skipped": True})
2002
  continue
2003
  s_score, s_verdict, s_level, s_axes, _, _ = quick_score(sec)
2004
- # 문장별 하이라이트
2005
  sec_sents = split_sentences(sec)
2006
  sent_scores = []
2007
  for sent in sec_sents:
@@ -2014,15 +1608,11 @@ def run_document_analysis(file, progress=gr.Progress()):
2014
  })
2015
  pct = 0.45 + (i / max(len(sections), 1)) * 0.30
2016
  progress(pct, f"섹션 {i+1}/{len(sections)}")
2017
-
2018
- # ═══ HTML 히트맵 보고서 ═══
2019
  now = datetime.now().strftime("%Y-%m-%d %H:%M")
2020
  ext = Path(file_path).suffix.upper()
2021
  cm_map = {"ai_high": ("#FF4444", "#FFE0E0"), "ai_medium": ("#FF8800", "#FFF0DD"),
2022
  "ai_low": ("#DDAA00", "#FFFBE0"), "uncertain": ("#888", "#F5F5F5"), "human": ("#22AA44", "#E0FFE8")}
2023
  tc, tbg = cm_map.get(total_level, ("#888", "#F5F5F5"))
2024
-
2025
- # 섹션별 히트맵 바 HTML
2026
  heatmap_cells = []
2027
  for sr in section_results:
2028
  sidx = sr["idx"]
@@ -2033,13 +1623,10 @@ def run_document_analysis(file, progress=gr.Progress()):
2033
  ssc = sr["score"]; svd = sr["verdict"]
2034
  heatmap_cells.append(f"<div style='flex:1;min-width:18px;height:28px;background:{sc};border-radius:3px;opacity:0.8;cursor:pointer;' title='섹션{sidx}: AI {ssc}점 ({svd})'></div>")
2035
  heatmap_bar = f"<div style='display:flex;gap:3px;margin:12px 0;'>" + ''.join(heatmap_cells) + "</div>"
2036
-
2037
- # 섹션 상세 카드
2038
  section_cards = []
2039
  for sr in section_results:
2040
  if sr["skipped"]: continue
2041
  sc, sbg = cm_map.get(sr.get("level", "uncertain"), ("#888", "#F5F5F5"))
2042
- # 문장 하이라이트 (score_sentence 기반)
2043
  sent_html = ""
2044
  for ss in sr.get("sent_scores", []):
2045
  s = ss["score"]
@@ -2048,14 +1635,12 @@ def run_document_analysis(file, progress=gr.Progress()):
2048
  elif s >= 25: sclr = "background:rgba(221,170,0,0.08);border-bottom:1px solid #DDAA00;"
2049
  else: sclr = ""
2050
  sent_html += f"<span style='{sclr}padding:1px 2px;border-radius:2px;' title='AI {s}점'>{ss['text']}</span> "
2051
-
2052
  axes_html = ""
2053
  if "axes" in sr:
2054
  ax = sr["axes"]
2055
  for k, v in ax.items():
2056
  axc = "#FF4444" if v >= 50 else "#FF8800" if v >= 30 else "#22AA44"
2057
  axes_html += f"<span style='display:inline-block;margin:1px 3px;padding:2px 8px;background:{axc}22;border:1px solid {axc}44;border-radius:10px;font-size:9px;color:{axc};'>{k} {v}</span>"
2058
-
2059
  section_cards.append(f"""
2060
  <div style="border:1px solid #E0E0E0;border-left:4px solid {sc};border-radius:8px;padding:14px;margin-bottom:10px;background:white;">
2061
  <div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:8px;">
@@ -2065,19 +1650,14 @@ def run_document_analysis(file, progress=gr.Progress()):
2065
  <div style="margin-bottom:6px;">{axes_html}</div>
2066
  <div style="font-size:12px;line-height:1.8;color:#333;">{sent_html}</div>
2067
  </div>""")
2068
-
2069
- # AI 비율 분포
2070
  ai_high = sum(1 for s in section_results if not s["skipped"] and s["score"] >= 60)
2071
  ai_med = sum(1 for s in section_results if not s["skipped"] and 35 <= s["score"] < 60)
2072
  ai_low = sum(1 for s in section_results if not s["skipped"] and s["score"] < 35)
2073
  valid_sections = [s for s in section_results if not s["skipped"]]
2074
-
2075
- # LLM 교차검증 정보
2076
  llm_info = ""
2077
  if llm_result["score"] >= 0:
2078
  llm_rows = ''.join(f"<span style='margin-right:8px;font-size:10px;color:#555;'>{mn}: {llm_result['detail'].get(mn,'—')}</span>" for _, mn in LLM_JUDGES)
2079
  llm_info = f"<div style='margin-top:8px;padding:8px 12px;background:#F8F8FF;border-radius:6px;border:1px solid #E0E0FF;font-size:10px;'><b>🤖 LLM 교차검증:</b> 평균 {llm_result['score']}% | {llm_rows}</div>"
2080
-
2081
  html = f"""<div style="font-family:'Pretendard','Noto Sans KR',sans-serif;max-width:800px;margin:0 auto;background:#FAFBFC;border-radius:12px;border:1px solid #E0E0E0;overflow:hidden;">
2082
  <!-- 헤더 -->
2083
  <div style="background:linear-gradient(135deg,#1A1F36,#2D3561);padding:20px 24px;color:white;">
@@ -2092,7 +1672,6 @@ def run_document_analysis(file, progress=gr.Progress()):
2092
  </div>
2093
  </div>
2094
  </div>
2095
-
2096
  <!-- 요약 -->
2097
  <div style="padding:16px 24px;">
2098
  <div style="display:grid;grid-template-columns:repeat(4,1fr);gap:10px;margin-bottom:12px;">
@@ -2113,7 +1692,6 @@ def run_document_analysis(file, progress=gr.Progress()):
2113
  <div style="font-size:10px;color:#888;">품질 등급</div>
2114
  </div>
2115
  </div>
2116
-
2117
  <!-- 히트맵 바 -->
2118
  <div style="background:white;border:1px solid #E0E0E0;border-radius:8px;padding:12px;">
2119
  <div style="font-size:11px;font-weight:700;margin-bottom:6px;">🗺️ 섹션별 AI 히트맵 (빨강=AI의심, 초록=인간)</div>
@@ -2127,25 +1705,20 @@ def run_document_analysis(file, progress=gr.Progress()):
2127
  </div>
2128
  {llm_info}
2129
  </div>
2130
-
2131
  <!-- 섹션 상세 -->
2132
  <div style="padding:0 24px 20px;">
2133
  <div style="font-size:13px;font-weight:700;margin-bottom:10px;">📊 섹션별 상세 분석 ({len(valid_sections)}개)</div>
2134
  {''.join(section_cards)}
2135
  </div>
2136
-
2137
  <!-- 푸터 -->
2138
  <div style="padding:10px 24px;background:#F0F3F8;border-top:1px solid #E0E0E0;display:flex;justify-content:space-between;">
2139
  <span style="font-size:11px;font-weight:800;color:#1A1F36;">AI Detector v4.0</span>
2140
  <span style="font-size:9px;color:#AAA;">{now} · 5축 앙상블 + LLM 교차검증</span>
2141
  </div>
2142
  </div>"""
2143
-
2144
- # ═══ PDF 보고서 생성 ═══
2145
  progress(0.90, "PDF 보고서 생성...")
2146
  pdf_path = _generate_pdf_report(fname, total_score, total_verdict, total_level,
2147
  total_axes, quality, section_results, llm_result, now)
2148
-
2149
  log = f"파일: {fname} ({ext})\n"
2150
  log += f"섹션: {len(sections)}개 | 전체: {len(full_text)}자\n"
2151
  log += f"총점: {total_score} ({total_verdict})\n"
@@ -2154,23 +1727,17 @@ def run_document_analysis(file, progress=gr.Progress()):
2154
  if llm_result["score"] >= 0:
2155
  log += f"LLM 교차검증: {llm_result['score']}%\n"
2156
  return html, log, pdf_path
2157
-
2158
-
2159
  def _generate_pdf_report(fname, score, verdict, level, axes, quality, sections, llm_result, now):
2160
  """HTML → PDF 변환으로 보고서 생성"""
2161
  try:
2162
  cm = {"ai_high":"#FF4444","ai_medium":"#FF8800","ai_low":"#DDAA00","uncertain":"#888","human":"#22AA44"}
2163
  tc = cm.get(level, "#888")
2164
-
2165
- # 섹션 테이블 행
2166
  sec_rows = ""
2167
  for sr in sections:
2168
  if sr["skipped"]: continue
2169
  sc = cm.get(sr.get("level","uncertain"),"#888")
2170
  sec_rows += f"<tr><td style='padding:6px 10px;border:1px solid #DDD;text-align:center;'>{sr['idx']}</td><td style='padding:6px 10px;border:1px solid #DDD;'>{sr['text'][:80]}...</td><td style='padding:6px 10px;border:1px solid #DDD;text-align:center;color:{sc};font-weight:700;'>{sr['score']}</td><td style='padding:6px 10px;border:1px solid #DDD;text-align:center;'>{sr['verdict']}</td></tr>"
2171
-
2172
  ax_rows = ''.join(f"<tr><td style='padding:4px 10px;border:1px solid #DDD;'>{k}</td><td style='padding:4px 10px;border:1px solid #DDD;text-align:center;font-weight:700;'>{v}/100</td></tr>" for k, v in axes.items())
2173
-
2174
  html_content = f"""<!DOCTYPE html><html><head><meta charset="UTF-8">
2175
  <style>
2176
  body{{font-family:sans-serif;margin:30px;font-size:12px;color:#333;}}
@@ -2183,28 +1750,22 @@ def _generate_pdf_report(fname, score, verdict, level, axes, quality, sections,
2183
  </style></head><body>
2184
  <h1>📄 AI 글 판별 보고서</h1>
2185
  <p><b>파일:</b> {fname} | <b>생성:</b> {now} | <b>엔진:</b> AI Detector v4.0</p>
2186
-
2187
  <h2>종합 결과</h2>
2188
  <div class="score-box">{score}점</div>
2189
  <span style="margin-left:12px;font-size:16px;font-weight:700;">{verdict}</span>
2190
  <p><b>품질:</b> {quality['grade']} ({quality['score']}점)</p>
2191
-
2192
  <h2>5축 분석</h2>
2193
  <table><tr><th>축</th><th>점수</th></tr>{ax_rows}</table>
2194
-
2195
  <h2>섹션별 분석 ({len([s for s in sections if not s['skipped']])}개)</h2>
2196
  <table>
2197
  <tr><th>No</th><th>내용 (발췌)</th><th>AI 점수</th><th>판정</th></tr>
2198
  {sec_rows}
2199
  </table>
2200
-
2201
  <div class="footer">
2202
  AI Detector v4.0 — 5축 앙상블 + LLM 교차검증 | {now}<br>
2203
  본 보고서는 AI 판별 참고 자료이며, 최종 판단은 검토자의 확인이 필요합니다.
2204
  </div>
2205
  </body></html>"""
2206
-
2207
- # HTML 파일 저장 → 다운로드용
2208
  report_dir = tempfile.mkdtemp()
2209
  html_path = os.path.join(report_dir, f"AI_Report_{fname}.html")
2210
  with open(html_path, 'w', encoding='utf-8') as f:
@@ -2213,13 +1774,10 @@ def _generate_pdf_report(fname, score, verdict, level, axes, quality, sections,
2213
  except Exception as e:
2214
  print(f"PDF 보고서 생성 오류: {e}")
2215
  return None
2216
-
2217
-
2218
  def extract_file_text_api(file):
2219
  """파일 업로드 → 텍스트 추출 API (탭1~4용)"""
2220
  if file is None:
2221
  return "⚠️ 파일이 없습니다."
2222
- # Gradio 6.x: FileData 객체 / NamedString / str 모두 처리
2223
  if hasattr(file, 'path'):
2224
  file_path = file.path
2225
  elif hasattr(file, 'name'):
@@ -2235,8 +1793,6 @@ def extract_file_text_api(file):
2235
  if not full_text or len(full_text.strip()) < 10:
2236
  return f"⚠️ {fname}: 텍스트 추출 결과 없음"
2237
  return full_text.strip()
2238
-
2239
-
2240
  with gr.Blocks(title="AI 글 판별기 v5.1") as demo:
2241
  gr.Markdown("# 🔎 AI 글 판별기 v5.1\n**5축+Perplexity+Humanizer탐지+모델추정 · 품질 측정 · LLM 교차검증 · 표절 검사**")
2242
  with gr.Tab("🏆 경쟁력 비교"):
@@ -2252,9 +1808,7 @@ with gr.Blocks(title="AI 글 판별기 v5.1") as demo:
2252
  @keyframes glow { 0%,100% { box-shadow:0 0 8px rgba(99,102,241,0.3); } 50% { box-shadow:0 0 20px rgba(99,102,241,0.6); } }
2253
  @keyframes borderPulse { 0%,100% { border-color:rgba(99,102,241,0.3); } 50% { border-color:rgba(99,102,241,0.8); } }
2254
  @keyframes checkPop { from { transform:scale(0) rotate(-180deg); opacity:0; } to { transform:scale(1) rotate(0deg); opacity:1; } }
2255
-
2256
  .comp-wrap { font-family:'Pretendard','Noto Sans KR',system-ui,sans-serif; max-width:860px; margin:0 auto; padding:8px; }
2257
-
2258
  /* 히어로 배너 */
2259
  .hero-banner {
2260
  background: linear-gradient(135deg, #1e1b4b 0%, #312e81 30%, #4338ca 60%, #6366f1 100%);
@@ -2273,7 +1827,6 @@ with gr.Blocks(title="AI 글 판별기 v5.1") as demo:
2273
  font-size:11px; font-weight:800; padding:4px 12px; border-radius:20px; margin-top:10px;
2274
  animation: pulse 2s ease-in-out infinite; position:relative;
2275
  }
2276
-
2277
  /* 핵심 수치 */
2278
  .stats-row { display:grid; grid-template-columns:repeat(4,1fr); gap:10px; margin-bottom:18px; animation:fadeInUp 1s ease-out 0.2s both; }
2279
  .stat-card {
@@ -2283,7 +1836,6 @@ with gr.Blocks(title="AI 글 판별기 v5.1") as demo:
2283
  .stat-card:hover { transform:translateY(-4px); border-color:#6366f1; box-shadow:0 8px 25px rgba(99,102,241,0.15); }
2284
  .stat-num { font-size:32px; font-weight:900; background:linear-gradient(135deg,#6366f1,#8b5cf6); -webkit-background-clip:text; -webkit-text-fill-color:transparent; animation:countUp 0.6s ease-out; }
2285
  .stat-label { font-size:10px; color:#64748b; margin-top:2px; font-weight:600; }
2286
-
2287
  /* 비교 테이블 */
2288
  .comp-table-wrap { animation:fadeInUp 1.1s ease-out 0.4s both; margin-bottom:18px; }
2289
  .comp-table {
@@ -2304,13 +1856,11 @@ with gr.Blocks(title="AI 글 판별기 v5.1") as demo:
2304
  .comp-table tbody td:first-child { text-align:left; padding-left:14px; font-weight:600; color:#1e293b; }
2305
  .comp-table tbody td:nth-child(2) { background:rgba(239,68,68,0.04); font-weight:700; }
2306
  .comp-table tbody tr:last-child td { border-bottom:none; }
2307
-
2308
  /* 체크/엑스 아이콘 */
2309
  .ck { color:#22c55e; font-weight:800; font-size:15px; animation:checkPop 0.4s ease-out; }
2310
  .cx { color:#ef4444; font-weight:700; font-size:13px; }
2311
  .cp { color:#f59e0b; font-weight:700; font-size:12px; }
2312
  .cstar { color:#6366f1; font-weight:900; font-size:14px; }
2313
-
2314
  /* 차별화 카드 */
2315
  .diff-grid { display:grid; grid-template-columns:1fr 1fr; gap:10px; margin-bottom:18px; }
2316
  .diff-card {
@@ -2329,7 +1879,6 @@ with gr.Blocks(title="AI 글 판별기 v5.1") as demo:
2329
  .diff-title { font-size:13px; font-weight:800; color:#1e293b; margin-bottom:4px; }
2330
  .diff-desc { font-size:10px; color:#64748b; line-height:1.5; }
2331
  .diff-vs { display:inline-block; background:#fef2f2; color:#dc2626; font-size:9px; font-weight:700; padding:2px 6px; border-radius:4px; margin-top:4px; }
2332
-
2333
  /* 경쟁사 공략 바 */
2334
  .attack-section { animation:fadeInUp 1.3s ease-out 0.8s both; margin-bottom:14px; }
2335
  .attack-title { font-size:13px; font-weight:800; color:#1e293b; margin-bottom:10px; }
@@ -2339,7 +1888,6 @@ with gr.Blocks(title="AI 글 판별기 v5.1") as demo:
2339
  .attack-weak { font-size:9px; color:#ef4444; font-weight:600; }
2340
  .attack-bar { height:8px; background:#f1f5f9; border-radius:4px; overflow:hidden; }
2341
  .attack-fill { height:100%; border-radius:4px; animation:barGrow 1.5s ease-out; }
2342
-
2343
  /* 슬로건 */
2344
  .slogan-box {
2345
  background:linear-gradient(135deg,#faf5ff,#ede9fe,#e0e7ff); border-radius:16px;
@@ -2349,7 +1897,6 @@ with gr.Blocks(title="AI 글 판별기 v5.1") as demo:
2349
  .slogan-main { font-size:16px; font-weight:900; color:#3730a3; margin-bottom:6px; }
2350
  .slogan-sub { font-size:11px; color:#6366f1; line-height:1.7; }
2351
  </style>
2352
-
2353
  <div class="comp-wrap">
2354
  <!-- 히어로 배너 -->
2355
  <div class="hero-banner">
@@ -2360,7 +1907,6 @@ with gr.Blocks(title="AI 글 판별기 v5.1") as demo:
2360
  </div>
2361
  <div class="hero-badge">🔬 v5.1 — 3대 킬러 기능 탑재</div>
2362
  </div>
2363
-
2364
  <!-- 핵심 수치 -->
2365
  <div class="stats-row">
2366
  <div class="stat-card"><div class="stat-num">8축</div><div class="stat-label">탐지 차원<br>(업계 최다)</div></div>
@@ -2368,7 +1914,6 @@ with gr.Blocks(title="AI 글 판별기 v5.1") as demo:
2368
  <div class="stat-card"><div class="stat-num">무료</div><div class="stat-label">완전 무료<br>(횟수 무제한)</div></div>
2369
  <div class="stat-card"><div class="stat-num">4종</div><div class="stat-label">모델 추정<br>(GPT/Claude/Gemini/PPX)</div></div>
2370
  </div>
2371
-
2372
  <!-- 비교 테이블 -->
2373
  <div class="comp-table-wrap">
2374
  <table class="comp-table">
@@ -2513,7 +2058,6 @@ with gr.Blocks(title="AI 글 판별기 v5.1") as demo:
2513
  </tbody>
2514
  </table>
2515
  </div>
2516
-
2517
  <!-- 차별화 4대 카드 -->
2518
  <div class="diff-grid" style="animation:fadeInUp 1.2s ease-out 0.6s both;">
2519
  <div class="diff-card">
@@ -2541,7 +2085,6 @@ with gr.Blocks(title="AI 글 판별기 v5.1") as demo:
2541
  <div class="diff-vs">vs 전체 경쟁사: LLM 검증 미보유</div>
2542
  </div>
2543
  </div>
2544
-
2545
  <!-- 경쟁사 취약점 공략 -->
2546
  <div class="attack-section">
2547
  <div class="attack-title">⚔️ 경쟁사 취약점 vs 우리의 강점</div>
@@ -2566,7 +2109,6 @@ with gr.Blocks(title="AI 글 판별기 v5.1") as demo:
2566
  <div class="attack-bar"><div class="attack-fill" style="width:94%;background:linear-gradient(90deg,#a855f7,#ec4899);"></div></div>
2567
  </div>
2568
  </div>
2569
-
2570
  <!-- 슬로건 -->
2571
  <div class="slogan-box">
2572
  <div class="slogan-main">🇰🇷 한국어 AI 글을 가장 정확하게, 가장 투명하게, 완전 무료로 판별합니다</div>
@@ -2603,59 +2145,42 @@ with gr.Blocks(title="AI 글 판별기 v5.1") as demo:
2603
  btn_ps.click(lambda:SAMPLE_AI,outputs=[inp_plag])
2604
  with gr.Tab("📖 설명"):
2605
  gr.Markdown("""
2606
- ### 아키텍처 v5.1 — 3대 킬러 기능 탑재
2607
  - **탐지 5축:** 통계(6%)·문체(25%)·반복(10%)·구조(12%)·지문(30%)
2608
  - **★ Perplexity 확률분석(17%):** 문자 엔트로피·Burstiness·TTR편차·종결엔트로피
2609
  - **★ Humanizer 탐지:** 동의어과다치환·구조보존·접속사잔존·이중피동 등 6시그널
2610
  - **★ 모델 추정:** GPT·Claude·Gemini·Perplexity 4모델 증거기반 추정
2611
  - **품질 6항목:** 가독성·어휘·논리·정확성·표현·정보밀도
2612
  - **LLM 교차검증:** GPT-OSS-120B·Qwen3-32B·Kimi-K2 (GROQ)
2613
-
2614
- ### 표절 검사
2615
  - **Brave Search**: 병렬 20개 동시 웹검색
2616
  - **학술 DB**: KCI(한국학술지인용색인), RISS(학술연구정보), arXiv
2617
  - **Gemini**: Google Search Grounding
2618
  - **보고서**: CopyKiller 스타일 — 유사도%, 출처표, 문장별 하이라이트
2619
-
2620
- ### 환경변수
2621
  - `GROQ_API_KEY` — LLM 교차검증
2622
  - `GEMINI_API_KEY` — 표절 검사 (Google Search Grounding)
2623
  - `BRAVE_API_KEY` — 표절 검사 (Brave Search 병렬)
2624
  """)
2625
- # ── 숨겨진 파일 텍스트 추출 API (탭1~4 파일 업로드용) ──
2626
  with gr.Row(visible=False):
2627
  _file_in = gr.File(label="hidden_file")
2628
  _text_out = gr.Textbox(label="hidden_text")
2629
  _file_in.change(extract_file_text_api, [_file_in], [_text_out], api_name="extract_file_text")
2630
-
2631
- # ═══ 정적 파일 준비 ═══
2632
  import shutil, pathlib
2633
  static_dir = pathlib.Path("static")
2634
  static_dir.mkdir(exist_ok=True)
2635
  if pathlib.Path("index.html").exists():
2636
  shutil.copy("index.html", static_dir / "index.html")
2637
-
2638
- # ═══ FastAPI — index.html을 루트(/)로 서빙 ═══
2639
  from fastapi import FastAPI
2640
  from fastapi.responses import HTMLResponse, FileResponse
2641
  from fastapi.staticfiles import StaticFiles
2642
-
2643
  server = FastAPI()
2644
-
2645
  @server.get("/", response_class=HTMLResponse)
2646
  async def serve_root():
2647
  """루트 URL에서 프리미엄 index.html 서빙"""
2648
  fp = pathlib.Path("static/index.html")
2649
  if fp.exists():
2650
  return HTMLResponse(fp.read_text(encoding="utf-8"))
2651
- # index.html 없으면 Gradio UI로 리다이렉트
2652
  from fastapi.responses import RedirectResponse
2653
  return RedirectResponse("/gradio/")
2654
-
2655
- # Gradio를 /gradio 경로에 마운트 — API는 /gradio/gradio_api/call/... 에서 작동
2656
  app = gr.mount_gradio_app(server, demo, path="/gradio", allowed_paths=["static"])
2657
-
2658
  if __name__ == "__main__":
2659
  import uvicorn
2660
- uvicorn.run(app, host="0.0.0.0", port=7860)
2661
-
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import math, re, os, json, random, time, hashlib, zlib, zipfile, tempfile
3
  from collections import Counter
 
6
  from concurrent.futures import ThreadPoolExecutor, as_completed
7
  from xml.etree import ElementTree as ET
8
  from kiwipiepy import Kiwi
 
9
  KIWI = Kiwi()
10
  try:
11
  import httpx; HAS_HTTPX = True
 
17
  HAS_GENAI = True
18
  except ImportError:
19
  HAS_GENAI = False
 
 
20
  try:
21
  import olefile; HAS_OLEFILE = True
22
  except ImportError:
 
33
  from docx import Document as DocxDocument; HAS_DOCX = True
34
  except ImportError:
35
  HAS_DOCX = False
 
36
  GROQ_KEY = os.getenv("GROQ_API_KEY", "")
37
  GEMINI_KEY = os.getenv("GEMINI_API_KEY", "")
38
  BRAVE_KEY = os.getenv("BRAVE_API_KEY", "")
 
 
 
 
 
39
  def extract_text_from_pdf(file_path):
40
  """PDF → 텍스트 (페이지별 분리)"""
41
  pages = []
 
59
  except Exception as e:
60
  print(f"PyPDF2: {e}")
61
  return None, "PDF 추출 실패 (pdfplumber, PyPDF2 없음)"
 
62
  def extract_text_from_docx(file_path):
63
  """DOCX → 텍스트 (문단별 분리)"""
64
  if not HAS_DOCX: return None, "python-docx 없음"
 
79
  return None, "DOCX 텍스트 없음"
80
  except Exception as e:
81
  return None, f"DOCX 오류: {e}"
 
82
  def extract_text_from_txt(file_path):
83
  """TXT/MD/CSV 등 → 텍스트"""
84
  for enc in ['utf-8', 'euc-kr', 'cp949', 'utf-16', 'latin-1']:
 
86
  with open(file_path, 'r', encoding=enc) as f:
87
  text = f.read()
88
  if text.strip():
 
89
  sections = [s.strip() for s in re.split(r'\n{2,}', text) if s.strip()]
90
  return sections if sections else [text], None
91
  except: continue
92
  return None, "텍스트 인코딩 감지 실패"
 
93
  def extract_text_from_hwpx(file_path):
94
  """HWPX (ZIP 기반) → 텍스트"""
95
  try:
 
128
  return None, "유효하지 않은 HWPX"
129
  except Exception as e:
130
  return None, f"HWPX 오류: {e}"
 
131
  def _decode_hwp_para(data):
132
  """HWP 바이너리 → 문단 텍스트"""
133
  result = []
 
149
  text = re.sub(r'[ \t]+', ' ', text)
150
  text = re.sub(r'\n{3,}', '\n\n', text)
151
  return text if len(text) > 2 else None
 
152
  def _extract_hwp_section(data):
153
  """HWP 섹션 바이너리 → 텍스트"""
154
  texts = []
 
172
  except:
173
  pos += 1
174
  return '\n'.join(texts) if texts else None
 
175
  def extract_text_from_hwp(file_path):
176
  """HWP (OLE 기반) → 텍스트"""
177
  if not HAS_OLEFILE: return None, "olefile 없음"
 
200
  return None, "HWP 텍스트 없음"
201
  except Exception as e:
202
  return None, f"HWP 오류: {e}"
 
203
  def extract_text_from_file(file_path):
204
  """
205
  만능 문서 추출: PDF/DOCX/HWP/HWPX/TXT → (sections_list, full_text, error)
 
210
  return None, None, "파일 없음"
211
  ext = Path(file_path).suffix.lower()
212
  sections, error = None, None
 
213
  if ext == '.pdf':
214
  sections, error = extract_text_from_pdf(file_path)
215
  elif ext == '.docx':
 
222
  sections, error = extract_text_from_txt(file_path)
223
  else:
224
  return None, None, f"지원하지 않는 형식: {ext}"
 
225
  if sections:
226
  full = '\n\n'.join(sections)
227
  return sections, full, None
228
  return None, None, error or "텍스트 추출 실패"
 
 
 
 
229
  def split_sentences(text):
230
  try:
231
  s = [x.text.strip() for x in KIWI.split_into_sents(text) if x.text.strip()]
232
  if s: return s
233
  except: pass
234
  return [x.strip() for x in re.split(r'(?<=[.!?。])\s+', text) if x.strip()]
 
235
  def split_words(text):
236
  return [w for w in re.findall(r'[가-힣a-zA-Z0-9]+', text) if w]
 
237
  def get_morphemes(text):
238
  try:
239
  r = KIWI.analyze(text)
240
  if r and r[0]: return [(m.form, m.tag) for m in r[0][0]]
241
  except: pass
242
  return []
 
243
  def http_get(url, headers=None, timeout=15):
244
  try:
245
  if HAS_HTTPX:
 
251
  with urllib.request.urlopen(req, timeout=timeout) as resp:
252
  return resp.read().decode('utf-8', errors='replace')
253
  except: return None
 
254
  def http_post_json(url, body, headers=None, timeout=30):
255
  try:
256
  h = headers or {}
 
265
  with urllib.request.urlopen(req, timeout=timeout, context=ssl.create_default_context()) as resp:
266
  return json.loads(resp.read())
267
  except: return None
 
268
  def call_groq(model, prompt, max_tokens=800, temperature=0.1):
269
  if not GROQ_KEY: return None, "NO_KEY"
270
  url = "https://api.groq.com/openai/v1/chat/completions"
 
281
  with urllib.request.urlopen(req, timeout=45, context=ssl.create_default_context()) as resp:
282
  return json.loads(resp.read())["choices"][0]["message"]["content"], None
283
  except Exception as e: return None, str(e)[:150]
 
 
 
 
284
  AI_ENDINGS = ['합니다','입니다','됩니다','습니다','있습니다','했습니다','겠습니다']
 
285
  AI_CASUAL_ENDINGS = ['라고 할 수 있다','라고 볼 수 있다','다고 생각한다','다고 판단된다',
286
  '인 셈이다','인 것이다','는 것이다','는 셈이다','ㄹ 것이다','을 것이다',
287
  '라 할 수 있다','로 보인다','로 판단된다','고 있다','는 추세다','는 상황이다',
288
  '지 않을 수 없다','라 하겠다','음을 알 수 있다','할 필요가 있다']
289
  AI_CONNS = ['또한','따라서','그러므로','이에 따라','한편','더불어','아울러','뿐만 아니라',
290
  '이를 통해','이에','결과적으로','궁극적으로','특히','나아가','이러한']
 
291
  AI_SOFT_CONNS = ['물론','그러나','하지만','이처럼','이와 같이','이를 바탕으로']
292
  AI_FILLER = ['것으로 보','것으로 나타','것으로 예상','할 수 있','볼 수 있','주목할 만',
293
  '중요한 역할','중요한 의미','긍정적인 영향','부정적인 영향','필요합니다','필요하다',
294
  '중요합니다','중요하다','역할을 하','영향을 미','기대된다','예상됩니다','부각되고',
295
  '대두되고','다양한 분야','다양한 산업','눈부신 성과','획기적인 변화','혁신적인',
296
  '점에서','측면에서','관점에서']
 
297
  AI_CASUAL_FILLER = ['무궁무진하다','무궁무진한','과언이 아니','돌파구가 될','전환점이 될',
298
  '기반으로','발판으로','원동력이','초석이 될','가속화되','급부상','패러다임',
299
  '지평을 열','새로운 장을','대전환','본격화되','고도화','이정표']
 
300
  AI_CONCESSION = re.compile(r'물론.{2,20}(하지만|그러나|그렇지만|다만)|.{2,15}(이긴 하지만|기는 하지만|수 있지만|수는 있지만)')
 
301
  EN_AI_MARKERS = ['furthermore','additionally','moreover','it is worth noting','in conclusion',
302
  'it is important to','plays a crucial role','significant impact','various aspects',
303
  'in this regard','consequently','nevertheless','integral part of','led to remarkable',
 
324
  "Perplexity": {"m":['검색 결과에 따르면','보도에 따르면','연구에 따르면','밝혔다','전했다',
325
  '것으로 나타났다','것으로 조사됐다','것으로 집계됐다','발표했다'],"e":['밝혔다','나타났다','전했다'],"lp":re.compile(r'\[\d+\]',re.M)},
326
  }
 
327
  def score_sentence(sent):
328
  """단일 문장 AI 점수 (0~100). 탭1·탭2 공유. v5.0 대폭 강화."""
329
  sc = 0; reasons = []
330
  sl = sent.lower().strip()
331
  sr = sent.rstrip('.!?。')
 
 
332
  for e in AI_ENDINGS:
333
  if sr.endswith(e): sc += 22; reasons.append(f"격식어미(-{e})"); break
 
 
334
  if sc == 0: # 격식이 아닌 경우만
335
  for e in AI_CASUAL_ENDINGS:
336
  if sr.endswith(e): sc += 15; reasons.append(f"비격식AI(-{e})"); break
 
 
337
  stripped = sent.strip()
338
  for c in AI_CONNS:
339
  if stripped.startswith(c):
 
341
  else:
342
  for c in AI_SOFT_CONNS:
343
  if stripped.startswith(c): sc += 8; reasons.append(f"약한접속사({c})"); break
 
 
344
  filler_found = sum(1 for f in AI_FILLER if f in sent)
345
  casual_filler = sum(1 for f in AI_CASUAL_FILLER if f in sent)
346
  total_filler = filler_found + casual_filler
347
  if total_filler >= 3: sc += 25; reasons.append(f"상투표현×{total_filler}")
348
  elif total_filler == 2: sc += 18; reasons.append(f"상투표현×2")
349
  elif total_filler == 1: sc += 10; reasons.append(f"상투표현×1")
 
 
350
  if AI_CONCESSION.search(sent): sc += 10; reasons.append("양보패턴")
 
 
351
  for mn, fp in FP.items():
352
  for m in fp["m"]:
353
  if m in sent: sc += 8; reasons.append(f"{mn}지문"); break
 
 
354
  en_count = sum(1 for em in EN_AI_MARKERS if em in sl)
355
  if en_count >= 3: sc += 25; reasons.append(f"영어AI×{en_count}")
356
  elif en_count >= 2: sc += 18; reasons.append(f"영어AI×{en_count}")
357
  elif en_count >= 1: sc += 12; reasons.append(f"영어AI×1")
 
 
358
  has_formal = any(sr.endswith(e) for e in AI_ENDINGS)
359
  has_conn = any(stripped.startswith(c) for c in AI_CONNS)
360
  if has_formal and total_filler >= 1 and has_conn: sc += 8; reasons.append("복합AI")
361
  elif has_formal and total_filler >= 2: sc += 5; reasons.append("격식+상투")
 
 
362
  for n, p in HUMAN_MARKERS.items():
363
  matches = p.findall(sent)
364
  if matches:
 
366
  elif n in ('비격식종결','구어축약'): sc -= 18; reasons.append(f"구어체({n})")
367
  elif n == '맞춤법오류': sc -= 12; reasons.append("맞춤법오류")
368
  elif n in ('느낌표','말줄임표'): sc -= 10; reasons.append(f"인간({n})")
 
369
  return max(0, min(100, sc)), reasons
 
 
 
 
370
  def analyze_statistics(text, sentences, words):
371
  sl = [len(s) for s in sentences]
372
  if len(sl) < 2: return {"score":50}
 
 
373
  avg = sum(sl)/len(sl); std = math.sqrt(sum((l-avg)**2 for l in sl)/len(sl))
374
  cv = std/avg if avg > 0 else 0
375
  cv_score = 85 if cv<0.20 else 70 if cv<0.30 else 50 if cv<0.45 else 30 if cv<0.60 else 15
 
 
376
  diffs = [abs(sl[i]-sl[i-1]) for i in range(1,len(sl))]
377
  burst_score = 50
378
  if diffs:
 
380
  max_d = max(diffs)
381
  burst_ratio = max_d / (avg_d + 1)
382
  burst_score = 85 if burst_ratio < 1.8 else 65 if burst_ratio < 2.5 else 40 if burst_ratio < 3.5 else 20
 
 
383
  standard_ratio = sum(1 for l in sl if 20 <= l <= 60) / len(sl)
384
  std_score = 80 if standard_ratio > 0.8 else 60 if standard_ratio > 0.6 else 40 if standard_ratio > 0.4 else 20
 
385
  extreme = sum(1 for l in sl if l < 10 or l > 80)
386
  if extreme >= 2: std_score = max(10, std_score - 20)
387
  elif extreme >= 1: std_score = max(15, std_score - 10)
 
 
388
  wf = Counter(words); t = len(words)
389
  ttr = len(wf)/t if t>0 else 0
390
  vocab_score = 70 if ttr<0.45 else 55 if ttr<0.55 else 35 if ttr<0.65 else 20
 
 
391
  wpc = [len(split_words(s)) for s in sentences]
392
  complex_score = 50
393
  if len(wpc) >= 3:
 
395
  wpc_std = math.sqrt(sum((w-wpc_avg)**2 for w in wpc)/len(wpc))
396
  wpc_cv = wpc_std/wpc_avg if wpc_avg > 0 else 0
397
  complex_score = 80 if wpc_cv < 0.20 else 60 if wpc_cv < 0.35 else 35 if wpc_cv < 0.50 else 15
 
398
  final = int(cv_score*0.20 + burst_score*0.20 + std_score*0.25 + vocab_score*0.15 + complex_score*0.20)
399
  return {"score":final,"cv":round(cv,3),"ttr":round(ttr,3)}
 
 
 
 
400
  def analyze_korean_style(text, sentences, morphemes):
401
  if not sentences: return {"score":50}
 
 
402
  formal_cnt = sum(1 for s in sentences if any(s.rstrip('.!?').endswith(e) for e in AI_ENDINGS))
403
  casual_ai = sum(1 for s in sentences if any(s.rstrip('.!?').endswith(e) for e in AI_CASUAL_ENDINGS))
404
  fr = formal_cnt/len(sentences)
405
  car = casual_ai/len(sentences)
 
406
  ending_score = 85 if fr>0.7 else 65 if fr>0.5 else 45 if fr>0.3 else 25 if fr>0.1 else 10
407
  ending_score = min(90, ending_score + int(car * 25)) # 비격식AI 보너스
 
 
408
  conn_positions = []
409
  for i, s in enumerate(sentences):
410
  for c in AI_CONNS:
411
  if s.strip().startswith(c): conn_positions.append(i); break
412
  conn_density = len(conn_positions)/len(sentences) if sentences else 0
413
  conn_score = 85 if conn_density>0.4 else 65 if conn_density>0.25 else 40 if conn_density>0.1 else 15
 
414
  if len(conn_positions) >= 2:
415
  gaps = [conn_positions[i]-conn_positions[i-1] for i in range(1,len(conn_positions))]
416
  gap_cv = (math.sqrt(sum((g-sum(gaps)/len(gaps))**2 for g in gaps)/len(gaps))/(sum(gaps)/len(gaps)+0.01))
417
  if gap_cv < 0.5: conn_score = min(90, conn_score + 10) # 매우 규칙적 → AI 보너스
 
 
418
  filler_cnt = sum(1 for f in AI_FILLER if f in text) + sum(1 for f in AI_CASUAL_FILLER if f in text)
419
  filler_score = 90 if filler_cnt>=6 else 75 if filler_cnt>=4 else 55 if filler_cnt>=2 else 30 if filler_cnt>=1 else 10
 
 
420
  concession_cnt = len(AI_CONCESSION.findall(text))
421
  conc_score = 80 if concession_cnt >= 2 else 55 if concession_cnt >= 1 else 20
 
 
422
  human_count = sum(len(p.findall(text)) for p in HUMAN_MARKERS.values())
423
  human_penalty = min(35, human_count * 8)
 
 
424
  pos_score = 45
425
  if morphemes:
426
  pc = Counter(t for _,t in morphemes); tm = sum(pc.values())
427
  noun_r = sum(pc.get(t,0) for t in ['NNG','NNP','NNB','NR'])/tm if tm else 0
428
  pos_score = 70 if noun_r>0.42 else 55 if noun_r>0.38 else 35 if noun_r>0.32 else 20
 
429
  final = max(5, int(ending_score*0.25 + conn_score*0.20 + filler_score*0.20 +
430
  conc_score*0.10 + pos_score*0.15 + 10*0.10) - human_penalty)
431
  return {"score":final,"formal":f"{fr:.0%}","conn":f"{conn_density:.2f}","filler":filler_cnt,"human":human_count}
 
 
 
 
432
  def analyze_repetition(text, sentences, words):
433
  if not sentences or len(sentences) < 2: return {"score":35}
 
 
434
  tr = 0
435
  if len(words)>=5:
436
  tg = Counter(tuple(words[i:i+3]) for i in range(len(words)-2))
437
  tr = sum(1 for c in tg.values() if c>1)/len(tg) if tg else 0
438
  ngram_score = 80 if tr>0.15 else 60 if tr>0.08 else 35 if tr>0.03 else 15
 
 
439
  openers_2 = []
440
  openers_3 = []
441
  for s in sentences:
442
  ws = split_words(s)
443
  if len(ws) >= 2: openers_2.append(tuple(ws[:2]))
444
  if len(ws) >= 3: openers_3.append(tuple(ws[:3]))
 
445
  opener2_score = 50
446
  if openers_2:
447
  unique2 = len(set(openers_2))/len(openers_2)
448
  opener2_score = 80 if unique2 < 0.5 else 60 if unique2 < 0.7 else 35 if unique2 < 0.85 else 15
 
 
449
  ai_only_conns = ['또한','따라서','그러므로','이에 따라','더불어','아울러','뿐만 아니라',
450
  '이를 통해','이에','결과적으로','궁극적으로','나아가','이러한']
451
  cr = sum(1 for s in sentences if any(s.strip().startswith(c) for c in ai_only_conns))
452
  crr = cr/len(sentences) if sentences else 0
453
  ai_conn_score = 85 if crr>0.35 else 65 if crr>0.2 else 40 if crr>0.08 else 15
 
 
454
  templates = []
455
  for s in sentences:
456
  ws = split_words(s)
457
  if len(ws) >= 4:
 
458
  templates.append((ws[0], ws[-1]))
459
  template_rep = 0
460
  if templates:
461
  tc = Counter(templates)
462
  template_rep = sum(1 for c in tc.values() if c > 1) / len(tc) if tc else 0
463
  template_score = 80 if template_rep > 0.3 else 55 if template_rep > 0.1 else 25
 
 
464
  endings = []
465
  for s in sentences:
466
  sr = s.rstrip('.!?。')
 
470
  if endings:
471
  unique_e = len(set(endings))/len(endings)
472
  ending_div = 80 if unique_e < 0.3 else 60 if unique_e < 0.5 else 35 if unique_e < 0.7 else 15
 
473
  final = int(ngram_score*0.15 + opener2_score*0.20 + ai_conn_score*0.25 +
474
  template_score*0.15 + ending_div*0.25)
475
  return {"score":final}
 
 
 
 
 
476
  AI_VAGUE = re.compile(r'다양한|중요한|긍정적인|부정적인|획기적인|혁신적인|효율적인|체계적인|종합적인|전반적인|지속적인|적극적인|상당한|주요한')
 
477
  CONCRETE_PROPER = re.compile(r'삼성|LG|현대|SK|카카오|네이버|넷플릭스|구글|애플|테슬라|아마존|마이크로소프트|[가-힣]{2,}대학|[가-힣]{2,}병원|[가-힣]{1,3}시[는을이가]|[가-힣]{1,3}구[는을이가]|[가-힣]{2,}동[에서]')
478
  CONCRETE_NUMBER = re.compile(r'\d{2,}[만억조원달러%개년월일위등호]|\d+\.\d+%|\d{4}년|\d{1,2}월')
479
  CONCRETE_QUOTE = re.compile(r'에 따르면|발표했|밝혔다|보도했|전했다|라고 말|인터뷰|설문|조사|통계청|보고서')
 
480
  def analyze_structure(text, sentences):
481
  if not sentences: return {"score":35}
 
 
482
  lt = (len(re.findall(r'^\d+[.)]\s',text,re.M)) + len(re.findall(r'^[-•*]\s',text,re.M)) +
483
  len(re.findall(r'^#+\s',text,re.M)) + len(re.findall(r'\*\*[^*]+\*\*',text)))
484
  list_score = 90 if lt>=5 else 70 if lt>=3 else 45 if lt>=1 else 10
 
 
485
  vague_cnt = len(AI_VAGUE.findall(text))
486
  proper_cnt = len(CONCRETE_PROPER.findall(text))
487
  number_cnt = len(CONCRETE_NUMBER.findall(text))
488
  quote_cnt = len(CONCRETE_QUOTE.findall(text))
489
  concrete_total = proper_cnt + number_cnt + quote_cnt
 
490
  if vague_cnt >= 3 and concrete_total == 0: abstract_score = 90
491
  elif vague_cnt >= 2 and concrete_total <= 1: abstract_score = 70
492
  elif vague_cnt >= 1 and concrete_total == 0: abstract_score = 55
 
494
  elif concrete_total >= 2: abstract_score = 20
495
  elif concrete_total >= 1: abstract_score = 30
496
  else: abstract_score = 45
 
 
497
  has_question = any(s.strip().endswith('?') for s in sentences)
498
  has_exclaim = any(s.strip().endswith('!') for s in sentences)
499
  has_ellipsis = any('...' in s or '…' in s for s in sentences)
500
  variety = sum([has_question, has_exclaim, has_ellipsis])
501
  type_score = 15 if variety >= 2 else 40 if variety >= 1 else 65
 
 
502
  puncts = re.findall(r'[!?,;:…—\-~]', text)
503
  unique_punct = len(set(puncts))
504
  punct_score = 65 if unique_punct <= 1 else 45 if unique_punct <= 3 else 20
 
 
505
  paras = [p.strip() for p in text.split('\n\n') if p.strip()]
506
  para_score = 35
507
  if len(paras) >= 2:
 
512
  para_score = 75 if pcv < 0.2 else 55 if pcv < 0.35 else 30
513
  if len(paras) >= 3 and pl[0] < avg_p and pl[-1] < avg_p:
514
  para_score = min(85, para_score + 10)
 
515
  final = int(list_score*0.10 + abstract_score*0.40 + type_score*0.20 + punct_score*0.10 + para_score*0.20)
516
  return {"score":final}
 
 
 
 
517
  def analyze_model_fingerprint(text, sentences):
518
  ms = {}
519
  sl = text.lower()
 
524
  em = sum(1 for s in sentences if any(s.rstrip('.!?').endswith(e) for e in fp.get("e",[])))
525
  if sentences: sc += int((em/len(sentences))*20)
526
  ms[mn] = min(100,sc)
 
 
527
  general_ai = 0
 
528
  general_ai += sum(5 for f in AI_CASUAL_FILLER if f in text)
 
529
  casual_end_cnt = sum(1 for s in sentences if any(s.rstrip('.!?').endswith(e) for e in AI_CASUAL_ENDINGS))
530
  general_ai += casual_end_cnt * 5
 
531
  general_ai += len(AI_CONCESSION.findall(text)) * 8
532
  ms["비격식AI"] = min(100, general_ai)
 
 
533
  en_score = sum(5 for em in EN_AI_MARKERS if em in sl)
534
  ms["영어AI"] = min(100, en_score)
 
535
  mx = max(ms.values()) if ms else 0
 
536
  multi = sum(1 for v in ms.values() if v >= 10)
537
  multi_bonus = 10 if multi >= 3 else 5 if multi >= 2 else 0
 
538
  base = 85 if mx>=50 else 65 if mx>=35 else 45 if mx>=20 else 25 if mx>=10 else 10
539
  return {"score":min(95, base + multi_bonus),"model_scores":{k:v for k,v in ms.items() if k not in ("비격식AI","영어AI") or v > 0}}
 
 
 
 
 
540
  def analyze_perplexity(text, sentences, morphemes):
541
  """한국어 특화 Perplexity + Burstiness — 문자 엔트로피 보정"""
542
  if len(sentences) < 2: return {"score": 40, "entropy": 0, "variance": 0, "order": 0, "zipf": 0}
 
 
543
  chars = [c for c in text if c.strip()]
544
  char_score = 45
545
  if len(chars) >= 30:
 
547
  cb_freq = Counter(cbigrams)
548
  total_cb = len(cbigrams)
549
  char_entropy = -sum((cnt/total_cb)*math.log2(cnt/total_cb) for cnt in cb_freq.values())
 
550
  if char_entropy < 7.5: char_score = 78
551
  elif char_entropy < 8.5: char_score = 62
552
  elif char_entropy < 9.5: char_score = 42
553
  elif char_entropy < 10.5: char_score = 25
554
  else: char_score = 12
 
 
555
  sl = [len(s) for s in sentences]
556
  burst_score = 45
557
  if len(sl) >= 3:
 
563
  elif cv < 0.40: burst_score = 38
564
  elif cv < 0.60: burst_score = 20
565
  else: burst_score = 8
 
 
566
  sent_ttr = []
567
  for s in sentences:
568
  sw = split_words(s)
 
576
  elif std_ttr < 0.08: ttr_score = 55
577
  elif std_ttr < 0.15: ttr_score = 35
578
  else: ttr_score = 15
 
 
579
  endings = [s.rstrip('.!?\u2026')[-3:] for s in sentences if len(s) >= 5]
580
  end_score = 40
581
  if len(endings) >= 3:
 
587
  elif norm_ent < 0.7: end_score = 50
588
  elif norm_ent < 0.85: end_score = 32
589
  else: end_score = 15
 
590
  final = int(char_score * 0.30 + burst_score * 0.30 + ttr_score * 0.20 + end_score * 0.20)
591
  return {"score": final, "entropy": char_score, "variance": burst_score, "order": ttr_score, "zipf": end_score}
 
 
 
 
 
 
 
 
592
  HUMANIZER_OVERSUBST = re.compile(r'활용하다|이용하다|사용하다|적용하다|도입하다|채택하다|수행하다|진행하다|실시하다|실행하다')
593
  HUMANIZER_AWKWARD = re.compile(r'그것은|이것은|저것은|해당 사항|앞서 언급한|전술한|상기한|기술된')
594
  HUMANIZER_PASSIVE = re.compile(r'되어지[고는며]|하게 되었[다습]|수행되[었어]|진행되[었어]|실시되[었어]|활용되[었어]')
 
595
  def analyze_humanizer(text, sentences, words, morphemes):
596
  """Humanizer/Bypasser 탐지 — AI 원문 패러프레이즈 흔적 분석"""
597
  if len(sentences) < 2: return {"score": 20, "signals": []}
598
  signals = []
 
 
 
599
  content_words = [f for f, t in morphemes if t in ('NNG', 'NNP', 'VV', 'VA')]
600
  if len(content_words) >= 10:
601
  cw_freq = Counter(content_words)
 
602
  hapax = sum(1 for c in cw_freq.values() if c == 1)
603
  hapax_ratio = hapax / len(cw_freq) if cw_freq else 0
 
604
  if hapax_ratio > 0.95 and len(content_words) >= 30:
605
  signals.append(("동의어과다치환", 20, "핵심 어휘가 과도하게 분산"))
606
  elif hapax_ratio > 0.90 and len(content_words) >= 25:
607
  signals.append(("동의어치환의심", 12, "어휘 반복 회피 패턴"))
 
 
 
608
  sl = [len(s) for s in sentences]
609
  if len(sl) >= 4:
610
  avg = sum(sl) / len(sl)
611
  cv = math.sqrt(sum((l - avg)**2 for l in sl) / len(sl)) / (avg + 1e-10)
 
612
  unique_ratio = len(set(words)) / len(words) if words else 0
613
  if cv < 0.20 and unique_ratio > 0.80 and len(sentences) >= 5:
614
  signals.append(("구조보존어휘변경", 18, "문장 구조 균일 + 비정상적 어휘 다양성"))
 
 
 
615
  residual = 0
 
616
  conn_positions = []
617
  for i, s in enumerate(sentences):
618
  stripped = s.strip()
 
621
  conn_positions.append(i)
622
  break
623
  if len(conn_positions) >= 2:
 
624
  gaps = [conn_positions[i] - conn_positions[i-1] for i in range(1, len(conn_positions))]
625
  if gaps and max(gaps) - min(gaps) <= 1: # 거의 등간격
626
  signals.append(("접속사등간격잔존", 15, "접속사 배치가 규칙적 (AI 원문 구조 잔존)"))
627
  residual += 15
 
 
628
  oversubst = len(HUMANIZER_OVERSUBST.findall(text))
629
  awkward = len(HUMANIZER_AWKWARD.findall(text))
630
  passive = len(HUMANIZER_PASSIVE.findall(text))
 
634
  signals.append(("어색한지시어", 10, f"해당/전술/상기 등 {awkward}개"))
635
  if passive >= 3:
636
  signals.append(("이중피동과다", 15, f"되어지/수행되 등 {passive}개"))
 
 
 
 
 
637
  endings = [s.rstrip('.!?')[-2:] for s in sentences if len(s) >= 4]
638
  end_types = len(set(endings)) / len(endings) if endings else 0
639
  has_question = any(s.strip().endswith('?') for s in sentences)
 
641
  sent_type_variety = sum([has_question, has_exclaim])
642
  if sent_type_variety == 0 and end_types > 0.85 and len(sentences) >= 6:
643
  signals.append(("유형단조어미다양", 12, "서술문만 + 종결어미 과다 다양 = Humanizer 패턴"))
 
 
 
644
  starters = [s.strip()[:3] for s in sentences if len(s) >= 6]
645
  starter_unique = len(set(starters)) / len(starters) if starters else 0
646
  if starter_unique >= 0.98 and len(sentences) >= 7:
647
  signals.append(("문두과다다양", 8, "모든 문장 시작이 다름 (자연스럽지 않은 다양성)"))
 
648
  total = sum(s[1] for s in signals)
 
649
  if total >= 45: score = 85
650
  elif total >= 30: score = 68
651
  elif total >= 20: score = 52
652
  elif total >= 10: score = 35
653
  else: score = 15
 
654
  return {"score": score, "signals": signals, "total_evidence": total}
 
 
 
 
 
 
655
  MODEL_PROFILES = {
656
  "GPT": {
657
  "style": ["격식체 ~습니다", "또한/특히 접속사", "~에 대해", "~것으로 예상됩니다"],
 
682
  "connectors": ["한편", "또한", "이에"],
683
  },
684
  }
 
685
  def estimate_model(text, sentences, morphemes, model_scores):
686
  """AI 모델 추정 — 복합 증거 기반"""
687
  evidence = {m: {"score": 0, "reasons": []} for m in MODEL_PROFILES}
 
688
  sl = text.lower()
 
689
  for model, profile in MODEL_PROFILES.items():
 
690
  fp_score = model_scores.get(model, 0)
691
  evidence[model]["score"] += fp_score * 0.4
692
  if fp_score >= 20:
693
  evidence[model]["reasons"].append(f"지문 매칭 {fp_score}점")
 
 
694
  marker_cnt = sum(1 for m in profile["markers"] if m in text)
695
  if marker_cnt >= 2:
696
  evidence[model]["score"] += marker_cnt * 8
697
  evidence[model]["reasons"].append(f"특유 표현 {marker_cnt}개")
 
 
698
  end_match = 0
699
  for s in sentences:
700
  for e in profile["endings"]:
 
705
  if end_ratio > 0.7:
706
  evidence[model]["score"] += 12
707
  evidence[model]["reasons"].append(f"종결어미 {end_ratio:.0%} 일치")
 
 
708
  conn_match = sum(1 for s in sentences if any(s.strip().startswith(c) for c in profile["connectors"]))
709
  if conn_match >= 2:
710
  evidence[model]["score"] += conn_match * 4
711
  evidence[model]["reasons"].append(f"접속사 패턴 {conn_match}회")
 
 
712
  number_citations = len(re.findall(r'\d+[%만억조]|에 따르면|것으로 나타났|발표했', text))
713
  if number_citations >= 3:
714
  evidence["Perplexity"]["score"] += number_citations * 5
715
  evidence["Perplexity"]["reasons"].append(f"수치/인용 {number_citations}회")
 
 
716
  concession_cnt = len(AI_CONCESSION.findall(text))
717
  if concession_cnt >= 1:
718
  evidence["Claude"]["score"] += concession_cnt * 10
719
  evidence["Claude"]["reasons"].append(f"양보-주장 구문 {concession_cnt}회")
 
 
720
  ranked = sorted(evidence.items(), key=lambda x: x[1]["score"], reverse=True)
721
  top = ranked[0]
722
  second = ranked[1] if len(ranked) > 1 else None
 
723
  if top[1]["score"] < 10:
724
  return {"model": "특정 불가", "confidence": "낮음", "detail": evidence, "ranked": ranked}
 
 
725
  gap = top[1]["score"] - (second[1]["score"] if second else 0)
726
  if gap >= 20 and top[1]["score"] >= 30:
727
  conf = "높음"
 
729
  conf = "중간"
730
  else:
731
  conf = "낮음"
 
732
  return {
733
  "model": top[0],
734
  "confidence": conf,
 
737
  "detail": evidence,
738
  "ranked": ranked
739
  }
 
 
 
 
740
  def analyze_quality(text, sentences, words, morphemes):
741
  qs = {}; sl = [len(s) for s in sentences]; tw = len(words)
742
  ideal = sum(1 for l in sl if 15<=l<=70)/len(sentences) if sentences else 0
 
767
  total = int(sum(qs[k]*wq[k] for k in wq))
768
  grade = "S" if total>=85 else "A" if total>=72 else "B" if total>=58 else "C" if total>=42 else "D" if total>=28 else "F"
769
  return {"score":total,"grade":grade,"sub_scores":qs}
 
 
 
 
770
  LLM_JUDGES = [("openai/gpt-oss-120b","GPT-OSS 120B"),("qwen/qwen3-32b","Qwen3 32B"),("moonshotai/kimi-k2-instruct-0905","Kimi-K2")]
 
771
  def _parse_ai_probability(raw_resp):
772
  """LLM 응답에서 AI 확률(0~100)을 추출. 한국어/영어 다양한 형식 대응."""
773
  if not raw_resp: return -1
 
774
  think_content = ''
775
  think_m = re.search(r'<think>(.*?)</think>', raw_resp, flags=re.S)
776
  if think_m: think_content = think_m.group(1)
777
  resp = re.sub(r'<think>.*?</think>', '', raw_resp, flags=re.S).strip()
778
  if not resp or len(resp) < 5:
779
  resp = raw_resp # think만 있으면 원본 포함
 
 
780
  specific_patterns = [
781
  r'AI\s*확률\s*[::]\s*(?:약\s*)?(\d+)\s*%?',
782
  r'AI\s*[Pp]robability\s*[::]\s*(?:about|approximately?\s*)?(\d+)\s*%?',
 
792
  if m:
793
  v = int(m.group(1))
794
  if 0 <= v <= 100: return v
 
 
795
  lines = [l.strip() for l in resp.strip().split('\n') if l.strip()]
796
  for line in reversed(lines[-5:]):
 
797
  if re.search(r'AI|확률|[Pp]robab|신뢰|판[정단]', line):
798
  nums = re.findall(r'(\d+)\s*%', line)
799
  if nums:
 
803
  if nums:
804
  v = int(nums[-1])
805
  if 0 <= v <= 100: return v
 
 
806
  all_pcts = list(re.finditer(r'(\d+)\s*(?:%|퍼센트|percent)', resp, re.I))
807
  for m in reversed(all_pcts):
808
  v = int(m.group(1))
 
809
  ctx_start = max(0, m.start()-50)
810
  ctx = resp[ctx_start:m.end()+20]
811
  if re.search(r'AI|확률|[Pp]robab|신뢰|판[정단]|가능성|likelihood', ctx, re.I):
812
  if 0 <= v <= 100: return v
 
 
813
  if all_pcts:
814
  v = int(all_pcts[-1].group(1))
815
  if 5 <= v <= 99: return v # 100% 제외 (통계 수치 오탐 방지)
 
 
816
  if think_content:
817
  for pat in specific_patterns:
818
  m = re.search(pat, think_content, re.I)
819
  if m:
820
  v = int(m.group(1))
821
  if 0 <= v <= 100: return v
 
822
  think_pcts = re.findall(r'(\d+)\s*%', think_content)
823
  if think_pcts:
824
  v = int(think_pcts[-1])
825
  if 5 <= v <= 99: return v
 
826
  return -1
 
827
  def llm_cross_check(text):
828
  if not GROQ_KEY: return {"score":-1,"detail":{}}
 
829
  prompt = f"""Analyze whether this text was written by AI.
 
830
  [Instructions]
831
  1. Determine AI vs Human with 3 brief reasons
832
  2. IMPORTANT - Your LAST line MUST be exactly this format:
833
  AI확률: XX%
834
  (Replace XX with your estimated probability 0-100)
 
835
  Example of correct last line:
836
  AI확률: 75%
 
837
  [Text to analyze]
838
  {text[:2000]}"""
 
839
  votes=[]; rpt={}
840
  for mid,mn in LLM_JUDGES:
841
  resp,err = call_groq(mid,prompt)
 
844
  if p >= 0:
845
  votes.append(p); rpt[mn]=f"{p}%"
846
  else:
 
847
  cleaned = re.sub(r'<think>.*?</think>', '', resp, flags=re.S).strip()
848
  tail = cleaned[-60:].replace('\n',' ') if len(cleaned) > 60 else cleaned.replace('\n',' ')
849
  rpt[mn]=f"파싱실패({tail[:40]})"
850
  else: rpt[mn]=f"ERR:{err[:30] if err else '?'}"
851
  if votes: return {"score":int(sum(votes)/len(votes)),"detail":rpt}
852
  return {"score":-1,"detail":rpt}
 
 
 
 
853
  def compute_verdict(scores, llm_score=-1, sent_avg=-1, ppx_score=-1, hum_score=-1):
854
  w={"통계":.06,"문체":.25,"반복성":.10,"구조":.12,"지문":.30}
855
  ws=sum(scores[k]*w[k] for k in w)
 
 
856
  if ppx_score >= 0: ws += ppx_score * 0.17
 
 
857
  style=scores["문체"]; fp=scores["지문"]; rep=scores["반복성"]; struct=scores["구조"]
858
  if style>=35 and fp>=35: ws+=8
859
  elif style>=30 and fp>=25: ws+=4
860
  if style>=30 and rep>=25 and fp>=20: ws+=4
861
  if fp>=45: ws+=3
862
  if struct>=50 and style>=30: ws+=3
 
863
  if ppx_score>=55 and fp>=35: ws+=5
864
  if ppx_score>=65 and style>=35: ws+=3
 
 
865
  if hum_score>=50:
866
  ws=max(ws, 45) # Humanizer 확인 → 최소 AI 의심 중간
867
  ws += (hum_score-50)*0.15
 
 
868
  if sent_avg>=0 and sent_avg>ws: ws=ws*0.80+sent_avg*0.20
 
869
  hi=sum(1 for v in scores.values() if v>=50)
870
  if hi>=4: ws+=8
871
  elif hi>=3: ws+=5
872
  elif hi>=2: ws+=2
 
 
873
  if style<40 and fp<=20 and rep<22 and struct<35 and (ppx_score<0 or ppx_score<40):
874
  ws-=5
 
875
  lo=sum(1 for v in scores.values() if v<20)
876
  if lo>=3: ws-=8
877
  elif lo>=2: ws-=3
 
882
  if fs>=45: return fs,"AI 의심 중간","ai_low"
883
  if fs>=30: return fs,"판단 유보","uncertain"
884
  return fs,"인간 작성 추정","human"
 
885
  def quick_score(text):
886
  sents=split_sentences(text); words=split_words(text); morphs=get_morphemes(text)
887
  sc={"통계":analyze_statistics(text,sents,words)["score"],"문체":analyze_korean_style(text,sents,morphs)["score"],
 
893
  hum=analyze_humanizer(text,sents,words,morphs)
894
  fs,v,lv=compute_verdict(sc, sent_avg=sent_avg, ppx_score=ppx["score"], hum_score=hum["score"])
895
  return fs,v,lv,sc,ppx,hum
 
 
 
 
896
  def brave_search(query, count=5):
897
  """Brave Search API — 단일 쿼리"""
898
  if not BRAVE_KEY: return []
 
908
  return results
909
  except: pass
910
  return []
 
911
  def search_kci(query):
912
  """KCI(한국학술지인용색인) 검색"""
913
  try:
 
920
  return results[:3]
921
  except: pass
922
  return []
 
923
  def search_riss(query):
924
  """RISS(학술연구정보서비스) — 간접 검색"""
925
  results = []
 
933
  results.append({"title": title, "url": "https://www.riss.kr" + m.group(1), "snippet": "", "source": "RISS"})
934
  except: pass
935
  return results[:3]
 
936
  def search_arxiv(query):
937
  """arXiv API 검색"""
938
  results = []
 
948
  except Exception as e:
949
  pass
950
  return results[:3]
 
951
  def gemini_plagiarism_check(text_chunk):
952
  """Gemini + Google Search Grounding으로 표절 검사"""
953
  if not HAS_GENAI or not GEMINI_KEY: return None
 
957
  prompt = f"""다음 텍스트가 인터넷에 존재하는지 Google Search로 확인하세요.
958
  유사한 문장이 발견되면 출처 URL과 유사도(%)를 보고하세요.
959
  마지막 줄에 "유사도: XX%" 형식으로 작성.
 
960
  [텍스트]
961
  {text_chunk[:1000]}"""
962
  resp = client.models.generate_content(
 
977
  return {"pct": pct, "response": text_resp, "sources": sources}
978
  except Exception as e:
979
  return {"pct": 0, "response": str(e)[:100], "sources": []}
 
980
  def parallel_brave_search(queries, max_workers=10):
981
  """Brave Search 병렬 실행 (최대 20개)"""
982
  all_results = {}
 
989
  all_results[q] = results
990
  except: all_results[q] = []
991
  return all_results
 
992
  def duckduckgo_search(query, max_results=5):
993
  """DuckDuckGo HTML 스크래핑 — API 키 불필요 폴백"""
994
  results = []
 
1003
  href = m.group(1)
1004
  title = re.sub(r'<[^>]+>', '', m.group(2)).strip()
1005
  snippet = re.sub(r'<[^>]+>', '', m.group(3)).strip()
 
1006
  real_url = href
1007
  if 'uddg=' in href:
1008
  um = re.search(r'uddg=([^&]+)', href)
 
1012
  if len(results) >= max_results: break
1013
  except: pass
1014
  return results
 
1015
  def self_crawl_search(query, max_results=3):
1016
  """httpx 기반 자체 크롤링 (DuckDuckGo + 학술 사이트)"""
1017
  all_results = []
 
1018
  all_results.extend(duckduckgo_search(query, max_results))
 
1019
  if '논문' not in query and 'paper' not in query.lower():
1020
  all_results.extend(duckduckgo_search(f"{query} 논문 학술", 2))
1021
  return all_results
 
1022
  def run_plagiarism(text, progress=gr.Progress()):
1023
  if not text or len(text.strip())<50:
1024
  return "<div style='padding:20px;text-align:center;color:#888;'>⚠️ 최소 50자 이상</div>", ""
1025
  text = text.strip()
1026
  sents = split_sentences(text)
1027
  now = datetime.now().strftime("%Y-%m-%d %H:%M")
 
1028
  has_brave = bool(BRAVE_KEY)
1029
  has_gemini = bool(HAS_GENAI and GEMINI_KEY)
 
1030
  progress(0.05, "문장 분리...")
 
1031
  blocks = []
1032
  for i in range(0, len(sents), 4):
1033
  block = ' '.join(sents[i:i+4])
1034
  if len(block) > 20:
1035
  blocks.append({"text": block, "sent_indices": list(range(i, min(i+4, len(sents))))})
 
1036
  all_sources = []
1037
  sent_matches = {i: [] for i in range(len(sents))} # 문장별 매칭 정보
1038
  block_results = []
1039
  log_lines = []
 
 
1040
  if has_brave:
1041
  progress(0.15, f"Brave Search 병렬 검색 ({len(blocks)}블록)...")
1042
  queries = []
 
1053
  sent_matches[si].append({"source": r["title"], "url": r["url"], "type": "Brave"})
1054
  log_lines.append(f"Brave Search: {len(queries)}쿼리 → {sum(len(v) for v in brave_results.values())}건")
1055
  else:
 
1056
  progress(0.15, f"자체 웹 검색 ({len(blocks)}블록)...")
1057
  crawl_queries = []
1058
  for b in blocks[:10]: # 최대 10블록
 
1070
  sent_matches[si].append({"source": r["title"], "url": r["url"], "type": r.get("source","Web")})
1071
  except: pass
1072
  log_lines.append(f"자체 웹검색: {len(crawl_queries)}쿼리 (DuckDuckGo)")
 
 
1073
  progress(0.40, "학술 DB 검색 (KCI/RISS/arXiv)...")
 
1074
  words = split_words(text)
1075
  wf = Counter(words)
1076
  keywords = [w for w, c in wf.most_common(20) if len(w) >= 2 and c >= 2][:5]
1077
  kw_query = ' '.join(keywords[:3])
 
1078
  academic_results = []
1079
  with ThreadPoolExecutor(max_workers=3) as executor:
1080
  futures = [
 
1089
  all_sources.extend(results)
1090
  except: pass
1091
  log_lines.append(f"학술DB: KCI/RISS/arXiv → {len(academic_results)}건")
 
 
1092
  gemini_results = []
1093
  if has_gemini:
1094
  progress(0.60, "Gemini + Google Search...")
 
1101
  for si in b["sent_indices"]:
1102
  sent_matches[si].append({"source": src.get("title",""), "url": src.get("url",""), "type": "Google"})
1103
  log_lines.append(f"Gemini: {len(blocks[:5])}블록 → {sum(len(r.get('sources',[])) for r in gemini_results)}출처")
 
1104
  progress(0.80, "보고서 생성...")
 
 
1105
  matched_sents = sum(1 for si, matches in sent_matches.items() if matches)
1106
  total_sents = len(sents)
1107
  plag_pct = int(matched_sents / total_sents * 100) if total_sents > 0 else 0
 
 
1108
  if gemini_results:
1109
  gemini_pcts = [r["pct"] for r in gemini_results if r["pct"] > 0]
1110
  if gemini_pcts:
1111
  gemini_avg = sum(gemini_pcts) / len(gemini_pcts)
1112
  plag_pct = int(plag_pct * 0.5 + gemini_avg * 0.5)
 
 
1113
  seen_urls = set()
1114
  unique_sources = []
1115
  for s in all_sources:
 
1117
  if url and url not in seen_urls:
1118
  seen_urls.add(url)
1119
  unique_sources.append(s)
 
 
1120
  if plag_pct >= 50: grade, grade_color, grade_bg = "표절 의심", "#FF4444", "#FFE0E0"
1121
  elif plag_pct >= 30: grade, grade_color, grade_bg = "주의 필요", "#FF8800", "#FFF0DD"
1122
  elif plag_pct >= 15: grade, grade_color, grade_bg = "유사 표현 일부", "#DDAA00", "#FFFBE0"
1123
  elif plag_pct >= 5: grade, grade_color, grade_bg = "양호", "#4ECDC4", "#E0FFF8"
1124
  else: grade, grade_color, grade_bg = "우수 (원본성 높음)", "#22AA44", "#E0FFE8"
 
 
 
1125
  sent_analysis = []
1126
  for i, s in enumerate(sents):
1127
  matches = sent_matches.get(i, [])
 
1131
  else:
1132
  sent_analysis.append({"idx":i, "text":s, "matched":False})
1133
  sim_sents = [s for s in sent_analysis if s["matched"]]
 
 
1134
  src_groups = {}
1135
  for src in unique_sources:
1136
  key = src.get("url","")[:80]
 
1138
  src_groups[key] = {"title":src.get("title",""), "url":src.get("url",""), "source":src.get("source",""), "count":0}
1139
  src_groups[key]["count"] += 1
1140
  src_list = sorted(src_groups.values(), key=lambda x: -x["count"])
 
1141
  methods_used = []
1142
  if has_brave: methods_used.append("Brave Search(병렬)")
1143
  elif all_sources: methods_used.append("DuckDuckGo(자체크롤링)")
1144
  methods_used.append("KCI · RISS · arXiv")
1145
  if has_gemini: methods_used.append("Gemini+Google Search")
1146
  method_str = " + ".join(methods_used)
 
1147
  gc = grade_color
1148
  word_count = len(split_words(text))
1149
  char_count = len(text)
1150
  doc_id = hashlib.md5(text[:100].encode()).hexdigest()[:8].upper()
1151
  similarity_pct = plag_pct
1152
  citation_pct = 0
 
 
1153
  cat_suspect = len(sim_sents) # 의심
1154
  cat_cited = 0 # 인용 (형식적 인용 감지)
1155
  cat_normal = total_sents - cat_suspect - cat_cited # 일반
1156
  cat_suspect_pct = int(cat_suspect / max(1, total_sents) * 100)
1157
  cat_normal_pct = 100 - cat_suspect_pct
 
 
1158
  def src_icon(s):
1159
  src = s.get("source","").lower()
1160
  if "kci" in src: return "📚", "KCI"
 
1163
  if "google" in src: return "🔍", "Google"
1164
  if "brave" in src: return "🌐", "Brave"
1165
  return "🌐", "Web"
 
 
1166
  src_rows = ""
1167
  for i, sg in enumerate(src_list[:15]):
1168
  pct = min(100, int(sg["count"] / max(1, total_sents) * 100 * 3))
 
1176
  <td style="padding:6px 8px;font-size:12px;text-align:center;border:1px solid #D5D5D5;font-weight:800;color:#D63031;">{pct}%</td>
1177
  <td style="padding:6px 10px;border:1px solid #D5D5D5;"><div style="background:#EDEDED;height:14px;border-radius:2px;overflow:hidden;"><div style="background:linear-gradient(90deg,#D63031,#FF7675);height:100%;width:{max(3,pct)}%;border-radius:2px;"></div></div></td>
1178
  </tr>"""
 
 
1179
  suspect_rows = ""
1180
  for i, sa in enumerate(sim_sents[:15]):
1181
  suspect_rows += f"""<tr>
 
1184
  <td style="padding:8px;font-size:10px;line-height:1.6;border:1px solid #D5D5D5;vertical-align:top;color:#555;"><span style="background:#FFE0E0;border-bottom:2px solid #E74C3C;padding:1px 3px;">{sa["text"][:70]}...</span></td>
1185
  <td style="padding:8px;font-size:10px;border:1px solid #D5D5D5;vertical-align:top;"><a href="{sa.get('url','#')}" target="_blank" style="color:#2E86C1;text-decoration:none;font-weight:600;">{sa["source"][:28]}</a><br><span style="font-size:8px;color:#AAA;">{sa.get('type','')}</span></td>
1186
  </tr>"""
 
 
1187
  full_hl = ""
1188
  for sa in sent_analysis:
1189
  sidx = sa["idx"] + 1
 
1191
  full_hl += f'<span style="background:#FFD6D6;border-bottom:2px solid #E74C3C;padding:1px 2px;cursor:pointer;" title="[의심 #{sidx}] 출처: {sa.get("source","")}">{sa["text"]}</span> '
1192
  else:
1193
  full_hl += f'<span style="color:#333;">{sa["text"]}</span> '
 
 
1194
  bar_suspect_w = max(2, cat_suspect_pct) if cat_suspect > 0 else 0
1195
  bar_normal_w = 100 - bar_suspect_w
 
 
1196
  HDR_BG = '#3B7DD8'
1197
  HDR_BG2 = '#4A8DE0'
1198
  TH = 'padding:8px 10px;font-size:10px;font-weight:700;color:#fff;background:{};text-align:center;border:1px solid {};'.format(HDR_BG, HDR_BG)
1199
  TL = 'padding:7px 10px;font-size:11px;color:#444;font-weight:600;background:#EDF2FA;border:1px solid #D5D5D5;'
1200
  TV = 'padding:7px 10px;font-size:12px;color:#333;border:1px solid #D5D5D5;'
1201
  SEC = 'font-size:13px;font-weight:800;color:#1A3C6E;margin:0 0 10px 0;padding:8px 12px;background:#EDF2FA;border-left:4px solid {};border-bottom:1px solid #D5D5D5;'.format(HDR_BG)
 
1202
  html = f"""<div style="font-family:'Noto Sans KR','Malgun Gothic','Apple SD Gothic Neo',sans-serif;max-width:780px;margin:0 auto;background:#fff;border:2px solid #3B7DD8;box-shadow:0 2px 12px rgba(0,0,0,0.08);">
 
1203
  <!-- ═══════ 헤더 (CopyKiller 스타일) ═══════ -->
1204
  <div style="background:linear-gradient(135deg,{HDR_BG},{HDR_BG2});padding:18px 24px;color:#fff;">
1205
  <table style="width:100%;"><tr>
 
1213
  </td>
1214
  </tr></table>
1215
  </div>
 
1216
  <!-- ═══════ 제출 정보 테이블 ═══════ -->
1217
  <div style="padding:16px 24px 0;">
1218
  <div style="{SEC}">📋 검사 정보</div>
 
1237
  </tr>
1238
  </table>
1239
  </div>
 
1240
  <!-- ═══════ 검사 결과 (도넛 + 바) ═══════ -->
1241
  <div style="padding:18px 24px 0;">
1242
  <div style="{SEC}">📊 검사 결과</div>
 
1258
  <span style="font-size:11px;font-weight:800;color:{gc};">{grade}</span>
1259
  </div>
1260
  </td>
 
1261
  <!-- 상세 결과 -->
1262
  <td style="padding:0;border:1px solid #D5D5D5;vertical-align:top;">
1263
  <!-- 카테고리 바 (CopyKiller 핵심) -->
 
1273
  <span style="display:flex;align-items:center;gap:3px;"><span style="display:inline-block;width:10px;height:10px;background:#E8ECF0;border:1px solid #CCC;border-radius:2px;"></span> 일반 <b>{cat_normal}</b>건</span>
1274
  </div>
1275
  </div>
 
1276
  <!-- 비율 바 3줄 -->
1277
  <div style="padding:12px 18px;">
1278
  <div style="margin-bottom:10px;">
 
1288
  <div style="background:#EDEDED;height:16px;border-radius:3px;overflow:hidden;border:1px solid #DDD;"><div style="background:linear-gradient(90deg,#3498DB,#85C1E9);height:100%;width:{max(1,citation_pct)}%;transition:width 0.5s;"></div></div>
1289
  </div>
1290
  </div>
 
1291
  <!-- 요약 수치 -->
1292
  <div style="padding:8px 18px;background:#F8F9FB;border-top:1px solid #E8E8E8;">
1293
  <table style="width:100%;border-collapse:collapse;">
 
1305
  </tr>
1306
  </table>
1307
  </div>
 
1308
  <!-- ═══════ 전체 텍스트 분석 ═══════ -->
1309
  <div style="padding:18px 24px 0;">
1310
  <div style="{SEC}">📝 전체 텍스트 분석</div>
 
1317
  </div>
1318
  <div style="padding:14px;background:#FAFBFC;border:1px solid #D5D5D5;line-height:2.1;font-size:13px;max-height:300px;overflow-y:auto;">{full_hl}</div>
1319
  </div>
 
1320
  <!-- ═══════ 표절 의심 출처 ═══════ -->
1321
  <div style="padding:18px 24px 0;">
1322
  <div style="{SEC}">🔗 표절 의심 출처 ({len(src_list)}건)</div>
 
1331
  {src_rows if src_rows else '<tr><td colspan="5" style="padding:18px;text-align:center;color:#999;font-size:11px;border:1px solid #D5D5D5;background:#FAFBFC;">발견된 유사 출처가 없습니다.</td></tr>'}
1332
  </table>
1333
  </div>
 
1334
  <!-- ═══════ 의심 문장 비교 ═══════ -->
1335
  <div style="padding:18px 24px 0;">
1336
  <div style="{SEC}">⚠️ 의심 문장 비교 ({len(sim_sents)}건)</div>
 
1344
  {suspect_rows if suspect_rows else '<tr><td colspan="4" style="padding:18px;text-align:center;color:#999;font-size:11px;border:1px solid #D5D5D5;background:#FAFBFC;">유사 의심 문장이 발견되지 않았습니다.</td></tr>'}
1345
  </table>
1346
  </div>
 
1347
  <!-- ═══════ 검사 안내 ═══════ -->
1348
  <div style="margin:18px 24px;padding:12px 14px;background:#F5F8FC;border:1px solid #D0DAEA;border-radius:4px;font-size:9px;color:#555;line-height:1.8;">
1349
  <b style="color:#333;">📌 검사 안내</b><br>
 
1352
  · 유사도는 문장 단위 매칭 기반이며, 최종 판정은 교수자/검토자의 확인이 필요합니다.<br>
1353
  · 인용 표기(따옴표, 각주 등)가 포함된 문장은 인용으로 분류될 수 있습니다.
1354
  </div>
 
1355
  <!-- ═══════ 푸터 (CopyKiller 스타일) ═══════ -->
1356
  <div style="padding:10px 24px;background:#F0F3F8;border-top:2px solid {HDR_BG};display:flex;justify-content:space-between;align-items:center;">
1357
  <div>
 
1364
  </div>
1365
  </div>
1366
  </div>"""
 
1367
  log = '\n'.join(log_lines) + f"\n\n종합: {plag_pct}% {grade} | 출처 {len(unique_sources)}건 | 유사문장 {matched_sents}/{total_sents}"
1368
  return html, log
 
 
 
 
1369
  def run_detection(text, progress=gr.Progress()):
1370
  if not text or len(text.strip())<50: return "<div style='padding:20px;text-align:center;color:#888;'>⚠️ 최소 50자</div>",""
1371
  text=text.strip()
 
1382
  sc={"통계":s1["score"],"문체":s2["score"],"반복성":s3["score"],"구조":s4["score"],"지문":s5["score"]}
1383
  sent_scores=[score_sentence(s)[0] for s in sents]
1384
  sent_avg=sum(sent_scores)/len(sent_scores) if sent_scores else -1
 
 
1385
  ms_raw=s5.get("model_scores",{})
1386
  model_est=estimate_model(text,sents,morphs,ms_raw)
 
1387
  fs,verdict,level=compute_verdict(sc,lr["score"],sent_avg=sent_avg,ppx_score=ppx["score"],hum_score=hum["score"])
1388
  progress(0.95)
1389
  cm={"ai_high":("#FF4444","#FFE0E0","높음"),"ai_medium":("#FF8800","#FFF0DD","중간~높음"),"ai_low":("#DDAA00","#FFFBE0","중간"),"uncertain":("#888","#F0F0F0","낮음"),"human":("#22AA44","#E0FFE8","매우 낮음")}
1390
  fg,bg,conf=cm.get(level,("#888","#F0F0F0","?"))
 
 
1391
  est_model=model_est.get("model","특정 불가")
1392
  est_conf=model_est.get("confidence","낮음")
1393
  est_reasons=model_est.get("reasons",[])
 
1397
  mt=f"{est_model} (참고)"
1398
  else:
1399
  mt="특정 불가"
 
1400
  ai_sents=sum(1 for s in sent_scores if s>=40)
1401
  human_sents=sum(1 for s in sent_scores if s<20)
 
1402
  def gb(l,s,w="",desc=""):
1403
  c="#FF4444" if s>=70 else "#FF8800" if s>=50 else "#DDAA00" if s>=35 else "#22AA44"
1404
  wt=f"<span style='color:#999;font-size:9px;'> ×{w}</span>" if w else ""
1405
  dt=f"<div style='font-size:9px;color:#888;margin-top:1px;'>{desc}</div>" if desc else ""
1406
  return f"<div style='margin:4px 0;'><div style='display:flex;justify-content:space-between;'><span style='font-size:11px;font-weight:600;'>{l}{wt}</span><span style='font-size:11px;font-weight:700;color:{c};'>{s}</span></div><div style='background:#E8E8E8;border-radius:4px;height:7px;'><div style='background:{c};height:100%;width:{s}%;border-radius:4px;'></div></div>{dt}</div>"
 
 
1407
  mb=""
1408
  for mn in ["GPT","Claude","Gemini","Perplexity"]:
1409
  s=ms_raw.get(mn,0); mc="#FF4444" if s>=40 else "#FF8800" if s>=20 else "#CCC"
 
1410
  tag=""
1411
  if mn==est_model and est_conf!="낮음":
1412
  tag=f" <span style='background:#FF4444;color:white;font-size:7px;padding:0 3px;border-radius:3px;'>추정</span>"
1413
  mb+=f"<div style='display:flex;align-items:center;gap:4px;margin:2px 0;'><span style='width:66px;font-size:10px;font-weight:600;'>{mn}{tag}</span><div style='flex:1;background:#E8E8E8;border-radius:3px;height:5px;'><div style='background:{mc};height:100%;width:{s}%;'></div></div><span style='font-size:9px;width:18px;text-align:right;color:{mc};'>{s}</span></div>"
 
 
1414
  ls=""
1415
  if lr["score"]>=0:
1416
  lsc=lr["score"]
1417
  lr_rows="".join(f"<div style='font-size:9px;color:#555;'>{mn}: {lr['detail'].get(mn,'—')}</div>" for _,mn in LLM_JUDGES)
1418
  ls=f"<div style='margin-top:8px;padding:8px;background:#F8F8FF;border-radius:6px;border:1px solid #E0E0FF;'><div style='font-size:10px;font-weight:700;margin-bottom:3px;'>🤖 LLM 교차검증 (평균 {lsc}%)</div>{lr_rows}</div>"
1419
  else: ls="<div style='margin-top:6px;padding:4px 8px;background:#F5F5F5;border-radius:4px;color:#999;font-size:9px;'>🤖 GROQ_API_KEY 미설정</div>"
 
 
1420
  qs=qr["sub_scores"]; gc={"S":"#FF6B6B","A":"#4ECDC4","B":"#45B7D1","C":"#DDAA00","D":"#FF8800","F":"#FF4444"}.get(qr["grade"],"#888")
1421
  def qgb(l,s):
1422
  c="#22AA44" if s>=70 else "#4ECDC4" if s>=55 else "#DDAA00" if s>=40 else "#FF8800"
1423
  return f"<div style='margin:2px 0;display:flex;align-items:center;gap:4px;'><span style='width:50px;font-size:10px;'>{l}</span><div style='flex:1;background:#E8E8E8;border-radius:3px;height:5px;'><div style='background:{c};height:100%;width:{s}%;'></div></div><span style='font-size:9px;color:{c};width:18px;text-align:right;'>{s}</span></div>"
 
 
1424
  reasons=[]
1425
  if sc["문체"]>=70: reasons.append("격식체 종결어미가 대부분, AI형 접속사·상투표현 다수 감지")
1426
  elif sc["문체"]>=50: reasons.append("격식체와 AI형 표현이 혼재")
 
1437
  reasons.append(f"🔍 추정 모델: <b>{est_model}</b> ({est_why})")
1438
  if not reasons: reasons.append("인간적 표현이 우세하며 AI 패턴이 약함")
1439
  reason_html='<br>'.join(f"• {r}" for r in reasons)
 
 
1440
  ppx_c="#FF4444" if ppx["score"]>=65 else "#FF8800" if ppx["score"]>=50 else "#DDAA00" if ppx["score"]>=35 else "#22AA44"
1441
  ppx_html=f"""<div style='margin-top:8px;padding:8px;background:linear-gradient(135deg,#FFF8F0,#FFF0FF);border-radius:6px;border:1px solid #E8D0FF;'>
1442
  <div style='font-size:10px;font-weight:700;margin-bottom:4px;'>🧠 Perplexity 분석 <span style='color:{ppx_c};font-size:12px;font-weight:900;'>{ppx["score"]}점</span></div>
 
1447
  <span style='font-size:9px;color:#777;'>Zipf적합: {ppx.get("zipf",0)}</span>
1448
  </div>
1449
  </div>"""
 
 
1450
  hum_html=""
1451
  if hum["score"]>=30:
1452
  hc="#FF4444" if hum["score"]>=65 else "#FF8800" if hum["score"]>=50 else "#DDAA00"
 
1455
  <div style='font-size:10px;font-weight:700;margin-bottom:3px;'>🛡️ Humanizer 탐지 <span style='color:{hc};font-size:12px;font-weight:900;'>{hum["score"]}점</span></div>
1456
  {sig_rows}
1457
  </div>"""
 
 
1458
  est_html=""
1459
  if est_model!="특정 불가":
1460
  ec="#FF4444" if est_conf=="높음" else "#FF8800" if est_conf=="중간" else "#DDAA00"
 
1468
  {ranked_html}
1469
  <div style='font-size:8px;color:#999;margin-top:2px;'>근거: {", ".join(est_reasons[:3]) if est_reasons else "복합 지표"}</div>
1470
  </div>"""
 
1471
  html=f"""<div style="font-family:'Pretendard','Noto Sans KR',sans-serif;max-width:720px;margin:0 auto;">
1472
  <div style="background:{bg};border:2px solid {fg};border-radius:14px;padding:20px;margin-bottom:12px;">
1473
  <div style="display:flex;align-items:center;gap:16px;">
 
1489
  <div style="font-size:11px;color:#555;line-height:1.7;">{reason_html}</div>
1490
  </div>
1491
  </div>
 
1492
  <div style="display:grid;grid-template-columns:1fr 1fr;gap:8px;">
1493
  <div style="background:#FAFAFA;border-radius:8px;padding:10px;">
1494
  <div style="font-size:10px;font-weight:700;margin-bottom:4px;">📊 AI 탐지 5축 + Perplexity</div>
 
1515
  </div>"""
1516
  log=f"AI:{fs}점 [{verdict}] 신뢰:{conf} | 모델:{mt} | PPX:{ppx['score']} HUM:{hum['score']} | 품질:{qr['grade']}({qr['score']})\n축: 통계{sc['통계']} 문체{sc['문체']} 반복{sc['반복성']} 구조{sc['구조']} 지문{sc['지문']} PPX{ppx['score']} HUM{hum['score']}"
1517
  return html, log
 
 
 
 
1518
  def run_highlight(text):
1519
  if not text or len(text.strip())<30: return "<div style='color:#888;'>텍스트 필요</div>"
1520
  sents=split_sentences(text)
1521
  hl=[]
1522
  for s in sents:
1523
  sc, reasons = score_sentence(s)
 
1524
  if sc >= 60: bg="rgba(220,38,38,0.35)"; level="AI확신"
1525
  elif sc >= 40: bg="rgba(249,115,22,0.30)"; level="AI의심"
1526
  elif sc >= 25: bg="rgba(234,179,8,0.25)"; level="주의"
1527
  elif sc >= 10: bg="rgba(132,204,22,0.15)"; level="인간추정"
1528
  else: bg="rgba(34,197,94,0.20)"; level="인간"
 
1529
  detail_parts = []
1530
  for r in reasons:
1531
  if '격식' in r or '비격식AI' in r: detail_parts.append(f"🔤 {r}")
 
1536
  else: detail_parts.append(r)
1537
  tt = ' | '.join(detail_parts) if detail_parts else '특이 패턴 없음'
1538
  hl.append(f'<span style="background:{bg};padding:2px 4px;border-radius:4px;display:inline;line-height:2.2;border-bottom:2px solid {"#DC2626" if sc>=60 else "#F97316" if sc>=40 else "#EAB308" if sc>=25 else "#84CC16" if sc>=10 else "#22C55E"};" title="[{level}] {tt} ({sc}점)">{s}</span>')
 
1539
  total_scores = [score_sentence(s)[0] for s in sents]
1540
  avg_sc = sum(total_scores)/len(total_scores) if total_scores else 0
1541
  ai_high = sum(1 for s in total_scores if s >= 60)
1542
  ai_mid = sum(1 for s in total_scores if 40 <= s < 60)
1543
  human_cnt = sum(1 for s in total_scores if s < 25)
 
1544
  return f"""<div style='font-family:Pretendard,sans-serif;'>
1545
  <div style='margin-bottom:10px;padding:10px;background:#F8F8FF;border-radius:8px;'>
1546
  <div style='display:flex;gap:8px;align-items:center;font-size:11px;margin-bottom:6px;flex-wrap:wrap;'>
 
1554
  </div>
1555
  <div style='line-height:2.4;font-size:14px;'>{' '.join(hl)}</div>
1556
  </div>"""
 
 
 
 
1557
  SAMPLE_AI = """인공지능 기술은 현대 사회에서 매우 중요한 역할을 하고 있습니다. 특히 자연어 처리 분야에서의 발전은 눈부신 성과를 거두고 있습니다. 이러한 기술의 발전은 다양한 산업 분야에 긍정적인 영향을 미치고 있으며, 향후 더욱 발전할 것으로 예상됩니다.
 
1558
  또한 생성형 AI의 등장으로 콘텐츠 제작 방식이 크게 변화하고 있습니다. 이를 통해 기업들은 효율적인 콘텐츠 생산이 가능해졌으며, 개인 사용자들도 다양한 창작 활동에 AI를 활용할 수 있게 되었습니다. 따라서 AI 리터러시의 중요성이 더욱 부각되고 있습니다.
 
1559
  나아가 AI 윤리와 규제에 대한 논의도 활발히 진행되고 있습니다. 특히 AI가 생성한 콘텐츠의 저작권 문제는 중요한 의미를 가지며, 이에 대한 법적 프레임워크 구축이 필요합니다. 결과적으로 기술 발전과 함께 사회적 합의를 이루는 것이 중요합니다."""
 
1560
  SAMPLE_HUMAN = """아 진짜 요즘 AI 때문에 머리 아프다ㅋㅋㅋ 어제 chatgpt한테 레포트 써달라고 했는데 완전 교과서 같은 글만 써줘서 그냥 내가 다시 썼음;;
 
1561
  근데 생각해보면 AI가 쓴 글이랑 사람이 쓴 글이 확실히 다르긴 해. 뭔가... 너무 깔끔하달까? 사람은 이렇게 횡설수설도 하고 맞춤법도 틀리고 그러잖아.
 
1562
  교수님이 AI 탐지기 돌린다고 해서 좀 무서운데 ㅠㅠ 나는 진짜 직접 쓴 건데 혹시 오탐 나면 어쩌지... 걱정된다 진심으로."""
 
 
 
 
1563
  def run_document_analysis(file, progress=gr.Progress()):
1564
  """문서 파일 업로드 → 섹션별 AI 탐지 히트맵 + PDF 보고서 생성"""
1565
  if file is None:
1566
  return "<div style='padding:20px;text-align:center;color:#888;'>📄 파일을 업로드하세요 (PDF, DOCX, HWP, HWPX, TXT)</div>", "", None
 
1567
  if hasattr(file, 'path'):
1568
  file_path = file.path
1569
  elif hasattr(file, 'name'):
 
1572
  file_path = str(file)
1573
  fname = os.path.basename(file_path)
1574
  progress(0.05, f"📄 {fname} 읽는 중...")
 
1575
  sections, full_text, error = extract_text_from_file(file_path)
1576
  if error:
1577
  return f"<div style='padding:20px;text-align:center;color:#E44;'>⚠️ {error}</div>", "", None
1578
  if not sections or not full_text or len(full_text.strip()) < 50:
1579
  return "<div style='padding:20px;text-align:center;color:#E44;'>⚠️ 텍스트가 충분하지 않습니다 (50자 미만)</div>", "", None
 
1580
  progress(0.15, "전체 텍스트 분석...")
 
1581
  sents_all = split_sentences(full_text)
1582
  words_all = split_words(full_text)
1583
  morphs_all = get_morphemes(full_text)
1584
  total_score, total_verdict, total_level, total_axes, total_ppx, total_hum = quick_score(full_text)
1585
  quality = analyze_quality(full_text, sents_all, words_all, morphs_all)
 
 
1586
  progress(0.30, "LLM 교차검증...")
1587
  llm_result = llm_cross_check(full_text[:3000])
1588
  if llm_result["score"] >= 0:
1589
  _sent_scores = [score_sentence(s)[0] for s in sents_all]
1590
  _sent_avg = sum(_sent_scores)/len(_sent_scores) if _sent_scores else -1
1591
  total_score, total_verdict, total_level = compute_verdict(total_axes, llm_result["score"], sent_avg=_sent_avg, ppx_score=total_ppx["score"], hum_score=total_hum["score"])
 
 
1592
  progress(0.45, f"{len(sections)}개 섹션 분석...")
1593
  section_results = []
1594
  for i, sec in enumerate(sections):
 
1596
  section_results.append({"idx": i+1, "text": sec, "score": -1, "verdict": "너무 짧음", "skipped": True})
1597
  continue
1598
  s_score, s_verdict, s_level, s_axes, _, _ = quick_score(sec)
 
1599
  sec_sents = split_sentences(sec)
1600
  sent_scores = []
1601
  for sent in sec_sents:
 
1608
  })
1609
  pct = 0.45 + (i / max(len(sections), 1)) * 0.30
1610
  progress(pct, f"섹션 {i+1}/{len(sections)}")
 
 
1611
  now = datetime.now().strftime("%Y-%m-%d %H:%M")
1612
  ext = Path(file_path).suffix.upper()
1613
  cm_map = {"ai_high": ("#FF4444", "#FFE0E0"), "ai_medium": ("#FF8800", "#FFF0DD"),
1614
  "ai_low": ("#DDAA00", "#FFFBE0"), "uncertain": ("#888", "#F5F5F5"), "human": ("#22AA44", "#E0FFE8")}
1615
  tc, tbg = cm_map.get(total_level, ("#888", "#F5F5F5"))
 
 
1616
  heatmap_cells = []
1617
  for sr in section_results:
1618
  sidx = sr["idx"]
 
1623
  ssc = sr["score"]; svd = sr["verdict"]
1624
  heatmap_cells.append(f"<div style='flex:1;min-width:18px;height:28px;background:{sc};border-radius:3px;opacity:0.8;cursor:pointer;' title='섹션{sidx}: AI {ssc}점 ({svd})'></div>")
1625
  heatmap_bar = f"<div style='display:flex;gap:3px;margin:12px 0;'>" + ''.join(heatmap_cells) + "</div>"
 
 
1626
  section_cards = []
1627
  for sr in section_results:
1628
  if sr["skipped"]: continue
1629
  sc, sbg = cm_map.get(sr.get("level", "uncertain"), ("#888", "#F5F5F5"))
 
1630
  sent_html = ""
1631
  for ss in sr.get("sent_scores", []):
1632
  s = ss["score"]
 
1635
  elif s >= 25: sclr = "background:rgba(221,170,0,0.08);border-bottom:1px solid #DDAA00;"
1636
  else: sclr = ""
1637
  sent_html += f"<span style='{sclr}padding:1px 2px;border-radius:2px;' title='AI {s}점'>{ss['text']}</span> "
 
1638
  axes_html = ""
1639
  if "axes" in sr:
1640
  ax = sr["axes"]
1641
  for k, v in ax.items():
1642
  axc = "#FF4444" if v >= 50 else "#FF8800" if v >= 30 else "#22AA44"
1643
  axes_html += f"<span style='display:inline-block;margin:1px 3px;padding:2px 8px;background:{axc}22;border:1px solid {axc}44;border-radius:10px;font-size:9px;color:{axc};'>{k} {v}</span>"
 
1644
  section_cards.append(f"""
1645
  <div style="border:1px solid #E0E0E0;border-left:4px solid {sc};border-radius:8px;padding:14px;margin-bottom:10px;background:white;">
1646
  <div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:8px;">
 
1650
  <div style="margin-bottom:6px;">{axes_html}</div>
1651
  <div style="font-size:12px;line-height:1.8;color:#333;">{sent_html}</div>
1652
  </div>""")
 
 
1653
  ai_high = sum(1 for s in section_results if not s["skipped"] and s["score"] >= 60)
1654
  ai_med = sum(1 for s in section_results if not s["skipped"] and 35 <= s["score"] < 60)
1655
  ai_low = sum(1 for s in section_results if not s["skipped"] and s["score"] < 35)
1656
  valid_sections = [s for s in section_results if not s["skipped"]]
 
 
1657
  llm_info = ""
1658
  if llm_result["score"] >= 0:
1659
  llm_rows = ''.join(f"<span style='margin-right:8px;font-size:10px;color:#555;'>{mn}: {llm_result['detail'].get(mn,'—')}</span>" for _, mn in LLM_JUDGES)
1660
  llm_info = f"<div style='margin-top:8px;padding:8px 12px;background:#F8F8FF;border-radius:6px;border:1px solid #E0E0FF;font-size:10px;'><b>🤖 LLM 교차검증:</b> 평균 {llm_result['score']}% | {llm_rows}</div>"
 
1661
  html = f"""<div style="font-family:'Pretendard','Noto Sans KR',sans-serif;max-width:800px;margin:0 auto;background:#FAFBFC;border-radius:12px;border:1px solid #E0E0E0;overflow:hidden;">
1662
  <!-- 헤더 -->
1663
  <div style="background:linear-gradient(135deg,#1A1F36,#2D3561);padding:20px 24px;color:white;">
 
1672
  </div>
1673
  </div>
1674
  </div>
 
1675
  <!-- 요약 -->
1676
  <div style="padding:16px 24px;">
1677
  <div style="display:grid;grid-template-columns:repeat(4,1fr);gap:10px;margin-bottom:12px;">
 
1692
  <div style="font-size:10px;color:#888;">품질 등급</div>
1693
  </div>
1694
  </div>
 
1695
  <!-- 히트맵 바 -->
1696
  <div style="background:white;border:1px solid #E0E0E0;border-radius:8px;padding:12px;">
1697
  <div style="font-size:11px;font-weight:700;margin-bottom:6px;">🗺️ 섹션별 AI 히트맵 (빨강=AI의심, 초록=인간)</div>
 
1705
  </div>
1706
  {llm_info}
1707
  </div>
 
1708
  <!-- 섹션 상세 -->
1709
  <div style="padding:0 24px 20px;">
1710
  <div style="font-size:13px;font-weight:700;margin-bottom:10px;">📊 섹션별 상세 분석 ({len(valid_sections)}개)</div>
1711
  {''.join(section_cards)}
1712
  </div>
 
1713
  <!-- 푸터 -->
1714
  <div style="padding:10px 24px;background:#F0F3F8;border-top:1px solid #E0E0E0;display:flex;justify-content:space-between;">
1715
  <span style="font-size:11px;font-weight:800;color:#1A1F36;">AI Detector v4.0</span>
1716
  <span style="font-size:9px;color:#AAA;">{now} · 5축 앙상블 + LLM 교차검증</span>
1717
  </div>
1718
  </div>"""
 
 
1719
  progress(0.90, "PDF 보고서 생성...")
1720
  pdf_path = _generate_pdf_report(fname, total_score, total_verdict, total_level,
1721
  total_axes, quality, section_results, llm_result, now)
 
1722
  log = f"파일: {fname} ({ext})\n"
1723
  log += f"섹션: {len(sections)}개 | 전체: {len(full_text)}자\n"
1724
  log += f"총점: {total_score} ({total_verdict})\n"
 
1727
  if llm_result["score"] >= 0:
1728
  log += f"LLM 교차검증: {llm_result['score']}%\n"
1729
  return html, log, pdf_path
 
 
1730
  def _generate_pdf_report(fname, score, verdict, level, axes, quality, sections, llm_result, now):
1731
  """HTML → PDF 변환으로 보고서 생성"""
1732
  try:
1733
  cm = {"ai_high":"#FF4444","ai_medium":"#FF8800","ai_low":"#DDAA00","uncertain":"#888","human":"#22AA44"}
1734
  tc = cm.get(level, "#888")
 
 
1735
  sec_rows = ""
1736
  for sr in sections:
1737
  if sr["skipped"]: continue
1738
  sc = cm.get(sr.get("level","uncertain"),"#888")
1739
  sec_rows += f"<tr><td style='padding:6px 10px;border:1px solid #DDD;text-align:center;'>{sr['idx']}</td><td style='padding:6px 10px;border:1px solid #DDD;'>{sr['text'][:80]}...</td><td style='padding:6px 10px;border:1px solid #DDD;text-align:center;color:{sc};font-weight:700;'>{sr['score']}</td><td style='padding:6px 10px;border:1px solid #DDD;text-align:center;'>{sr['verdict']}</td></tr>"
 
1740
  ax_rows = ''.join(f"<tr><td style='padding:4px 10px;border:1px solid #DDD;'>{k}</td><td style='padding:4px 10px;border:1px solid #DDD;text-align:center;font-weight:700;'>{v}/100</td></tr>" for k, v in axes.items())
 
1741
  html_content = f"""<!DOCTYPE html><html><head><meta charset="UTF-8">
1742
  <style>
1743
  body{{font-family:sans-serif;margin:30px;font-size:12px;color:#333;}}
 
1750
  </style></head><body>
1751
  <h1>📄 AI 글 판별 보고서</h1>
1752
  <p><b>파일:</b> {fname} | <b>생성:</b> {now} | <b>엔진:</b> AI Detector v4.0</p>
 
1753
  <h2>종합 결과</h2>
1754
  <div class="score-box">{score}점</div>
1755
  <span style="margin-left:12px;font-size:16px;font-weight:700;">{verdict}</span>
1756
  <p><b>품질:</b> {quality['grade']} ({quality['score']}점)</p>
 
1757
  <h2>5축 분석</h2>
1758
  <table><tr><th>축</th><th>점수</th></tr>{ax_rows}</table>
 
1759
  <h2>섹션별 분석 ({len([s for s in sections if not s['skipped']])}개)</h2>
1760
  <table>
1761
  <tr><th>No</th><th>내용 (발췌)</th><th>AI 점수</th><th>판정</th></tr>
1762
  {sec_rows}
1763
  </table>
 
1764
  <div class="footer">
1765
  AI Detector v4.0 — 5축 앙상블 + LLM 교차검증 | {now}<br>
1766
  본 보고서는 AI 판별 참고 자료이며, 최종 판단은 검토자의 확인이 필요합니다.
1767
  </div>
1768
  </body></html>"""
 
 
1769
  report_dir = tempfile.mkdtemp()
1770
  html_path = os.path.join(report_dir, f"AI_Report_{fname}.html")
1771
  with open(html_path, 'w', encoding='utf-8') as f:
 
1774
  except Exception as e:
1775
  print(f"PDF 보고서 생성 오류: {e}")
1776
  return None
 
 
1777
  def extract_file_text_api(file):
1778
  """파일 업로드 → 텍스트 추출 API (탭1~4용)"""
1779
  if file is None:
1780
  return "⚠️ 파일이 없습니다."
 
1781
  if hasattr(file, 'path'):
1782
  file_path = file.path
1783
  elif hasattr(file, 'name'):
 
1793
  if not full_text or len(full_text.strip()) < 10:
1794
  return f"⚠️ {fname}: 텍스트 추출 결과 없음"
1795
  return full_text.strip()
 
 
1796
  with gr.Blocks(title="AI 글 판별기 v5.1") as demo:
1797
  gr.Markdown("# 🔎 AI 글 판별기 v5.1\n**5축+Perplexity+Humanizer탐지+모델추정 · 품질 측정 · LLM 교차검증 · 표절 검사**")
1798
  with gr.Tab("🏆 경쟁력 비교"):
 
1808
  @keyframes glow { 0%,100% { box-shadow:0 0 8px rgba(99,102,241,0.3); } 50% { box-shadow:0 0 20px rgba(99,102,241,0.6); } }
1809
  @keyframes borderPulse { 0%,100% { border-color:rgba(99,102,241,0.3); } 50% { border-color:rgba(99,102,241,0.8); } }
1810
  @keyframes checkPop { from { transform:scale(0) rotate(-180deg); opacity:0; } to { transform:scale(1) rotate(0deg); opacity:1; } }
 
1811
  .comp-wrap { font-family:'Pretendard','Noto Sans KR',system-ui,sans-serif; max-width:860px; margin:0 auto; padding:8px; }
 
1812
  /* 히어로 배너 */
1813
  .hero-banner {
1814
  background: linear-gradient(135deg, #1e1b4b 0%, #312e81 30%, #4338ca 60%, #6366f1 100%);
 
1827
  font-size:11px; font-weight:800; padding:4px 12px; border-radius:20px; margin-top:10px;
1828
  animation: pulse 2s ease-in-out infinite; position:relative;
1829
  }
 
1830
  /* 핵심 수치 */
1831
  .stats-row { display:grid; grid-template-columns:repeat(4,1fr); gap:10px; margin-bottom:18px; animation:fadeInUp 1s ease-out 0.2s both; }
1832
  .stat-card {
 
1836
  .stat-card:hover { transform:translateY(-4px); border-color:#6366f1; box-shadow:0 8px 25px rgba(99,102,241,0.15); }
1837
  .stat-num { font-size:32px; font-weight:900; background:linear-gradient(135deg,#6366f1,#8b5cf6); -webkit-background-clip:text; -webkit-text-fill-color:transparent; animation:countUp 0.6s ease-out; }
1838
  .stat-label { font-size:10px; color:#64748b; margin-top:2px; font-weight:600; }
 
1839
  /* 비교 테이블 */
1840
  .comp-table-wrap { animation:fadeInUp 1.1s ease-out 0.4s both; margin-bottom:18px; }
1841
  .comp-table {
 
1856
  .comp-table tbody td:first-child { text-align:left; padding-left:14px; font-weight:600; color:#1e293b; }
1857
  .comp-table tbody td:nth-child(2) { background:rgba(239,68,68,0.04); font-weight:700; }
1858
  .comp-table tbody tr:last-child td { border-bottom:none; }
 
1859
  /* 체크/엑스 아이콘 */
1860
  .ck { color:#22c55e; font-weight:800; font-size:15px; animation:checkPop 0.4s ease-out; }
1861
  .cx { color:#ef4444; font-weight:700; font-size:13px; }
1862
  .cp { color:#f59e0b; font-weight:700; font-size:12px; }
1863
  .cstar { color:#6366f1; font-weight:900; font-size:14px; }
 
1864
  /* 차별화 카드 */
1865
  .diff-grid { display:grid; grid-template-columns:1fr 1fr; gap:10px; margin-bottom:18px; }
1866
  .diff-card {
 
1879
  .diff-title { font-size:13px; font-weight:800; color:#1e293b; margin-bottom:4px; }
1880
  .diff-desc { font-size:10px; color:#64748b; line-height:1.5; }
1881
  .diff-vs { display:inline-block; background:#fef2f2; color:#dc2626; font-size:9px; font-weight:700; padding:2px 6px; border-radius:4px; margin-top:4px; }
 
1882
  /* 경쟁사 공략 바 */
1883
  .attack-section { animation:fadeInUp 1.3s ease-out 0.8s both; margin-bottom:14px; }
1884
  .attack-title { font-size:13px; font-weight:800; color:#1e293b; margin-bottom:10px; }
 
1888
  .attack-weak { font-size:9px; color:#ef4444; font-weight:600; }
1889
  .attack-bar { height:8px; background:#f1f5f9; border-radius:4px; overflow:hidden; }
1890
  .attack-fill { height:100%; border-radius:4px; animation:barGrow 1.5s ease-out; }
 
1891
  /* 슬로건 */
1892
  .slogan-box {
1893
  background:linear-gradient(135deg,#faf5ff,#ede9fe,#e0e7ff); border-radius:16px;
 
1897
  .slogan-main { font-size:16px; font-weight:900; color:#3730a3; margin-bottom:6px; }
1898
  .slogan-sub { font-size:11px; color:#6366f1; line-height:1.7; }
1899
  </style>
 
1900
  <div class="comp-wrap">
1901
  <!-- 히어로 배너 -->
1902
  <div class="hero-banner">
 
1907
  </div>
1908
  <div class="hero-badge">🔬 v5.1 — 3대 킬러 기능 탑재</div>
1909
  </div>
 
1910
  <!-- 핵심 수치 -->
1911
  <div class="stats-row">
1912
  <div class="stat-card"><div class="stat-num">8축</div><div class="stat-label">탐지 차원<br>(업계 최다)</div></div>
 
1914
  <div class="stat-card"><div class="stat-num">무료</div><div class="stat-label">완전 무료<br>(횟수 무제한)</div></div>
1915
  <div class="stat-card"><div class="stat-num">4종</div><div class="stat-label">모델 추정<br>(GPT/Claude/Gemini/PPX)</div></div>
1916
  </div>
 
1917
  <!-- 비교 테이블 -->
1918
  <div class="comp-table-wrap">
1919
  <table class="comp-table">
 
2058
  </tbody>
2059
  </table>
2060
  </div>
 
2061
  <!-- 차별화 4대 카드 -->
2062
  <div class="diff-grid" style="animation:fadeInUp 1.2s ease-out 0.6s both;">
2063
  <div class="diff-card">
 
2085
  <div class="diff-vs">vs 전체 경쟁사: LLM 검증 미보유</div>
2086
  </div>
2087
  </div>
 
2088
  <!-- 경쟁사 취약점 공략 -->
2089
  <div class="attack-section">
2090
  <div class="attack-title">⚔️ 경쟁사 취약점 vs 우리의 강점</div>
 
2109
  <div class="attack-bar"><div class="attack-fill" style="width:94%;background:linear-gradient(90deg,#a855f7,#ec4899);"></div></div>
2110
  </div>
2111
  </div>
 
2112
  <!-- 슬로건 -->
2113
  <div class="slogan-box">
2114
  <div class="slogan-main">🇰🇷 한국어 AI 글을 가장 정확하게, 가장 투명하게, 완전 무료로 판별합니다</div>
 
2145
  btn_ps.click(lambda:SAMPLE_AI,outputs=[inp_plag])
2146
  with gr.Tab("📖 설명"):
2147
  gr.Markdown("""
 
2148
  - **탐지 5축:** 통계(6%)·문체(25%)·반복(10%)·구조(12%)·지문(30%)
2149
  - **★ Perplexity 확률분석(17%):** 문자 엔트로피·Burstiness·TTR편차·종결엔트로피
2150
  - **★ Humanizer 탐지:** 동의어과다치환·구조보존·접속사잔존·이중피동 등 6시그널
2151
  - **★ 모델 추정:** GPT·Claude·Gemini·Perplexity 4모델 증거기반 추정
2152
  - **품질 6항목:** 가독성·어휘·논리·정확성·표현·정보밀도
2153
  - **LLM 교차검증:** GPT-OSS-120B·Qwen3-32B·Kimi-K2 (GROQ)
 
 
2154
  - **Brave Search**: 병렬 20개 동시 웹검색
2155
  - **학술 DB**: KCI(한국학술지인용색인), RISS(학술연구정보), arXiv
2156
  - **Gemini**: Google Search Grounding
2157
  - **보고서**: CopyKiller 스타일 — 유사도%, 출처표, 문장별 하이라이트
 
 
2158
  - `GROQ_API_KEY` — LLM 교차검증
2159
  - `GEMINI_API_KEY` — 표절 검사 (Google Search Grounding)
2160
  - `BRAVE_API_KEY` — 표절 검사 (Brave Search 병렬)
2161
  """)
 
2162
  with gr.Row(visible=False):
2163
  _file_in = gr.File(label="hidden_file")
2164
  _text_out = gr.Textbox(label="hidden_text")
2165
  _file_in.change(extract_file_text_api, [_file_in], [_text_out], api_name="extract_file_text")
 
 
2166
  import shutil, pathlib
2167
  static_dir = pathlib.Path("static")
2168
  static_dir.mkdir(exist_ok=True)
2169
  if pathlib.Path("index.html").exists():
2170
  shutil.copy("index.html", static_dir / "index.html")
 
 
2171
  from fastapi import FastAPI
2172
  from fastapi.responses import HTMLResponse, FileResponse
2173
  from fastapi.staticfiles import StaticFiles
 
2174
  server = FastAPI()
 
2175
  @server.get("/", response_class=HTMLResponse)
2176
  async def serve_root():
2177
  """루트 URL에서 프리미엄 index.html 서빙"""
2178
  fp = pathlib.Path("static/index.html")
2179
  if fp.exists():
2180
  return HTMLResponse(fp.read_text(encoding="utf-8"))
 
2181
  from fastapi.responses import RedirectResponse
2182
  return RedirectResponse("/gradio/")
 
 
2183
  app = gr.mount_gradio_app(server, demo, path="/gradio", allowed_paths=["static"])
 
2184
  if __name__ == "__main__":
2185
  import uvicorn
2186
+ uvicorn.run(app, host="0.0.0.0", port=7860)