import gradio as gr import math, re, os, json, random, time, hashlib, zlib, zipfile, tempfile from collections import Counter from datetime import datetime from pathlib import Path from concurrent.futures import ThreadPoolExecutor, as_completed from xml.etree import ElementTree as ET from kiwipiepy import Kiwi KIWI = Kiwi() try: import httpx; HAS_HTTPX = True except ImportError: HAS_HTTPX = False try: from google import genai from google.genai import types as gtypes HAS_GENAI = True except ImportError: HAS_GENAI = False try: import olefile; HAS_OLEFILE = True except ImportError: HAS_OLEFILE = False try: import pdfplumber; HAS_PDFPLUMBER = True except ImportError: HAS_PDFPLUMBER = False try: import PyPDF2; HAS_PYPDF2 = True except ImportError: HAS_PYPDF2 = False try: from docx import Document as DocxDocument; HAS_DOCX = True except ImportError: HAS_DOCX = False GROQ_KEY = os.getenv("GROQ_API_KEY", "") GEMINI_KEY = os.getenv("GEMINI_API_KEY", "") BRAVE_KEY = os.getenv("BRAVE_API_KEY", "") def extract_text_from_pdf(file_path): """PDF → 텍스트 (페이지별 분리)""" pages = [] if HAS_PDFPLUMBER: try: with pdfplumber.open(file_path) as pdf: for p in pdf.pages: t = p.extract_text() if t: pages.append(t) if pages: return pages, None except Exception as e: print(f"pdfplumber: {e}") if HAS_PYPDF2: try: with open(file_path, 'rb') as f: reader = PyPDF2.PdfReader(f) for p in reader.pages: t = p.extract_text() if t: pages.append(t) if pages: return pages, None except Exception as e: print(f"PyPDF2: {e}") return None, "PDF 추출 실패 (pdfplumber, PyPDF2 없음)" def extract_text_from_docx(file_path): """DOCX → 텍스트 (문단별 분리)""" if not HAS_DOCX: return None, "python-docx 없음" try: doc = DocxDocument(file_path) sections = [] current = [] for para in doc.paragraphs: txt = para.text.strip() if not txt: if current: sections.append('\n'.join(current)) current = [] else: current.append(txt) if current: sections.append('\n'.join(current)) if sections: return sections, None return None, "DOCX 텍스트 없음" except Exception as e: return None, f"DOCX 오류: {e}" def extract_text_from_txt(file_path): """TXT/MD/CSV 등 → 텍스트""" for enc in ['utf-8', 'euc-kr', 'cp949', 'utf-16', 'latin-1']: try: with open(file_path, 'r', encoding=enc) as f: text = f.read() if text.strip(): sections = [s.strip() for s in re.split(r'\n{2,}', text) if s.strip()] return sections if sections else [text], None except: continue return None, "텍스트 인코딩 감지 실패" def extract_text_from_hwpx(file_path): """HWPX (ZIP 기반) → 텍스트""" try: text_parts = [] with zipfile.ZipFile(file_path, 'r') as zf: file_list = zf.namelist() section_files = sorted([f for f in file_list if f.startswith('Contents/section') and f.endswith('.xml')]) if not section_files: section_files = sorted([f for f in file_list if 'section' in f.lower() and f.endswith('.xml')]) for sf_name in section_files: try: with zf.open(sf_name) as sf: content = sf.read().decode('utf-8', errors='ignore') content = re.sub(r'\sxmlns[^"]*"[^"]*"', '', content) content = re.sub(r'<[a-zA-Z]+:', '<', content) content = re.sub(r'([^<]+)<', content) clean = [t.strip() for t in matches if t.strip() and len(t.strip()) > 1] if clean: text_parts.append(' '.join(clean)) except: continue if text_parts: return text_parts, None return None, "HWPX 텍스트 없음" except zipfile.BadZipFile: return None, "유효하지 않은 HWPX" except Exception as e: return None, f"HWPX 오류: {e}" def _decode_hwp_para(data): """HWP 바이너리 → 문단 텍스트""" result = [] i = 0 while i < len(data) - 1: code = int.from_bytes(data[i:i+2], 'little') if code in (1,2,3): i += 14 elif code == 9: result.append('\t') elif code in (10,13): result.append('\n') elif code == 24: result.append('-') elif code in (30,31): result.append(' ') elif code >= 32: try: ch = chr(code) if ch.isprintable() or ch in '\n\t ': result.append(ch) except: pass i += 2 text = ''.join(result).strip() text = re.sub(r'[ \t]+', ' ', text) text = re.sub(r'\n{3,}', '\n\n', text) return text if len(text) > 2 else None def _extract_hwp_section(data): """HWP 섹션 바이너리 → 텍스트""" texts = [] pos = 0 while pos < len(data) - 4: try: header = int.from_bytes(data[pos:pos+4], 'little') tag_id = header & 0x3FF size = (header >> 20) & 0xFFF pos += 4 if size == 0xFFF: if pos + 4 > len(data): break size = int.from_bytes(data[pos:pos+4], 'little') pos += 4 if pos + size > len(data): break record_data = data[pos:pos+size] pos += size if tag_id == 67 and size > 0: t = _decode_hwp_para(record_data) if t: texts.append(t) except: pos += 1 return '\n'.join(texts) if texts else None def extract_text_from_hwp(file_path): """HWP (OLE 기반) → 텍스트""" if not HAS_OLEFILE: return None, "olefile 없음" try: ole = olefile.OleFileIO(file_path) if not ole.exists('FileHeader'): ole.close(); return None, "HWP 헤더 없음" header_data = ole.openstream('FileHeader').read() is_compressed = (header_data[36] & 1) == 1 if len(header_data) > 36 else True all_texts = [] for entry in ole.listdir(): entry_path = '/'.join(entry) if entry_path.startswith('BodyText/Section'): try: stream = ole.openstream(entry).read() if is_compressed: try: stream = zlib.decompress(stream, -15) except: try: stream = zlib.decompress(stream) except: pass section_text = _extract_hwp_section(stream) if section_text: all_texts.append(section_text) except: continue ole.close() if all_texts: return all_texts, None return None, "HWP 텍스트 없음" except Exception as e: return None, f"HWP 오류: {e}" def extract_text_from_file(file_path): """ 만능 문서 추출: PDF/DOCX/HWP/HWPX/TXT → (sections_list, full_text, error) sections_list: 페이지/섹션별 텍스트 리스트 full_text: 전체 합친 텍스트 """ if not file_path or not os.path.exists(file_path): return None, None, "파일 없음" ext = Path(file_path).suffix.lower() sections, error = None, None if ext == '.pdf': sections, error = extract_text_from_pdf(file_path) elif ext == '.docx': sections, error = extract_text_from_docx(file_path) elif ext == '.hwpx': sections, error = extract_text_from_hwpx(file_path) elif ext == '.hwp': sections, error = extract_text_from_hwp(file_path) elif ext in ('.txt', '.md', '.csv', '.json', '.xml', '.html'): sections, error = extract_text_from_txt(file_path) else: return None, None, f"지원하지 않는 형식: {ext}" if sections: full = '\n\n'.join(sections) return sections, full, None return None, None, error or "텍스트 추출 실패" def split_sentences(text): try: s = [x.text.strip() for x in KIWI.split_into_sents(text) if x.text.strip()] if s: return s except: pass return [x.strip() for x in re.split(r'(?<=[.!?。])\s+', text) if x.strip()] def split_words(text): return [w for w in re.findall(r'[가-힣a-zA-Z0-9]+', text) if w] def get_morphemes(text): try: r = KIWI.analyze(text) if r and r[0]: return [(m.form, m.tag) for m in r[0][0]] except: pass return [] def http_get(url, headers=None, timeout=15): try: if HAS_HTTPX: r = httpx.get(url, headers=headers or {}, timeout=timeout, follow_redirects=True) return r.text if r.status_code == 200 else None else: import urllib.request req = urllib.request.Request(url, headers=headers or {}) with urllib.request.urlopen(req, timeout=timeout) as resp: return resp.read().decode('utf-8', errors='replace') except: return None def http_post_json(url, body, headers=None, timeout=30): try: h = headers or {} h["Content-Type"] = "application/json" if HAS_HTTPX: r = httpx.post(url, json=body, headers=h, timeout=timeout) if r.status_code == 200: return r.json() return None else: import urllib.request, ssl req = urllib.request.Request(url, json.dumps(body).encode(), h) with urllib.request.urlopen(req, timeout=timeout, context=ssl.create_default_context()) as resp: return json.loads(resp.read()) except: return None def call_groq(model, prompt, max_tokens=800, temperature=0.1): if not GROQ_KEY: return None, "NO_KEY" url = "https://api.groq.com/openai/v1/chat/completions" h = {"Authorization": f"Bearer {GROQ_KEY}", "Content-Type": "application/json"} b = {"model": model, "messages": [{"role":"user","content":prompt}], "max_tokens": max_tokens, "temperature": temperature} try: if HAS_HTTPX: r = httpx.post(url, json=b, headers=h, timeout=45) if r.status_code == 200: return r.json()["choices"][0]["message"]["content"], None return None, f"HTTP {r.status_code}" else: import urllib.request, ssl req = urllib.request.Request(url, json.dumps(b).encode(), h) with urllib.request.urlopen(req, timeout=45, context=ssl.create_default_context()) as resp: return json.loads(resp.read())["choices"][0]["message"]["content"], None except Exception as e: return None, str(e)[:150] AI_ENDINGS = ['합니다','입니다','됩니다','습니다','있습니다','했습니다','겠습니다'] AI_CASUAL_ENDINGS = ['라고 할 수 있다','라고 볼 수 있다','다고 생각한다','다고 판단된다', '인 셈이다','인 것이다','는 것이다','는 셈이다','ㄹ 것이다','을 것이다', '라 할 수 있다','로 보인다','로 판단된다','고 있다','는 추세다','는 상황이다', '지 않을 수 없다','라 하겠다','음을 알 수 있다','할 필요가 있다'] AI_CONNS = ['또한','따라서','그러므로','이에 따라','한편','더불어','아울러','뿐만 아니라', '이를 통해','이에','결과적으로','궁극적으로','특히','나아가','이러한'] AI_SOFT_CONNS = ['물론','그러나','하지만','이처럼','이와 같이','이를 바탕으로'] AI_FILLER = ['것으로 보','것으로 나타','것으로 예상','할 수 있','볼 수 있','주목할 만', '중요한 역할','중요한 의미','긍정적인 영향','부정적인 영향','필요합니다','필요하다', '중요합니다','중요하다','역할을 하','영향을 미','기대된다','예상됩니다','부각되고', '대두되고','다양한 분야','다양한 산업','눈부신 성과','획기적인 변화','혁신적인', '점에서','측면에서','관점에서'] AI_CASUAL_FILLER = ['무궁무진하다','무궁무진한','과언이 아니','돌파구가 될','전환점이 될', '기반으로','발판으로','원동력이','초석이 될','가속화되','급부상','패러다임', '지평을 열','새로운 장을','대전환','본격화되','고도화','이정표'] AI_CONCESSION = re.compile(r'물론.{2,20}(하지만|그러나|그렇지만|다만)|.{2,15}(이긴 하지만|기는 하지만|수 있지만|수는 있지만)') EN_AI_MARKERS = ['furthermore','additionally','moreover','it is worth noting','in conclusion', 'it is important to','plays a crucial role','significant impact','various aspects', 'in this regard','consequently','nevertheless','integral part of','led to remarkable', 'fundamentally transformed','has become increasingly','it should be noted', 'in the context of','paradigm shift','landscape of','methodologies', 'transformative impact','unprecedented','in various domains'] HUMAN_MARKERS = { 'ㅋㅎㅠ': re.compile(r'([ㅋㅎㅠㅜㄷㄱ])\1{1,}'), '이모티콘': re.compile(r'[;:]-?[)(DPp]|\^[_\-]?\^|ㅡㅡ|;;'), '줄임': re.compile(r'ㄹㅇ|ㅇㅇ|ㄴㄴ|ㅇㅋ|ㄷㄷ|ㅂㅂ'), '느낌표': re.compile(r'[!?]{2,}'), '비격식종결': re.compile(r'(거든|잖아|인데|인걸|같음|느낌|아님|대박|미쳤|헐|ㅋ$|ㅎ$|임$|음$|듯$)'), '구어축약': re.compile(r'(걍|좀|막|완전|진짜|레알|존나|개|졸라|존맛|겁나)'), '맞춤법오류': re.compile(r'됬|몇일|금새|할수있|것같[은다]|되가|되서|안됀|햇다'), '말줄임표': re.compile(r'\.{3,}|…'), } FP = { "GPT": {"m":['물론이죠','도움이 되셨기를','설명해 드리겠습니다','추가 질문','도움이 필요하시면', '요약하자면','간략히 정리하면','핵심은'],"e":['습니다','드리겠습니다'],"lp":re.compile(r'^\d+\.\s|^[-•]\s',re.M)}, "Claude": {"m":['말씀하신','살펴보겠습니다','균형 잡힌','맥락에서','한 가지 주의할','뉘앙스', '흥미로운 질문','복잡한 주제'],"e":['네요','거예요'],"lp":re.compile(r'^\*\*.*\*\*|^#+\s',re.M)}, "Gemini": {"m":['다음과 같습니다','정리해 드리겠습니다','핵심 내용을','더 알고 싶으시면', '알아보겠습니다'],"e":['겠습니다','보세요'],"lp":re.compile(r'^\*\s|^-\s\*\*',re.M)}, "Perplexity": {"m":['검색 결과에 따르면','보도에 따르면','연구에 따르면','밝혔다','전했다', '것으로 나타났다','것으로 조사됐다','것으로 집계됐다','발표했다'],"e":['밝혔다','나타났다','전했다'],"lp":re.compile(r'\[\d+\]',re.M)}, } def score_sentence(sent): """단일 문장 AI 점수 (0~100). 탭1·탭2 공유. v5.0 대폭 강화.""" sc = 0; reasons = [] sl = sent.lower().strip() sr = sent.rstrip('.!?。') for e in AI_ENDINGS: if sr.endswith(e): sc += 22; reasons.append(f"격식어미(-{e})"); break if sc == 0: # 격식이 아닌 경우만 for e in AI_CASUAL_ENDINGS: if sr.endswith(e): sc += 15; reasons.append(f"비격식AI(-{e})"); break stripped = sent.strip() for c in AI_CONNS: if stripped.startswith(c): sc += 18; reasons.append(f"AI접속사({c})"); break else: for c in AI_SOFT_CONNS: if stripped.startswith(c): sc += 8; reasons.append(f"약한접속사({c})"); break filler_found = sum(1 for f in AI_FILLER if f in sent) casual_filler = sum(1 for f in AI_CASUAL_FILLER if f in sent) total_filler = filler_found + casual_filler if total_filler >= 3: sc += 25; reasons.append(f"상투표현×{total_filler}") elif total_filler == 2: sc += 18; reasons.append(f"상투표현×2") elif total_filler == 1: sc += 10; reasons.append(f"상투표현×1") if AI_CONCESSION.search(sent): sc += 10; reasons.append("양보패턴") for mn, fp in FP.items(): for m in fp["m"]: if m in sent: sc += 8; reasons.append(f"{mn}지문"); break en_count = sum(1 for em in EN_AI_MARKERS if em in sl) if en_count >= 3: sc += 25; reasons.append(f"영어AI×{en_count}") elif en_count >= 2: sc += 18; reasons.append(f"영어AI×{en_count}") elif en_count >= 1: sc += 12; reasons.append(f"영어AI×1") has_formal = any(sr.endswith(e) for e in AI_ENDINGS) has_conn = any(stripped.startswith(c) for c in AI_CONNS) if has_formal and total_filler >= 1 and has_conn: sc += 8; reasons.append("복합AI") elif has_formal and total_filler >= 2: sc += 5; reasons.append("격식+상투") for n, p in HUMAN_MARKERS.items(): matches = p.findall(sent) if matches: if n in ('ㅋㅎㅠ','이모티콘','줄임'): sc -= 25; reasons.append(f"인간({n})") elif n in ('비격식종결','구어축약'): sc -= 18; reasons.append(f"구어체({n})") elif n == '맞춤법오류': sc -= 12; reasons.append("맞춤법오류") elif n in ('느낌표','말줄임표'): sc -= 10; reasons.append(f"인간({n})") return max(0, min(100, sc)), reasons def analyze_statistics(text, sentences, words): sl = [len(s) for s in sentences] if len(sl) < 2: return {"score":50} avg = sum(sl)/len(sl); std = math.sqrt(sum((l-avg)**2 for l in sl)/len(sl)) cv = std/avg if avg > 0 else 0 cv_score = 85 if cv<0.20 else 70 if cv<0.30 else 50 if cv<0.45 else 30 if cv<0.60 else 15 diffs = [abs(sl[i]-sl[i-1]) for i in range(1,len(sl))] burst_score = 50 if diffs: avg_d = sum(diffs)/len(diffs) max_d = max(diffs) burst_ratio = max_d / (avg_d + 1) burst_score = 85 if burst_ratio < 1.8 else 65 if burst_ratio < 2.5 else 40 if burst_ratio < 3.5 else 20 standard_ratio = sum(1 for l in sl if 20 <= l <= 60) / len(sl) std_score = 80 if standard_ratio > 0.8 else 60 if standard_ratio > 0.6 else 40 if standard_ratio > 0.4 else 20 extreme = sum(1 for l in sl if l < 10 or l > 80) if extreme >= 2: std_score = max(10, std_score - 20) elif extreme >= 1: std_score = max(15, std_score - 10) wf = Counter(words); t = len(words) ttr = len(wf)/t if t>0 else 0 vocab_score = 70 if ttr<0.45 else 55 if ttr<0.55 else 35 if ttr<0.65 else 20 wpc = [len(split_words(s)) for s in sentences] complex_score = 50 if len(wpc) >= 3: wpc_avg = sum(wpc)/len(wpc) wpc_std = math.sqrt(sum((w-wpc_avg)**2 for w in wpc)/len(wpc)) wpc_cv = wpc_std/wpc_avg if wpc_avg > 0 else 0 complex_score = 80 if wpc_cv < 0.20 else 60 if wpc_cv < 0.35 else 35 if wpc_cv < 0.50 else 15 final = int(cv_score*0.20 + burst_score*0.20 + std_score*0.25 + vocab_score*0.15 + complex_score*0.20) return {"score":final,"cv":round(cv,3),"ttr":round(ttr,3)} def analyze_korean_style(text, sentences, morphemes): if not sentences: return {"score":50} formal_cnt = sum(1 for s in sentences if any(s.rstrip('.!?').endswith(e) for e in AI_ENDINGS)) casual_ai = sum(1 for s in sentences if any(s.rstrip('.!?').endswith(e) for e in AI_CASUAL_ENDINGS)) fr = formal_cnt/len(sentences) car = casual_ai/len(sentences) ending_score = 85 if fr>0.7 else 65 if fr>0.5 else 45 if fr>0.3 else 25 if fr>0.1 else 10 ending_score = min(90, ending_score + int(car * 25)) # 비격식AI 보너스 conn_positions = [] for i, s in enumerate(sentences): for c in AI_CONNS: if s.strip().startswith(c): conn_positions.append(i); break conn_density = len(conn_positions)/len(sentences) if sentences else 0 conn_score = 85 if conn_density>0.4 else 65 if conn_density>0.25 else 40 if conn_density>0.1 else 15 if len(conn_positions) >= 2: gaps = [conn_positions[i]-conn_positions[i-1] for i in range(1,len(conn_positions))] gap_cv = (math.sqrt(sum((g-sum(gaps)/len(gaps))**2 for g in gaps)/len(gaps))/(sum(gaps)/len(gaps)+0.01)) if gap_cv < 0.5: conn_score = min(90, conn_score + 10) # 매우 규칙적 → AI 보너스 filler_cnt = sum(1 for f in AI_FILLER if f in text) + sum(1 for f in AI_CASUAL_FILLER if f in text) filler_score = 90 if filler_cnt>=6 else 75 if filler_cnt>=4 else 55 if filler_cnt>=2 else 30 if filler_cnt>=1 else 10 concession_cnt = len(AI_CONCESSION.findall(text)) conc_score = 80 if concession_cnt >= 2 else 55 if concession_cnt >= 1 else 20 human_count = sum(len(p.findall(text)) for p in HUMAN_MARKERS.values()) human_penalty = min(35, human_count * 8) pos_score = 45 if morphemes: pc = Counter(t for _,t in morphemes); tm = sum(pc.values()) noun_r = sum(pc.get(t,0) for t in ['NNG','NNP','NNB','NR'])/tm if tm else 0 pos_score = 70 if noun_r>0.42 else 55 if noun_r>0.38 else 35 if noun_r>0.32 else 20 final = max(5, int(ending_score*0.25 + conn_score*0.20 + filler_score*0.20 + conc_score*0.10 + pos_score*0.15 + 10*0.10) - human_penalty) return {"score":final,"formal":f"{fr:.0%}","conn":f"{conn_density:.2f}","filler":filler_cnt,"human":human_count} def analyze_repetition(text, sentences, words): if not sentences or len(sentences) < 2: return {"score":35} tr = 0 if len(words)>=5: tg = Counter(tuple(words[i:i+3]) for i in range(len(words)-2)) tr = sum(1 for c in tg.values() if c>1)/len(tg) if tg else 0 ngram_score = 80 if tr>0.15 else 60 if tr>0.08 else 35 if tr>0.03 else 15 openers_2 = [] openers_3 = [] for s in sentences: ws = split_words(s) if len(ws) >= 2: openers_2.append(tuple(ws[:2])) if len(ws) >= 3: openers_3.append(tuple(ws[:3])) opener2_score = 50 if openers_2: unique2 = len(set(openers_2))/len(openers_2) opener2_score = 80 if unique2 < 0.5 else 60 if unique2 < 0.7 else 35 if unique2 < 0.85 else 15 ai_only_conns = ['또한','따라서','그러므로','이에 따라','더불어','아울러','뿐만 아니라', '이를 통해','이에','결과적으로','궁극적으로','나아가','이러한'] cr = sum(1 for s in sentences if any(s.strip().startswith(c) for c in ai_only_conns)) crr = cr/len(sentences) if sentences else 0 ai_conn_score = 85 if crr>0.35 else 65 if crr>0.2 else 40 if crr>0.08 else 15 templates = [] for s in sentences: ws = split_words(s) if len(ws) >= 4: templates.append((ws[0], ws[-1])) template_rep = 0 if templates: tc = Counter(templates) template_rep = sum(1 for c in tc.values() if c > 1) / len(tc) if tc else 0 template_score = 80 if template_rep > 0.3 else 55 if template_rep > 0.1 else 25 endings = [] for s in sentences: sr = s.rstrip('.!?。') for e in AI_ENDINGS + ['있다','했다','된다','한다','이다','는다']: if sr.endswith(e): endings.append(e); break ending_div = 50 if endings: unique_e = len(set(endings))/len(endings) ending_div = 80 if unique_e < 0.3 else 60 if unique_e < 0.5 else 35 if unique_e < 0.7 else 15 final = int(ngram_score*0.15 + opener2_score*0.20 + ai_conn_score*0.25 + template_score*0.15 + ending_div*0.25) return {"score":final} AI_VAGUE = re.compile(r'다양한|중요한|긍정적인|부정적인|획기적인|혁신적인|효율적인|체계적인|종합적인|전반적인|지속적인|적극적인|상당한|주요한') CONCRETE_PROPER = re.compile(r'삼성|LG|현대|SK|카카오|네이버|넷플릭스|구글|애플|테슬라|아마존|마이크로소프트|[가-힣]{2,}대학|[가-힣]{2,}병원|[가-힣]{1,3}시[는을이가]|[가-힣]{1,3}구[는을이가]|[가-힣]{2,}동[에서]') CONCRETE_NUMBER = re.compile(r'\d{2,}[만억조원달러%개년월일위등호]|\d+\.\d+%|\d{4}년|\d{1,2}월') CONCRETE_QUOTE = re.compile(r'에 따르면|발표했|밝혔다|보도했|전했다|라고 말|인터뷰|설문|조사|통계청|보고서') def analyze_structure(text, sentences): if not sentences: return {"score":35} lt = (len(re.findall(r'^\d+[.)]\s',text,re.M)) + len(re.findall(r'^[-•*]\s',text,re.M)) + len(re.findall(r'^#+\s',text,re.M)) + len(re.findall(r'\*\*[^*]+\*\*',text))) list_score = 90 if lt>=5 else 70 if lt>=3 else 45 if lt>=1 else 10 vague_cnt = len(AI_VAGUE.findall(text)) proper_cnt = len(CONCRETE_PROPER.findall(text)) number_cnt = len(CONCRETE_NUMBER.findall(text)) quote_cnt = len(CONCRETE_QUOTE.findall(text)) concrete_total = proper_cnt + number_cnt + quote_cnt if vague_cnt >= 3 and concrete_total == 0: abstract_score = 90 elif vague_cnt >= 2 and concrete_total <= 1: abstract_score = 70 elif vague_cnt >= 1 and concrete_total == 0: abstract_score = 55 elif concrete_total >= 3: abstract_score = 10 elif concrete_total >= 2: abstract_score = 20 elif concrete_total >= 1: abstract_score = 30 else: abstract_score = 45 has_question = any(s.strip().endswith('?') for s in sentences) has_exclaim = any(s.strip().endswith('!') for s in sentences) has_ellipsis = any('...' in s or '…' in s for s in sentences) variety = sum([has_question, has_exclaim, has_ellipsis]) type_score = 15 if variety >= 2 else 40 if variety >= 1 else 65 puncts = re.findall(r'[!?,;:…—\-~]', text) unique_punct = len(set(puncts)) punct_score = 65 if unique_punct <= 1 else 45 if unique_punct <= 3 else 20 paras = [p.strip() for p in text.split('\n\n') if p.strip()] para_score = 35 if len(paras) >= 2: pl = [len(split_sentences(p)) for p in paras] avg_p = sum(pl)/len(pl) if avg_p > 0: pcv = math.sqrt(sum((l-avg_p)**2 for l in pl)/len(pl))/avg_p para_score = 75 if pcv < 0.2 else 55 if pcv < 0.35 else 30 if len(paras) >= 3 and pl[0] < avg_p and pl[-1] < avg_p: para_score = min(85, para_score + 10) final = int(list_score*0.10 + abstract_score*0.40 + type_score*0.20 + punct_score*0.10 + para_score*0.20) return {"score":final} def analyze_model_fingerprint(text, sentences): ms = {} sl = text.lower() for mn, fp in FP.items(): sc = sum(min(15,text.count(m)*5) for m in fp["m"] if text.count(m)>0) lm = fp["lp"].findall(text) if lm: sc += min(20,len(lm)*3) em = sum(1 for s in sentences if any(s.rstrip('.!?').endswith(e) for e in fp.get("e",[]))) if sentences: sc += int((em/len(sentences))*20) ms[mn] = min(100,sc) general_ai = 0 general_ai += sum(5 for f in AI_CASUAL_FILLER if f in text) casual_end_cnt = sum(1 for s in sentences if any(s.rstrip('.!?').endswith(e) for e in AI_CASUAL_ENDINGS)) general_ai += casual_end_cnt * 5 general_ai += len(AI_CONCESSION.findall(text)) * 8 ms["비격식AI"] = min(100, general_ai) en_score = sum(5 for em in EN_AI_MARKERS if em in sl) ms["영어AI"] = min(100, en_score) mx = max(ms.values()) if ms else 0 multi = sum(1 for v in ms.values() if v >= 10) multi_bonus = 10 if multi >= 3 else 5 if multi >= 2 else 0 base = 85 if mx>=50 else 65 if mx>=35 else 45 if mx>=20 else 25 if mx>=10 else 10 return {"score":min(95, base + multi_bonus),"model_scores":{k:v for k,v in ms.items() if k not in ("비격식AI","영어AI") or v > 0}} def analyze_perplexity(text, sentences, morphemes): """한국어 특화 Perplexity + Burstiness — 문자 엔트로피 보정""" if len(sentences) < 2: return {"score": 40, "entropy": 0, "variance": 0, "order": 0, "zipf": 0} chars = [c for c in text if c.strip()] char_score = 45 if len(chars) >= 30: cbigrams = [(chars[i], chars[i+1]) for i in range(len(chars)-1)] cb_freq = Counter(cbigrams) total_cb = len(cbigrams) char_entropy = -sum((cnt/total_cb)*math.log2(cnt/total_cb) for cnt in cb_freq.values()) if char_entropy < 7.5: char_score = 78 elif char_entropy < 8.5: char_score = 62 elif char_entropy < 9.5: char_score = 42 elif char_entropy < 10.5: char_score = 25 else: char_score = 12 sl = [len(s) for s in sentences] burst_score = 45 if len(sl) >= 3: avg = sum(sl)/len(sl) std = math.sqrt(sum((l-avg)**2 for l in sl)/len(sl)) cv = std/(avg+1e-10) if cv < 0.15: burst_score = 82 elif cv < 0.25: burst_score = 62 elif cv < 0.40: burst_score = 38 elif cv < 0.60: burst_score = 20 else: burst_score = 8 sent_ttr = [] for s in sentences: sw = split_words(s) if len(sw) >= 3: sent_ttr.append(len(set(sw))/len(sw)) ttr_score = 42 if len(sent_ttr) >= 3: avg_ttr = sum(sent_ttr)/len(sent_ttr) std_ttr = math.sqrt(sum((t-avg_ttr)**2 for t in sent_ttr)/len(sent_ttr)) if std_ttr < 0.04: ttr_score = 75 elif std_ttr < 0.08: ttr_score = 55 elif std_ttr < 0.15: ttr_score = 35 else: ttr_score = 15 endings = [s.rstrip('.!?\u2026')[-3:] for s in sentences if len(s) >= 5] end_score = 40 if len(endings) >= 3: ef = Counter(endings) end_ent = -sum((c/len(endings))*math.log2(c/len(endings)) for c in ef.values()) max_ent = math.log2(len(ef)) if len(ef) > 1 else 1 norm_ent = end_ent / (max_ent + 1e-10) if norm_ent < 0.5: end_score = 72 elif norm_ent < 0.7: end_score = 50 elif norm_ent < 0.85: end_score = 32 else: end_score = 15 final = int(char_score * 0.30 + burst_score * 0.30 + ttr_score * 0.20 + end_score * 0.20) return {"score": final, "entropy": char_score, "variance": burst_score, "order": ttr_score, "zipf": end_score} HUMANIZER_OVERSUBST = re.compile(r'활용하다|이용하다|사용하다|적용하다|도입하다|채택하다|수행하다|진행하다|실시하다|실행하다') HUMANIZER_AWKWARD = re.compile(r'그것은|이것은|저것은|해당 사항|앞서 언급한|전술한|상기한|기술된') HUMANIZER_PASSIVE = re.compile(r'되어지[고는며]|하게 되었[다습]|수행되[었어]|진행되[었어]|실시되[었어]|활용되[었어]') def analyze_humanizer(text, sentences, words, morphemes): """Humanizer/Bypasser 탐지 — AI 원문 패러프레이즈 흔적 분석""" if len(sentences) < 2: return {"score": 20, "signals": []} signals = [] content_words = [f for f, t in morphemes if t in ('NNG', 'NNP', 'VV', 'VA')] if len(content_words) >= 10: cw_freq = Counter(content_words) hapax = sum(1 for c in cw_freq.values() if c == 1) hapax_ratio = hapax / len(cw_freq) if cw_freq else 0 if hapax_ratio > 0.95 and len(content_words) >= 30: signals.append(("동의어과다치환", 20, "핵심 어휘가 과도하게 분산")) elif hapax_ratio > 0.90 and len(content_words) >= 25: signals.append(("동의어치환의심", 12, "어휘 반복 회피 패턴")) sl = [len(s) for s in sentences] if len(sl) >= 4: avg = sum(sl) / len(sl) cv = math.sqrt(sum((l - avg)**2 for l in sl) / len(sl)) / (avg + 1e-10) unique_ratio = len(set(words)) / len(words) if words else 0 if cv < 0.20 and unique_ratio > 0.80 and len(sentences) >= 5: signals.append(("구조보존어휘변경", 18, "문장 구조 균일 + 비정상적 어휘 다양성")) residual = 0 conn_positions = [] for i, s in enumerate(sentences): stripped = s.strip() for c in ['또한','특히','한편','더불어','아울러','나아가','이에','게다가','반면','결국']: if stripped.startswith(c): conn_positions.append(i) break if len(conn_positions) >= 2: gaps = [conn_positions[i] - conn_positions[i-1] for i in range(1, len(conn_positions))] if gaps and max(gaps) - min(gaps) <= 1: # 거의 등간격 signals.append(("접속사등간격잔존", 15, "접속사 배치가 규칙적 (AI 원문 구조 잔존)")) residual += 15 oversubst = len(HUMANIZER_OVERSUBST.findall(text)) awkward = len(HUMANIZER_AWKWARD.findall(text)) passive = len(HUMANIZER_PASSIVE.findall(text)) if oversubst >= 3: signals.append(("유사동사난무", 12, f"활용/이용/사용/적용 등 {oversubst}개")) if awkward >= 2: signals.append(("어색한지시어", 10, f"해당/전술/상기 등 {awkward}개")) if passive >= 3: signals.append(("이중피동과다", 15, f"되어지/수행되 등 {passive}개")) endings = [s.rstrip('.!?')[-2:] for s in sentences if len(s) >= 4] end_types = len(set(endings)) / len(endings) if endings else 0 has_question = any(s.strip().endswith('?') for s in sentences) has_exclaim = any(s.strip().endswith('!') for s in sentences) sent_type_variety = sum([has_question, has_exclaim]) if sent_type_variety == 0 and end_types > 0.85 and len(sentences) >= 6: signals.append(("유형단조어미다양", 12, "서술문만 + 종결어미 과다 다양 = Humanizer 패턴")) starters = [s.strip()[:3] for s in sentences if len(s) >= 6] starter_unique = len(set(starters)) / len(starters) if starters else 0 if starter_unique >= 0.98 and len(sentences) >= 7: signals.append(("문두과다다양", 8, "모든 문장 시작이 다름 (자연스럽지 않은 다양성)")) total = sum(s[1] for s in signals) if total >= 45: score = 85 elif total >= 30: score = 68 elif total >= 20: score = 52 elif total >= 10: score = 35 else: score = 15 return {"score": score, "signals": signals, "total_evidence": total} MODEL_PROFILES = { "GPT": { "style": ["격식체 ~습니다", "또한/특히 접속사", "~에 대해", "~것으로 예상됩니다"], "markers": ["다양한", "중요한 역할", "긍정적인", "눈부신", "주목할 만한", "살펴보겠습니다"], "structure": "균일한 문단, 서론-본론-결론 구조, 마크다운 선호", "endings": ["습니다", "있습니다", "됩니다", "입니다"], "connectors": ["또한", "특히", "한편", "이처럼"], }, "Claude": { "style": ["맥락 제시", "균형 잡힌", "사려 깊은 어조", "양보 후 주장"], "markers": ["흥미로운 질문", "복잡한 주제", "맥락에서", "균형 잡힌", "살펴보겠습니다", "한 가지 주의할"], "structure": "자연스러운 흐름, 양보-주장 구문 선호, 부드러운 전환", "endings": ["네요", "입니다", "있습니다", "습니다"], "connectors": ["한편", "물론", "다만", "이와 관련해"], }, "Gemini": { "style": ["정보 나열형", "~에 대해 알아보겠습니다", "검색 결과 기반"], "markers": ["알아보겠습니다", "도움이 되셨으면", "추가 질문", "참고로"], "structure": "리스트/번호 매기기 선호, 헤더 활용, 정보 밀도 높음", "endings": ["습니다", "있습니다", "세요"], "connectors": ["또한", "그리고", "참고로"], }, "Perplexity": { "style": ["출처 인용형", "~에 따르면", "수치 제시", "것으로 나타났다"], "markers": ["에 따르면", "것으로 나타났", "것으로 조사됐", "것으로 집계됐", "발표했", "보도에 따르면"], "structure": "팩트 중심, 수치 인용 다수, 출처 명시 스타일", "endings": ["습니다", "나타났다", "밝혔다", "전했다"], "connectors": ["한편", "또한", "이에"], }, } def estimate_model(text, sentences, morphemes, model_scores): """AI 모델 추정 — 복합 증거 기반""" evidence = {m: {"score": 0, "reasons": []} for m in MODEL_PROFILES} sl = text.lower() for model, profile in MODEL_PROFILES.items(): fp_score = model_scores.get(model, 0) evidence[model]["score"] += fp_score * 0.4 if fp_score >= 20: evidence[model]["reasons"].append(f"지문 매칭 {fp_score}점") marker_cnt = sum(1 for m in profile["markers"] if m in text) if marker_cnt >= 2: evidence[model]["score"] += marker_cnt * 8 evidence[model]["reasons"].append(f"특유 표현 {marker_cnt}개") end_match = 0 for s in sentences: for e in profile["endings"]: if s.rstrip('.!?').endswith(e): end_match += 1; break if sentences: end_ratio = end_match / len(sentences) if end_ratio > 0.7: evidence[model]["score"] += 12 evidence[model]["reasons"].append(f"종결어미 {end_ratio:.0%} 일치") conn_match = sum(1 for s in sentences if any(s.strip().startswith(c) for c in profile["connectors"])) if conn_match >= 2: evidence[model]["score"] += conn_match * 4 evidence[model]["reasons"].append(f"접속사 패턴 {conn_match}회") number_citations = len(re.findall(r'\d+[%만억조]|에 따르면|것으로 나타났|발표했', text)) if number_citations >= 3: evidence["Perplexity"]["score"] += number_citations * 5 evidence["Perplexity"]["reasons"].append(f"수치/인용 {number_citations}회") concession_cnt = len(AI_CONCESSION.findall(text)) if concession_cnt >= 1: evidence["Claude"]["score"] += concession_cnt * 10 evidence["Claude"]["reasons"].append(f"양보-주장 구문 {concession_cnt}회") ranked = sorted(evidence.items(), key=lambda x: x[1]["score"], reverse=True) top = ranked[0] second = ranked[1] if len(ranked) > 1 else None if top[1]["score"] < 10: return {"model": "특정 불가", "confidence": "낮음", "detail": evidence, "ranked": ranked} gap = top[1]["score"] - (second[1]["score"] if second else 0) if gap >= 20 and top[1]["score"] >= 30: conf = "높음" elif gap >= 10 and top[1]["score"] >= 20: conf = "중간" else: conf = "낮음" return { "model": top[0], "confidence": conf, "score": top[1]["score"], "reasons": top[1]["reasons"], "detail": evidence, "ranked": ranked } def analyze_quality(text, sentences, words, morphemes): qs = {}; sl = [len(s) for s in sentences]; tw = len(words) ideal = sum(1 for l in sl if 15<=l<=70)/len(sentences) if sentences else 0 qs["가독성"] = min(100,int(ideal*70+(1-sum(1 for l in sl if l>100)/max(1,len(sentences)))*30)) wf = Counter(words); uw = len(wf) mattr = (sum(len(set(words[i:i+50]))/50 for i in range(max(1,tw-50)))/max(1,tw-50)) if tw>=100 else (uw/tw if tw>0 else 0.5) hr = sum(1 for c in wf.values() if c==1)/tw if tw>0 else 0 qs["어휘풍부도"] = min(100,int(mattr*80+hr*40)) lc = {'순접':['그래서','따라서'],'역접':['그러나','하지만','다만'],'첨가':['또한','그리고','게다가'],'전환':['한편'],'예시':['예를 들어'],'요약':['결국','결론적으로']} ut = sum(1 for cw in lc.values() if any(w in text for w in cw)) qs["논리구조"] = min(100,int(ut/len(lc)*60+min(40,ut*10))) si = sum(1 for p in [re.compile(r'됬'),re.compile(r'몇일'),re.compile(r'금새')] if p.search(text)) spi = sum(1 for p in [re.compile(r'할수있'),re.compile(r'것같')] if p.search(text)) qs["정확성"] = max(0,100-(si+spi)*15) ar=0;vv=0 if morphemes: pc = Counter(t for _,t in morphemes); tm = sum(pc.values()) ar = sum(pc.get(t,0) for t in ['VA','MAG','MAJ'])/tm if tm else 0 vv = len(set(f for f,t in morphemes if t in ['VV','VA']))/max(1,sum(1 for _,t in morphemes if t in ['VV','VA'])) qs["표현풍부성"] = min(100,int(ar*200+vv*30)) cr = 0.5 if morphemes: ct={'NNG','NNP','VV','VA','MAG'}; ft={'JKS','JKC','JKG','JKO','JX','JC','EP','EF','EC','ETN','ETM'} cc=sum(1 for _,t in morphemes if t in ct); fc=sum(1 for _,t in morphemes if t in ft) cr = cc/(cc+fc) if (cc+fc)>0 else 0.5 qs["정보밀도"] = min(100,int(cr*80)) wq = {"가독성":.20,"어휘풍부도":.18,"논리구조":.18,"정확성":.18,"표현풍부성":.13,"정보밀도":.13} total = int(sum(qs[k]*wq[k] for k in wq)) grade = "S" if total>=85 else "A" if total>=72 else "B" if total>=58 else "C" if total>=42 else "D" if total>=28 else "F" return {"score":total,"grade":grade,"sub_scores":qs} LLM_JUDGES = [("openai/gpt-oss-120b","GPT-OSS 120B"),("qwen/qwen3-32b","Qwen3 32B"),("moonshotai/kimi-k2-instruct-0905","Kimi-K2")] def _parse_ai_probability(raw_resp): """LLM 응답에서 AI 확률(0~100)을 추출. 한국어/영어 다양한 형식 대응.""" if not raw_resp: return -1 think_content = '' think_m = re.search(r'(.*?)', raw_resp, flags=re.S) if think_m: think_content = think_m.group(1) resp = re.sub(r'.*?', '', raw_resp, flags=re.S).strip() if not resp or len(resp) < 5: resp = raw_resp # think만 있으면 원본 포함 specific_patterns = [ r'AI\s*확률\s*[::]\s*(?:약\s*)?(\d+)\s*%?', r'AI\s*[Pp]robability\s*[::]\s*(?:about|approximately?\s*)?(\d+)\s*%?', r'[Pp]robability\s*(?:of\s*)?(?:being\s*)?AI\s*[::\-]\s*(?:about|approximately?\s*)?(\d+)\s*%?', r'AI\s*(?:생성|작성|판정)?\s*확률\s*[::]?\s*(?:약\s*)?(\d+)', r'(?:Score|Rating|Confidence)\s*[::]\s*(\d+)', r'(\d+)\s*%\s*(?:의\s*)?(?:확률|가능성|probability|likely|chance|likelihood)', r'(?:신뢰도|확신도)\s*[::]?\s*(?:약\s*)?(\d+)\s*(?:%|퍼센트)', r'(?:약\s*)?(\d+)\s*(?:%|퍼센트)\s*(?:정도|수준)', ] for pat in specific_patterns: m = re.search(pat, resp, re.I) if m: v = int(m.group(1)) if 0 <= v <= 100: return v lines = [l.strip() for l in resp.strip().split('\n') if l.strip()] for line in reversed(lines[-5:]): if re.search(r'AI|확률|[Pp]robab|신뢰|판[정단]', line): nums = re.findall(r'(\d+)\s*%', line) if nums: v = int(nums[-1]) if 0 <= v <= 100: return v nums = re.findall(r'(\d+)\s*퍼센트', line) if nums: v = int(nums[-1]) if 0 <= v <= 100: return v all_pcts = list(re.finditer(r'(\d+)\s*(?:%|퍼센트|percent)', resp, re.I)) for m in reversed(all_pcts): v = int(m.group(1)) ctx_start = max(0, m.start()-50) ctx = resp[ctx_start:m.end()+20] if re.search(r'AI|확률|[Pp]robab|신뢰|판[정단]|가능성|likelihood', ctx, re.I): if 0 <= v <= 100: return v if all_pcts: v = int(all_pcts[-1].group(1)) if 5 <= v <= 99: return v # 100% 제외 (통계 수치 오탐 방지) if think_content: for pat in specific_patterns: m = re.search(pat, think_content, re.I) if m: v = int(m.group(1)) if 0 <= v <= 100: return v think_pcts = re.findall(r'(\d+)\s*%', think_content) if think_pcts: v = int(think_pcts[-1]) if 5 <= v <= 99: return v return -1 def llm_cross_check(text): if not GROQ_KEY: return {"score":-1,"detail":{}} prompt = f"""Analyze whether this text was written by AI. [Instructions] 1. Determine AI vs Human with 3 brief reasons 2. IMPORTANT - Your LAST line MUST be exactly this format: AI확률: XX% (Replace XX with your estimated probability 0-100) Example of correct last line: AI확률: 75% [Text to analyze] {text[:2000]}""" votes=[]; rpt={} for mid,mn in LLM_JUDGES: resp,err = call_groq(mid,prompt) if resp: p = _parse_ai_probability(resp) if p >= 0: votes.append(p); rpt[mn]=f"{p}%" else: cleaned = re.sub(r'.*?', '', resp, flags=re.S).strip() tail = cleaned[-60:].replace('\n',' ') if len(cleaned) > 60 else cleaned.replace('\n',' ') rpt[mn]=f"파싱실패({tail[:40]})" else: rpt[mn]=f"ERR:{err[:30] if err else '?'}" if votes: return {"score":int(sum(votes)/len(votes)),"detail":rpt} return {"score":-1,"detail":rpt} def compute_verdict(scores, llm_score=-1, sent_avg=-1, ppx_score=-1, hum_score=-1): w={"통계":.06,"문체":.25,"반복성":.10,"구조":.12,"지문":.30} ws=sum(scores[k]*w[k] for k in w) if ppx_score >= 0: ws += ppx_score * 0.17 style=scores["문체"]; fp=scores["지문"]; rep=scores["반복성"]; struct=scores["구조"] if style>=35 and fp>=35: ws+=8 elif style>=30 and fp>=25: ws+=4 if style>=30 and rep>=25 and fp>=20: ws+=4 if fp>=45: ws+=3 if struct>=50 and style>=30: ws+=3 if ppx_score>=55 and fp>=35: ws+=5 if ppx_score>=65 and style>=35: ws+=3 if hum_score>=50: ws=max(ws, 45) # Humanizer 확인 → 최소 AI 의심 중간 ws += (hum_score-50)*0.15 if sent_avg>=0 and sent_avg>ws: ws=ws*0.80+sent_avg*0.20 hi=sum(1 for v in scores.values() if v>=50) if hi>=4: ws+=8 elif hi>=3: ws+=5 elif hi>=2: ws+=2 if style<40 and fp<=20 and rep<22 and struct<35 and (ppx_score<0 or ppx_score<40): ws-=5 lo=sum(1 for v in scores.values() if v<20) if lo>=3: ws-=8 elif lo>=2: ws-=3 if llm_score>=0: ws=ws*0.70+llm_score*0.30 fs=max(0,min(100,int(ws))) if fs>=75: return fs,"AI 작성 확신","ai_high" if fs>=60: return fs,"AI 의심 높음","ai_medium" if fs>=45: return fs,"AI 의심 중간","ai_low" if fs>=30: return fs,"판단 유보","uncertain" return fs,"인간 작성 추정","human" def quick_score(text): sents=split_sentences(text); words=split_words(text); morphs=get_morphemes(text) sc={"통계":analyze_statistics(text,sents,words)["score"],"문체":analyze_korean_style(text,sents,morphs)["score"], "반복성":analyze_repetition(text,sents,words)["score"],"구조":analyze_structure(text,sents)["score"], "지문":analyze_model_fingerprint(text,sents)["score"]} sent_scores=[score_sentence(s)[0] for s in sents] sent_avg=sum(sent_scores)/len(sent_scores) if sent_scores else -1 ppx=analyze_perplexity(text,sents,morphs) hum=analyze_humanizer(text,sents,words,morphs) fs,v,lv=compute_verdict(sc, sent_avg=sent_avg, ppx_score=ppx["score"], hum_score=hum["score"]) return fs,v,lv,sc,ppx,hum from plagiarism_check import run_plagiarism def run_detection(text, progress=gr.Progress()): if not text or len(text.strip())<50: return "
⚠️ 최소 50자
","" text=text.strip() progress(0.05); sents=split_sentences(text); words=split_words(text); morphs=get_morphemes(text) progress(0.12); s1=analyze_statistics(text,sents,words) progress(0.22); s2=analyze_korean_style(text,sents,morphs) progress(0.30); s3=analyze_repetition(text,sents,words) progress(0.38); s4=analyze_structure(text,sents) progress(0.45); s5=analyze_model_fingerprint(text,sents) progress(0.52); ppx=analyze_perplexity(text,sents,morphs) progress(0.58); hum=analyze_humanizer(text,sents,words,morphs) progress(0.65); qr=analyze_quality(text,sents,words,morphs) progress(0.75); lr=llm_cross_check(text) sc={"통계":s1["score"],"문체":s2["score"],"반복성":s3["score"],"구조":s4["score"],"지문":s5["score"]} sent_scores=[score_sentence(s)[0] for s in sents] sent_avg=sum(sent_scores)/len(sent_scores) if sent_scores else -1 ms_raw=s5.get("model_scores",{}) model_est=estimate_model(text,sents,morphs,ms_raw) fs,verdict,level=compute_verdict(sc,lr["score"],sent_avg=sent_avg,ppx_score=ppx["score"],hum_score=hum["score"]) progress(0.95) cm={"ai_high":("#FF4444","#FFE0E0","높음"),"ai_medium":("#FF8800","#FFF0DD","중간~높음"),"ai_low":("#DDAA00","#FFFBE0","중간"),"uncertain":("#888","#F0F0F0","낮음"),"human":("#22AA44","#E0FFE8","매우 낮음")} fg,bg,conf=cm.get(level,("#888","#F0F0F0","?")) est_model=model_est.get("model","특정 불가") est_conf=model_est.get("confidence","낮음") est_reasons=model_est.get("reasons",[]) if est_model!="특정 불가" and est_conf!="낮음": mt=f"{est_model} (신뢰: {est_conf})" elif est_model!="특정 불가": mt=f"{est_model} (참고)" else: mt="특정 불가" ai_sents=sum(1 for s in sent_scores if s>=40) human_sents=sum(1 for s in sent_scores if s<20) def gb(l,s,w="",desc=""): c="#FF4444" if s>=70 else "#FF8800" if s>=50 else "#DDAA00" if s>=35 else "#22AA44" wt=f" ×{w}" if w else "" dt=f"
{desc}
" if desc else "" return f"
{l}{wt}{s}
{dt}
" mb="" for mn in ["GPT","Claude","Gemini","Perplexity"]: s=ms_raw.get(mn,0); mc="#FF4444" if s>=40 else "#FF8800" if s>=20 else "#CCC" tag="" if mn==est_model and est_conf!="낮음": tag=f" 추정" mb+=f"
{mn}{tag}
{s}
" ls="" if lr["score"]>=0: lsc=lr["score"] lr_rows="".join(f"
{mn}: {lr['detail'].get(mn,'—')}
" for _,mn in LLM_JUDGES) ls=f"
🤖 LLM 교차검증 (평균 {lsc}%)
{lr_rows}
" else: ls="
🤖 GROQ_API_KEY 미설정
" qs=qr["sub_scores"]; gc={"S":"#FF6B6B","A":"#4ECDC4","B":"#45B7D1","C":"#DDAA00","D":"#FF8800","F":"#FF4444"}.get(qr["grade"],"#888") def qgb(l,s): c="#22AA44" if s>=70 else "#4ECDC4" if s>=55 else "#DDAA00" if s>=40 else "#FF8800" return f"
{l}
{s}
" reasons=[] if sc["문체"]>=70: reasons.append("격식체 종결어미가 대부분, AI형 접속사·상투표현 다수 감지") elif sc["문체"]>=50: reasons.append("격식체와 AI형 표현이 혼재") if ppx["score"]>=65: reasons.append(f"텍스트 예측 가능도가 매우 높음 (Perplexity {ppx['score']}점)") elif ppx["score"]>=50: reasons.append(f"텍스트 예측 가능도가 높음 (Perplexity {ppx['score']}점)") if hum["score"]>=50: hum_sigs=", ".join(s[0] for s in hum["signals"][:3]) reasons.append(f"⚠️ Humanizer/패러프레이즈 흔적 감지 ({hum_sigs})") if sc["통계"]>=60: reasons.append("문장 길이가 매우 균일하여 기계적 패턴") if sc["반복성"]>=50: reasons.append("문두 접속사 반복, n-gram 패턴 감지") if sc["구조"]>=50: reasons.append("추상적 수식어 다수, 구체적 사실 부족") if est_model!="특정 불가" and est_conf!="낮음": est_why=", ".join(est_reasons[:2]) if est_reasons else "" reasons.append(f"🔍 추정 모델: {est_model} ({est_why})") if not reasons: reasons.append("인간적 표현이 우세하며 AI 패턴이 약함") reason_html='
'.join(f"• {r}" for r in reasons) ppx_c="#FF4444" if ppx["score"]>=65 else "#FF8800" if ppx["score"]>=50 else "#DDAA00" if ppx["score"]>=35 else "#22AA44" ppx_html=f"""
🧠 Perplexity 분석 {ppx["score"]}점
엔트로피: {ppx.get("entropy",0)} 분산균일: {ppx.get("variance",0)} 어순예측: {ppx.get("order",0)} Zipf적합: {ppx.get("zipf",0)}
""" hum_html="" if hum["score"]>=30: hc="#FF4444" if hum["score"]>=65 else "#FF8800" if hum["score"]>=50 else "#DDAA00" sig_rows="".join(f"
🔸 {s[0]}: {s[2]}
" for s in hum["signals"][:4]) hum_html=f"""
🛡️ Humanizer 탐지 {hum["score"]}점
{sig_rows}
""" est_html="" if est_model!="특정 불가": ec="#FF4444" if est_conf=="높음" else "#FF8800" if est_conf=="중간" else "#DDAA00" ranked_html="" for m, ev in model_est.get("ranked",[])[:4]: ms_c="#FF4444" if ev["score"]>=30 else "#FF8800" if ev["score"]>=15 else "#CCC" bar_w=min(100,int(ev["score"]*1.5)) ranked_html+=f"
{m}
{ev['score']:.0f}
" est_html=f"""
🎯 AI 모델 추정: {est_model} (신뢰: {est_conf})
{ranked_html}
근거: {", ".join(est_reasons[:3]) if est_reasons else "복합 지표"}
""" html=f"""
{fs}
/ 100점
{"🔴" if level=="ai_high" else "🟠" if level=="ai_medium" else "🟡" if level in ["ai_low","uncertain"] else "🟢"} {verdict}
AI 작성 가능성: {conf} | 추정 모델: {mt}
{len(sents)}문장 중 AI의심 {ai_sents}문장, 인간추정 {human_sents}문장
📋 판정 근거
{reason_html}
📊 AI 탐지 5축 + Perplexity
{gb('① 통계',sc['통계'],'.06','Burstiness·복잡도 균일성')} {gb('② 문체',sc['문체'],'.25','격식·접속사·양보구문')} {gb('③ 반복',sc['반복성'],'.10','n-gram·문두·종결다양성')} {gb('④ 구조',sc['구조'],'.12','추상성/구체성')} {gb('⑤ 지문',sc['지문'],'.30','GPT/Claude/Gemini/PPX')} {gb('⑥ PPX',ppx['score'],'.17','예측가능도·엔트로피')}
🔍 모델 지문
{mb}
📝 품질 {qr['grade']} {qr['score']}점
{qgb('가독성',qs['가독성'])}{qgb('어휘',qs['어휘풍부도'])}{qgb('논리',qs['논리구조'])}{qgb('정확',qs['정확성'])}
{ppx_html}{hum_html}{est_html}{ls}
""" log=f"AI:{fs}점 [{verdict}] 신뢰:{conf} | 모델:{mt} | PPX:{ppx['score']} HUM:{hum['score']} | 품질:{qr['grade']}({qr['score']})\n축: 통계{sc['통계']} 문체{sc['문체']} 반복{sc['반복성']} 구조{sc['구조']} 지문{sc['지문']} PPX{ppx['score']} HUM{hum['score']}" return html, log def run_highlight(text): if not text or len(text.strip())<30: return "
텍스트 필요
" sents=split_sentences(text) hl=[] for s in sents: sc, reasons = score_sentence(s) if sc >= 60: bg="rgba(220,38,38,0.35)"; level="AI확신" elif sc >= 40: bg="rgba(249,115,22,0.30)"; level="AI의심" elif sc >= 25: bg="rgba(234,179,8,0.25)"; level="주의" elif sc >= 10: bg="rgba(132,204,22,0.15)"; level="인간추정" else: bg="rgba(34,197,94,0.20)"; level="인간" detail_parts = [] for r in reasons: if '격식' in r or '비격식AI' in r: detail_parts.append(f"🔤 {r}") elif '접속사' in r: detail_parts.append(f"🔗 {r}") elif '상투' in r: detail_parts.append(f"📋 {r}") elif '지문' in r: detail_parts.append(f"🔍 {r}") elif '인간' in r or '구어' in r or '맞춤법' in r: detail_parts.append(f"✅ {r}") else: detail_parts.append(r) tt = ' | '.join(detail_parts) if detail_parts else '특이 패턴 없음' hl.append(f'=60 else "#F97316" if sc>=40 else "#EAB308" if sc>=25 else "#84CC16" if sc>=10 else "#22C55E"};" title="[{level}] {tt} ({sc}점)">{s}') total_scores = [score_sentence(s)[0] for s in sents] avg_sc = sum(total_scores)/len(total_scores) if total_scores else 0 ai_high = sum(1 for s in total_scores if s >= 60) ai_mid = sum(1 for s in total_scores if 40 <= s < 60) human_cnt = sum(1 for s in total_scores if s < 25) return f"""
🔴 AI확신 {ai_high} 🟠 AI의심 {ai_mid} 🟡 주의 🟢 인간 {human_cnt} 평균 {avg_sc:.0f}점 | 마우스 오버→상세 근거
💡 격식어미(22) + AI접속사(18) + 상투표현(10~25) + 양보패턴(10) + 모델지문(8) + 복합보너스(8) − 인간마커(~25)
{' '.join(hl)}
""" SAMPLE_AI = """인공지능 기술은 현대 사회에서 매우 중요한 역할을 하고 있습니다. 특히 자연어 처리 분야에서의 발전은 눈부신 성과를 거두고 있습니다. 이러한 기술의 발전은 다양한 산업 분야에 긍정적인 영향을 미치고 있으며, 향후 더욱 발전할 것으로 예상됩니다. 또한 생성형 AI의 등장으로 콘텐츠 제작 방식이 크게 변화하고 있습니다. 이를 통해 기업들은 효율적인 콘텐츠 생산이 가능해졌으며, 개인 사용자들도 다양한 창작 활동에 AI를 활용할 수 있게 되었습니다. 따라서 AI 리터러시의 중요성이 더욱 부각되고 있습니다. 나아가 AI 윤리와 규제에 대한 논의도 활발히 진행되고 있습니다. 특히 AI가 생성한 콘텐츠의 저작권 문제는 중요한 의미를 가지며, 이에 대한 법적 프레임워크 구축이 필요합니다. 결과적으로 기술 발전과 함께 사회적 합의를 이루는 것이 중요합니다.""" SAMPLE_HUMAN = """아 진짜 요즘 AI 때문에 머리 아프다ㅋㅋㅋ 어제 chatgpt한테 레포트 써달라고 했는데 완전 교과서 같은 글만 써줘서 그냥 내가 다시 썼음;; 근데 생각해보면 AI가 쓴 글이랑 사람이 쓴 글이 확실히 다르긴 해. 뭔가... 너무 깔끔하달까? 사람은 이렇게 횡설수설도 하고 맞춤법도 틀리고 그러잖아. 교수님이 AI 탐지기 돌린다고 해서 좀 무서운데 ㅠㅠ 나는 진짜 직접 쓴 건데 혹시 오탐 나면 어쩌지... 걱정된다 진심으로.""" def run_document_analysis(file, progress=gr.Progress()): """문서 파일 업로드 → 섹션별 AI 탐지 히트맵 + PDF 보고서 생성""" if file is None: return "
📄 파일을 업로드하세요 (PDF, DOCX, HWP, HWPX, TXT)
", "", None if hasattr(file, 'path'): file_path = file.path elif hasattr(file, 'name'): file_path = file.name else: file_path = str(file) fname = os.path.basename(file_path) progress(0.05, f"📄 {fname} 읽는 중...") sections, full_text, error = extract_text_from_file(file_path) if error: return f"
⚠️ {error}
", "", None if not sections or not full_text or len(full_text.strip()) < 50: return "
⚠️ 텍스트가 충분하지 않습니다 (50자 미만)
", "", None progress(0.15, "전체 텍스트 분석...") sents_all = split_sentences(full_text) words_all = split_words(full_text) morphs_all = get_morphemes(full_text) total_score, total_verdict, total_level, total_axes, total_ppx, total_hum = quick_score(full_text) quality = analyze_quality(full_text, sents_all, words_all, morphs_all) progress(0.30, "LLM 교차검증...") llm_result = llm_cross_check(full_text[:3000]) if llm_result["score"] >= 0: _sent_scores = [score_sentence(s)[0] for s in sents_all] _sent_avg = sum(_sent_scores)/len(_sent_scores) if _sent_scores else -1 total_score, total_verdict, total_level = compute_verdict(total_axes, llm_result["score"], sent_avg=_sent_avg, ppx_score=total_ppx["score"], hum_score=total_hum["score"]) progress(0.45, f"{len(sections)}개 섹션 분석...") section_results = [] for i, sec in enumerate(sections): if len(sec.strip()) < 20: section_results.append({"idx": i+1, "text": sec, "score": -1, "verdict": "너무 짧음", "skipped": True}) continue s_score, s_verdict, s_level, s_axes, _, _ = quick_score(sec) sec_sents = split_sentences(sec) sent_scores = [] for sent in sec_sents: ss = score_sentence(sent) sent_scores.append({"text": sent, "score": ss}) section_results.append({ "idx": i+1, "text": sec[:200], "score": s_score, "verdict": s_verdict, "level": s_level, "axes": s_axes, "sent_scores": sent_scores, "skipped": False }) pct = 0.45 + (i / max(len(sections), 1)) * 0.30 progress(pct, f"섹션 {i+1}/{len(sections)}") now = datetime.now().strftime("%Y-%m-%d %H:%M") ext = Path(file_path).suffix.upper() cm_map = {"ai_high": ("#FF4444", "#FFE0E0"), "ai_medium": ("#FF8800", "#FFF0DD"), "ai_low": ("#DDAA00", "#FFFBE0"), "uncertain": ("#888", "#F5F5F5"), "human": ("#22AA44", "#E0FFE8")} tc, tbg = cm_map.get(total_level, ("#888", "#F5F5F5")) heatmap_cells = [] for sr in section_results: sidx = sr["idx"] if sr["skipped"]: heatmap_cells.append(f"
") else: sc, sbg = cm_map.get(sr.get("level", "uncertain"), ("#888", "#F5F5F5")) ssc = sr["score"]; svd = sr["verdict"] heatmap_cells.append(f"
") heatmap_bar = f"
" + ''.join(heatmap_cells) + "
" section_cards = [] for sr in section_results: if sr["skipped"]: continue sc, sbg = cm_map.get(sr.get("level", "uncertain"), ("#888", "#F5F5F5")) sent_html = "" for ss in sr.get("sent_scores", []): s = ss["score"] if s >= 60: sclr = "background:rgba(255,68,68,0.15);border-bottom:2px solid #FF4444;" elif s >= 40: sclr = "background:rgba(255,136,0,0.1);border-bottom:2px solid #FF8800;" elif s >= 25: sclr = "background:rgba(221,170,0,0.08);border-bottom:1px solid #DDAA00;" else: sclr = "" sent_html += f"{ss['text']} " axes_html = "" if "axes" in sr: ax = sr["axes"] for k, v in ax.items(): axc = "#FF4444" if v >= 50 else "#FF8800" if v >= 30 else "#22AA44" axes_html += f"{k} {v}" section_cards.append(f"""
📑 섹션 {sr['idx']} AI {sr['score']}점 · {sr['verdict']}
{axes_html}
{sent_html}
""") ai_high = sum(1 for s in section_results if not s["skipped"] and s["score"] >= 60) ai_med = sum(1 for s in section_results if not s["skipped"] and 35 <= s["score"] < 60) ai_low = sum(1 for s in section_results if not s["skipped"] and s["score"] < 35) valid_sections = [s for s in section_results if not s["skipped"]] llm_info = "" if llm_result["score"] >= 0: llm_rows = ''.join(f"{mn}: {llm_result['detail'].get(mn,'—')}" for _, mn in LLM_JUDGES) llm_info = f"
🤖 LLM 교차검증: 평균 {llm_result['score']}% | {llm_rows}
" html = f"""
📄 문서 AI 분석 보고서
{fname} · {ext} · {len(sections)}개 섹션 · {len(full_text)}자
{total_score}
{total_verdict}
{ai_high}
AI 의심 높음
{ai_med}
AI 의심 중간
{ai_low}
인간 판정
{quality['grade']}
품질 등급
🗺️ 섹션별 AI 히트맵 (빨강=AI의심, 초록=인간)
{heatmap_bar}
AI 높음 AI 중간 불확실 인간
{llm_info}
📊 섹션별 상세 분석 ({len(valid_sections)}개)
{''.join(section_cards)}
AI Detector v4.0 {now} · 5축 앙상블 + LLM 교차검증
""" progress(0.90, "PDF 보고서 생성...") pdf_path = _generate_pdf_report(fname, total_score, total_verdict, total_level, total_axes, quality, section_results, llm_result, now) log = f"파일: {fname} ({ext})\n" log += f"섹션: {len(sections)}개 | 전체: {len(full_text)}자\n" log += f"총점: {total_score} ({total_verdict})\n" log += f"AI 의심 높음: {ai_high} | 중간: {ai_med} | 인간: {ai_low}\n" log += f"품질: {quality['grade']} ({quality['score']}점)\n" if llm_result["score"] >= 0: log += f"LLM 교차검증: {llm_result['score']}%\n" return html, log, pdf_path def _generate_pdf_report(fname, score, verdict, level, axes, quality, sections, llm_result, now): """HTML → PDF 변환으로 보고서 생성""" try: cm = {"ai_high":"#FF4444","ai_medium":"#FF8800","ai_low":"#DDAA00","uncertain":"#888","human":"#22AA44"} tc = cm.get(level, "#888") sec_rows = "" for sr in sections: if sr["skipped"]: continue sc = cm.get(sr.get("level","uncertain"),"#888") sec_rows += f"{sr['idx']}{sr['text'][:80]}...{sr['score']}{sr['verdict']}" ax_rows = ''.join(f"{k}{v}/100" for k, v in axes.items()) html_content = f"""

📄 AI 글 판별 보고서

파일: {fname} | 생성: {now} | 엔진: AI Detector v4.0

종합 결과

{score}점
{verdict}

품질: {quality['grade']} ({quality['score']}점)

5축 분석

{ax_rows}
점수

섹션별 분석 ({len([s for s in sections if not s['skipped']])}개)

{sec_rows}
No내용 (발췌)AI 점수판정
""" report_dir = tempfile.mkdtemp() html_path = os.path.join(report_dir, f"AI_Report_{fname}.html") with open(html_path, 'w', encoding='utf-8') as f: f.write(html_content) return html_path except Exception as e: print(f"PDF 보고서 생성 오류: {e}") return None def extract_file_text_api(file): """파일 업로드 → 텍스트 추출 API (탭1~4용)""" if file is None: return "⚠️ 파일이 없습니다." if hasattr(file, 'path'): file_path = file.path elif hasattr(file, 'name'): file_path = file.name else: file_path = str(file) if not os.path.exists(file_path): return f"⚠️ 파일을 찾을 수 없습니다: {os.path.basename(file_path)}" fname = os.path.basename(file_path) sections, full_text, error = extract_text_from_file(file_path) if error: return f"⚠️ {fname}: {error}" if not full_text or len(full_text.strip()) < 10: return f"⚠️ {fname}: 텍스트 추출 결과 없음" return full_text.strip() with gr.Blocks(title="AI 글 판별기 v5.1") as demo: gr.Markdown("# 🔎 AI 글 판별기 v5.1\n**5축+Perplexity+Humanizer탐지+모델추정 · 품질 측정 · LLM 교차검증 · 표절 검사**") with gr.Tab("🏆 경쟁력 비교"): gr.HTML("""
🇰🇷 한국어 AI 판별 No.1
카피킬러 · GPTZero · Turnitin · JustDone · Originality.ai 등
글로벌 8개 경쟁 서비스 정밀 분석 후 설계된 차세대 판별기
🔬 v5.1 — 3대 킬러 기능 탑재
8축
탐지 차원
(업계 최다)
100%
테스트 정확도
(11/11 샘플)
무료
완전 무료
(횟수 무제한)
4종
모델 추정
(GPT/Claude/Gemini/PPX)
기능 🔎 AI 판별기
v5.1
📋 카피킬러
GPT킬러
🅹 JustDone
.com
🔍 GPTZero 🅾 Originality
.ai
🅃 Turnitin
🎯 한국어 형태소 분석 ★ 전용
📊 탐지 축 수 8축 1~2축 1축 2축 1축 1축
🧠 Perplexity 확률분석 4중
🛡️ Humanizer/Bypasser 탐지 6시그널
🎯 AI 모델 추정 ★ 4모델
🤖 LLM 교차검증 ★ 3모델
📝 판정 근거 투명 공개 ★ 8축별
🎨 문장별 하이라이트 5단계
📄 HWP/HWPX 문서
📝 글 품질 측정 6항목
🔍 표절 검사 5소스 DB DB
💰 가격 완전 무료 9,900원/건 $7.99~/월 $8.33~/월 $14.95/월 기관 전용
🎯 대상 모델 전 AI 모델 GPT만 범용 범용 범용 범용
⚠️ 핵심 약점 규칙 기반
(ML 미적용)
GPT만·오탐률
89% 불만
점수 30%+ 편차
"Hello"→92%AI
혼합텍스트 41%
편집AI 35% 오류
$14.95/월
비영어 약함
기관만 사용
학생 접근 불가
🧠
Perplexity 확률 분석
GPTZero의 핵심 기술을 한국어에 특화.
문자 엔트로피 + Burstiness + TTR편차 + 종결엔트로피 4중 분석으로 AI 텍스트의 예측 가능도를 정밀 측정
vs GPTZero: 한국어 형태소 기반 → 더 정확
🛡️
Humanizer/Bypasser 탐지
QuillBot 등 패러프레이즈 도구로 수정된 AI 글의 잔존 흔적 6가지 시그널로 탐지.
동의어 과다치환, 구조보존+어휘변경, 접속사 등간격 잔존 패턴
vs Turnitin 전용 모듈 → 한국어 최초 구현
🎯
AI 모델 추정 (업계 유일)
GPT · Claude · Gemini · Perplexity 4개 모델의 고유 지문을 복합 증거로 분석.
"어떤 AI가 썼는지"까지 추정하는 유일한 한국어 서비스
vs 전체 경쟁사: 모델 추정 기능 없음
🤖
LLM 3중 교차검증 (업계 유일)
GPT-OSS-120B · Qwen3-32B · Kimi-K2
3개 독립 AI가 직접 판독 → 다수결 투표로 규칙 기반의 한계 보완
vs 전체 경쟁사: LLM 검증 미보유
⚔️ 경쟁사 취약점 vs 우리의 강점
📋 카피킬러 — GPT만 탐지, 89% 불만율→ 전 AI 모델 + 8축 투명 근거
🅹 JustDone — 동일 텍스트 30%+ 점수 편차→ 결정론적 점수 보장
🔍 GPTZero — 혼합텍스트 41%, 편집AI 35% 오류→ Humanizer 탐지 + 섹션 분리
🅃 Turnitin — 기관 전용, 학생 사전검사 불가→ 누구나 무료 즉시 사용
🅾 Originality — $14.95/월, 비영어 취약→ 완전 무료 + 한국어 전문
🇰🇷 한국어 AI 글을 가장 정확하게, 가장 투명하게, 완전 무료로 판별합니다
vs 카피킬러: "GPT만이 아닌, 모든 AI를 탐지합니다"
vs GPTZero: "한국어에 특화된 형태소 분석으로 더 정확합니다"
vs Turnitin: "누구나, 무료로, 지금 바로 사용할 수 있습니다"
vs JustDone: "동일한 텍스트에 항상 동일한 결과를 보장합니다"
""") with gr.Tab("🔍 분석"): gr.Markdown("텍스트가 AI에 의해 작성되었는지 5개 축으로 분석합니다. 0~100점 (높을수록 AI 가능성 높음)") inp=gr.Textbox(label="분석할 텍스트",placeholder="최소 50자 이상...",lines=10) with gr.Row(): btn_a=gr.Button("🚀 AI 판별 + 품질 분석",variant="primary",size="lg") btn_sa=gr.Button("📝 AI 예시",size="sm"); btn_sh=gr.Button("✍️ 인간 예시",size="sm") rh=gr.HTML(); rl=gr.Textbox(label="상세 로그",lines=3,elem_classes=["mono"]) btn_a.click(run_detection,[inp],[rh,rl],api_name="run_detection") btn_sa.click(lambda:SAMPLE_AI,outputs=[inp]); btn_sh.click(lambda:SAMPLE_HUMAN,outputs=[inp]) with gr.Tab("🎨 하이라이트"): gr.Markdown("문장별로 AI 확률을 색상 표시합니다. **탭1과 동일한 기준**으로 판정합니다. 마우스 오버 시 근거 확인.") ih=gr.Textbox(label="텍스트",lines=8); bh=gr.Button("🎨 하이라이트 분석",variant="primary"); hr=gr.HTML() bh.click(run_highlight,[ih],[hr],api_name="run_highlight") with gr.Tab("🔍 표절 검사"): gr.Markdown("**Brave Search 병렬(최대20) + KCI · RISS · arXiv + Gemini Google Search** 기반 표절 검사. CopyKiller 스타일 보고서.") inp_plag=gr.Textbox(label="검사할 텍스트",placeholder="표절 검사할 텍스트 (최소 50자)...",lines=10) with gr.Row(): btn_plag=gr.Button("🔍 표절 검사 시작",variant="primary",size="lg") btn_ps=gr.Button("📝 AI 예시",size="sm") plag_html=gr.HTML(); plag_log=gr.Textbox(label="검사 로그",lines=4,elem_classes=["mono"]) btn_plag.click(run_plagiarism,[inp_plag],[plag_html,plag_log],api_name="run_plagiarism") btn_ps.click(lambda:SAMPLE_AI,outputs=[inp_plag]) with gr.Tab("📖 설명"): gr.Markdown(""" - **탐지 5축:** 통계(6%)·문체(25%)·반복(10%)·구조(12%)·지문(30%) - **★ Perplexity 확률분석(17%):** 문자 엔트로피·Burstiness·TTR편차·종결엔트로피 - **★ Humanizer 탐지:** 동의어과다치환·구조보존·접속사잔존·이중피동 등 6시그널 - **★ 모델 추정:** GPT·Claude·Gemini·Perplexity 4모델 증거기반 추정 - **품질 6항목:** 가독성·어휘·논리·정확성·표현·정보밀도 - **LLM 교차검증:** GPT-OSS-120B·Qwen3-32B·Kimi-K2 (GROQ) - **Brave Search**: 병렬 20개 동시 웹검색 - **학술 DB**: KCI(한국학술지인용색인), RISS(학술연구정보), arXiv - **Gemini**: Google Search Grounding - **보고서**: CopyKiller 스타일 — 유사도%, 출처표, 문장별 하이라이트 - `GROQ_API_KEY` — LLM 교차검증 - `GEMINI_API_KEY` — 표절 검사 (Google Search Grounding) - `BRAVE_API_KEY` — 표절 검사 (Brave Search 병렬) """) with gr.Row(visible=False): _file_in = gr.File(label="hidden_file") _text_out = gr.Textbox(label="hidden_text") _file_in.change(extract_file_text_api, [_file_in], [_text_out], api_name="extract_file_text") import shutil, pathlib static_dir = pathlib.Path("static") static_dir.mkdir(exist_ok=True) if pathlib.Path("index.html").exists(): shutil.copy("index.html", static_dir / "index.html") from fastapi import FastAPI from fastapi.responses import HTMLResponse, FileResponse from fastapi.staticfiles import StaticFiles server = FastAPI() @server.get("/", response_class=HTMLResponse) async def serve_root(): """루트 URL에서 프리미엄 index.html 서빙""" fp = pathlib.Path("static/index.html") if fp.exists(): return HTMLResponse(fp.read_text(encoding="utf-8")) from fastapi.responses import RedirectResponse return RedirectResponse("/gradio/") app = gr.mount_gradio_app(server, demo, path="/gradio", allowed_paths=["static"]) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)