Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
"""
|
| 2 |
-
AI ๊ธ ํ๋ณ๊ธฐ
|
| 3 |
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 4 |
5์ถ AI ํ์ง | 6ํญ๋ชฉ ํ์ง | LLM ๊ต์ฐจ๊ฒ์ฆ (GPT-OSS-120B ยท Qwen3-32B ยท Kimi-K2)
|
| 5 |
-
โ
|
| 6 |
โ
ํ์ : Brave Search ๋ณ๋ ฌ(์ต๋20) + KCI/RISS/ARXIV + Gemini + CopyKiller ๋ณด๊ณ ์
|
| 7 |
โ
๋ฌธ์: PDFยทDOCXยทHWPยทHWPXยทTXT ์
๋ก๋ โ ์น์
๋ณ ํํธ๋งต + PDF ๋ณด๊ณ ์
|
| 8 |
"""
|
|
@@ -319,140 +319,344 @@ def call_groq(model, prompt, max_tokens=800, temperature=0.1):
|
|
| 319 |
except Exception as e: return None, str(e)[:150]
|
| 320 |
|
| 321 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 322 |
-
# โ
ํตํฉ ๋ฌธ์ฅ ์ ์ (ํญ1 + ํญ2 ๊ณต์ )
|
| 323 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 324 |
AI_ENDINGS = ['ํฉ๋๋ค','์
๋๋ค','๋ฉ๋๋ค','์ต๋๋ค','์์ต๋๋ค','ํ์ต๋๋ค','๊ฒ ์ต๋๋ค']
|
| 325 |
-
|
| 326 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
HUMAN_MARKERS = {
|
| 328 |
-
'ใ
ใ
ใ
': re.compile(r'([ใ
ใ
ใ
ใ
ใทใฑ])\1{
|
| 329 |
'์ด๋ชจํฐ์ฝ': re.compile(r'[;:]-?[)(DPp]|\^[_\-]?\^|ใ
กใ
ก|;;'),
|
| 330 |
-
'์ค์': re.compile(r'ในใ
|ใ
ใ
|ใดใด|ใ
ใ
'),
|
| 331 |
'๋๋ํ': re.compile(r'[!?]{2,}'),
|
| 332 |
-
'๋น๊ฒฉ์': re.compile(r'(๊ฑฐ๋ |์์|์ธ๋ฐ|์ธ๊ฑธ|๊ฐ์|๋๋|์๋|๋๋ฐ|๋ฏธ์ณค)'),
|
|
|
|
|
|
|
|
|
|
| 333 |
}
|
| 334 |
FP = {
|
| 335 |
-
"GPT": {"m":['๋ฌผ๋ก ์ด์ฃ ','๋์์ด ๋์
จ๊ธฐ๋ฅผ','์ค๋ช
ํด ๋๋ฆฌ๊ฒ ์ต๋๋ค','์ถ๊ฐ ์ง๋ฌธ','๋์์ด ํ์ํ์๋ฉด'
|
| 336 |
-
|
| 337 |
-
"
|
| 338 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
}
|
| 340 |
|
| 341 |
def score_sentence(sent):
|
| 342 |
-
"""๋จ์ผ ๋ฌธ์ฅ AI ์ ์ (0~100). ํญ1ยทํญ2 ๊ณต์ ."""
|
| 343 |
sc = 0; reasons = []
|
| 344 |
-
|
|
|
|
|
|
|
|
|
|
| 345 |
for e in AI_ENDINGS:
|
| 346 |
-
if
|
| 347 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 348 |
for c in AI_CONNS:
|
| 349 |
-
if
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
for mn, fp in FP.items():
|
| 358 |
for m in fp["m"]:
|
| 359 |
-
if m in sent: sc +=
|
| 360 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 361 |
for n, p in HUMAN_MARKERS.items():
|
| 362 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 363 |
return max(0, min(100, sc)), reasons
|
| 364 |
|
| 365 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 366 |
-
# ์ถโ ํต๊ณ
|
| 367 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 368 |
def analyze_statistics(text, sentences, words):
|
| 369 |
sl = [len(s) for s in sentences]
|
| 370 |
if len(sl) < 2: return {"score":50}
|
|
|
|
|
|
|
| 371 |
avg = sum(sl)/len(sl); std = math.sqrt(sum((l-avg)**2 for l in sl)/len(sl))
|
| 372 |
cv = std/avg if avg > 0 else 0
|
| 373 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 374 |
wf = Counter(words); t = len(words)
|
| 375 |
-
ne = 0
|
| 376 |
-
if t > 0:
|
| 377 |
-
ent = -sum((c/t)*math.log2(c/t) for c in wf.values() if c>0)
|
| 378 |
-
mx = math.log2(len(wf)) if len(wf)>1 else 1
|
| 379 |
-
ne = ent/mx if mx>0 else 0
|
| 380 |
-
es = 75 if ne>0.92 else 55 if ne>0.85 else 30
|
| 381 |
ttr = len(wf)/t if t>0 else 0
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
|
|
|
| 394 |
|
| 395 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 396 |
-
# ์ถโก ๋ฌธ์ฒด
|
| 397 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 398 |
def analyze_korean_style(text, sentences, morphemes):
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 409 |
if morphemes:
|
| 410 |
pc = Counter(t for _,t in morphemes); tm = sum(pc.values())
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
|
|
|
|
|
|
|
|
|
| 414 |
|
| 415 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 416 |
-
# ์ถโข ๋ฐ๋ณต
|
| 417 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 418 |
def analyze_repetition(text, sentences, words):
|
|
|
|
|
|
|
|
|
|
| 419 |
tr = 0
|
| 420 |
-
if len(words)>=
|
| 421 |
tg = Counter(tuple(words[i:i+3]) for i in range(len(words)-2))
|
| 422 |
tr = sum(1 for c in tg.values() if c>1)/len(tg) if tg else 0
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 429 |
ai_only_conns = ['๋ํ','๋ฐ๋ผ์','๊ทธ๋ฌ๋ฏ๋ก','์ด์ ๋ฐ๋ผ','๋๋ถ์ด','์์ธ๋ฌ','๋ฟ๋ง ์๋๋ผ',
|
| 430 |
'์ด๋ฅผ ํตํด','์ด์','๊ฒฐ๊ณผ์ ์ผ๋ก','๊ถ๊ทน์ ์ผ๋ก','๋์๊ฐ','์ด๋ฌํ']
|
| 431 |
cr = sum(1 for s in sentences if any(s.strip().startswith(c) for c in ai_only_conns))
|
| 432 |
crr = cr/len(sentences) if sentences else 0
|
| 433 |
-
|
| 434 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 435 |
|
| 436 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 437 |
-
# ์ถโฃ ๊ตฌ์กฐ
|
| 438 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 439 |
def analyze_structure(text, sentences):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 440 |
paras = [p.strip() for p in text.split('\n\n') if p.strip()]
|
| 441 |
-
|
| 442 |
-
if len(paras)>
|
| 443 |
-
pl = [len(split_sentences(p)) for p in paras]
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
|
|
|
|
|
|
|
|
|
| 450 |
|
| 451 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 452 |
-
# ์ถโค ์ง๋ฌธ
|
| 453 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 454 |
def analyze_model_fingerprint(text, sentences):
|
| 455 |
ms = {}
|
|
|
|
| 456 |
for mn, fp in FP.items():
|
| 457 |
sc = sum(min(15,text.count(m)*5) for m in fp["m"] if text.count(m)>0)
|
| 458 |
lm = fp["lp"].findall(text)
|
|
@@ -460,8 +664,29 @@ def analyze_model_fingerprint(text, sentences):
|
|
| 460 |
em = sum(1 for s in sentences if any(s.rstrip('.!?').endswith(e) for e in fp.get("e",[])))
|
| 461 |
if sentences: sc += int((em/len(sentences))*20)
|
| 462 |
ms[mn] = min(100,sc)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 463 |
mx = max(ms.values()) if ms else 0
|
| 464 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 465 |
|
| 466 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 467 |
# ํ์ง
|
|
@@ -610,14 +835,34 @@ AIํ๋ฅ : 75%
|
|
| 610 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 611 |
# ์ข
ํฉ ํ์ (์ผ๊ด๋ ๊ธฐ์ค)
|
| 612 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 613 |
-
def compute_verdict(scores, llm_score=-1):
|
| 614 |
-
w={"ํต๊ณ":.
|
| 615 |
ws=sum(scores[k]*w[k] for k in w)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 616 |
hi=sum(1 for v in scores.values() if v>=50)
|
| 617 |
-
if hi>=4: ws+=
|
| 618 |
-
elif hi>=3: ws+=
|
| 619 |
-
elif hi>=2: ws+=
|
| 620 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 621 |
if llm_score>=0: ws=ws*0.70+llm_score*0.30
|
| 622 |
fs=max(0,min(100,int(ws)))
|
| 623 |
if fs>=75: return fs,"AI ์์ฑ ํ์ ","ai_high"
|
|
@@ -631,275 +876,14 @@ def quick_score(text):
|
|
| 631 |
sc={"ํต๊ณ":analyze_statistics(text,sents,words)["score"],"๋ฌธ์ฒด":analyze_korean_style(text,sents,morphs)["score"],
|
| 632 |
"๋ฐ๋ณต์ฑ":analyze_repetition(text,sents,words)["score"],"๊ตฌ์กฐ":analyze_structure(text,sents)["score"],
|
| 633 |
"์ง๋ฌธ":analyze_model_fingerprint(text,sents)["score"]}
|
| 634 |
-
|
|
|
|
|
|
|
|
|
|
| 635 |
|
| 636 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 637 |
-
# โ
AIโ์ธ๊ฐ ๋ณํ (๋ํญ ๊ฐํ)
|
| 638 |
-
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 639 |
-
CONN_MAP = {'๋ํ':['๊ทธ๋ฆฌ๊ณ ','์ด ๋ฐ์๋','๊ฒ๋ค๊ฐ'],'๋ฐ๋ผ์':['๊ทธ๋์','์ด๋ฐ ์ด์ ๋ก','๊ทธ๋ฌ๋๊น'],
|
| 640 |
-
'์ด์ ๋ฐ๋ผ':['๊ทธ๋์','๊ทธ๋ฌ๋๊น'],'ํํธ':['๋ฐ๋ฉด์','๋ฐ๋๋ก'],
|
| 641 |
-
'๋๋ถ์ด':['ํจ๊ป','๊ฐ์ด'],'๊ฒฐ๊ณผ์ ์ผ๋ก':['๊ฒฐ๊ตญ','๋๋ด๋'],
|
| 642 |
-
'๊ถ๊ทน์ ์ผ๋ก':['๊ฒฐ๊ตญ์๋','๋ง์ง๋ง์๋'],'๋์๊ฐ':['๋ ๋์๊ฐ๋ฉด','์ฌ๊ธฐ์ ๋'],
|
| 643 |
-
'์ด๋ฌํ':['์ด๋ฐ','์ด์ ๊ฐ์'],'ํนํ':['๊ทธ์ค์์๋','๋ฌด์๋ณด๋ค','ํน๋ณํ'],
|
| 644 |
-
'๋ฟ๋ง ์๋๋ผ':['๊ฑฐ๊ธฐ์๋ค','๊ทธ๊ฒ๋ฟ ์๋๋ผ'],'์ด๋ฅผ ํตํด':['๋๋ถ์','์ด ๋์'],
|
| 645 |
-
'์ด์':['๊ทธ๋์','์ด๊ฑธ๋ก'],'์์ธ๋ฌ':['๊ทธ๋ฆฌ๊ณ ','ํจ๊ป'],'๊ทธ๋ฌ๋ฏ๋ก':['๊ทธ๋์','๊ทธ๋ฌ๋']}
|
| 646 |
-
FILL_MAP = {
|
| 647 |
-
'์ค์ํ ์ญํ ์ ํ๊ณ ':'ํฐ ๋ชซ์ ์ฐจ์งํ๊ณ ','์ค์ํ ์ญํ ์ ํ':'ํฐ ๋ชซ์ ์ฐจ์งํ',
|
| 648 |
-
'์ค์ํ ์๋ฏธ๋ฅผ ๊ฐ์ง':'ํฐ ์๋ฏธ๋ฅผ ์ง๋',
|
| 649 |
-
'๊ธ์ ์ ์ธ ์ํฅ์ ๋ฏธ์น๊ณ ':'์ข์ ์ชฝ์ผ๋ก ์์ฉํ๊ณ ','๊ธ์ ์ ์ธ ์ํฅ':'์ข์ ์ชฝ ์์ฉ',
|
| 650 |
-
'๋ถ์ ์ ์ธ ์ํฅ์':'๋์ ์ชฝ์ผ๋ก','์ํฅ์ ๋ฏธ์น':'์์ฉ์ ํ',
|
| 651 |
-
'๋๋ถ์ ์ฑ๊ณผ๋ฅผ ๊ฑฐ๋':'๋๋จํ ๊ฒฐ๊ณผ๋ฅผ ๋ด','๋๋ถ์ ์ฑ๊ณผ':'๋๋จํ ๊ฒฐ๊ณผ',
|
| 652 |
-
'๊ด๋ชฉํ ๋งํ':'๋์ ๋๋','ํ๊ธฐ์ ์ธ ๋ณํ':'ํฐ ์ ํ์ ','ํ์ ์ ์ธ':'์๋ก์ด',
|
| 653 |
-
'๋ค์ํ ๋ถ์ผ':'์ฌ๋ฌ ๋ถ์ผ','๋ค์ํ ์ฐ์
๋ถ์ผ':'์ฌ๋ฌ ์ฐ์
','๋ค์ํ ์ฐ์
':'์ฌ๋ฌ ์ฐ์
',
|
| 654 |
-
'๋ค์ํ ์ฐฝ์':'์ฌ๋ฌ ์ฐฝ์','๋ค์ํ ์ธก๋ฉด':'์ฌ๋ฌ ๋ฉด',
|
| 655 |
-
'๋ถ๊ฐ๋๊ณ ์์ต๋๋ค':'๋๋๋ฌ์ง๊ณ ์๋ค','๋ถ๊ฐ๋๊ณ ':'๋๋๋ฌ์ง๊ณ ',
|
| 656 |
-
'๋๋๋๊ณ ':'๋ ์ค๋ฅด๊ณ ','ํ์ฉํ ์ ์๊ฒ':'์ธ ์ ์๊ฒ',
|
| 657 |
-
'ํ๋ฐํ ์งํ๋๊ณ ์์ต๋๋ค':'ํ๋ฐํ๊ฒ ์ด๋ค์ง๊ณ ์๋ค',
|
| 658 |
-
'๊ฒ์ผ๋ก ์์๋ฉ๋๋ค':'๊ฒ ๊ฐ๋ค','๊ฒ์ผ๋ก ๋ณด์
๋๋ค':'๊ฒ ๊ฐ๋ค',
|
| 659 |
-
'๊ฒ์ผ๋ก ํ๋จ๋ฉ๋๋ค':'๊ฒ์ผ๋ก ๋ณด์ธ๋ค','๊ฒ์ผ๋ก ๋ถ์๋ฉ๋๋ค':'๊ฒ์ผ๋ก ๋ณด์ธ๋ค',
|
| 660 |
-
'๊ฒ์ผ๋ก ๋ณด์ด':'๊ฒ ๊ฐ','๊ฒ์ผ๋ก ๋ํ':'๊ฒ์ผ๋ก ๋๋ฌ๋',
|
| 661 |
-
'ํ ์ ์์ต๋๋ค':'ํ ์ ์๋ค','๋ณผ ์ ์์ต๋๋ค':'๋ณผ ์ ์๋ค',
|
| 662 |
-
'์ฃผ๋ชฉํ ๋งํ':'๋์ฌ๊ฒจ๋ณผ','์ฃผ๋ชฉํ ๋ง':'๋์ฌ๊ฒจ๋ณผ ๋ง',
|
| 663 |
-
'ํ์ํฉ๋๋ค':'ํ์ํ๋ค','์ค์ํฉ๋๋ค':'์ค์ํ๋ค',
|
| 664 |
-
'์ญํ ์ ํ๊ณ ':'๋ชซ์ ํ๊ณ ','์ญํ ์ ํ':'๋ชซ์ ํ',
|
| 665 |
-
'์ ์์':'๋ฉด์์','์ธก๋ฉด์์':'๋ถ๋ถ์์','๊ด์ ์์':'์๊ฐ์์',
|
| 666 |
-
}
|
| 667 |
-
INLINE_CONN = {'์ด๋ฅผ ํตํด ':'์ด๊ฑธ๋ก ','์ด์ ๋ํ ':'์ด ๋ฌธ์ ์ ๋ํ ','๋ฐ๋ผ์ ':'๊ทธ๋์ ',
|
| 668 |
-
'๊ฒฐ๊ณผ์ ์ผ๋ก ':'๊ฒฐ๊ตญ ','์ด๋ฌํ ':'์ด๋ฐ ','์ด์ ':'์ด๊ฑธ๋ก ','๋์๊ฐ ':'๋ ๋์๊ฐ๋ฉด '}
|
| 669 |
-
END_RULES = [
|
| 670 |
-
('ํ๋ฐํ ์งํ๋๊ณ ์์ต๋๋ค','ํ๋ฐํ๊ฒ ์ด๋ค์ง๊ณ ์๋ค'),
|
| 671 |
-
('๊ฑฐ๋๊ณ ์์ต๋๋ค','๊ฑฐ๋๊ณ ์๋ค'),('๋ณํํ๊ณ ์์ต๋๋ค','๋ฐ๋๊ณ ์๋ค'),
|
| 672 |
-
('์๊ฒ ๋์์ต๋๋ค','์๊ฒ ๋๋ค'),('ํ๊ณ ์์ต๋๋ค','ํ๊ณ ์๋ค'),
|
| 673 |
-
('๋๊ณ ์์ต๋๋ค','๋๊ณ ์๋ค'),('ํ ์ ์์ต๋๋ค','ํ ์ ์๋ค'),
|
| 674 |
-
('๋ฏธ์น๊ณ ์์ผ๋ฉฐ','์ฃผ๊ณ ์๊ณ '),('๊ฐ๋ฅํด์ก์ผ๋ฉฐ','๊ฐ๋ฅํด์ก๊ณ '),
|
| 675 |
-
('ํ์ํฉ๋๋ค','ํ์ํ๋ค'),('์ค์ํฉ๋๋ค','์ค์ํ๋ค'),
|
| 676 |
-
('์์ต๋๋ค','์๋ค'),('๋ฉ๋๋ค','๋๋ค'),('ํ์ต๋๋ค','ํ๋ค'),
|
| 677 |
-
('๊ฒ ์ต๋๋ค','๊ฒ์ด๋ค'),('์
๋๋ค','์ด๋ค'),
|
| 678 |
-
('๊ฐ์ง๋ฉฐ','๊ฐ์ง๊ณ '),('์ด๋ฃจ๋ ๊ฒ์ด','์ด๋ฃจ๋ ๊ฒ'),
|
| 679 |
-
]
|
| 680 |
-
# ๋ฌธ์ฅ ์ฌ๊ตฌ์ฑ์ฉ ํจํด
|
| 681 |
-
RESTRUCTURE = [
|
| 682 |
-
(r'(\S+)์ (\S+)์์ (.+)', lambda m: f"{m.group(2)}์์ {m.group(1)}์ {m.group(3)}" if random.random()<0.3 else m.group()),
|
| 683 |
-
(r'(.+)ํ๊ณ ์๋ค\.', lambda m: f"{m.group(1)}ํ๋ ์ค์ด๋ค." if random.random()<0.3 else m.group()),
|
| 684 |
-
]
|
| 685 |
-
|
| 686 |
-
def rule_humanize(text):
|
| 687 |
-
r=text; ch=[]
|
| 688 |
-
# 1. ๋ฌธ๋ ์ ์์ฌ (100% ๊ต์ฒด โ 50%๋ ๋์ฒด, 50%๋ ์ญ์ )
|
| 689 |
-
for ac,alts in CONN_MAP.items():
|
| 690 |
-
pat=re.compile(r'(?:^|\n)(\s*)('+re.escape(ac)+r')(\s)',re.M)
|
| 691 |
-
for m in reversed(list(pat.finditer(r))):
|
| 692 |
-
if random.random()<0.6:
|
| 693 |
-
alt=random.choice(alts); r=r[:m.start(2)]+alt+r[m.end(2):]; ch.append(f"์ ์์ฌ '{ac}'โ'{alt}'")
|
| 694 |
-
else: r=r[:m.start(2)]+r[m.end(2):]; ch.append(f"์ ์์ฌ์ ๊ฑฐ '{ac}'")
|
| 695 |
-
# 2. ๋ฌธ์ฅ ๋ด ์ ์์ฌ
|
| 696 |
-
for ai,hu in INLINE_CONN.items():
|
| 697 |
-
if ai in r: r=r.replace(ai,hu,1); ch.append(f"๋ด๋ถ์ ์ '{ai.strip()}'")
|
| 698 |
-
# 3. ์ํฌ (FILL_MAP โ AI_FILLER ์์ ๋ฌด๋ ฅํ)
|
| 699 |
-
for ai in sorted(FILL_MAP.keys(), key=len, reverse=True):
|
| 700 |
-
hu = FILL_MAP[ai]
|
| 701 |
-
if ai in r: r=r.replace(ai,hu,1); ch.append(f"์ํฌ '{ai}'")
|
| 702 |
-
# 4. ์ข
๊ฒฐ์ด๋ฏธ ์ ๋ฉด ๋ณํ
|
| 703 |
-
for ai,hu in END_RULES:
|
| 704 |
-
cnt=r.count(ai)
|
| 705 |
-
if cnt>0: r=r.replace(ai,hu); ch.append(f"์ข
๊ฒฐ '{ai}'โ'{hu}' ร{cnt}")
|
| 706 |
-
# 5. ๋งํฌ๋ค์ด ์ ๊ฑฐ
|
| 707 |
-
r=re.sub(r'^\d+\.\s+','',r,flags=re.M)
|
| 708 |
-
r=re.sub(r'^[-โข*]\s+','',r,flags=re.M)
|
| 709 |
-
r=re.sub(r'\*\*([^*]+)\*\*',r'\1',r)
|
| 710 |
-
r=re.sub(r'^#+\s+','',r,flags=re.M)
|
| 711 |
-
# 6. ๋ฌธ์ฅ ์ฌ๊ตฌ์ฑ
|
| 712 |
-
sents = split_sentences(r)
|
| 713 |
-
rebuilt = []
|
| 714 |
-
for i, s in enumerate(sents):
|
| 715 |
-
ns = s
|
| 716 |
-
for pat, repl in RESTRUCTURE:
|
| 717 |
-
ns = re.sub(pat, repl, ns)
|
| 718 |
-
rebuilt.append(ns)
|
| 719 |
-
# 7. ๋ฌธ์ฅ ๊ธธ์ด ๋ณ๋ ์ฃผ์
(ํต๊ณ์ถ ๊ฐ์ ) โ BUG3 FIX
|
| 720 |
-
if len(rebuilt) >= 4:
|
| 721 |
-
sl = [len(s) for s in rebuilt]
|
| 722 |
-
avg = sum(sl)/len(sl)
|
| 723 |
-
cv = math.sqrt(sum((l-avg)**2 for l in sl)/len(sl))/(avg if avg>0 else 1)
|
| 724 |
-
if cv < 0.35: # CV๊ฐ ๋ฎ์ผ๋ฉด = ๋ฌธ์ฅ ๊ธธ์ด๊ฐ ๋๋ฌด ๊ท ์ผ = AI์
|
| 725 |
-
# ๋๋ค ์์น์ ์งง์ ๋ฌธ์ฅ ์ฝ์
|
| 726 |
-
short_comments = [
|
| 727 |
-
"์ข ๋๋๋ค.","์ด๊ฒ ํต์ฌ์ด๋ค.","๊ฝค ์๋ฏธ ์๋ค.","๋ณํ๊ฐ ํฌ๋ค.",
|
| 728 |
-
"์ฝ์ง ์์ ๋ฌธ์ ๋ค.","์๊ฐํด๋ณผ ๋ถ๋ถ์ด๋ค.","๋ฌด์ ๋ชป ํ ํ๋ฆ์ด๋ค."
|
| 729 |
-
]
|
| 730 |
-
# 2~3๊ฐ ์ฝ์
(์ค๋ณต ๋ฐฉ์ง)
|
| 731 |
-
used = set()
|
| 732 |
-
for _ in range(min(3, len(rebuilt)//3)):
|
| 733 |
-
pos = random.randint(1, len(rebuilt)-1)
|
| 734 |
-
cmt = random.choice([c for c in short_comments if c not in used])
|
| 735 |
-
used.add(cmt)
|
| 736 |
-
rebuilt.insert(pos, cmt)
|
| 737 |
-
ch.append(f"์งง์ ๋ฌธ์ฅ {len(used)}๊ฐ ์ฝ์
(CV ๊ฐ์ )")
|
| 738 |
-
# ๊ฐ๋ ๊ธด ๋ฌธ์ฅ๋ ๋ง๋ค๊ธฐ (์งง์ 2๊ฐ ํฉ์น๊ธฐ)
|
| 739 |
-
if len(rebuilt) >= 6 and random.random() < 0.5:
|
| 740 |
-
idx = random.randint(0, len(rebuilt)-2)
|
| 741 |
-
if 15 < len(rebuilt[idx]) < 35 and 15 < len(rebuilt[idx+1]) < 35:
|
| 742 |
-
merged = rebuilt[idx].rstrip('.') + ', ' + rebuilt[idx+1][0].lower() + rebuilt[idx+1][1:] if rebuilt[idx+1] else rebuilt[idx]
|
| 743 |
-
rebuilt[idx] = merged
|
| 744 |
-
rebuilt.pop(idx+1)
|
| 745 |
-
ch.append("๊ธด ๋ฌธ์ฅ ํฉ๋ณ (CV ๊ฐ์ )")
|
| 746 |
-
# 8. ๋ฌธ๋จ ๋๋๊ธฐ โ ๊ตฌ์กฐ์ถ ๊ฐ์
|
| 747 |
-
if len(rebuilt) >= 6 and '\n\n' not in ' '.join(rebuilt):
|
| 748 |
-
mid = len(rebuilt)//2
|
| 749 |
-
rebuilt.insert(mid, '\n')
|
| 750 |
-
ch.append("๋ฌธ๋จ ๋ถ๋ฆฌ ์ฝ์
")
|
| 751 |
-
r = ' '.join(rebuilt)
|
| 752 |
-
r = r.replace(' \n ', '\n\n')
|
| 753 |
-
return r.strip(), ch
|
| 754 |
-
|
| 755 |
-
def llm_humanize(text):
|
| 756 |
-
if not GROQ_KEY: return text, ["LLM: APIํค ์์"]
|
| 757 |
-
prompt = f"""๋น์ ์ AI ํ
์คํธ ํ์ง๊ธฐ๋ฅผ ์ฐํํ๋ ๋ฆฌ๋ผ์ดํฐ์
๋๋ค. ์๋ ํ
์คํธ๋ฅผ ์์ฐ์ค๋ฌ์ด ์ฌ๋์ด ์ด ๊ธ๋ก ์์ ํ ์ฌ์์ฑํ์ธ์.
|
| 758 |
-
|
| 759 |
-
[ํ์ ๊ท์น]
|
| 760 |
-
1. "-์ต๋๋ค/-ํฉ๋๋ค/-์
๋๋ค" ์ข
๊ฒฐ์ด๋ฏธ ์ ๋ ์ฌ์ฉ ๊ธ์ง. ์ ๋ถ "-๋ค/-์ด๋ค/-ํ๋ค/-์ธ ์
์ด๋ค" ๋ฑ ๋น๊ฒฉ์์ฒด๋ก
|
| 761 |
-
2. "๋ํ/๋ฐ๋ผ์/๋์๊ฐ/์ด๋ฌํ/์ด์/๊ฒฐ๊ณผ์ ์ผ๋ก" ์ ๋ ์ฌ์ฉ ๊ธ์ง
|
| 762 |
-
3. "์ค์ํ ์ญํ /๊ธ์ ์ ์ธ ์ํฅ/๋ค์ํ ๋ถ์ผ/ํ๊ธฐ์ ์ธ" ์ ๋ถ ์ผ์์ด๋ก
|
| 763 |
-
4. ๋ฌธ์ฅ ๊ธธ์ด๋ฅผ ์ฌํ๊ฒ ๋ถ๊ท์นํ๊ฒ: 5~10์ ์งง์ ๋ฌธ์ฅ๊ณผ 50~80์ ๊ธด ๋ฌธ์ฅ์ ๋ฐ๋์ ์์ด๋ผ
|
| 764 |
-
5. ๋ฐ๋์ 3~4๊ฐ ๊ฐ์ธ์ ๊ฐ์/์๊ฒฌ ์ฝ์
("์์งํ", "์ฌ์ค", "๊ทผ๋ฐ ์๊ฐํด๋ณด๋ฉด", "๋ด ์๊ฐ์")
|
| 765 |
-
6. ์๋ ์๋ฏธยท์ ๋ณด๋ 100% ๋ณด์กด
|
| 766 |
-
7. ๋งํฌ๋ค์ด/๋ฆฌ์คํธ ์์ด ์์ ํ ์ฐ๋ฌธ์ผ๋ก
|
| 767 |
-
8. ๋ฌธ์ฅ ์์๋ฅผ ์ผ๋ถ ๋ฐ๊ฟ๋ ๋จ
|
| 768 |
-
9. ํผ๋ ํํ ์ค์ด๊ณ ๋ฅ๋ํ ์์ฃผ
|
| 769 |
-
10. "~๊ฒ์ผ๋ก ๋ณด์ธ๋ค/์์๋๋ค" ๊ฐ์ ํํผ ํํ โ "~์ผ ๊ฑฐ๋ค/~ํ ๊ฒ ๊ฐ๋ค"
|
| 770 |
-
|
| 771 |
-
[์๋ฌธ]
|
| 772 |
-
{text[:2500]}
|
| 773 |
-
|
| 774 |
-
[๋ณํ ๊ฒฐ๊ณผ๋ง ์ถ๋ ฅ - ์ค๋ช
์์ด]"""
|
| 775 |
-
resp, err = call_groq("qwen/qwen3-32b", prompt, max_tokens=2000, temperature=0.85)
|
| 776 |
-
if resp:
|
| 777 |
-
cleaned = re.sub(r'<think>.*?</think>', '', resp, flags=re.S).strip()
|
| 778 |
-
if len(cleaned) > 50: return cleaned, [f"LLM ๋ฆฌ๋ผ์ดํ
({len(cleaned)}์)"]
|
| 779 |
-
return text, [f"LLM ์คํจ: {err}"]
|
| 780 |
-
|
| 781 |
-
def run_humanizer(text, progress=gr.Progress()):
|
| 782 |
-
if not text or len(text.strip())<50: return "","","",""
|
| 783 |
-
text=text.strip()
|
| 784 |
-
progress(0.05,"์๋ณธ ๋ถ์...")
|
| 785 |
-
b_score,b_verdict,_,b_axes=quick_score(text)
|
| 786 |
-
bq=analyze_quality(text,split_sentences(text),split_words(text),get_morphemes(text))
|
| 787 |
-
if b_score<25: return text,"์ด๋ฏธ ์ธ๊ฐ์ ์ธ ํ
์คํธ (AI์ ์ 25๋ฏธ๋ง).","",""
|
| 788 |
-
|
| 789 |
-
# โโโ Adversarial Humanizer v2.1: ๋ฐ๋ณต ์๊ธฐ๋์ (์ดํ ๋ฐฉ์ง) โโโ
|
| 790 |
-
MAX_ROUNDS = 3
|
| 791 |
-
TARGET_SCORE = 25
|
| 792 |
-
best_text = text
|
| 793 |
-
best_score = b_score
|
| 794 |
-
best_method = "์๋ณธ"
|
| 795 |
-
all_ch = []
|
| 796 |
-
round_log = []
|
| 797 |
-
original_text = text # LLM์ ํญ์ ์๋ณธ ๊ธฐ๋ฐ์ผ๋ก ํธ์ถ (์ดํ ๋ฐฉ์ง)
|
| 798 |
-
|
| 799 |
-
for rnd in range(1, MAX_ROUNDS + 1):
|
| 800 |
-
if best_score <= TARGET_SCORE:
|
| 801 |
-
round_log.append(f"๐ Round {rnd} ์คํต โ ์ด๋ฏธ ๋ชฉํ ๋ฌ์ฑ (์ ์ {best_score})")
|
| 802 |
-
break
|
| 803 |
-
|
| 804 |
-
pct = 0.05 + (rnd / MAX_ROUNDS) * 0.65
|
| 805 |
-
progress(pct, f"โ๏ธ Round {rnd}/{MAX_ROUNDS} โ ํ์ฌ ์ ์ {best_score}...")
|
| 806 |
-
|
| 807 |
-
candidates = []
|
| 808 |
-
|
| 809 |
-
# ๊ท์น ๋ณํ: ํ์ฌ best์ ์ ์ฉ (Round 1์ ์๋ณธ, Round 2+๋ best)
|
| 810 |
-
rule_text, rule_ch = rule_humanize(best_text)
|
| 811 |
-
r_score, _, _, _ = quick_score(rule_text)
|
| 812 |
-
candidates.append((rule_text, r_score, f"R{rnd}-๊ท์น", rule_ch))
|
| 813 |
-
|
| 814 |
-
if GROQ_KEY:
|
| 815 |
-
# LLM: ํญ์ ์๋ณธ ํ
์คํธ ๊ธฐ๋ฐ (Round 2+์์ ์ดํ๋ ํ
์คํธ ์ฌLLM ๋ฐฉ์ง)
|
| 816 |
-
llm_text, llm_ch = llm_humanize(original_text)
|
| 817 |
-
l_score, _, _, _ = quick_score(llm_text)
|
| 818 |
-
candidates.append((llm_text, l_score, f"R{rnd}-LLM", llm_ch))
|
| 819 |
-
|
| 820 |
-
# LLM โ ๊ท์น ํ์ด๋ธ๋ฆฌ๋
|
| 821 |
-
llm_rule, lr_ch = rule_humanize(llm_text)
|
| 822 |
-
lr_score, _, _, _ = quick_score(llm_rule)
|
| 823 |
-
candidates.append((llm_rule, lr_score, f"R{rnd}-LLM+๊ท์น", llm_ch + lr_ch))
|
| 824 |
-
|
| 825 |
-
# ์ต์ ์ ํ
|
| 826 |
-
winner = min(candidates, key=lambda x: x[1])
|
| 827 |
-
w_text, w_score, w_method, w_changes = winner
|
| 828 |
-
score_strs = ', '.join(f"{c[2]}:{c[1]}" for c in candidates)
|
| 829 |
-
round_log.append(f"Round {rnd}: {score_strs} โ {w_method} ์ฑํ (ฮ{best_score - w_score})")
|
| 830 |
-
|
| 831 |
-
# ์
ํ ๋ฐฉ์ง: ์ด์ ๋ณด๋ค 2์ ์ด์ ๊ฐ์ ๋ ๊ฒฝ์ฐ๋ง ์ฑํ
|
| 832 |
-
if w_score < best_score - 1:
|
| 833 |
-
best_text, best_score, best_method = w_text, w_score, w_method
|
| 834 |
-
all_ch.extend(w_changes)
|
| 835 |
-
else:
|
| 836 |
-
round_log.append(f" โณ ์ ์๋ฏธํ ๊ฐ์ ์์ (ฮ{best_score - w_score}), ์ด์ ๊ฒฐ๊ณผ ์ ์ง")
|
| 837 |
-
break
|
| 838 |
-
|
| 839 |
-
final_text = best_text
|
| 840 |
-
method = best_method
|
| 841 |
-
|
| 842 |
-
progress(0.75, "์ต์ข
๊ฒ์ฆ...")
|
| 843 |
-
a_score, a_verdict, a_level, a_axes = quick_score(final_text)
|
| 844 |
-
aq = analyze_quality(final_text, split_sentences(final_text), split_words(final_text), get_morphemes(final_text))
|
| 845 |
-
|
| 846 |
-
progress(0.85, "LLM ๊ต์ฐจ๊ฒ์ฆ...")
|
| 847 |
-
llm_v = llm_cross_check(final_text)
|
| 848 |
-
if llm_v["score"] >= 0:
|
| 849 |
-
a_score_f, a_verdict_f, _ = compute_verdict(a_axes, llm_v["score"])
|
| 850 |
-
else:
|
| 851 |
-
a_score_f, a_verdict_f = a_score, a_verdict
|
| 852 |
-
|
| 853 |
-
delta = b_score - a_score_f
|
| 854 |
-
passed = a_score_f < 30
|
| 855 |
-
change_log = f"โ๏ธ Adversarial v2 ({len(round_log)}๋ผ์ด๋)\n"
|
| 856 |
-
change_log += '\n'.join(f" {r}" for r in round_log)
|
| 857 |
-
change_log += f"\n\n์ด {len(all_ch)}๊ฑด ๋ณํ:\n" + '\n'.join(f" โข {c}" for c in all_ch[:20])
|
| 858 |
-
if len(all_ch) > 20: change_log += f"\n ... +{len(all_ch)-20}๊ฑด"
|
| 859 |
-
|
| 860 |
-
# ๋น๊ต HTML
|
| 861 |
-
def cbar(lbl,bv,av):
|
| 862 |
-
bc="#FF4444" if bv>=50 else "#DDAA00" if bv>=35 else "#22AA44"
|
| 863 |
-
ac="#FF4444" if av>=50 else "#DDAA00" if av>=35 else "#22AA44"
|
| 864 |
-
d=bv-av; ds=f"<span style='color:#22AA44;font-weight:700;'>โ{d}</span>" if d>0 else f"<span style='color:#FF4444;'>โ{abs(d)}</span>" if d<0 else "="
|
| 865 |
-
return f"<div style='margin:3px 0;display:grid;grid-template-columns:70px 1fr 20px 1fr 36px;align-items:center;gap:3px;font-size:11px;'><span style='font-weight:600;'>{lbl}</span><div style='background:#E8E8E8;border-radius:3px;height:6px;'><div style='background:{bc};height:100%;width:{bv}%;border-radius:3px;'></div></div><span style='text-align:center;color:#CCC;'>โ</span><div style='background:#E8E8E8;border-radius:3px;height:6px;'><div style='background:{ac};height:100%;width:{av}%;border-radius:3px;'></div></div><span style='text-align:right;'>{ds}</span></div>"
|
| 866 |
-
|
| 867 |
-
bfg="#FF4444" if b_score>=60 else "#FF8800" if b_score>=45 else "#DDAA00" if b_score>=30 else "#22AA44"
|
| 868 |
-
bbg="#FFE0E0" if b_score>=60 else "#FFF0DD" if b_score>=45 else "#FFFBE0" if b_score>=30 else "#E0FFE8"
|
| 869 |
-
afg="#FF4444" if a_score_f>=60 else "#FF8800" if a_score_f>=45 else "#DDAA00" if a_score_f>=30 else "#22AA44"
|
| 870 |
-
abg="#FFE0E0" if a_score_f>=60 else "#FFF0DD" if a_score_f>=45 else "#FFFBE0" if a_score_f>=30 else "#E0FFE8"
|
| 871 |
-
dc="#22AA44" if delta>0 else "#FF4444"
|
| 872 |
-
badge='<span style="background:#22AA44;color:white;padding:4px 14px;border-radius:20px;font-weight:700;">โ
๊ฒ์ฆ ํต๊ณผ</span>' if passed else '<span style="background:#FF8800;color:white;padding:4px 14px;border-radius:20px;font-weight:700;">โ ๏ธ ์ถ๊ฐ ์์ ๊ถ์ฅ (AI์ ์ {})'.format(a_score_f)+'</span>'
|
| 873 |
-
|
| 874 |
-
html=f"""<div style="font-family:'Pretendard',sans-serif;max-width:700px;margin:0 auto;">
|
| 875 |
-
<div style="text-align:center;margin-bottom:12px;">{badge}</div>
|
| 876 |
-
<div style="display:grid;grid-template-columns:1fr 50px 1fr;gap:6px;margin-bottom:12px;">
|
| 877 |
-
<div style="background:{bbg};border:2px solid {bfg};border-radius:12px;padding:14px;text-align:center;">
|
| 878 |
-
<div style="font-size:10px;color:#888;">BEFORE</div>
|
| 879 |
-
<div style="font-size:32px;font-weight:900;color:{bfg};">{b_score}</div>
|
| 880 |
-
<div style="font-size:10px;color:{bfg};">{b_verdict}</div>
|
| 881 |
-
</div>
|
| 882 |
-
<div style="display:flex;align-items:center;justify-content:center;">
|
| 883 |
-
<div style="font-size:22px;color:{dc};font-weight:900;">{"โ" if delta>0 else "โ"}{abs(delta)}</div>
|
| 884 |
-
</div>
|
| 885 |
-
<div style="background:{abg};border:2px solid {afg};border-radius:12px;padding:14px;text-align:center;">
|
| 886 |
-
<div style="font-size:10px;color:#888;">AFTER ({method})</div>
|
| 887 |
-
<div style="font-size:32px;font-weight:900;color:{afg};">{a_score_f}</div>
|
| 888 |
-
<div style="font-size:10px;color:{afg};">{a_verdict_f}</div>
|
| 889 |
-
</div>
|
| 890 |
-
</div>
|
| 891 |
-
<div style="background:#FAFAFA;border-radius:8px;padding:10px;margin-bottom:8px;">
|
| 892 |
-
<div style="font-size:11px;font-weight:700;margin-bottom:4px;">๐ ์ถ๋ณ ๋น๊ต</div>
|
| 893 |
-
{cbar("ํต๊ณ",b_axes["ํต๊ณ"],a_axes["ํต๊ณ"])}{cbar("๋ฌธ์ฒด",b_axes["๋ฌธ์ฒด"],a_axes["๋ฌธ์ฒด"])}{cbar("๋ฐ๋ณต",b_axes["๋ฐ๋ณต์ฑ"],a_axes["๋ฐ๋ณต์ฑ"])}{cbar("๊ตฌ์กฐ",b_axes["๊ตฌ์กฐ"],a_axes["๊ตฌ์กฐ"])}{cbar("์ง๋ฌธ",b_axes["์ง๋ฌธ"],a_axes["์ง๋ฌธ"])}
|
| 894 |
-
</div>
|
| 895 |
-
<div style="background:#F0F4FF;border-radius:6px;padding:6px 10px;font-size:11px;">
|
| 896 |
-
<b>ํ์ง:</b> {bq['grade']}({bq['score']}) โ {aq['grade']}({aq['score']})
|
| 897 |
-
</div></div>"""
|
| 898 |
-
return final_text, change_log, html, ""
|
| 899 |
-
|
| 900 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 901 |
# โ
ํ์ ๊ฒ์ฌ (Brave Search ๋ณ๋ ฌ + KCI/RISS/ARXIV + Gemini)
|
| 902 |
-
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 903 |
def brave_search(query, count=5):
|
| 904 |
"""Brave Search API โ ๋จ์ผ ์ฟผ๋ฆฌ"""
|
| 905 |
if not BRAVE_KEY: return []
|
|
@@ -1457,15 +1441,16 @@ def run_detection(text, progress=gr.Progress()):
|
|
| 1457 |
progress(0.62); qr=analyze_quality(text,sents,words,morphs)
|
| 1458 |
progress(0.75); lr=llm_cross_check(text)
|
| 1459 |
sc={"ํต๊ณ":s1["score"],"๋ฌธ์ฒด":s2["score"],"๋ฐ๋ณต์ฑ":s3["score"],"๊ตฌ์กฐ":s4["score"],"์ง๋ฌธ":s5["score"]}
|
| 1460 |
-
|
|
|
|
|
|
|
|
|
|
| 1461 |
progress(0.95)
|
| 1462 |
cm={"ai_high":("#FF4444","#FFE0E0","๋์"),"ai_medium":("#FF8800","#FFF0DD","์ค๊ฐ~๋์"),"ai_low":("#DDAA00","#FFFBE0","์ค๊ฐ"),"uncertain":("#888","#F0F0F0","๋ฎ์"),"human":("#22AA44","#E0FFE8","๋งค์ฐ ๋ฎ์")}
|
| 1463 |
fg,bg,conf=cm.get(level,("#888","#F0F0F0","?"))
|
| 1464 |
ms=s5.get("model_scores",{}); tm=max(ms,key=ms.get) if ms else "N/A"; tms=ms.get(tm,0)
|
| 1465 |
mt=f"{tm} ({tms}์ )" if tms>=15 else "ํน์ ๋ถ๊ฐ"
|
| 1466 |
|
| 1467 |
-
# ๋ฌธ์ฅ๋ณ ์ ์ (ํญ2์ ๋์ผ ๊ธฐ์ค)
|
| 1468 |
-
sent_scores = [score_sentence(s)[0] for s in sents]
|
| 1469 |
ai_sents = sum(1 for s in sent_scores if s >= 40)
|
| 1470 |
human_sents = sum(1 for s in sent_scores if s < 20)
|
| 1471 |
|
|
@@ -1563,28 +1548,43 @@ def run_highlight(text):
|
|
| 1563 |
sents=split_sentences(text)
|
| 1564 |
hl=[]
|
| 1565 |
for s in sents:
|
| 1566 |
-
sc, reasons = score_sentence(s)
|
| 1567 |
-
|
| 1568 |
-
|
| 1569 |
-
|
| 1570 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1571 |
|
| 1572 |
total_scores = [score_sentence(s)[0] for s in sents]
|
| 1573 |
avg_sc = sum(total_scores)/len(total_scores) if total_scores else 0
|
| 1574 |
-
|
|
|
|
| 1575 |
human_cnt = sum(1 for s in total_scores if s < 25)
|
| 1576 |
|
| 1577 |
return f"""<div style='font-family:Pretendard,sans-serif;'>
|
| 1578 |
<div style='margin-bottom:10px;padding:10px;background:#F8F8FF;border-radius:8px;'>
|
| 1579 |
-
<div style='display:flex;gap:
|
| 1580 |
-
<span style='background:rgba(
|
| 1581 |
-
<span style='background:rgba(
|
| 1582 |
-
<span style='background:rgba(
|
| 1583 |
-
<span style='
|
|
|
|
| 1584 |
</div>
|
| 1585 |
-
<div style='font-size:10px;color:#888;'>๐ก
|
| 1586 |
</div>
|
| 1587 |
-
<div style='line-height:2.
|
| 1588 |
</div>"""
|
| 1589 |
|
| 1590 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
@@ -1637,7 +1637,9 @@ def run_document_analysis(file, progress=gr.Progress()):
|
|
| 1637 |
progress(0.30, "LLM ๊ต์ฐจ๊ฒ์ฆ...")
|
| 1638 |
llm_result = llm_cross_check(full_text[:3000])
|
| 1639 |
if llm_result["score"] >= 0:
|
| 1640 |
-
|
|
|
|
|
|
|
| 1641 |
|
| 1642 |
# ์น์
๋ณ ๋ถ์
|
| 1643 |
progress(0.45, f"{len(sections)}๊ฐ ์น์
๋ถ์...")
|
|
@@ -1884,7 +1886,7 @@ def extract_file_text_api(file):
|
|
| 1884 |
|
| 1885 |
|
| 1886 |
with gr.Blocks(title="AI ๊ธ ํ๋ณ๊ธฐ v4.0") as demo:
|
| 1887 |
-
gr.Markdown("# ๐ AI ๊ธ ํ๋ณ๊ธฐ
|
| 1888 |
with gr.Tab("๐ ๋ถ์"):
|
| 1889 |
gr.Markdown("ํ
์คํธ๊ฐ AI์ ์ํด ์์ฑ๋์๋์ง 5๊ฐ ์ถ์ผ๋ก ๋ถ์ํฉ๋๋ค. 0~100์ (๋์์๋ก AI ๊ฐ๋ฅ์ฑ ๋์)")
|
| 1890 |
inp=gr.Textbox(label="๋ถ์ํ ํ
์คํธ",placeholder="์ต์ 50์ ์ด์...",lines=10)
|
|
@@ -1898,15 +1900,6 @@ with gr.Blocks(title="AI ๊ธ ํ๋ณ๊ธฐ v4.0") as demo:
|
|
| 1898 |
gr.Markdown("๋ฌธ์ฅ๋ณ๋ก AI ํ๋ฅ ์ ์์ ํ์ํฉ๋๋ค. **ํญ1๊ณผ ๋์ผํ ๊ธฐ์ค**์ผ๋ก ํ์ ํฉ๋๋ค. ๋ง์ฐ์ค ์ค๋ฒ ์ ๊ทผ๊ฑฐ ํ์ธ.")
|
| 1899 |
ih=gr.Textbox(label="ํ
์คํธ",lines=8); bh=gr.Button("๐จ ํ์ด๋ผ์ดํธ ๋ถ์",variant="primary"); hr=gr.HTML()
|
| 1900 |
bh.click(run_highlight,[ih],[hr],api_name="run_highlight")
|
| 1901 |
-
with gr.Tab("๐ AIโ์ธ๊ฐ ๋ณํ"):
|
| 1902 |
-
gr.Markdown("**Adversarial Humanizer v2** โ ํ์ง๊ธฐ์ ๋ณํ๊ธฐ์ ์๊ธฐ๋์ ๋ฃจํ. ์ต๋ 3๋ผ์ด๋ ๋ฐ๋ณตํ๋ฉฐ AI ์ ์๋ฅผ ์ต์ ๋ก ๋์ด๋ด๋ฆฝ๋๋ค.")
|
| 1903 |
-
ihm=gr.Textbox(label="์๋ณธ (AI ํ
์คํธ)",lines=8)
|
| 1904 |
-
with gr.Row():
|
| 1905 |
-
bhm=gr.Button("๐ ์๋ ๋ณํ + ๊ฒ์ฆ",variant="primary",size="lg"); bhs=gr.Button("๐ AI ์์",size="sm")
|
| 1906 |
-
ohm=gr.Textbox(label="โ
๋ณํ ๊ฒฐ๊ณผ",lines=8)
|
| 1907 |
-
och=gr.Textbox(label="๐ ๋ณํ ๋ด์ญ",lines=5,elem_classes=["mono"])
|
| 1908 |
-
ocp=gr.HTML(); oex=gr.Textbox(visible=False)
|
| 1909 |
-
bhm.click(run_humanizer,[ihm],[ohm,och,ocp,oex],api_name="run_humanizer"); bhs.click(lambda:SAMPLE_AI,outputs=[ihm])
|
| 1910 |
with gr.Tab("๐ ํ์ ๊ฒ์ฌ"):
|
| 1911 |
gr.Markdown("**Brave Search ๋ณ๋ ฌ(์ต๋20) + KCI ยท RISS ยท arXiv + Gemini Google Search** ๊ธฐ๋ฐ ํ์ ๊ฒ์ฌ. CopyKiller ์คํ์ผ ๋ณด๊ณ ์.")
|
| 1912 |
inp_plag=gr.Textbox(label="๊ฒ์ฌํ ํ
์คํธ",placeholder="ํ์ ๊ฒ์ฌํ ํ
์คํธ (์ต์ 50์)...",lines=10)
|
|
@@ -1918,7 +1911,7 @@ with gr.Blocks(title="AI ๊ธ ํ๋ณ๊ธฐ v4.0") as demo:
|
|
| 1918 |
btn_ps.click(lambda:SAMPLE_AI,outputs=[inp_plag])
|
| 1919 |
with gr.Tab("๐ ์ค๋ช
"):
|
| 1920 |
gr.Markdown("""
|
| 1921 |
-
### ์ํคํ
์ฒ
|
| 1922 |
- **ํ์ง 5์ถ:** ํต๊ณ(25%)ยท๋ฌธ์ฒด(30%)ยท๋ฐ๋ณต(15%)ยท๊ตฌ์กฐ(15%)ยท์ง๋ฌธ(15%)
|
| 1923 |
- **ํ์ง 6ํญ๋ชฉ:** ๊ฐ๋
์ฑยท์ดํยท๋
ผ๋ฆฌยท์ ํ์ฑยทํํยท์ ๋ณด๋ฐ๋
|
| 1924 |
- **LLM ๊ต์ฐจ๊ฒ์ฆ:** GPT-OSS-120BยทQwen3-32BยทKimi-K2 (GROQ)
|
|
@@ -1927,26 +1920,14 @@ with gr.Blocks(title="AI ๊ธ ํ๋ณ๊ธฐ v4.0") as demo:
|
|
| 1927 |
- `score_sentence()` ํตํฉ ํจ์๋ก ๋์ผ ๊ธฐ์ค ํ์
|
| 1928 |
- ๊ฒฉ์์ด๋ฏธ(25์ ) + AI์ ์์ฌ(20์ ) + ์ํฌํํ(15~25์ ) + ๋ชจ๋ธ์ง๋ฌธ(10์ ) โ ์ธ๊ฐ๋ง์ปค(30์ )
|
| 1929 |
|
| 1930 |
-
### AIโ์ธ๊ฐ ๋ณํ (Adversarial v2)
|
| 1931 |
-
1. **์๊ธฐ๋์ ๋ฃจํ**: ๋ณํโํ์งโ์ฌ๋ณํ ์ต๋ 3๋ผ์ด๋
|
| 1932 |
-
2. **๋ผ์ด๋๋ณ**: ๊ท์น / LLM / LLM+๊ท์น 3ํ๋ณด ๊ฒฝ์
|
| 1933 |
-
3. **์๋ ์ข
๋ฃ**: ๋ชฉํ ์ ์(25์ ) ์ดํ ๋ฌ์ฑ ์ ์ข
๋ฃ
|
| 1934 |
-
4. **ํผ๋๋ฐฑ**: ์ด์ ๋ผ์ด๋ ์ต์ ๊ฒฐ๊ณผ๋ฅผ ๋ค์ ๋ผ์ด๋ ์
๋ ฅ์ผ๋ก
|
| 1935 |
-
|
| 1936 |
### ํ์ ๊ฒ์ฌ
|
| 1937 |
- **Brave Search**: ๋ณ๋ ฌ 20๊ฐ ๋์ ์น๊ฒ์
|
| 1938 |
- **ํ์ DB**: KCI(ํ๊ตญํ์ ์ง์ธ์ฉ์์ธ), RISS(ํ์ ์ฐ๊ตฌ์ ๋ณด), arXiv
|
| 1939 |
- **Gemini**: Google Search Grounding
|
| 1940 |
- **๋ณด๊ณ ์**: CopyKiller ์คํ์ผ โ ์ ์ฌ๋%, ์ถ์ฒํ, ๋ฌธ์ฅ๋ณ ํ์ด๋ผ์ดํธ
|
| 1941 |
|
| 1942 |
-
### ๐ ๋ฌธ์ ๋ถ์ (NEW)
|
| 1943 |
-
- **์ง์ ํ์**: PDF ยท DOCX ยท HWP ยท HWPX ยท TXT ยท MD
|
| 1944 |
-
- **์น์
๋ณ ํํธ๋งต**: ํ์ด์ง/๋ฌธ๋จ๋ณ AI ํ๋ฅ ์์ ์๊ฐํ
|
| 1945 |
-
- **๋ฌธ์ฅ๋ณ ํ์ด๋ผ์ดํธ**: ๊ฐ ๋ฌธ์ฅ AI ํ๋ฅ score_sentence() ๊ธฐ๋ฐ
|
| 1946 |
-
- **PDF ๋ณด๊ณ ์**: ์ข
ํฉ๊ฒฐ๊ณผ + 5์ถ ๋ถ์ + ์น์
๋ณ ์์ธ ๋ค์ด๋ก๋
|
| 1947 |
-
|
| 1948 |
### ํ๊ฒฝ๋ณ์
|
| 1949 |
-
- `GROQ_API_KEY` โ LLM ๊ต์ฐจ๊ฒ์ฆ
|
| 1950 |
- `GEMINI_API_KEY` โ ํ์ ๊ฒ์ฌ (Google Search Grounding)
|
| 1951 |
- `BRAVE_API_KEY` โ ํ์ ๊ฒ์ฌ (Brave Search ๋ณ๋ ฌ)
|
| 1952 |
""")
|
|
|
|
| 1 |
"""
|
| 2 |
+
AI ๊ธ ํ๋ณ๊ธฐ v5.0 โ 5์ถ AI ํ์ง + ํ์ง ์ธก์ + LLM ๊ต์ฐจ๊ฒ์ฆ + ํ์ ๊ฒ์ฌ
|
| 3 |
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 4 |
5์ถ AI ํ์ง | 6ํญ๋ชฉ ํ์ง | LLM ๊ต์ฐจ๊ฒ์ฆ (GPT-OSS-120B ยท Qwen3-32B ยท Kimi-K2)
|
| 5 |
+
โ
LLM ๊ต์ฐจ๊ฒ์ฆ: 3๋ชจ๋ธ (GPT-OSS/Qwen3/Kimi-K2) ํฌํ + ๊ฐ๊ฑดํ ํ์ฑ
|
| 6 |
โ
ํ์ : Brave Search ๋ณ๋ ฌ(์ต๋20) + KCI/RISS/ARXIV + Gemini + CopyKiller ๋ณด๊ณ ์
|
| 7 |
โ
๋ฌธ์: PDFยทDOCXยทHWPยทHWPXยทTXT ์
๋ก๋ โ ์น์
๋ณ ํํธ๋งต + PDF ๋ณด๊ณ ์
|
| 8 |
"""
|
|
|
|
| 319 |
except Exception as e: return None, str(e)[:150]
|
| 320 |
|
| 321 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 322 |
+
# โ
ํตํฉ ๋ฌธ์ฅ ์ ์ (ํญ1 + ํญ2 ๊ณต์ ) โ v5.0 ๋ํญ ๊ฐํ
|
| 323 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 324 |
AI_ENDINGS = ['ํฉ๋๋ค','์
๋๋ค','๋ฉ๋๋ค','์ต๋๋ค','์์ต๋๋ค','ํ์ต๋๋ค','๊ฒ ์ต๋๋ค']
|
| 325 |
+
# ๋น๊ฒฉ์์ด์ง๋ง AI์ ์ธ ์ข
๊ฒฐ ํจํด
|
| 326 |
+
AI_CASUAL_ENDINGS = ['๋ผ๊ณ ํ ์ ์๋ค','๋ผ๊ณ ๋ณผ ์ ์๋ค','๋ค๊ณ ์๊ฐํ๋ค','๋ค๊ณ ํ๋จ๋๋ค',
|
| 327 |
+
'์ธ ์
์ด๋ค','์ธ ๊ฒ์ด๋ค','๋ ๊ฒ์ด๋ค','๋ ์
์ด๋ค','ใน ๊ฒ์ด๋ค','์ ๊ฒ์ด๋ค',
|
| 328 |
+
'๋ผ ํ ์ ์๋ค','๋ก ๋ณด์ธ๋ค','๋ก ํ๋จ๋๋ค','๊ณ ์๋ค','๋ ์ถ์ธ๋ค','๋ ์ํฉ์ด๋ค',
|
| 329 |
+
'์ง ์์ ์ ์๋ค','๋ผ ํ๊ฒ ๋ค','์์ ์ ์ ์๋ค','ํ ํ์๊ฐ ์๋ค']
|
| 330 |
+
AI_CONNS = ['๋ํ','๋ฐ๋ผ์','๊ทธ๋ฌ๋ฏ๋ก','์ด์ ๋ฐ๋ผ','ํํธ','๋๋ถ์ด','์์ธ๋ฌ','๋ฟ๋ง ์๋๋ผ',
|
| 331 |
+
'์ด๋ฅผ ํตํด','์ด์','๊ฒฐ๊ณผ์ ์ผ๋ก','๊ถ๊ทน์ ์ผ๋ก','ํนํ','๋์๊ฐ','์ด๋ฌํ']
|
| 332 |
+
# ์์ฐ์ด์์๋ ์ฐ์ด์ง๋ง AI๊ฐ ๊ณผ๋ํ๊ฒ ์ฐ๋ ์ ์์ฌ
|
| 333 |
+
AI_SOFT_CONNS = ['๋ฌผ๋ก ','๊ทธ๋ฌ๋','ํ์ง๋ง','์ด์ฒ๋ผ','์ด์ ๊ฐ์ด','์ด๋ฅผ ๋ฐํ์ผ๋ก']
|
| 334 |
+
AI_FILLER = ['๊ฒ์ผ๋ก ๋ณด','๊ฒ์ผ๋ก ๋ํ','๊ฒ์ผ๋ก ์์','ํ ์ ์','๋ณผ ์ ์','์ฃผ๋ชฉํ ๋ง',
|
| 335 |
+
'์ค์ํ ์ญํ ','์ค์ํ ์๋ฏธ','๊ธ์ ์ ์ธ ์ํฅ','๋ถ์ ์ ์ธ ์ํฅ','ํ์ํฉ๋๋ค','ํ์ํ๋ค',
|
| 336 |
+
'์ค์ํฉ๋๋ค','์ค์ํ๋ค','์ญํ ์ ํ','์ํฅ์ ๋ฏธ','๊ธฐ๋๋๋ค','์์๋ฉ๋๋ค','๋ถ๊ฐ๋๊ณ ',
|
| 337 |
+
'๋๋๋๊ณ ','๋ค์ํ ๋ถ์ผ','๋ค์ํ ์ฐ์
','๋๋ถ์ ์ฑ๊ณผ','ํ๊ธฐ์ ์ธ ๋ณํ','ํ์ ์ ์ธ',
|
| 338 |
+
'์ ์์','์ธก๋ฉด์์','๊ด์ ์์']
|
| 339 |
+
# ๋น๊ฒฉ์ AI ์ํฌํํ (๋น๊ฒฉ์์ฒด๋ก ์จ๋ AI์ )
|
| 340 |
+
AI_CASUAL_FILLER = ['๋ฌด๊ถ๋ฌด์งํ๋ค','๋ฌด๊ถ๋ฌด์งํ','๊ณผ์ธ์ด ์๋','๋ํ๊ตฌ๊ฐ ๋ ','์ ํ์ ์ด ๋ ',
|
| 341 |
+
'๊ธฐ๋ฐ์ผ๋ก','๋ฐํ์ผ๋ก','์๋๋ ฅ์ด','์ด์์ด ๋ ','๊ฐ์ํ๋','๊ธ๋ถ์','ํจ๋ฌ๋ค์',
|
| 342 |
+
'์งํ์ ์ด','์๋ก์ด ์ฅ์','๋์ ํ','๋ณธ๊ฒฉํ๋','๊ณ ๋ํ','์ด์ ํ']
|
| 343 |
+
# ์๋ณด-์ฃผ์ฅ ํจํด (AI ํน์ : "๋ฌผ๋ก Xํ์ง๋ง, Y" ๊ตฌ์กฐ)
|
| 344 |
+
AI_CONCESSION = re.compile(r'๋ฌผ๋ก .{2,20}(ํ์ง๋ง|๊ทธ๋ฌ๋|๊ทธ๋ ์ง๋ง|๋ค๋ง)|.{2,15}(์ด๊ธด ํ์ง๋ง|๊ธฐ๋ ํ์ง๋ง|์ ์์ง๋ง|์๋ ์์ง๋ง)')
|
| 345 |
+
# ์์ด AI ํจํด
|
| 346 |
+
EN_AI_MARKERS = ['furthermore','additionally','moreover','it is worth noting','in conclusion',
|
| 347 |
+
'it is important to','plays a crucial role','significant impact','various aspects',
|
| 348 |
+
'in this regard','consequently','nevertheless','integral part of','led to remarkable',
|
| 349 |
+
'fundamentally transformed','has become increasingly','it should be noted',
|
| 350 |
+
'in the context of','paradigm shift','landscape of','methodologies',
|
| 351 |
+
'transformative impact','unprecedented','in various domains']
|
| 352 |
HUMAN_MARKERS = {
|
| 353 |
+
'ใ
ใ
ใ
': re.compile(r'([ใ
ใ
ใ
ใ
ใทใฑ])\1{1,}'),
|
| 354 |
'์ด๋ชจํฐ์ฝ': re.compile(r'[;:]-?[)(DPp]|\^[_\-]?\^|ใ
กใ
ก|;;'),
|
| 355 |
+
'์ค์': re.compile(r'ในใ
|ใ
ใ
|ใดใด|ใ
ใ
|ใทใท|ใ
ใ
'),
|
| 356 |
'๋๋ํ': re.compile(r'[!?]{2,}'),
|
| 357 |
+
'๋น๊ฒฉ์์ข
๊ฒฐ': re.compile(r'(๊ฑฐ๋ |์์|์ธ๋ฐ|์ธ๊ฑธ|๊ฐ์|๋๋|์๋|๋๋ฐ|๋ฏธ์ณค|ํ|ใ
$|ใ
$|์$|์$|๋ฏ$)'),
|
| 358 |
+
'๊ตฌ์ด์ถ์ฝ': re.compile(r'(๊ฑ|์ข|๋ง|์์ |์ง์ง|๋ ์|์กด๋|๊ฐ|์กธ๋ผ|์กด๋ง|๊ฒ๋)'),
|
| 359 |
+
'๋ง์ถค๋ฒ์ค๋ฅ': re.compile(r'๋ฌ|๋ช์ผ|๊ธ์|ํ ์์|๊ฒ๊ฐ[์๋ค]|๋๊ฐ|๋์|์๋|ํ๋ค'),
|
| 360 |
+
'๋ง์ค์ํ': re.compile(r'\.{3,}|โฆ'),
|
| 361 |
}
|
| 362 |
FP = {
|
| 363 |
+
"GPT": {"m":['๋ฌผ๋ก ์ด์ฃ ','๋์์ด ๋์
จ๊ธฐ๋ฅผ','์ค๋ช
ํด ๋๋ฆฌ๊ฒ ์ต๋๋ค','์ถ๊ฐ ์ง๋ฌธ','๋์์ด ํ์ํ์๋ฉด',
|
| 364 |
+
'์์ฝํ์๋ฉด','๊ฐ๋ตํ ์ ๋ฆฌํ๋ฉด','ํต์ฌ์'],"e":['์ต๋๋ค','๋๋ฆฌ๊ฒ ์ต๋๋ค'],"lp":re.compile(r'^\d+\.\s|^[-โข]\s',re.M)},
|
| 365 |
+
"Claude": {"m":['๋ง์ํ์ ','์ดํด๋ณด๊ฒ ์ต๋๋ค','๊ท ํ ์กํ','๋งฅ๋ฝ์์','ํ ๊ฐ์ง ์ฃผ์ํ ','๋์์ค',
|
| 366 |
+
'ํฅ๋ฏธ๋ก์ด ์ง๋ฌธ','๋ณต์กํ ์ฃผ์ '],"e":['๋ค์','๊ฑฐ์์'],"lp":re.compile(r'^\*\*.*\*\*|^#+\s',re.M)},
|
| 367 |
+
"Gemini": {"m":['๋ค์๊ณผ ๊ฐ์ต๋๋ค','์ ๋ฆฌํด ๋๋ฆฌ๊ฒ ์ต๋๋ค','ํต์ฌ ๋ด์ฉ์','๋ ์๊ณ ์ถ์ผ์๋ฉด',
|
| 368 |
+
'์์๋ณด๊ฒ ์ต๋๋ค'],"e":['๊ฒ ์ต๋๋ค','๋ณด์ธ์'],"lp":re.compile(r'^\*\s|^-\s\*\*',re.M)},
|
| 369 |
+
"Perplexity": {"m":['๊ฒ์ ๊ฒฐ๊ณผ์ ๋ฐ๋ฅด๋ฉด','๋ณด๋์ ๋ฐ๋ฅด๋ฉด','์ฐ๊ตฌ์ ๋ฐ๋ฅด๋ฉด','๋ฐํ๋ค','์ ํ๋ค',
|
| 370 |
+
'๊ฒ์ผ๋ก ๋ํ๋ฌ๋ค','๊ฒ์ผ๋ก ์กฐ์ฌ๋๋ค','๊ฒ์ผ๋ก ์ง๊ณ๋๋ค','๋ฐํํ๋ค'],"e":['๋ฐํ๋ค','๋ํ๋ฌ๋ค','์ ํ๋ค'],"lp":re.compile(r'\[\d+\]',re.M)},
|
| 371 |
}
|
| 372 |
|
| 373 |
def score_sentence(sent):
|
| 374 |
+
"""๋จ์ผ ๋ฌธ์ฅ AI ์ ์ (0~100). ํญ1ยทํญ2 ๊ณต์ . v5.0 ๋ํญ ๊ฐํ."""
|
| 375 |
sc = 0; reasons = []
|
| 376 |
+
sl = sent.lower().strip()
|
| 377 |
+
sr = sent.rstrip('.!?ใ')
|
| 378 |
+
|
| 379 |
+
# โโ ๊ฒฉ์ ์ข
๊ฒฐ์ด๋ฏธ โโ
|
| 380 |
for e in AI_ENDINGS:
|
| 381 |
+
if sr.endswith(e): sc += 22; reasons.append(f"๊ฒฉ์์ด๋ฏธ(-{e})"); break
|
| 382 |
+
|
| 383 |
+
# โโ ๋น๊ฒฉ์ AI ์ข
๊ฒฐ ํจํด โโ
|
| 384 |
+
if sc == 0: # ๊ฒฉ์์ด ์๋ ๊ฒฝ์ฐ๋ง
|
| 385 |
+
for e in AI_CASUAL_ENDINGS:
|
| 386 |
+
if sr.endswith(e): sc += 15; reasons.append(f"๋น๊ฒฉ์AI(-{e})"); break
|
| 387 |
+
|
| 388 |
+
# โโ ๋ฌธ๋ ์ ์์ฌ โโ
|
| 389 |
+
stripped = sent.strip()
|
| 390 |
for c in AI_CONNS:
|
| 391 |
+
if stripped.startswith(c):
|
| 392 |
+
sc += 18; reasons.append(f"AI์ ์์ฌ({c})"); break
|
| 393 |
+
else:
|
| 394 |
+
for c in AI_SOFT_CONNS:
|
| 395 |
+
if stripped.startswith(c): sc += 8; reasons.append(f"์ฝํ์ ์์ฌ({c})"); break
|
| 396 |
+
|
| 397 |
+
# โโ ์ํฌ์ ํํ (ํด๋์ + ๋น๊ฒฉ์) โโ
|
| 398 |
+
filler_found = sum(1 for f in AI_FILLER if f in sent)
|
| 399 |
+
casual_filler = sum(1 for f in AI_CASUAL_FILLER if f in sent)
|
| 400 |
+
total_filler = filler_found + casual_filler
|
| 401 |
+
if total_filler >= 3: sc += 25; reasons.append(f"์ํฌํํร{total_filler}")
|
| 402 |
+
elif total_filler == 2: sc += 18; reasons.append(f"์ํฌํํร2")
|
| 403 |
+
elif total_filler == 1: sc += 10; reasons.append(f"์ํฌํํร1")
|
| 404 |
+
|
| 405 |
+
# โโ ์๋ณด-์ฃผ์ฅ ํจํด (Claude/GPT ํน์ ) โโ
|
| 406 |
+
if AI_CONCESSION.search(sent): sc += 10; reasons.append("์๋ณดํจํด")
|
| 407 |
+
|
| 408 |
+
# โโ ๋ชจ๋ธ ์ง๋ฌธ โโ
|
| 409 |
for mn, fp in FP.items():
|
| 410 |
for m in fp["m"]:
|
| 411 |
+
if m in sent: sc += 8; reasons.append(f"{mn}์ง๋ฌธ"); break
|
| 412 |
+
|
| 413 |
+
# โโ ์์ด AI ํจํด (๋ณต์ ๋์ ) โโ
|
| 414 |
+
en_count = sum(1 for em in EN_AI_MARKERS if em in sl)
|
| 415 |
+
if en_count >= 3: sc += 25; reasons.append(f"์์ดAIร{en_count}")
|
| 416 |
+
elif en_count >= 2: sc += 18; reasons.append(f"์์ดAIร{en_count}")
|
| 417 |
+
elif en_count >= 1: sc += 12; reasons.append(f"์์ดAIร1")
|
| 418 |
+
|
| 419 |
+
# โโ ๋ณตํฉ ๋ณด๋์ค: ๊ฒฉ์+์ํฌ+์ ์ ๋์ โ ๊ฑฐ์ ํ์ค AI โโ
|
| 420 |
+
has_formal = any(sr.endswith(e) for e in AI_ENDINGS)
|
| 421 |
+
has_conn = any(stripped.startswith(c) for c in AI_CONNS)
|
| 422 |
+
if has_formal and total_filler >= 1 and has_conn: sc += 8; reasons.append("๋ณตํฉAI")
|
| 423 |
+
elif has_formal and total_filler >= 2: sc += 5; reasons.append("๊ฒฉ์+์ํฌ")
|
| 424 |
+
|
| 425 |
+
# โโ ์ธ๊ฐ ๋ง์ปค (๊ฐ์ ) โ ์ธ๋ถํ โโ
|
| 426 |
for n, p in HUMAN_MARKERS.items():
|
| 427 |
+
matches = p.findall(sent)
|
| 428 |
+
if matches:
|
| 429 |
+
if n in ('ใ
ใ
ใ
','์ด๋ชจํฐ์ฝ','์ค์'): sc -= 25; reasons.append(f"์ธ๊ฐ({n})")
|
| 430 |
+
elif n in ('๋น๊ฒฉ์์ข
๊ฒฐ','๊ตฌ์ด์ถ์ฝ'): sc -= 18; reasons.append(f"๊ตฌ์ด์ฒด({n})")
|
| 431 |
+
elif n == '๋ง์ถค๋ฒ์ค๋ฅ': sc -= 12; reasons.append("๋ง์ถค๋ฒ์ค๋ฅ")
|
| 432 |
+
elif n in ('๋๋ํ','๋ง์ค์ํ'): sc -= 10; reasons.append(f"์ธ๊ฐ({n})")
|
| 433 |
+
|
| 434 |
return max(0, min(100, sc)), reasons
|
| 435 |
|
| 436 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 437 |
+
# ์ถโ ํต๊ณ โ v5.0: Burstiness + ์กฐ๊ฑด๋ถ ์ํธ๋กํผ ์ถ๊ฐ
|
| 438 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 439 |
def analyze_statistics(text, sentences, words):
|
| 440 |
sl = [len(s) for s in sentences]
|
| 441 |
if len(sl) < 2: return {"score":50}
|
| 442 |
+
|
| 443 |
+
# 1. CV (๋ฌธ์ฅ ๊ธธ์ด ๋ณ๋ ๊ณ์)
|
| 444 |
avg = sum(sl)/len(sl); std = math.sqrt(sum((l-avg)**2 for l in sl)/len(sl))
|
| 445 |
cv = std/avg if avg > 0 else 0
|
| 446 |
+
cv_score = 85 if cv<0.20 else 70 if cv<0.30 else 50 if cv<0.45 else 30 if cv<0.60 else 15
|
| 447 |
+
|
| 448 |
+
# 2. Burstiness โ ์ฐ์ ๋ฌธ์ฅ ๊ธธ์ด์ฐจ ๋ณ๋
|
| 449 |
+
diffs = [abs(sl[i]-sl[i-1]) for i in range(1,len(sl))]
|
| 450 |
+
burst_score = 50
|
| 451 |
+
if diffs:
|
| 452 |
+
avg_d = sum(diffs)/len(diffs)
|
| 453 |
+
max_d = max(diffs)
|
| 454 |
+
burst_ratio = max_d / (avg_d + 1)
|
| 455 |
+
burst_score = 85 if burst_ratio < 1.8 else 65 if burst_ratio < 2.5 else 40 if burst_ratio < 3.5 else 20
|
| 456 |
+
|
| 457 |
+
# 3. โ
ํ์ค ๊ธธ์ด ๋น์จ โ AI๋ ๋๋ถ๋ถ 25~60์, ์ธ๊ฐ์ ๊ทน๋จ outlier ์์
|
| 458 |
+
standard_ratio = sum(1 for l in sl if 20 <= l <= 60) / len(sl)
|
| 459 |
+
std_score = 80 if standard_ratio > 0.8 else 60 if standard_ratio > 0.6 else 40 if standard_ratio > 0.4 else 20
|
| 460 |
+
# ๊ทน๋จ ๋ฌธ์ฅ(10์ ๋ฏธ๋ง or 80์ ์ด๊ณผ) ์์ผ๋ฉด ์ธ๊ฐ์
|
| 461 |
+
extreme = sum(1 for l in sl if l < 10 or l > 80)
|
| 462 |
+
if extreme >= 2: std_score = max(10, std_score - 20)
|
| 463 |
+
elif extreme >= 1: std_score = max(15, std_score - 10)
|
| 464 |
+
|
| 465 |
+
# 4. ์ดํ ๋ค์์ฑ
|
| 466 |
wf = Counter(words); t = len(words)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 467 |
ttr = len(wf)/t if t>0 else 0
|
| 468 |
+
vocab_score = 70 if ttr<0.45 else 55 if ttr<0.55 else 35 if ttr<0.65 else 20
|
| 469 |
+
|
| 470 |
+
# 5. ๋ฌธ์ฅ ๋ณต์ก๋ ๊ท ์ผ์ฑ
|
| 471 |
+
wpc = [len(split_words(s)) for s in sentences]
|
| 472 |
+
complex_score = 50
|
| 473 |
+
if len(wpc) >= 3:
|
| 474 |
+
wpc_avg = sum(wpc)/len(wpc)
|
| 475 |
+
wpc_std = math.sqrt(sum((w-wpc_avg)**2 for w in wpc)/len(wpc))
|
| 476 |
+
wpc_cv = wpc_std/wpc_avg if wpc_avg > 0 else 0
|
| 477 |
+
complex_score = 80 if wpc_cv < 0.20 else 60 if wpc_cv < 0.35 else 35 if wpc_cv < 0.50 else 15
|
| 478 |
+
|
| 479 |
+
final = int(cv_score*0.20 + burst_score*0.20 + std_score*0.25 + vocab_score*0.15 + complex_score*0.20)
|
| 480 |
+
return {"score":final,"cv":round(cv,3),"ttr":round(ttr,3)}
|
| 481 |
|
| 482 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 483 |
+
# ์ถโก ๋ฌธ์ฒด โ v5.0: ์๋ณดํจํด + ๋น๊ฒฉ์AI + ์ ์์ฌ ์์นํจํด
|
| 484 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 485 |
def analyze_korean_style(text, sentences, morphemes):
|
| 486 |
+
if not sentences: return {"score":50}
|
| 487 |
+
|
| 488 |
+
# 1. ์ข
๊ฒฐ์ด๋ฏธ ๋ถ์ (๊ฒฉ์ + ๋น๊ฒฉ์AI)
|
| 489 |
+
formal_cnt = sum(1 for s in sentences if any(s.rstrip('.!?').endswith(e) for e in AI_ENDINGS))
|
| 490 |
+
casual_ai = sum(1 for s in sentences if any(s.rstrip('.!?').endswith(e) for e in AI_CASUAL_ENDINGS))
|
| 491 |
+
fr = formal_cnt/len(sentences)
|
| 492 |
+
car = casual_ai/len(sentences)
|
| 493 |
+
# ๊ฒฉ์ ๋น์จ ๋์ผ๋ฉด AI์ , ๋น๊ฒฉ์AI๋ ๊ฐ์ฐ
|
| 494 |
+
ending_score = 85 if fr>0.7 else 65 if fr>0.5 else 45 if fr>0.3 else 25 if fr>0.1 else 10
|
| 495 |
+
ending_score = min(90, ending_score + int(car * 25)) # ๋น๊ฒฉ์AI ๋ณด๋์ค
|
| 496 |
+
|
| 497 |
+
# 2. ์ ์์ฌ ๋ฐ๋ + โ
์์น ํจํด
|
| 498 |
+
conn_positions = []
|
| 499 |
+
for i, s in enumerate(sentences):
|
| 500 |
+
for c in AI_CONNS:
|
| 501 |
+
if s.strip().startswith(c): conn_positions.append(i); break
|
| 502 |
+
conn_density = len(conn_positions)/len(sentences) if sentences else 0
|
| 503 |
+
conn_score = 85 if conn_density>0.4 else 65 if conn_density>0.25 else 40 if conn_density>0.1 else 15
|
| 504 |
+
# AI๋ ์ ์์ฌ๋ฅผ ๊ท์น์ ๊ฐ๊ฒฉ์ผ๋ก ๋ฐฐ์น (2-3๋ฌธ์ฅ๋ง๋ค)
|
| 505 |
+
if len(conn_positions) >= 2:
|
| 506 |
+
gaps = [conn_positions[i]-conn_positions[i-1] for i in range(1,len(conn_positions))]
|
| 507 |
+
gap_cv = (math.sqrt(sum((g-sum(gaps)/len(gaps))**2 for g in gaps)/len(gaps))/(sum(gaps)/len(gaps)+0.01))
|
| 508 |
+
if gap_cv < 0.5: conn_score = min(90, conn_score + 10) # ๋งค์ฐ ๊ท์น์ โ AI ๋ณด๋์ค
|
| 509 |
+
|
| 510 |
+
# 3. ์ํฌํํ (ํด๋์ + ๋น๊ฒฉ์)
|
| 511 |
+
filler_cnt = sum(1 for f in AI_FILLER if f in text) + sum(1 for f in AI_CASUAL_FILLER if f in text)
|
| 512 |
+
filler_score = 90 if filler_cnt>=6 else 75 if filler_cnt>=4 else 55 if filler_cnt>=2 else 30 if filler_cnt>=1 else 10
|
| 513 |
+
|
| 514 |
+
# 4. โ
์๋ณด-์ฃผ์ฅ ๊ตฌ๋ฌธ (AI ํน์ : "๋ฌผ๋ก ~ํ์ง๋ง~" ํจํด)
|
| 515 |
+
concession_cnt = len(AI_CONCESSION.findall(text))
|
| 516 |
+
conc_score = 80 if concession_cnt >= 2 else 55 if concession_cnt >= 1 else 20
|
| 517 |
+
|
| 518 |
+
# 5. ์ธ๊ฐ ๋ง์ปค ๊ฐ์
|
| 519 |
+
human_count = sum(len(p.findall(text)) for p in HUMAN_MARKERS.values())
|
| 520 |
+
human_penalty = min(35, human_count * 8)
|
| 521 |
+
|
| 522 |
+
# 6. ํ์ฌ ๋น์จ (๋ช
์ฌ ๊ณผ๋ค = AI์ )
|
| 523 |
+
pos_score = 45
|
| 524 |
if morphemes:
|
| 525 |
pc = Counter(t for _,t in morphemes); tm = sum(pc.values())
|
| 526 |
+
noun_r = sum(pc.get(t,0) for t in ['NNG','NNP','NNB','NR'])/tm if tm else 0
|
| 527 |
+
pos_score = 70 if noun_r>0.42 else 55 if noun_r>0.38 else 35 if noun_r>0.32 else 20
|
| 528 |
+
|
| 529 |
+
final = max(5, int(ending_score*0.25 + conn_score*0.20 + filler_score*0.20 +
|
| 530 |
+
conc_score*0.10 + pos_score*0.15 + 10*0.10) - human_penalty)
|
| 531 |
+
return {"score":final,"formal":f"{fr:.0%}","conn":f"{conn_density:.2f}","filler":filler_cnt,"human":human_count}
|
| 532 |
|
| 533 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 534 |
+
# ์ถโข ๋ฐ๋ณต โ v5.0: ๋ฌธ๋ N-์ด์ + ๊ตฌ๋ฌธ ํ
ํ๋ฆฟ + ์๋ฏธ ๋ฐ๋ณต
|
| 535 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 536 |
def analyze_repetition(text, sentences, words):
|
| 537 |
+
if not sentences or len(sentences) < 2: return {"score":35}
|
| 538 |
+
|
| 539 |
+
# 1. N-gram ๋ฐ๋ณต (3-gram)
|
| 540 |
tr = 0
|
| 541 |
+
if len(words)>=5:
|
| 542 |
tg = Counter(tuple(words[i:i+3]) for i in range(len(words)-2))
|
| 543 |
tr = sum(1 for c in tg.values() if c>1)/len(tg) if tg else 0
|
| 544 |
+
ngram_score = 80 if tr>0.15 else 60 if tr>0.08 else 35 if tr>0.03 else 15
|
| 545 |
+
|
| 546 |
+
# 2. โ
๋ฌธ๋ 2-3์ด์ ๋ค์์ฑ (์ฒซ ๋จ์ด๋ง์ด ์๋ ์ฒซ 2-3์ด์ )
|
| 547 |
+
openers_2 = []
|
| 548 |
+
openers_3 = []
|
| 549 |
+
for s in sentences:
|
| 550 |
+
ws = split_words(s)
|
| 551 |
+
if len(ws) >= 2: openers_2.append(tuple(ws[:2]))
|
| 552 |
+
if len(ws) >= 3: openers_3.append(tuple(ws[:3]))
|
| 553 |
+
|
| 554 |
+
opener2_score = 50
|
| 555 |
+
if openers_2:
|
| 556 |
+
unique2 = len(set(openers_2))/len(openers_2)
|
| 557 |
+
opener2_score = 80 if unique2 < 0.5 else 60 if unique2 < 0.7 else 35 if unique2 < 0.85 else 15
|
| 558 |
+
|
| 559 |
+
# 3. AI ์ ์์ฌ ๋ฌธ๋ ๋ฐ๋ณต
|
| 560 |
ai_only_conns = ['๋ํ','๋ฐ๋ผ์','๊ทธ๋ฌ๋ฏ๋ก','์ด์ ๋ฐ๋ผ','๋๋ถ์ด','์์ธ๋ฌ','๋ฟ๋ง ์๋๋ผ',
|
| 561 |
'์ด๋ฅผ ํตํด','์ด์','๊ฒฐ๊ณผ์ ์ผ๋ก','๊ถ๊ทน์ ์ผ๋ก','๋์๊ฐ','์ด๋ฌํ']
|
| 562 |
cr = sum(1 for s in sentences if any(s.strip().startswith(c) for c in ai_only_conns))
|
| 563 |
crr = cr/len(sentences) if sentences else 0
|
| 564 |
+
ai_conn_score = 85 if crr>0.35 else 65 if crr>0.2 else 40 if crr>0.08 else 15
|
| 565 |
+
|
| 566 |
+
# 4. โ
๊ตฌ๋ฌธ ํ
ํ๋ฆฟ ๋ฐ๋ณต (์ฃผ์ด+์กฐ์ฌ+...+์ข
๊ฒฐ ํจํด)
|
| 567 |
+
templates = []
|
| 568 |
+
for s in sentences:
|
| 569 |
+
ws = split_words(s)
|
| 570 |
+
if len(ws) >= 4:
|
| 571 |
+
# ์ฒซ ์ด์ + ๋ง์ง๋ง ์ด์ ํจํด
|
| 572 |
+
templates.append((ws[0], ws[-1]))
|
| 573 |
+
template_rep = 0
|
| 574 |
+
if templates:
|
| 575 |
+
tc = Counter(templates)
|
| 576 |
+
template_rep = sum(1 for c in tc.values() if c > 1) / len(tc) if tc else 0
|
| 577 |
+
template_score = 80 if template_rep > 0.3 else 55 if template_rep > 0.1 else 25
|
| 578 |
+
|
| 579 |
+
# 5. โ
์ข
๊ฒฐ์ด๋ฏธ ๋ค์์ฑ (AI๋ ๊ฐ์ ์ข
๊ฒฐ์ด๋ฏธ ๋ฐ๋ณต)
|
| 580 |
+
endings = []
|
| 581 |
+
for s in sentences:
|
| 582 |
+
sr = s.rstrip('.!?ใ')
|
| 583 |
+
for e in AI_ENDINGS + ['์๋ค','ํ๋ค','๋๋ค','ํ๋ค','์ด๋ค','๋๋ค']:
|
| 584 |
+
if sr.endswith(e): endings.append(e); break
|
| 585 |
+
ending_div = 50
|
| 586 |
+
if endings:
|
| 587 |
+
unique_e = len(set(endings))/len(endings)
|
| 588 |
+
ending_div = 80 if unique_e < 0.3 else 60 if unique_e < 0.5 else 35 if unique_e < 0.7 else 15
|
| 589 |
+
|
| 590 |
+
final = int(ngram_score*0.15 + opener2_score*0.20 + ai_conn_score*0.25 +
|
| 591 |
+
template_score*0.15 + ending_div*0.25)
|
| 592 |
+
return {"score":final}
|
| 593 |
|
| 594 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 595 |
+
# ์ถโฃ ๊ตฌ์กฐ โ v5.0: ์ถ์์ฑ/๊ตฌ์ฒด์ฑ + ๋ฌธ์ฅ๋ค์์ฑ + ๊ตฌ๋์
|
| 596 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 597 |
+
# AI ์ถ์ ์์์ด (๊ตฌ์ฒด ๋ช
์ฌ ์์ด ์ฐ์ด๋ AI์ ํ์ฉ์ฌ)
|
| 598 |
+
AI_VAGUE = re.compile(r'๋ค์ํ|์ค์ํ|๊ธ์ ์ ์ธ|๋ถ์ ์ ์ธ|ํ๊ธฐ์ ์ธ|ํ์ ์ ์ธ|ํจ์จ์ ์ธ|์ฒด๊ณ์ ์ธ|์ข
ํฉ์ ์ธ|์ ๋ฐ์ ์ธ|์ง์์ ์ธ|์ ๊ทน์ ์ธ|์๋นํ|์ฃผ์ํ')
|
| 599 |
+
# ๊ตฌ์ฒด์ฑ ์งํ (์ธ๊ฐ์ โ ๊ณ ์ ๋ช
์ฌ, ์ซ์+๋จ์, ์ธ์ฉ/์ถ์ฒ)
|
| 600 |
+
CONCRETE_PROPER = re.compile(r'์ผ์ฑ|LG|ํ๋|SK|์นด์นด์ค|๋ค์ด๋ฒ|๋ทํ๋ฆญ์ค|๊ตฌ๊ธ|์ ํ|ํ
์ฌ๋ผ|์๋ง์กด|๋ง์ดํฌ๋ก์ํํธ|[๊ฐ-ํฃ]{2,}๋ํ|[๊ฐ-ํฃ]{2,}๋ณ์|[๊ฐ-ํฃ]{1,3}์[๋์์ด๊ฐ]|[๊ฐ-ํฃ]{1,3}๊ตฌ[๋์์ด๊ฐ]|[๊ฐ-ํฃ]{2,}๋[์์]')
|
| 601 |
+
CONCRETE_NUMBER = re.compile(r'\d{2,}[๋ง์ต์กฐ์๋ฌ๋ฌ%๊ฐ๋
์์ผ์๋ฑํธ]|\d+\.\d+%|\d{4}๋
|\d{1,2}์')
|
| 602 |
+
CONCRETE_QUOTE = re.compile(r'์ ๋ฐ๋ฅด๋ฉด|๋ฐํํ|๋ฐํ๋ค|๋ณด๋ํ|์ ํ๋ค|๋ผ๊ณ ๋ง|์ธํฐ๋ทฐ|์ค๋ฌธ|์กฐ์ฌ|ํต๊ณ์ฒญ|๋ณด๊ณ ์')
|
| 603 |
+
|
| 604 |
def analyze_structure(text, sentences):
|
| 605 |
+
if not sentences: return {"score":35}
|
| 606 |
+
|
| 607 |
+
# 1. ๋งํฌ๋ค์ด/๋ฆฌ์คํธ
|
| 608 |
+
lt = (len(re.findall(r'^\d+[.)]\s',text,re.M)) + len(re.findall(r'^[-โข*]\s',text,re.M)) +
|
| 609 |
+
len(re.findall(r'^#+\s',text,re.M)) + len(re.findall(r'\*\*[^*]+\*\*',text)))
|
| 610 |
+
list_score = 90 if lt>=5 else 70 if lt>=3 else 45 if lt>=1 else 10
|
| 611 |
+
|
| 612 |
+
# 2. โ
์ถ์์ฑ vs ๊ตฌ์ฒด์ฑ (ํต์ฌ ํ๋ณ โ 40% ๊ฐ์ค์น)
|
| 613 |
+
vague_cnt = len(AI_VAGUE.findall(text))
|
| 614 |
+
proper_cnt = len(CONCRETE_PROPER.findall(text))
|
| 615 |
+
number_cnt = len(CONCRETE_NUMBER.findall(text))
|
| 616 |
+
quote_cnt = len(CONCRETE_QUOTE.findall(text))
|
| 617 |
+
concrete_total = proper_cnt + number_cnt + quote_cnt
|
| 618 |
+
|
| 619 |
+
if vague_cnt >= 3 and concrete_total == 0: abstract_score = 90
|
| 620 |
+
elif vague_cnt >= 2 and concrete_total <= 1: abstract_score = 70
|
| 621 |
+
elif vague_cnt >= 1 and concrete_total == 0: abstract_score = 55
|
| 622 |
+
elif concrete_total >= 3: abstract_score = 10
|
| 623 |
+
elif concrete_total >= 2: abstract_score = 20
|
| 624 |
+
elif concrete_total >= 1: abstract_score = 30
|
| 625 |
+
else: abstract_score = 45
|
| 626 |
+
|
| 627 |
+
# 3. ๋ฌธ์ฅ ์ ํ ๋ค์์ฑ (AI=์์ ๋ฌธ๋ง, ์ธ๊ฐ=์๋ฌธ/๊ฐํ ํผ์ฉ)
|
| 628 |
+
has_question = any(s.strip().endswith('?') for s in sentences)
|
| 629 |
+
has_exclaim = any(s.strip().endswith('!') for s in sentences)
|
| 630 |
+
has_ellipsis = any('...' in s or 'โฆ' in s for s in sentences)
|
| 631 |
+
variety = sum([has_question, has_exclaim, has_ellipsis])
|
| 632 |
+
type_score = 15 if variety >= 2 else 40 if variety >= 1 else 65
|
| 633 |
+
|
| 634 |
+
# 4. ๊ตฌ๋์ ๋จ์กฐ๋ก์
|
| 635 |
+
puncts = re.findall(r'[!?,;:โฆโ\-~]', text)
|
| 636 |
+
unique_punct = len(set(puncts))
|
| 637 |
+
punct_score = 65 if unique_punct <= 1 else 45 if unique_punct <= 3 else 20
|
| 638 |
+
|
| 639 |
+
# 5. ๋ฌธ๋จ ๊ตฌ์กฐ (๋ค๋ฌธ๋จ์ธ ๊ฒฝ์ฐ)
|
| 640 |
paras = [p.strip() for p in text.split('\n\n') if p.strip()]
|
| 641 |
+
para_score = 35
|
| 642 |
+
if len(paras) >= 2:
|
| 643 |
+
pl = [len(split_sentences(p)) for p in paras]
|
| 644 |
+
avg_p = sum(pl)/len(pl)
|
| 645 |
+
if avg_p > 0:
|
| 646 |
+
pcv = math.sqrt(sum((l-avg_p)**2 for l in pl)/len(pl))/avg_p
|
| 647 |
+
para_score = 75 if pcv < 0.2 else 55 if pcv < 0.35 else 30
|
| 648 |
+
if len(paras) >= 3 and pl[0] < avg_p and pl[-1] < avg_p:
|
| 649 |
+
para_score = min(85, para_score + 10)
|
| 650 |
+
|
| 651 |
+
final = int(list_score*0.10 + abstract_score*0.40 + type_score*0.20 + punct_score*0.10 + para_score*0.20)
|
| 652 |
+
return {"score":final}
|
| 653 |
|
| 654 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 655 |
+
# ์ถโค ์ง๋ฌธ โ v5.0: Perplexity + ๋น๊ฒฉ์AI + ์์ด ํจํด
|
| 656 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 657 |
def analyze_model_fingerprint(text, sentences):
|
| 658 |
ms = {}
|
| 659 |
+
sl = text.lower()
|
| 660 |
for mn, fp in FP.items():
|
| 661 |
sc = sum(min(15,text.count(m)*5) for m in fp["m"] if text.count(m)>0)
|
| 662 |
lm = fp["lp"].findall(text)
|
|
|
|
| 664 |
em = sum(1 for s in sentences if any(s.rstrip('.!?').endswith(e) for e in fp.get("e",[])))
|
| 665 |
if sentences: sc += int((em/len(sentences))*20)
|
| 666 |
ms[mn] = min(100,sc)
|
| 667 |
+
|
| 668 |
+
# โ
๋น๊ฒฉ์ AI ์ผ๋ฐ ์ง๋ฌธ (ํน์ ๋ชจ๋ธ ๋ถ๋ฌธ)
|
| 669 |
+
general_ai = 0
|
| 670 |
+
# ๋น๊ฒฉ์ AI ์ํฌ
|
| 671 |
+
general_ai += sum(5 for f in AI_CASUAL_FILLER if f in text)
|
| 672 |
+
# ๋น๊ฒฉ์ AI ์ข
๊ฒฐ
|
| 673 |
+
casual_end_cnt = sum(1 for s in sentences if any(s.rstrip('.!?').endswith(e) for e in AI_CASUAL_ENDINGS))
|
| 674 |
+
general_ai += casual_end_cnt * 5
|
| 675 |
+
# ์๋ณด ํจํด
|
| 676 |
+
general_ai += len(AI_CONCESSION.findall(text)) * 8
|
| 677 |
+
ms["๋น๊ฒฉ์AI"] = min(100, general_ai)
|
| 678 |
+
|
| 679 |
+
# โ
์์ด AI ์ง๋ฌธ
|
| 680 |
+
en_score = sum(5 for em in EN_AI_MARKERS if em in sl)
|
| 681 |
+
ms["์์ดAI"] = min(100, en_score)
|
| 682 |
+
|
| 683 |
mx = max(ms.values()) if ms else 0
|
| 684 |
+
# ๋ณต์ ๋ชจ๋ธ์์ ์ ์๊ฐ ๋์ค๋ฉด ๋ AI์
|
| 685 |
+
multi = sum(1 for v in ms.values() if v >= 10)
|
| 686 |
+
multi_bonus = 10 if multi >= 3 else 5 if multi >= 2 else 0
|
| 687 |
+
|
| 688 |
+
base = 85 if mx>=50 else 65 if mx>=35 else 45 if mx>=20 else 25 if mx>=10 else 10
|
| 689 |
+
return {"score":min(95, base + multi_bonus),"model_scores":{k:v for k,v in ms.items() if k not in ("๋น๊ฒฉ์AI","์์ดAI") or v > 0}}
|
| 690 |
|
| 691 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 692 |
# ํ์ง
|
|
|
|
| 835 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 836 |
# ์ข
ํฉ ํ์ (์ผ๊ด๋ ๊ธฐ์ค)
|
| 837 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 838 |
+
def compute_verdict(scores, llm_score=-1, sent_avg=-1):
|
| 839 |
+
w={"ํต๊ณ":.08,"๋ฌธ์ฒด":.30,"๋ฐ๋ณต์ฑ":.12,"๊ตฌ์กฐ":.15,"์ง๋ฌธ":.35}
|
| 840 |
ws=sum(scores[k]*w[k] for k in w)
|
| 841 |
+
|
| 842 |
+
# โ
๊ต์ฐจ ์ ํธ ๋ถ์คํธ โ ๋ฌธ์ฒด/์ง๋ฌธ ์ค์ฌ
|
| 843 |
+
style = scores["๋ฌธ์ฒด"]; fp = scores["์ง๋ฌธ"]; rep = scores["๋ฐ๋ณต์ฑ"]; struct = scores["๊ตฌ์กฐ"]
|
| 844 |
+
if style >= 35 and fp >= 35: ws += 8 # ๋ฌธ์ฒด+์ง๋ฌธ ๋์ โ ๊ฐํ AI ์ ํธ
|
| 845 |
+
elif style >= 30 and fp >= 25: ws += 4
|
| 846 |
+
if style >= 30 and rep >= 25 and fp >= 20: ws += 4 # 3์ถ ์ฝ์ ํธ
|
| 847 |
+
if fp >= 45: ws += 3 # ๊ฐํ ์ง๋ฌธ ๋จ๋
๋ถ์คํธ
|
| 848 |
+
if struct >= 50 and style >= 30: ws += 3 # ์ถ์์ +๊ฒฉ์ ๋ฌธ์ฒด
|
| 849 |
+
|
| 850 |
+
# โ
๋ฌธ์ฅ ์์ค ๋ถ์คํธ (๋์ด๋ด๋ฆฌ์ง ์์)
|
| 851 |
+
if sent_avg >= 0 and sent_avg > ws:
|
| 852 |
+
ws = ws * 0.80 + sent_avg * 0.20
|
| 853 |
+
|
| 854 |
hi=sum(1 for v in scores.values() if v>=50)
|
| 855 |
+
if hi>=4: ws+=8
|
| 856 |
+
elif hi>=3: ws+=5
|
| 857 |
+
elif hi>=2: ws+=2
|
| 858 |
+
|
| 859 |
+
# โ
์ธ๊ฐ ๊ฒฉ์๋ฌธ ํ ์ธ โ ์ง๋ฌธ์ด ๋ฎ๊ณ ๊ตฌ์กฐ๊ฐ ๊ตฌ์ฒด์ (๋ฎ์)์ธ ๊ฒฝ์ฐ๋ง
|
| 860 |
+
if style < 40 and fp <= 20 and rep < 22 and struct < 35:
|
| 861 |
+
ws -= 5 # ๊ฒฉ์์ด์ง๋ง AI ์ง๋ฌธ ์๊ณ ๊ตฌ์ฒด์ = ์ธ๊ฐ
|
| 862 |
+
|
| 863 |
+
lo=sum(1 for v in scores.values() if v<20)
|
| 864 |
+
if lo>=3: ws-=8
|
| 865 |
+
elif lo>=2: ws-=3
|
| 866 |
if llm_score>=0: ws=ws*0.70+llm_score*0.30
|
| 867 |
fs=max(0,min(100,int(ws)))
|
| 868 |
if fs>=75: return fs,"AI ์์ฑ ํ์ ","ai_high"
|
|
|
|
| 876 |
sc={"ํต๊ณ":analyze_statistics(text,sents,words)["score"],"๋ฌธ์ฒด":analyze_korean_style(text,sents,morphs)["score"],
|
| 877 |
"๋ฐ๋ณต์ฑ":analyze_repetition(text,sents,words)["score"],"๊ตฌ์กฐ":analyze_structure(text,sents)["score"],
|
| 878 |
"์ง๋ฌธ":analyze_model_fingerprint(text,sents)["score"]}
|
| 879 |
+
# ๋ฌธ์ฅ ์์ค ํ๊ท ๊ณ์ฐ
|
| 880 |
+
sent_scores = [score_sentence(s)[0] for s in sents]
|
| 881 |
+
sent_avg = sum(sent_scores)/len(sent_scores) if sent_scores else -1
|
| 882 |
+
fs,v,lv=compute_verdict(sc, sent_avg=sent_avg); return fs,v,lv,sc
|
| 883 |
|
| 884 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 885 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 886 |
# โ
ํ์ ๊ฒ์ฌ (Brave Search ๋ณ๋ ฌ + KCI/RISS/ARXIV + Gemini)
|
|
|
|
| 887 |
def brave_search(query, count=5):
|
| 888 |
"""Brave Search API โ ๋จ์ผ ์ฟผ๋ฆฌ"""
|
| 889 |
if not BRAVE_KEY: return []
|
|
|
|
| 1441 |
progress(0.62); qr=analyze_quality(text,sents,words,morphs)
|
| 1442 |
progress(0.75); lr=llm_cross_check(text)
|
| 1443 |
sc={"ํต๊ณ":s1["score"],"๋ฌธ์ฒด":s2["score"],"๋ฐ๋ณต์ฑ":s3["score"],"๊ตฌ์กฐ":s4["score"],"์ง๋ฌธ":s5["score"]}
|
| 1444 |
+
# ๋ฌธ์ฅ๋ณ ์ ์ (ํญ2์ ๋์ผ ๊ธฐ์ค)
|
| 1445 |
+
sent_scores = [score_sentence(s)[0] for s in sents]
|
| 1446 |
+
sent_avg = sum(sent_scores)/len(sent_scores) if sent_scores else -1
|
| 1447 |
+
fs,verdict,level=compute_verdict(sc,lr["score"],sent_avg=sent_avg)
|
| 1448 |
progress(0.95)
|
| 1449 |
cm={"ai_high":("#FF4444","#FFE0E0","๋์"),"ai_medium":("#FF8800","#FFF0DD","์ค๊ฐ~๋์"),"ai_low":("#DDAA00","#FFFBE0","์ค๊ฐ"),"uncertain":("#888","#F0F0F0","๋ฎ์"),"human":("#22AA44","#E0FFE8","๋งค์ฐ ๋ฎ์")}
|
| 1450 |
fg,bg,conf=cm.get(level,("#888","#F0F0F0","?"))
|
| 1451 |
ms=s5.get("model_scores",{}); tm=max(ms,key=ms.get) if ms else "N/A"; tms=ms.get(tm,0)
|
| 1452 |
mt=f"{tm} ({tms}์ )" if tms>=15 else "ํน์ ๋ถ๊ฐ"
|
| 1453 |
|
|
|
|
|
|
|
| 1454 |
ai_sents = sum(1 for s in sent_scores if s >= 40)
|
| 1455 |
human_sents = sum(1 for s in sent_scores if s < 20)
|
| 1456 |
|
|
|
|
| 1548 |
sents=split_sentences(text)
|
| 1549 |
hl=[]
|
| 1550 |
for s in sents:
|
| 1551 |
+
sc, reasons = score_sentence(s)
|
| 1552 |
+
# 5๋จ๊ณ ์์
|
| 1553 |
+
if sc >= 60: bg="rgba(220,38,38,0.35)"; level="AIํ์ "
|
| 1554 |
+
elif sc >= 40: bg="rgba(249,115,22,0.30)"; level="AI์์ฌ"
|
| 1555 |
+
elif sc >= 25: bg="rgba(234,179,8,0.25)"; level="์ฃผ์"
|
| 1556 |
+
elif sc >= 10: bg="rgba(132,204,22,0.15)"; level="์ธ๊ฐ์ถ์ "
|
| 1557 |
+
else: bg="rgba(34,197,94,0.20)"; level="์ธ๊ฐ"
|
| 1558 |
+
# ๊ทผ๊ฑฐ ์์ธ
|
| 1559 |
+
detail_parts = []
|
| 1560 |
+
for r in reasons:
|
| 1561 |
+
if '๊ฒฉ์' in r or '๋น๊ฒฉ์AI' in r: detail_parts.append(f"๐ค {r}")
|
| 1562 |
+
elif '์ ์์ฌ' in r: detail_parts.append(f"๐ {r}")
|
| 1563 |
+
elif '์ํฌ' in r: detail_parts.append(f"๐ {r}")
|
| 1564 |
+
elif '์ง๋ฌธ' in r: detail_parts.append(f"๐ {r}")
|
| 1565 |
+
elif '์ธ๊ฐ' in r or '๊ตฌ์ด' in r or '๋ง์ถค๋ฒ' in r: detail_parts.append(f"โ
{r}")
|
| 1566 |
+
else: detail_parts.append(r)
|
| 1567 |
+
tt = ' | '.join(detail_parts) if detail_parts else 'ํน์ด ํจํด ์์'
|
| 1568 |
+
hl.append(f'<span style="background:{bg};padding:2px 4px;border-radius:4px;display:inline;line-height:2.2;border-bottom:2px solid {"#DC2626" if sc>=60 else "#F97316" if sc>=40 else "#EAB308" if sc>=25 else "#84CC16" if sc>=10 else "#22C55E"};" title="[{level}] {tt} ({sc}์ )">{s}</span>')
|
| 1569 |
|
| 1570 |
total_scores = [score_sentence(s)[0] for s in sents]
|
| 1571 |
avg_sc = sum(total_scores)/len(total_scores) if total_scores else 0
|
| 1572 |
+
ai_high = sum(1 for s in total_scores if s >= 60)
|
| 1573 |
+
ai_mid = sum(1 for s in total_scores if 40 <= s < 60)
|
| 1574 |
human_cnt = sum(1 for s in total_scores if s < 25)
|
| 1575 |
|
| 1576 |
return f"""<div style='font-family:Pretendard,sans-serif;'>
|
| 1577 |
<div style='margin-bottom:10px;padding:10px;background:#F8F8FF;border-radius:8px;'>
|
| 1578 |
+
<div style='display:flex;gap:8px;align-items:center;font-size:11px;margin-bottom:6px;flex-wrap:wrap;'>
|
| 1579 |
+
<span style='background:rgba(220,38,38,0.35);padding:2px 8px;border-radius:3px;'>๐ด AIํ์ {ai_high}</span>
|
| 1580 |
+
<span style='background:rgba(249,115,22,0.30);padding:2px 8px;border-radius:3px;'>๐ AI์์ฌ {ai_mid}</span>
|
| 1581 |
+
<span style='background:rgba(234,179,8,0.25);padding:2px 8px;border-radius:3px;'>๐ก ์ฃผ์</span>
|
| 1582 |
+
<span style='background:rgba(132,204,22,0.15);padding:2px 8px;border-radius:3px;'>๐ข ์ธ๊ฐ {human_cnt}</span>
|
| 1583 |
+
<span style='color:#888;'>ํ๊ท {avg_sc:.0f}์ | ๋ง์ฐ์ค ์ค๋ฒโ์์ธ ๊ทผ๊ฑฐ</span>
|
| 1584 |
</div>
|
| 1585 |
+
<div style='font-size:10px;color:#888;'>๐ก ๊ฒฉ์์ด๋ฏธ(22) + AI์ ์์ฌ(18) + ์ํฌํํ(10~25) + ์๋ณดํจํด(10) + ๋ชจ๋ธ์ง๋ฌธ(8) + ๋ณตํฉ๋ณด๋์ค(8) โ ์ธ๊ฐ๋ง์ปค(~25)</div>
|
| 1586 |
</div>
|
| 1587 |
+
<div style='line-height:2.4;font-size:14px;'>{' '.join(hl)}</div>
|
| 1588 |
</div>"""
|
| 1589 |
|
| 1590 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
|
|
| 1637 |
progress(0.30, "LLM ๊ต์ฐจ๊ฒ์ฆ...")
|
| 1638 |
llm_result = llm_cross_check(full_text[:3000])
|
| 1639 |
if llm_result["score"] >= 0:
|
| 1640 |
+
_sent_scores = [score_sentence(s)[0] for s in sents_all]
|
| 1641 |
+
_sent_avg = sum(_sent_scores)/len(_sent_scores) if _sent_scores else -1
|
| 1642 |
+
total_score, total_verdict, total_level = compute_verdict(total_axes, llm_result["score"], sent_avg=_sent_avg)
|
| 1643 |
|
| 1644 |
# ์น์
๋ณ ๋ถ์
|
| 1645 |
progress(0.45, f"{len(sections)}๊ฐ ์น์
๋ถ์...")
|
|
|
|
| 1886 |
|
| 1887 |
|
| 1888 |
with gr.Blocks(title="AI ๊ธ ํ๋ณ๊ธฐ v4.0") as demo:
|
| 1889 |
+
gr.Markdown("# ๐ AI ๊ธ ํ๋ณ๊ธฐ v5.0\n**5์ถ AI ํ์ง ยท ํ์ง ์ธก์ ยท LLM ๊ต์ฐจ๊ฒ์ฆ ยท ํ์ ๊ฒ์ฌ ยท ํ์ผ ์
๋ก๋**")
|
| 1890 |
with gr.Tab("๐ ๋ถ์"):
|
| 1891 |
gr.Markdown("ํ
์คํธ๊ฐ AI์ ์ํด ์์ฑ๋์๋์ง 5๊ฐ ์ถ์ผ๋ก ๋ถ์ํฉ๋๋ค. 0~100์ (๋์์๋ก AI ๊ฐ๋ฅ์ฑ ๋์)")
|
| 1892 |
inp=gr.Textbox(label="๋ถ์ํ ํ
์คํธ",placeholder="์ต์ 50์ ์ด์...",lines=10)
|
|
|
|
| 1900 |
gr.Markdown("๋ฌธ์ฅ๋ณ๋ก AI ํ๋ฅ ์ ์์ ํ์ํฉ๋๋ค. **ํญ1๊ณผ ๋์ผํ ๊ธฐ์ค**์ผ๋ก ํ์ ํฉ๋๋ค. ๋ง์ฐ์ค ์ค๋ฒ ์ ๊ทผ๊ฑฐ ํ์ธ.")
|
| 1901 |
ih=gr.Textbox(label="ํ
์คํธ",lines=8); bh=gr.Button("๐จ ํ์ด๋ผ์ดํธ ๋ถ์",variant="primary"); hr=gr.HTML()
|
| 1902 |
bh.click(run_highlight,[ih],[hr],api_name="run_highlight")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1903 |
with gr.Tab("๐ ํ์ ๊ฒ์ฌ"):
|
| 1904 |
gr.Markdown("**Brave Search ๋ณ๋ ฌ(์ต๋20) + KCI ยท RISS ยท arXiv + Gemini Google Search** ๊ธฐ๋ฐ ํ์ ๊ฒ์ฌ. CopyKiller ์คํ์ผ ๋ณด๊ณ ์.")
|
| 1905 |
inp_plag=gr.Textbox(label="๊ฒ์ฌํ ํ
์คํธ",placeholder="ํ์ ๊ฒ์ฌํ ํ
์คํธ (์ต์ 50์)...",lines=10)
|
|
|
|
| 1911 |
btn_ps.click(lambda:SAMPLE_AI,outputs=[inp_plag])
|
| 1912 |
with gr.Tab("๐ ์ค๋ช
"):
|
| 1913 |
gr.Markdown("""
|
| 1914 |
+
### ์ํคํ
์ฒ v5.0
|
| 1915 |
- **ํ์ง 5์ถ:** ํต๊ณ(25%)ยท๋ฌธ์ฒด(30%)ยท๋ฐ๋ณต(15%)ยท๊ตฌ์กฐ(15%)ยท์ง๋ฌธ(15%)
|
| 1916 |
- **ํ์ง 6ํญ๋ชฉ:** ๊ฐ๋
์ฑยท์ดํยท๋
ผ๋ฆฌยท์ ํ์ฑยทํํยท์ ๋ณด๋ฐ๋
|
| 1917 |
- **LLM ๊ต์ฐจ๊ฒ์ฆ:** GPT-OSS-120BยทQwen3-32BยทKimi-K2 (GROQ)
|
|
|
|
| 1920 |
- `score_sentence()` ํตํฉ ํจ์๋ก ๋์ผ ๊ธฐ์ค ํ์
|
| 1921 |
- ๊ฒฉ์์ด๋ฏธ(25์ ) + AI์ ์์ฌ(20์ ) + ์ํฌํํ(15~25์ ) + ๋ชจ๋ธ์ง๋ฌธ(10์ ) โ ์ธ๊ฐ๋ง์ปค(30์ )
|
| 1922 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1923 |
### ํ์ ๊ฒ์ฌ
|
| 1924 |
- **Brave Search**: ๋ณ๋ ฌ 20๊ฐ ๋์ ์น๊ฒ์
|
| 1925 |
- **ํ์ DB**: KCI(ํ๊ตญํ์ ์ง์ธ์ฉ์์ธ), RISS(ํ์ ์ฐ๊ตฌ์ ๋ณด), arXiv
|
| 1926 |
- **Gemini**: Google Search Grounding
|
| 1927 |
- **๋ณด๊ณ ์**: CopyKiller ์คํ์ผ โ ์ ์ฌ๋%, ์ถ์ฒํ, ๋ฌธ์ฅ๋ณ ํ์ด๋ผ์ดํธ
|
| 1928 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1929 |
### ํ๊ฒฝ๋ณ์
|
| 1930 |
+
- `GROQ_API_KEY` โ LLM ๊ต์ฐจ๊ฒ์ฆ
|
| 1931 |
- `GEMINI_API_KEY` โ ํ์ ๊ฒ์ฌ (Google Search Grounding)
|
| 1932 |
- `BRAVE_API_KEY` โ ํ์ ๊ฒ์ฌ (Brave Search ๋ณ๋ ฌ)
|
| 1933 |
""")
|