openfree commited on
Commit
0eb94d6
Β·
verified Β·
1 Parent(s): ada4ae5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +167 -330
app.py CHANGED
@@ -1020,359 +1020,196 @@ def self_crawl_search(query, max_results=3):
1020
  all_results.extend(duckduckgo_search(f"{query} λ…Όλ¬Έ ν•™μˆ ", 2))
1021
  return all_results
1022
  def run_plagiarism(text, progress=gr.Progress()):
1023
- if not text or len(text.strip())<50:
 
1024
  return "<div style='padding:20px;text-align:center;color:#888;'>⚠️ μ΅œμ†Œ 50자 이상</div>", ""
 
1025
  text = text.strip()
1026
- sents = split_sentences(text)
1027
  now = datetime.now().strftime("%Y-%m-%d %H:%M")
1028
- has_brave = bool(BRAVE_KEY)
1029
- has_gemini = bool(HAS_GENAI and GEMINI_KEY)
1030
- progress(0.05, "λ¬Έμž₯ 뢄리...")
1031
- blocks = []
1032
- for i in range(0, len(sents), 4):
1033
- block = ' '.join(sents[i:i+4])
1034
- if len(block) > 20:
1035
- blocks.append({"text": block, "sent_indices": list(range(i, min(i+4, len(sents))))})
1036
  all_sources = []
1037
- sent_matches = {i: [] for i in range(len(sents))} # λ¬Έμž₯별 λ§€μΉ­ 정보
1038
- block_results = []
1039
- log_lines = []
1040
- if has_brave:
1041
- progress(0.15, f"Brave Search 병렬 검색 ({len(blocks)}블둝)...")
1042
- queries = []
1043
- for b in blocks:
1044
- key_phrase = b["text"][:60].strip()
1045
- queries.append(f'"{key_phrase}"')
1046
- brave_results = parallel_brave_search(queries[:20])
1047
- for q, results in brave_results.items():
1048
- for r in results:
1049
- all_sources.append(r)
1050
- for b in blocks:
1051
- if q.strip('"') in b["text"][:60]:
1052
- for si in b["sent_indices"]:
1053
- sent_matches[si].append({"source": r["title"], "url": r["url"], "type": "Brave"})
1054
- log_lines.append(f"Brave Search: {len(queries)}쿼리 β†’ {sum(len(v) for v in brave_results.values())}건")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1055
  else:
1056
- progress(0.15, f"자체 μ›Ή 검색 ({len(blocks)}블둝)...")
1057
- crawl_queries = []
1058
- for b in blocks[:10]: # μ΅œλŒ€ 10블둝
1059
- key_phrase = b["text"][:50].strip()
1060
- crawl_queries.append((key_phrase, b))
1061
- with ThreadPoolExecutor(max_workers=5) as executor:
1062
- futures = {executor.submit(self_crawl_search, q, 3): (q, b) for q, b in crawl_queries}
1063
- for future in as_completed(futures):
1064
- q, b = futures[future]
1065
- try:
1066
- results = future.result()
1067
- for r in results:
1068
- all_sources.append(r)
1069
- for si in b["sent_indices"]:
1070
- sent_matches[si].append({"source": r["title"], "url": r["url"], "type": r.get("source","Web")})
1071
- except: pass
1072
- log_lines.append(f"자체 웹검색: {len(crawl_queries)}쿼리 (DuckDuckGo)")
1073
- progress(0.40, "ν•™μˆ  DB 검색 (KCI/RISS/arXiv)...")
1074
- words = split_words(text)
1075
- wf = Counter(words)
1076
- keywords = [w for w, c in wf.most_common(20) if len(w) >= 2 and c >= 2][:5]
1077
- kw_query = ' '.join(keywords[:3])
1078
- academic_results = []
1079
- with ThreadPoolExecutor(max_workers=3) as executor:
1080
- futures = [
1081
- executor.submit(search_kci, kw_query),
1082
- executor.submit(search_riss, kw_query),
1083
- executor.submit(search_arxiv, kw_query),
1084
- ]
1085
- for future in as_completed(futures):
1086
- try:
1087
- results = future.result()
1088
- academic_results.extend(results)
1089
- all_sources.extend(results)
1090
- except: pass
1091
- log_lines.append(f"ν•™μˆ DB: KCI/RISS/arXiv β†’ {len(academic_results)}건")
1092
- gemini_results = []
1093
- if has_gemini:
1094
- progress(0.60, "Gemini + Google Search...")
1095
- for i, b in enumerate(blocks[:5]): # μ΅œλŒ€ 5블둝
1096
- gr_result = gemini_plagiarism_check(b["text"])
1097
- if gr_result:
1098
- gemini_results.append(gr_result)
1099
- for src in gr_result.get("sources", []):
1100
- all_sources.append(src)
1101
- for si in b["sent_indices"]:
1102
- sent_matches[si].append({"source": src.get("title",""), "url": src.get("url",""), "type": "Google"})
1103
- log_lines.append(f"Gemini: {len(blocks[:5])}블둝 β†’ {sum(len(r.get('sources',[])) for r in gemini_results)}좜처")
1104
- progress(0.80, "λ³΄κ³ μ„œ 생성...")
1105
- matched_sents = sum(1 for si, matches in sent_matches.items() if matches)
1106
- total_sents = len(sents)
1107
 
1108
- # βœ… Gemini 90% (메인) + Brave/ν•™μˆ DB 10% (보쑰)
1109
- brave_pct = int(matched_sents / total_sents * 100) if total_sents > 0 else 0
 
 
1110
 
1111
- if gemini_results:
1112
- gemini_pcts = [r["pct"] for r in gemini_results if r["pct"] > 0]
1113
- if gemini_pcts:
1114
- gemini_avg = sum(gemini_pcts) / len(gemini_pcts)
1115
- plag_pct = int(gemini_avg * 0.9 + brave_pct * 0.1)
1116
- else:
1117
- plag_pct = brave_pct
 
1118
  else:
1119
- plag_pct = brave_pct
1120
- seen_urls = set()
1121
- unique_sources = []
1122
- for s in all_sources:
1123
- url = s.get("url", "")
1124
- if url and url not in seen_urls:
1125
- seen_urls.add(url)
1126
- unique_sources.append(s)
1127
- if plag_pct >= 50: grade, grade_color, grade_bg = "ν‘œμ ˆ μ˜μ‹¬", "#FF4444", "#FFE0E0"
1128
- elif plag_pct >= 30: grade, grade_color, grade_bg = "주의 ν•„μš”", "#FF8800", "#FFF0DD"
1129
- elif plag_pct >= 15: grade, grade_color, grade_bg = "μœ μ‚¬ ν‘œν˜„ 일뢀", "#DDAA00", "#FFFBE0"
1130
- elif plag_pct >= 5: grade, grade_color, grade_bg = "μ–‘ν˜Έ", "#4ECDC4", "#E0FFF8"
1131
- else: grade, grade_color, grade_bg = "우수 (원본성 λ†’μŒ)", "#22AA44", "#E0FFE8"
1132
- sent_analysis = []
1133
- for i, s in enumerate(sents):
1134
- matches = sent_matches.get(i, [])
1135
- if matches:
1136
- best = matches[0]
1137
- sent_analysis.append({"idx":i, "text":s, "matched":True, "source":best.get("source","")[:40], "url":best.get("url",""), "type":best.get("type","")})
1138
- else:
1139
- sent_analysis.append({"idx":i, "text":s, "matched":False})
1140
- sim_sents = [s for s in sent_analysis if s["matched"]]
1141
- src_groups = {}
1142
- for src in unique_sources:
1143
- key = src.get("url","")[:80]
1144
- if key not in src_groups:
1145
- src_groups[key] = {"title":src.get("title",""), "url":src.get("url",""), "source":src.get("source",""), "count":0}
1146
- src_groups[key]["count"] += 1
1147
- src_list = sorted(src_groups.values(), key=lambda x: -x["count"])
1148
- methods_used = []
1149
- if has_brave: methods_used.append("Brave Search(병렬)")
1150
- elif all_sources: methods_used.append("DuckDuckGo(자체크둀링)")
1151
- methods_used.append("KCI Β· RISS Β· arXiv")
1152
- if has_gemini: methods_used.append("Gemini+Google Search")
1153
- method_str = " + ".join(methods_used)
1154
- gc = grade_color
1155
  word_count = len(split_words(text))
1156
  char_count = len(text)
1157
- doc_id = hashlib.md5(text[:100].encode()).hexdigest()[:8].upper()
1158
- similarity_pct = plag_pct
1159
- citation_pct = 0
1160
- cat_suspect = len(sim_sents) # μ˜μ‹¬
1161
- cat_cited = 0 # 인용 (ν˜•μ‹μ  인용 감지)
1162
- cat_normal = total_sents - cat_suspect - cat_cited # 일반
1163
- cat_suspect_pct = int(cat_suspect / max(1, total_sents) * 100)
1164
- cat_normal_pct = 100 - cat_suspect_pct
1165
- def src_icon(s):
1166
- src = s.get("source","").lower()
1167
- if "kci" in src: return "πŸ“š", "KCI"
1168
- if "riss" in src: return "πŸ“–", "RISS"
1169
- if "arxiv" in src: return "πŸ“„", "arXiv"
1170
- if "google" in src: return "πŸ”", "Google"
1171
- if "brave" in src: return "🌐", "Brave"
1172
- return "🌐", "Web"
1173
  src_rows = ""
1174
- for i, sg in enumerate(src_list[:15]):
1175
- pct = min(100, int(sg["count"] / max(1, total_sents) * 100 * 3))
1176
- ico, stype = src_icon(sg)
1177
- title_short = sg["title"][:50] or "(제λͺ© μ—†μŒ)"
1178
- url_short = sg["url"][:60]
1179
- src_rows += f"""<tr>
1180
- <td style="padding:6px 8px;font-size:11px;text-align:center;border:1px solid #D5D5D5;color:#555;">{i+1}</td>
1181
- <td style="padding:6px 8px;font-size:11px;text-align:center;border:1px solid #D5D5D5;"><span style="font-size:14px;">{ico}</span><br><span style="font-size:9px;color:#666;">{stype}</span></td>
1182
- <td style="padding:6px 8px;border:1px solid #D5D5D5;"><div style="font-size:11px;font-weight:600;color:#1A3C6E;margin-bottom:2px;">{title_short}</div><div style="font-size:9px;color:#888;word-break:break-all;">{url_short}</div></td>
1183
- <td style="padding:6px 8px;font-size:12px;text-align:center;border:1px solid #D5D5D5;font-weight:800;color:#D63031;">{pct}%</td>
1184
- <td style="padding:6px 10px;border:1px solid #D5D5D5;"><div style="background:#EDEDED;height:14px;border-radius:2px;overflow:hidden;"><div style="background:linear-gradient(90deg,#D63031,#FF7675);height:100%;width:{max(3,pct)}%;border-radius:2px;"></div></div></td>
1185
  </tr>"""
1186
- suspect_rows = ""
1187
- for i, sa in enumerate(sim_sents[:15]):
1188
- suspect_rows += f"""<tr>
1189
- <td style="padding:8px;font-size:11px;text-align:center;color:#888;border:1px solid #D5D5D5;vertical-align:top;">{i+1}</td>
1190
- <td style="padding:8px;font-size:11px;line-height:1.7;border:1px solid #D5D5D5;vertical-align:top;"><span style="background:#FFF3CD;border-bottom:2px solid #FFD43B;padding:1px 3px;">{sa["text"][:90]}</span></td>
1191
- <td style="padding:8px;font-size:10px;line-height:1.6;border:1px solid #D5D5D5;vertical-align:top;color:#555;"><span style="background:#FFE0E0;border-bottom:2px solid #E74C3C;padding:1px 3px;">{sa["text"][:70]}...</span></td>
1192
- <td style="padding:8px;font-size:10px;border:1px solid #D5D5D5;vertical-align:top;"><a href="{sa.get('url','#')}" target="_blank" style="color:#2E86C1;text-decoration:none;font-weight:600;">{sa["source"][:28]}</a><br><span style="font-size:8px;color:#AAA;">{sa.get('type','')}</span></td>
1193
- </tr>"""
1194
- full_hl = ""
1195
- for sa in sent_analysis:
1196
- sidx = sa["idx"] + 1
1197
- if sa["matched"]:
1198
- full_hl += f'<span style="background:#FFD6D6;border-bottom:2px solid #E74C3C;padding:1px 2px;cursor:pointer;" title="[μ˜μ‹¬ #{sidx}] 좜처: {sa.get("source","")}">{sa["text"]}</span> '
1199
- else:
1200
- full_hl += f'<span style="color:#333;">{sa["text"]}</span> '
1201
- bar_suspect_w = max(2, cat_suspect_pct) if cat_suspect > 0 else 0
1202
- bar_normal_w = 100 - bar_suspect_w
1203
- HDR_BG = '#3B7DD8'
1204
- HDR_BG2 = '#4A8DE0'
1205
- TH = 'padding:8px 10px;font-size:10px;font-weight:700;color:#fff;background:{};text-align:center;border:1px solid {};'.format(HDR_BG, HDR_BG)
1206
- TL = 'padding:7px 10px;font-size:11px;color:#444;font-weight:600;background:#EDF2FA;border:1px solid #D5D5D5;'
1207
- TV = 'padding:7px 10px;font-size:12px;color:#333;border:1px solid #D5D5D5;'
1208
- SEC = 'font-size:13px;font-weight:800;color:#1A3C6E;margin:0 0 10px 0;padding:8px 12px;background:#EDF2FA;border-left:4px solid {};border-bottom:1px solid #D5D5D5;'.format(HDR_BG)
1209
- html = f"""<div style="font-family:'Noto Sans KR','Malgun Gothic','Apple SD Gothic Neo',sans-serif;max-width:780px;margin:0 auto;background:#fff;border:2px solid #3B7DD8;box-shadow:0 2px 12px rgba(0,0,0,0.08);">
1210
- <!-- ═══════ 헀더 (CopyKiller μŠ€νƒ€μΌ) ═══════ -->
1211
- <div style="background:linear-gradient(135deg,{HDR_BG},{HDR_BG2});padding:18px 24px;color:#fff;">
1212
- <table style="width:100%;"><tr>
1213
- <td>
1214
- <div style="font-size:10px;opacity:0.8;letter-spacing:1px;margin-bottom:4px;">AI TEXT DETECTOR Β· PLAGIARISM REPORT</div>
1215
- <div style="font-size:22px;font-weight:900;letter-spacing:-0.5px;">ν‘œμ ˆ 검사 κ²°κ³Ό ν™•οΏ½οΏ½οΏ½μ„œ</div>
1216
- </td>
1217
- <td style="text-align:right;vertical-align:bottom;">
1218
- <div style="font-size:10px;opacity:0.7;">λ¬Έμ„œλ²ˆν˜Έ {doc_id}</div>
1219
- <div style="font-size:10px;opacity:0.7;">{now}</div>
1220
- </td>
1221
- </tr></table>
1222
- </div>
1223
- <!-- ═══════ 제좜 정보 ν…Œμ΄λΈ” ═══════ -->
1224
- <div style="padding:16px 24px 0;">
1225
- <div style="{SEC}">πŸ“‹ 검사 정보</div>
1226
- <table style="width:100%;border-collapse:collapse;">
1227
- <tr>
1228
- <td style="{TL}width:90px;">검사 μΌμ‹œ</td>
1229
- <td style="{TV}">{now}</td>
1230
- <td style="{TL}width:90px;">λ¬Έμ„œλ²ˆν˜Έ</td>
1231
- <td style="{TV}">{doc_id}</td>
1232
- </tr>
1233
- <tr>
1234
- <td style="{TL}">검사 방법</td>
1235
- <td style="{TV}font-size:10px;" colspan="3">{method_str}</td>
1236
- </tr>
1237
- <tr>
1238
- <td style="{TL}">전체 λΆ„λŸ‰</td>
1239
- <td style="{TV}" colspan="3">κΈ€μžμˆ˜ <b>{char_count:,}</b> Β· μ–΄μ ˆμˆ˜ <b>{word_count:,}</b> Β· λ¬Έμž₯수 <b>{total_sents}</b></td>
1240
- </tr>
1241
- <tr>
1242
- <td style="{TL}">검색 λ²”μœ„</td>
1243
- <td style="{TV}" colspan="3">인터넷(μ›Ή), ν•™μˆ λ…Όλ¬Έ(KCIΒ·RISS), ν•΄μ™Έλ…Όλ¬Έ(arXiv), Google Scholar</td>
1244
- </tr>
1245
- </table>
1246
- </div>
1247
- <!-- ═══════ 검사 κ²°κ³Ό (도넛 + λ°”) ═══════ -->
1248
- <div style="padding:18px 24px 0;">
1249
- <div style="{SEC}">πŸ“Š 검사 κ²°κ³Ό</div>
1250
- <table style="width:100%;border-collapse:collapse;">
1251
- <tr>
1252
- <!-- 도넛 차트 -->
1253
- <td style="width:200px;padding:20px;text-align:center;border:1px solid #D5D5D5;vertical-align:middle;background:#FAFBFE;">
1254
- <div style="position:relative;width:140px;height:140px;margin:0 auto;">
1255
- <svg viewBox="0 0 36 36" style="width:140px;height:140px;transform:rotate(-90deg);">
1256
- <circle cx="18" cy="18" r="15.9" fill="none" stroke="#E8ECF0" stroke-width="2.8"/>
1257
- <circle cx="18" cy="18" r="15.9" fill="none" stroke="{gc}" stroke-width="2.8" stroke-dasharray="{plag_pct} {100-plag_pct}" stroke-linecap="round"/>
1258
- </svg>
1259
- <div style="position:absolute;top:50%;left:50%;transform:translate(-50%,-50%);text-align:center;">
1260
- <div style="font-size:9px;color:#888;margin-bottom:2px;">ν‘œμ ˆλ₯ </div>
1261
- <div style="font-size:32px;font-weight:900;color:{gc};line-height:1;">{plag_pct}<span style="font-size:14px;font-weight:700;">%</span></div>
1262
- </div>
1263
- </div>
1264
- <div style="margin-top:8px;padding:4px 12px;background:{grade_bg};border:1px solid {gc};border-radius:20px;display:inline-block;">
1265
- <span style="font-size:11px;font-weight:800;color:{gc};">{grade}</span>
1266
- </div>
1267
- </td>
1268
- <!-- 상세 κ²°κ³Ό -->
1269
- <td style="padding:0;border:1px solid #D5D5D5;vertical-align:top;">
1270
- <!-- μΉ΄ν…Œκ³ λ¦¬ λ°” (CopyKiller 핡심) -->
1271
- <div style="padding:14px 18px;border-bottom:1px solid #E8E8E8;">
1272
- <div style="display:flex;height:28px;border-radius:4px;overflow:hidden;border:1px solid #D0D0D0;margin-bottom:8px;">
1273
- <div style="background:#E74C3C;width:{bar_suspect_w}%;"></div>
1274
- <div style="background:#E8ECF0;width:{bar_normal_w}%;"></div>
1275
- </div>
1276
- <div style="display:flex;gap:14px;flex-wrap:wrap;font-size:9px;">
1277
- <span style="display:flex;align-items:center;gap:3px;"><span style="display:inline-block;width:10px;height:10px;background:#E74C3C;border-radius:2px;"></span> μ˜μ‹¬ <b>{cat_suspect}</b>건</span>
1278
- <span style="display:flex;align-items:center;gap:3px;"><span style="display:inline-block;width:10px;height:10px;background:#F39C12;border-radius:2px;"></span> μΆœμ²˜ν‘œμ‹œ <b>0</b>건</span>
1279
- <span style="display:flex;align-items:center;gap:3px;"><span style="display:inline-block;width:10px;height:10px;background:#3498DB;border-radius:2px;"></span> 인용 <b>{cat_cited}</b>건</span>
1280
- <span style="display:flex;align-items:center;gap:3px;"><span style="display:inline-block;width:10px;height:10px;background:#E8ECF0;border:1px solid #CCC;border-radius:2px;"></span> 일반 <b>{cat_normal}</b>건</span>
1281
- </div>
1282
- </div>
1283
- <!-- λΉ„μœ¨ λ°” 3쀄 -->
1284
- <div style="padding:12px 18px;">
1285
- <div style="margin-bottom:10px;">
1286
- <div style="display:flex;justify-content:space-between;margin-bottom:4px;"><span style="font-size:11px;font-weight:700;color:#E74C3C;">β–  ν‘œμ ˆλ₯ </span><span style="font-size:18px;font-weight:900;color:#E74C3C;">{plag_pct}%</span></div>
1287
- <div style="background:#EDEDED;height:16px;border-radius:3px;overflow:hidden;border:1px solid #DDD;"><div style="background:linear-gradient(90deg,#E74C3C,#FF7675);height:100%;width:{max(1,plag_pct)}%;transition:width 0.5s;"></div></div>
1288
- </div>
1289
- <div style="margin-bottom:10px;">
1290
- <div style="display:flex;justify-content:space-between;margin-bottom:4px;"><span style="font-size:11px;font-weight:700;color:#F39C12;">β–  μœ μ‚¬μœ¨</span><span style="font-size:18px;font-weight:900;color:#F39C12;">{similarity_pct}%</span></div>
1291
- <div style="background:#EDEDED;height:16px;border-radius:3px;overflow:hidden;border:1px solid #DDD;"><div style="background:linear-gradient(90deg,#F39C12,#FFEAA7);height:100%;width:{max(1,similarity_pct)}%;transition:width 0.5s;"></div></div>
1292
- </div>
1293
- <div>
1294
- <div style="display:flex;justify-content:space-between;margin-bottom:4px;"><span style="font-size:11px;font-weight:700;color:#3498DB;">β–  인용λ₯ </span><span style="font-size:18px;font-weight:900;color:#3498DB;">{citation_pct}%</span></div>
1295
- <div style="background:#EDEDED;height:16px;border-radius:3px;overflow:hidden;border:1px solid #DDD;"><div style="background:linear-gradient(90deg,#3498DB,#85C1E9);height:100%;width:{max(1,citation_pct)}%;transition:width 0.5s;"></div></div>
1296
- </div>
1297
- </div>
1298
- <!-- μš”μ•½ 수치 -->
1299
- <div style="padding:8px 18px;background:#F8F9FB;border-top:1px solid #E8E8E8;">
1300
- <table style="width:100%;border-collapse:collapse;">
1301
- <tr>
1302
- <td style="padding:4px;font-size:10px;color:#888;">μ˜μ‹¬λ¬Έμž₯</td>
1303
- <td style="padding:4px;font-size:12px;font-weight:800;color:#E74C3C;">{cat_suspect}건</td>
1304
- <td style="padding:4px;font-size:10px;color:#888;">일반문μž₯</td>
1305
- <td style="padding:4px;font-size:12px;font-weight:800;color:#27AE60;">{cat_normal}건</td>
1306
- <td style="padding:4px;font-size:10px;color:#888;">전체</td>
1307
- <td style="padding:4px;font-size:12px;font-weight:800;color:#333;">{total_sents}건</td>
1308
- </tr>
1309
- </table>
1310
- </div>
1311
- </td>
1312
- </tr>
1313
- </table>
1314
  </div>
1315
- <!-- ═══════ 전체 ν…μŠ€νŠΈ 뢄석 ═══════ -->
1316
- <div style="padding:18px 24px 0;">
1317
- <div style="{SEC}">πŸ“ 전체 ν…μŠ€νŠΈ 뢄석</div>
1318
- <div style="display:flex;gap:16px;margin-bottom:8px;font-size:9px;padding:6px 10px;background:#F8F9FB;border:1px solid #E8E8E8;border-radius:4px;">
1319
- <span style="display:flex;align-items:center;gap:4px;"><span style="display:inline-block;width:12px;height:12px;background:#E74C3C;border-radius:2px;"></span> ν‘œμ ˆ μ˜μ‹¬</span>
1320
- <span style="display:flex;align-items:center;gap:4px;"><span style="display:inline-block;width:12px;height:12px;background:#F39C12;border-radius:2px;"></span> μΆœμ²˜ν‘œμ‹œ</span>
1321
- <span style="display:flex;align-items:center;gap:4px;"><span style="display:inline-block;width:12px;height:12px;background:#3498DB;border-radius:2px;"></span> 인용</span>
1322
- <span style="display:flex;align-items:center;gap:4px;"><span style="display:inline-block;width:12px;height:12px;background:#27AE60;border-radius:2px;"></span> μžκΈ°ν‘œμ ˆ</span>
1323
- <span style="display:flex;align-items:center;gap:4px;"><span style="display:inline-block;width:12px;height:12px;background:#fff;border:1px solid #CCC;border-radius:2px;"></span> 일반</span>
 
 
 
 
 
 
 
1324
  </div>
1325
- <div style="padding:14px;background:#FAFBFC;border:1px solid #D5D5D5;line-height:2.1;font-size:13px;max-height:300px;overflow-y:auto;">{full_hl}</div>
1326
  </div>
1327
- <!-- ═══════ ν‘œμ ˆ μ˜μ‹¬ 좜처 ═══════ -->
1328
- <div style="padding:18px 24px 0;">
1329
- <div style="{SEC}">πŸ”— ν‘œμ ˆ μ˜μ‹¬ 좜처 ({len(src_list)}건)</div>
1330
- <table style="width:100%;border-collapse:collapse;">
1331
- <tr>
1332
- <th style="{TH}width:32px;">No</th>
1333
- <th style="{TH}width:48px;">μœ ν˜•</th>
1334
- <th style="{TH}">좜처λͺ… / URL</th>
1335
- <th style="{TH}width:55px;">μœ μ‚¬μœ¨</th>
1336
- <th style="{TH}width:100px;">뢄포</th>
1337
  </tr>
1338
- {src_rows if src_rows else '<tr><td colspan="5" style="padding:18px;text-align:center;color:#999;font-size:11px;border:1px solid #D5D5D5;background:#FAFBFC;">발견된 μœ μ‚¬ μΆœμ²˜κ°€ μ—†μŠ΅λ‹ˆλ‹€.</td></tr>'}
1339
- </table>
1340
- </div>
1341
- <!-- ═══════ μ˜μ‹¬ λ¬Έμž₯ 비ꡐ ═══════ -->
1342
- <div style="padding:18px 24px 0;">
1343
- <div style="{SEC}">⚠️ μ˜μ‹¬ λ¬Έμž₯ 비ꡐ ({len(sim_sents)}건)</div>
1344
- <table style="width:100%;border-collapse:collapse;">
1345
  <tr>
1346
- <th style="{TH}width:32px;">No</th>
1347
- <th style="{TH}width:38%;">검사 λ¬Έμž₯ (원문)</th>
1348
- <th style="{TH}width:34%;">비ꡐ λ¬Έμž₯ (좜처)</th>
1349
- <th style="{TH}">좜처</th>
1350
  </tr>
1351
- {suspect_rows if suspect_rows else '<tr><td colspan="4" style="padding:18px;text-align:center;color:#999;font-size:11px;border:1px solid #D5D5D5;background:#FAFBFC;">μœ μ‚¬ μ˜μ‹¬ λ¬Έμž₯이 λ°œκ²¬λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.</td></tr>'}
1352
  </table>
1353
  </div>
1354
- <!-- ═══════ 검사 μ•ˆλ‚΄ ═══════ -->
1355
- <div style="margin:18px 24px;padding:12px 14px;background:#F5F8FC;border:1px solid #D0DAEA;border-radius:4px;font-size:9px;color:#555;line-height:1.8;">
1356
- <b style="color:#333;">πŸ“Œ 검사 μ•ˆλ‚΄</b><br>
1357
- Β· λ³Έ λ³΄κ³ μ„œλŠ” <b>{method_str}</b> 기반 μžλ™ ν‘œμ ˆ 검사 κ²°κ³Όμž…λ‹ˆλ‹€.<br>
1358
- Β· 검색 λ²”μœ„: 인터넷 μ›ΉνŽ˜μ΄μ§€, ν•™μˆ λ…Όλ¬Έ(KCI, RISS), ν•΄μ™Έλ…Όλ¬Έ(arXiv)<br>
1359
- Β· μœ μ‚¬λ„λŠ” λ¬Έμž₯ λ‹¨μœ„ λ§€μΉ­ 기반이며, μ΅œμ’… νŒμ •μ€ ꡐ수자/κ²€ν† μžμ˜ 확인이 ν•„μš”ν•©λ‹ˆλ‹€.<br>
1360
- Β· 인용 ν‘œκΈ°(λ”°μ˜΄ν‘œ, 각주 λ“±)κ°€ ν¬ν•¨λœ λ¬Έμž₯은 인용으둜 λΆ„λ₯˜λ  수 μžˆμŠ΅λ‹ˆλ‹€.
 
 
 
 
 
 
 
1361
  </div>
1362
- <!-- ═══════ ν‘Έν„° (CopyKiller μŠ€νƒ€μΌ) ═══════ -->
1363
- <div style="padding:10px 24px;background:#F0F3F8;border-top:2px solid {HDR_BG};display:flex;justify-content:space-between;align-items:center;">
1364
- <div>
1365
- <span style="font-size:13px;font-weight:900;color:{HDR_BG};">AI Detector</span>
1366
- <span style="font-size:9px;color:#999;margin-left:6px;">Plagiarism Checker v3.5</span>
1367
- </div>
1368
- <div style="text-align:right;">
1369
- <div style="font-size:9px;color:#AAA;">Powered by Brave Β· KCI Β· RISS Β· arXiv Β· Gemini</div>
1370
- <div style="font-size:8px;color:#CCC;">{now} Β· ID: {doc_id} Β· All Rights Reserved.</div>
1371
- </div>
1372
  </div>
 
1373
  </div>"""
1374
- log = '\n'.join(log_lines) + f"\n\nμ’…ν•©: {plag_pct}% {grade} | 좜처 {len(unique_sources)}건 | μœ μ‚¬λ¬Έμž₯ {matched_sents}/{total_sents}"
1375
- return html, log
 
 
1376
  def run_detection(text, progress=gr.Progress()):
1377
  if not text or len(text.strip())<50: return "<div style='padding:20px;text-align:center;color:#888;'>⚠️ μ΅œμ†Œ 50자</div>",""
1378
  text=text.strip()
 
1020
  all_results.extend(duckduckgo_search(f"{query} λ…Όλ¬Έ ν•™μˆ ", 2))
1021
  return all_results
1022
  def run_plagiarism(text, progress=gr.Progress()):
1023
+ """βœ… Gemini Google Search 100% - λ‹¨μˆœ ν‘œμ ˆ 검사"""
1024
+ if not text or len(text.strip()) < 50:
1025
  return "<div style='padding:20px;text-align:center;color:#888;'>⚠️ μ΅œμ†Œ 50자 이상</div>", ""
1026
+
1027
  text = text.strip()
 
1028
  now = datetime.now().strftime("%Y-%m-%d %H:%M")
1029
+ doc_id = hashlib.md5(text[:100].encode()).hexdigest()[:8].upper()
1030
+
1031
+ plag_pct = 0
 
 
 
 
 
1032
  all_sources = []
1033
+ gemini_response = ""
1034
+
1035
+ # βœ… Gemini Google Search (μ œκ³΅λ°›μ€ μ½”λ“œ κ·ΈλŒ€λ‘œ)
1036
+ if HAS_GENAI and GEMINI_KEY:
1037
+ progress(0.30, "Gemini Google Search μ‹€ν–‰...")
1038
+
1039
+ try:
1040
+ client = genai.Client(api_key=GEMINI_KEY)
1041
+
1042
+ # ν”„λ‘¬ν”„νŠΈ
1043
+ prompt = f"""ν‘œμ ˆ 검사λ₯Ό ν•΄μ£Όμ„Έμš”. μ•„λž˜ ν…μŠ€νŠΈκ°€ 인터넷에 μ‘΄μž¬ν•˜λŠ”μ§€ Google Search둜 μ² μ €νžˆ κ²€μƒ‰ν•˜μ„Έμš”.
1044
+
1045
+ [ν…μŠ€νŠΈ]
1046
+ {text}
1047
+
1048
+ 응닡:
1049
+ 1. 발견된 μœ μ‚¬ λ‚΄μš© (있으면 제λͺ©, URL, μœ μ‚¬λ„)
1050
+ 2. λ§ˆμ§€λ§‰μ— "ν‘œμ ˆμœ¨: XX%"둜 κ²°λ‘ """
1051
+
1052
+ # μ œκ³΅λ°›μ€ μ½”λ“œ κ΅¬μ‘°λŒ€λ‘œ
1053
+ contents = [
1054
+ types.Content(
1055
+ role="user",
1056
+ parts=[types.Part.from_text(text=prompt)],
1057
+ )
1058
+ ]
1059
+
1060
+ tools = [types.Tool(googleSearch=types.GoogleSearch())]
1061
+
1062
+ generate_content_config = types.GenerateContentConfig(
1063
+ thinking_config=types.ThinkingConfig(thinking_budget=0),
1064
+ tools=tools,
1065
+ temperature=0.3,
1066
+ max_output_tokens=4000,
1067
+ )
1068
+
1069
+ progress(0.50, "Google Search μ‹€ν–‰ 쀑...")
1070
+
1071
+ # 슀트리밍 μˆ˜μ§‘
1072
+ full_response = ""
1073
+ for chunk in client.models.generate_content_stream(
1074
+ model="gemini-2.0-flash-lite-latest",
1075
+ contents=contents,
1076
+ config=generate_content_config,
1077
+ ):
1078
+ if chunk.text:
1079
+ full_response += chunk.text
1080
+
1081
+ gemini_response = full_response
1082
+
1083
+ # ν‘œμ ˆμœ¨ μΆ”μΆœ
1084
+ pm = re.search(r'ν‘œμ ˆμœ¨[:\s]*(\d+)', full_response)
1085
+ if pm:
1086
+ plag_pct = int(pm.group(1))
1087
+
1088
+ # 좜처 URL μΆ”μΆœ
1089
+ for m in re.finditer(r'https?://[^\s\)]{10,}', full_response):
1090
+ url = m.group(0)
1091
+ domain = url.split('/')[2] if '/' in url else url
1092
+ all_sources.append({
1093
+ "title": domain,
1094
+ "url": url,
1095
+ })
1096
+
1097
+ except Exception as e:
1098
+ gemini_response = f"였λ₯˜: {str(e)[:200]}"
1099
+
1100
  else:
1101
+ gemini_response = "❌ Gemini API ν‚€ μ—†μŒ"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1102
 
1103
+ # ============================================
1104
+ # νŒμ •
1105
+ # ============================================
1106
+ progress(0.80, "λ³΄κ³ μ„œ 생성...")
1107
 
1108
+ if plag_pct >= 50:
1109
+ grade, gc = "🚨 ν‘œμ ˆ μ˜μ‹¬", "#FF4444"
1110
+ elif plag_pct >= 30:
1111
+ grade, gc = "⚠️ 주의 ν•„μš”", "#FF8800"
1112
+ elif plag_pct >= 15:
1113
+ grade, gc = "πŸ“Œ μœ μ‚¬ν‘œν˜„", "#DDAA00"
1114
+ elif plag_pct >= 5:
1115
+ grade, gc = "βœ“ μ–‘ν˜Έ", "#4ECDC4"
1116
  else:
1117
+ grade, gc = "βœ… 우수", "#22AA44"
1118
+
1119
+ # 톡계
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1120
  word_count = len(split_words(text))
1121
  char_count = len(text)
1122
+
1123
+ # 좜처 ν…Œμ΄λΈ”
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1124
  src_rows = ""
1125
+ for i, s in enumerate(all_sources[:20]):
1126
+ src_rows += f"""<tr style="border-bottom:1px solid #E0E0E0;">
1127
+ <td style="padding:8px;text-align:center;font-size:11px;">{i+1}</td>
1128
+ <td style="padding:8px;"><a href="{s['url']}" target="_blank" style="color:#2E86C1;text-decoration:none;font-weight:600;font-size:11px;">{s['title'][:50]}</a></td>
1129
+ <td style="padding:8px;font-size:9px;color:#888;word-break:break-all;">{s['url'][:70]}</td>
 
 
 
 
 
 
1130
  </tr>"""
1131
+
1132
+ if not src_rows:
1133
+ src_rows = '<tr><td colspan="3" style="padding:20px;text-align:center;color:#999;">발견된 좜처 μ—†μŒ</td></tr>'
1134
+
1135
+ # HTML λ³΄κ³ μ„œ
1136
+ HDR = '#3B7DD8'
1137
+ html = f"""<div style="font-family:'Noto Sans KR',sans-serif;max-width:900px;margin:20px auto;background:#fff;border:1px solid #E0E0E0;border-radius:8px;">
1138
+
1139
+ <!-- 헀더 -->
1140
+ <div style="background:linear-gradient(135deg,{HDR},#4A8DE0);padding:24px;color:#fff;border-radius:8px 8px 0 0;">
1141
+ <div style="display:flex;justify-content:space-between;align-items:center;">
1142
+ <div>
1143
+ <div style="font-size:24px;font-weight:900;">ν‘œμ ˆ 검사 κ²°κ³Ό</div>
1144
+ <div style="font-size:12px;opacity:0.9;margin-top:4px;">Gemini Google Search 기반 뢄석</div>
1145
+ </div>
1146
+ <div style="text-align:right;font-size:11px;opacity:0.9;">
1147
+ <div>λ¬Έμ„œ: {doc_id}</div>
1148
+ <div>{now}</div>
1149
+ </div>
1150
+ </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1151
  </div>
1152
+
1153
+ <!-- κ²°κ³Ό μš”μ•½ -->
1154
+ <div style="padding:24px;background:#FAFBFE;border-bottom:1px solid #E0E0E0;">
1155
+ <div style="display:grid;grid-template-columns:1fr 1fr 1fr;gap:16px;">
1156
+ <div style="text-align:center;padding:16px;background:#fff;border-radius:6px;border:1px solid #E0E0E0;">
1157
+ <div style="font-size:48px;font-weight:900;color:{gc};">{plag_pct}%</div>
1158
+ <div style="font-size:12px;color:#666;margin-top:8px;">ν‘œμ ˆμœ¨</div>
1159
+ </div>
1160
+ <div style="text-align:center;padding:16px;background:#fff;border-radius:6px;border:1px solid #E0E0E0;">
1161
+ <div style="font-size:28px;font-weight:900;color:{gc};">{grade}</div>
1162
+ <div style="font-size:12px;color:#666;margin-top:8px;">νŒμ •</div>
1163
+ </div>
1164
+ <div style="text-align:center;padding:16px;background:#fff;border-radius:6px;border:1px solid #E0E0E0;">
1165
+ <div style="font-size:28px;font-weight:900;color:#666;">{len(all_sources)}</div>
1166
+ <div style="font-size:12px;color:#666;margin-top:8px;">좜처 발견</div>
1167
+ </div>
1168
  </div>
 
1169
  </div>
1170
+
1171
+ <!-- 정보 -->
1172
+ <div style="padding:24px;border-bottom:1px solid #E0E0E0;">
1173
+ <div style="font-size:13px;font-weight:700;color:#1A3C6E;margin-bottom:12px;">πŸ“‹ 검사 정보</div>
1174
+ <table style="width:100%;font-size:12px;">
1175
+ <tr style="border-bottom:1px solid #E0E0E0;">
1176
+ <td style="padding:8px;color:#666;width:100px;">κΈ€μžμˆ˜</td>
1177
+ <td style="padding:8px;font-weight:600;">{char_count:,}자</td>
1178
+ <td style="padding:8px;color:#666;width:100px;">λ‹¨μ–΄μˆ˜</td>
1179
+ <td style="padding:8px;font-weight:600;">{word_count:,}단어</td>
1180
  </tr>
 
 
 
 
 
 
 
1181
  <tr>
1182
+ <td style="padding:8px;color:#666;">검사 방법</td>
1183
+ <td colspan="3" style="padding:8px;font-weight:600;">βœ… Gemini Google Search (100%)</td>
 
 
1184
  </tr>
 
1185
  </table>
1186
  </div>
1187
+
1188
+ <!-- 좜처 -->
1189
+ <div style="padding:24px;">
1190
+ <div style="font-size:13px;font-weight:700;color:#1A3C6E;margin-bottom:12px;">πŸ” 발견된 좜처</div>
1191
+ <table style="width:100%;border-collapse:collapse;font-size:11px;">
1192
+ <thead>
1193
+ <tr style="background:{HDR};color:white;">
1194
+ <th style="padding:10px;text-align:center;width:40px;">μˆœμœ„</th>
1195
+ <th style="padding:10px;text-align:left;">좜처</th>
1196
+ <th style="padding:10px;text-align:left;">URL</th>
1197
+ </tr>
1198
+ </thead>
1199
+ <tbody>{src_rows}</tbody>
1200
+ </table>
1201
  </div>
1202
+
1203
+ <!-- ν•˜λ‹¨ -->
1204
+ <div style="padding:16px 24px;background:#FFF8E1;border-top:1px solid #E0E0E0;border-radius:0 0 8px 8px;font-size:11px;color:#666;line-height:1.6;">
1205
+ <strong style="color:#D63031;">βœ… Gemini Google Search 100%</strong> - μΈν„°λ„·μ˜ λͺ¨λ“  μœ μ‚¬ λ‚΄μš©μ„ κ²€μƒ‰ν•˜λŠ” κ³ κΈ‰ AI 뢄석
 
 
 
 
 
 
1206
  </div>
1207
+
1208
  </div>"""
1209
+
1210
+ progress(0.95, "μ™„λ£Œ!")
1211
+
1212
+ return html, ""
1213
  def run_detection(text, progress=gr.Progress()):
1214
  if not text or len(text.strip())<50: return "<div style='padding:20px;text-align:center;color:#888;'>⚠️ μ΅œμ†Œ 50자</div>",""
1215
  text=text.strip()