openfree commited on
Commit
0eb94d6
ยท
verified ยท
1 Parent(s): ada4ae5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +167 -330
app.py CHANGED
@@ -1020,359 +1020,196 @@ def self_crawl_search(query, max_results=3):
1020
  all_results.extend(duckduckgo_search(f"{query} ๋…ผ๋ฌธ ํ•™์ˆ ", 2))
1021
  return all_results
1022
  def run_plagiarism(text, progress=gr.Progress()):
1023
- if not text or len(text.strip())<50:
 
1024
  return "<div style='padding:20px;text-align:center;color:#888;'>โš ๏ธ ์ตœ์†Œ 50์ž ์ด์ƒ</div>", ""
 
1025
  text = text.strip()
1026
- sents = split_sentences(text)
1027
  now = datetime.now().strftime("%Y-%m-%d %H:%M")
1028
- has_brave = bool(BRAVE_KEY)
1029
- has_gemini = bool(HAS_GENAI and GEMINI_KEY)
1030
- progress(0.05, "๋ฌธ์žฅ ๋ถ„๋ฆฌ...")
1031
- blocks = []
1032
- for i in range(0, len(sents), 4):
1033
- block = ' '.join(sents[i:i+4])
1034
- if len(block) > 20:
1035
- blocks.append({"text": block, "sent_indices": list(range(i, min(i+4, len(sents))))})
1036
  all_sources = []
1037
- sent_matches = {i: [] for i in range(len(sents))} # ๋ฌธ์žฅ๋ณ„ ๋งค์นญ ์ •๋ณด
1038
- block_results = []
1039
- log_lines = []
1040
- if has_brave:
1041
- progress(0.15, f"Brave Search ๋ณ‘๋ ฌ ๊ฒ€์ƒ‰ ({len(blocks)}๋ธ”๋ก)...")
1042
- queries = []
1043
- for b in blocks:
1044
- key_phrase = b["text"][:60].strip()
1045
- queries.append(f'"{key_phrase}"')
1046
- brave_results = parallel_brave_search(queries[:20])
1047
- for q, results in brave_results.items():
1048
- for r in results:
1049
- all_sources.append(r)
1050
- for b in blocks:
1051
- if q.strip('"') in b["text"][:60]:
1052
- for si in b["sent_indices"]:
1053
- sent_matches[si].append({"source": r["title"], "url": r["url"], "type": "Brave"})
1054
- log_lines.append(f"Brave Search: {len(queries)}์ฟผ๋ฆฌ โ†’ {sum(len(v) for v in brave_results.values())}๊ฑด")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1055
  else:
1056
- progress(0.15, f"์ž์ฒด ์›น ๊ฒ€์ƒ‰ ({len(blocks)}๋ธ”๋ก)...")
1057
- crawl_queries = []
1058
- for b in blocks[:10]: # ์ตœ๋Œ€ 10๋ธ”๋ก
1059
- key_phrase = b["text"][:50].strip()
1060
- crawl_queries.append((key_phrase, b))
1061
- with ThreadPoolExecutor(max_workers=5) as executor:
1062
- futures = {executor.submit(self_crawl_search, q, 3): (q, b) for q, b in crawl_queries}
1063
- for future in as_completed(futures):
1064
- q, b = futures[future]
1065
- try:
1066
- results = future.result()
1067
- for r in results:
1068
- all_sources.append(r)
1069
- for si in b["sent_indices"]:
1070
- sent_matches[si].append({"source": r["title"], "url": r["url"], "type": r.get("source","Web")})
1071
- except: pass
1072
- log_lines.append(f"์ž์ฒด ์›น๊ฒ€์ƒ‰: {len(crawl_queries)}์ฟผ๋ฆฌ (DuckDuckGo)")
1073
- progress(0.40, "ํ•™์ˆ  DB ๊ฒ€์ƒ‰ (KCI/RISS/arXiv)...")
1074
- words = split_words(text)
1075
- wf = Counter(words)
1076
- keywords = [w for w, c in wf.most_common(20) if len(w) >= 2 and c >= 2][:5]
1077
- kw_query = ' '.join(keywords[:3])
1078
- academic_results = []
1079
- with ThreadPoolExecutor(max_workers=3) as executor:
1080
- futures = [
1081
- executor.submit(search_kci, kw_query),
1082
- executor.submit(search_riss, kw_query),
1083
- executor.submit(search_arxiv, kw_query),
1084
- ]
1085
- for future in as_completed(futures):
1086
- try:
1087
- results = future.result()
1088
- academic_results.extend(results)
1089
- all_sources.extend(results)
1090
- except: pass
1091
- log_lines.append(f"ํ•™์ˆ DB: KCI/RISS/arXiv โ†’ {len(academic_results)}๊ฑด")
1092
- gemini_results = []
1093
- if has_gemini:
1094
- progress(0.60, "Gemini + Google Search...")
1095
- for i, b in enumerate(blocks[:5]): # ์ตœ๋Œ€ 5๋ธ”๋ก
1096
- gr_result = gemini_plagiarism_check(b["text"])
1097
- if gr_result:
1098
- gemini_results.append(gr_result)
1099
- for src in gr_result.get("sources", []):
1100
- all_sources.append(src)
1101
- for si in b["sent_indices"]:
1102
- sent_matches[si].append({"source": src.get("title",""), "url": src.get("url",""), "type": "Google"})
1103
- log_lines.append(f"Gemini: {len(blocks[:5])}๋ธ”๋ก โ†’ {sum(len(r.get('sources',[])) for r in gemini_results)}์ถœ์ฒ˜")
1104
- progress(0.80, "๋ณด๊ณ ์„œ ์ƒ์„ฑ...")
1105
- matched_sents = sum(1 for si, matches in sent_matches.items() if matches)
1106
- total_sents = len(sents)
1107
 
1108
- # โœ… Gemini 90% (๋ฉ”์ธ) + Brave/ํ•™์ˆ DB 10% (๋ณด์กฐ)
1109
- brave_pct = int(matched_sents / total_sents * 100) if total_sents > 0 else 0
 
 
1110
 
1111
- if gemini_results:
1112
- gemini_pcts = [r["pct"] for r in gemini_results if r["pct"] > 0]
1113
- if gemini_pcts:
1114
- gemini_avg = sum(gemini_pcts) / len(gemini_pcts)
1115
- plag_pct = int(gemini_avg * 0.9 + brave_pct * 0.1)
1116
- else:
1117
- plag_pct = brave_pct
 
1118
  else:
1119
- plag_pct = brave_pct
1120
- seen_urls = set()
1121
- unique_sources = []
1122
- for s in all_sources:
1123
- url = s.get("url", "")
1124
- if url and url not in seen_urls:
1125
- seen_urls.add(url)
1126
- unique_sources.append(s)
1127
- if plag_pct >= 50: grade, grade_color, grade_bg = "ํ‘œ์ ˆ ์˜์‹ฌ", "#FF4444", "#FFE0E0"
1128
- elif plag_pct >= 30: grade, grade_color, grade_bg = "์ฃผ์˜ ํ•„์š”", "#FF8800", "#FFF0DD"
1129
- elif plag_pct >= 15: grade, grade_color, grade_bg = "์œ ์‚ฌ ํ‘œํ˜„ ์ผ๋ถ€", "#DDAA00", "#FFFBE0"
1130
- elif plag_pct >= 5: grade, grade_color, grade_bg = "์–‘ํ˜ธ", "#4ECDC4", "#E0FFF8"
1131
- else: grade, grade_color, grade_bg = "์šฐ์ˆ˜ (์›๋ณธ์„ฑ ๋†’์Œ)", "#22AA44", "#E0FFE8"
1132
- sent_analysis = []
1133
- for i, s in enumerate(sents):
1134
- matches = sent_matches.get(i, [])
1135
- if matches:
1136
- best = matches[0]
1137
- sent_analysis.append({"idx":i, "text":s, "matched":True, "source":best.get("source","")[:40], "url":best.get("url",""), "type":best.get("type","")})
1138
- else:
1139
- sent_analysis.append({"idx":i, "text":s, "matched":False})
1140
- sim_sents = [s for s in sent_analysis if s["matched"]]
1141
- src_groups = {}
1142
- for src in unique_sources:
1143
- key = src.get("url","")[:80]
1144
- if key not in src_groups:
1145
- src_groups[key] = {"title":src.get("title",""), "url":src.get("url",""), "source":src.get("source",""), "count":0}
1146
- src_groups[key]["count"] += 1
1147
- src_list = sorted(src_groups.values(), key=lambda x: -x["count"])
1148
- methods_used = []
1149
- if has_brave: methods_used.append("Brave Search(๋ณ‘๋ ฌ)")
1150
- elif all_sources: methods_used.append("DuckDuckGo(์ž์ฒดํฌ๋กค๋ง)")
1151
- methods_used.append("KCI ยท RISS ยท arXiv")
1152
- if has_gemini: methods_used.append("Gemini+Google Search")
1153
- method_str = " + ".join(methods_used)
1154
- gc = grade_color
1155
  word_count = len(split_words(text))
1156
  char_count = len(text)
1157
- doc_id = hashlib.md5(text[:100].encode()).hexdigest()[:8].upper()
1158
- similarity_pct = plag_pct
1159
- citation_pct = 0
1160
- cat_suspect = len(sim_sents) # ์˜์‹ฌ
1161
- cat_cited = 0 # ์ธ์šฉ (ํ˜•์‹์  ์ธ์šฉ ๊ฐ์ง€)
1162
- cat_normal = total_sents - cat_suspect - cat_cited # ์ผ๋ฐ˜
1163
- cat_suspect_pct = int(cat_suspect / max(1, total_sents) * 100)
1164
- cat_normal_pct = 100 - cat_suspect_pct
1165
- def src_icon(s):
1166
- src = s.get("source","").lower()
1167
- if "kci" in src: return "๐Ÿ“š", "KCI"
1168
- if "riss" in src: return "๐Ÿ“–", "RISS"
1169
- if "arxiv" in src: return "๐Ÿ“„", "arXiv"
1170
- if "google" in src: return "๐Ÿ”", "Google"
1171
- if "brave" in src: return "๐ŸŒ", "Brave"
1172
- return "๐ŸŒ", "Web"
1173
  src_rows = ""
1174
- for i, sg in enumerate(src_list[:15]):
1175
- pct = min(100, int(sg["count"] / max(1, total_sents) * 100 * 3))
1176
- ico, stype = src_icon(sg)
1177
- title_short = sg["title"][:50] or "(์ œ๋ชฉ ์—†์Œ)"
1178
- url_short = sg["url"][:60]
1179
- src_rows += f"""<tr>
1180
- <td style="padding:6px 8px;font-size:11px;text-align:center;border:1px solid #D5D5D5;color:#555;">{i+1}</td>
1181
- <td style="padding:6px 8px;font-size:11px;text-align:center;border:1px solid #D5D5D5;"><span style="font-size:14px;">{ico}</span><br><span style="font-size:9px;color:#666;">{stype}</span></td>
1182
- <td style="padding:6px 8px;border:1px solid #D5D5D5;"><div style="font-size:11px;font-weight:600;color:#1A3C6E;margin-bottom:2px;">{title_short}</div><div style="font-size:9px;color:#888;word-break:break-all;">{url_short}</div></td>
1183
- <td style="padding:6px 8px;font-size:12px;text-align:center;border:1px solid #D5D5D5;font-weight:800;color:#D63031;">{pct}%</td>
1184
- <td style="padding:6px 10px;border:1px solid #D5D5D5;"><div style="background:#EDEDED;height:14px;border-radius:2px;overflow:hidden;"><div style="background:linear-gradient(90deg,#D63031,#FF7675);height:100%;width:{max(3,pct)}%;border-radius:2px;"></div></div></td>
1185
  </tr>"""
1186
- suspect_rows = ""
1187
- for i, sa in enumerate(sim_sents[:15]):
1188
- suspect_rows += f"""<tr>
1189
- <td style="padding:8px;font-size:11px;text-align:center;color:#888;border:1px solid #D5D5D5;vertical-align:top;">{i+1}</td>
1190
- <td style="padding:8px;font-size:11px;line-height:1.7;border:1px solid #D5D5D5;vertical-align:top;"><span style="background:#FFF3CD;border-bottom:2px solid #FFD43B;padding:1px 3px;">{sa["text"][:90]}</span></td>
1191
- <td style="padding:8px;font-size:10px;line-height:1.6;border:1px solid #D5D5D5;vertical-align:top;color:#555;"><span style="background:#FFE0E0;border-bottom:2px solid #E74C3C;padding:1px 3px;">{sa["text"][:70]}...</span></td>
1192
- <td style="padding:8px;font-size:10px;border:1px solid #D5D5D5;vertical-align:top;"><a href="{sa.get('url','#')}" target="_blank" style="color:#2E86C1;text-decoration:none;font-weight:600;">{sa["source"][:28]}</a><br><span style="font-size:8px;color:#AAA;">{sa.get('type','')}</span></td>
1193
- </tr>"""
1194
- full_hl = ""
1195
- for sa in sent_analysis:
1196
- sidx = sa["idx"] + 1
1197
- if sa["matched"]:
1198
- full_hl += f'<span style="background:#FFD6D6;border-bottom:2px solid #E74C3C;padding:1px 2px;cursor:pointer;" title="[์˜์‹ฌ #{sidx}] ์ถœ์ฒ˜: {sa.get("source","")}">{sa["text"]}</span> '
1199
- else:
1200
- full_hl += f'<span style="color:#333;">{sa["text"]}</span> '
1201
- bar_suspect_w = max(2, cat_suspect_pct) if cat_suspect > 0 else 0
1202
- bar_normal_w = 100 - bar_suspect_w
1203
- HDR_BG = '#3B7DD8'
1204
- HDR_BG2 = '#4A8DE0'
1205
- TH = 'padding:8px 10px;font-size:10px;font-weight:700;color:#fff;background:{};text-align:center;border:1px solid {};'.format(HDR_BG, HDR_BG)
1206
- TL = 'padding:7px 10px;font-size:11px;color:#444;font-weight:600;background:#EDF2FA;border:1px solid #D5D5D5;'
1207
- TV = 'padding:7px 10px;font-size:12px;color:#333;border:1px solid #D5D5D5;'
1208
- SEC = 'font-size:13px;font-weight:800;color:#1A3C6E;margin:0 0 10px 0;padding:8px 12px;background:#EDF2FA;border-left:4px solid {};border-bottom:1px solid #D5D5D5;'.format(HDR_BG)
1209
- html = f"""<div style="font-family:'Noto Sans KR','Malgun Gothic','Apple SD Gothic Neo',sans-serif;max-width:780px;margin:0 auto;background:#fff;border:2px solid #3B7DD8;box-shadow:0 2px 12px rgba(0,0,0,0.08);">
1210
- <!-- โ•โ•โ•โ•โ•โ•โ• ํ—ค๋” (CopyKiller ์Šคํƒ€์ผ) โ•โ•โ•โ•โ•โ•โ• -->
1211
- <div style="background:linear-gradient(135deg,{HDR_BG},{HDR_BG2});padding:18px 24px;color:#fff;">
1212
- <table style="width:100%;"><tr>
1213
- <td>
1214
- <div style="font-size:10px;opacity:0.8;letter-spacing:1px;margin-bottom:4px;">AI TEXT DETECTOR ยท PLAGIARISM REPORT</div>
1215
- <div style="font-size:22px;font-weight:900;letter-spacing:-0.5px;">ํ‘œ์ ˆ ๊ฒ€์‚ฌ ๊ฒฐ๊ณผ ํ™•๏ฟฝ๏ฟฝ๏ฟฝ์„œ</div>
1216
- </td>
1217
- <td style="text-align:right;vertical-align:bottom;">
1218
- <div style="font-size:10px;opacity:0.7;">๋ฌธ์„œ๋ฒˆํ˜ธ {doc_id}</div>
1219
- <div style="font-size:10px;opacity:0.7;">{now}</div>
1220
- </td>
1221
- </tr></table>
1222
- </div>
1223
- <!-- โ•โ•โ•โ•โ•โ•โ• ์ œ์ถœ ์ •๋ณด ํ…Œ์ด๋ธ” โ•โ•โ•โ•โ•โ•โ• -->
1224
- <div style="padding:16px 24px 0;">
1225
- <div style="{SEC}">๐Ÿ“‹ ๊ฒ€์‚ฌ ์ •๋ณด</div>
1226
- <table style="width:100%;border-collapse:collapse;">
1227
- <tr>
1228
- <td style="{TL}width:90px;">๊ฒ€์‚ฌ ์ผ์‹œ</td>
1229
- <td style="{TV}">{now}</td>
1230
- <td style="{TL}width:90px;">๋ฌธ์„œ๋ฒˆํ˜ธ</td>
1231
- <td style="{TV}">{doc_id}</td>
1232
- </tr>
1233
- <tr>
1234
- <td style="{TL}">๊ฒ€์‚ฌ ๋ฐฉ๋ฒ•</td>
1235
- <td style="{TV}font-size:10px;" colspan="3">{method_str}</td>
1236
- </tr>
1237
- <tr>
1238
- <td style="{TL}">์ „์ฒด ๋ถ„๋Ÿ‰</td>
1239
- <td style="{TV}" colspan="3">๊ธ€์ž์ˆ˜ <b>{char_count:,}</b> ยท ์–ด์ ˆ์ˆ˜ <b>{word_count:,}</b> ยท ๋ฌธ์žฅ์ˆ˜ <b>{total_sents}</b></td>
1240
- </tr>
1241
- <tr>
1242
- <td style="{TL}">๊ฒ€์ƒ‰ ๋ฒ”์œ„</td>
1243
- <td style="{TV}" colspan="3">์ธํ„ฐ๋„ท(์›น), ํ•™์ˆ ๋…ผ๋ฌธ(KCIยทRISS), ํ•ด์™ธ๋…ผ๋ฌธ(arXiv), Google Scholar</td>
1244
- </tr>
1245
- </table>
1246
- </div>
1247
- <!-- โ•โ•โ•โ•โ•โ•โ• ๊ฒ€์‚ฌ ๊ฒฐ๊ณผ (๋„๋„› + ๋ฐ”) โ•โ•โ•โ•โ•โ•โ• -->
1248
- <div style="padding:18px 24px 0;">
1249
- <div style="{SEC}">๐Ÿ“Š ๊ฒ€์‚ฌ ๊ฒฐ๊ณผ</div>
1250
- <table style="width:100%;border-collapse:collapse;">
1251
- <tr>
1252
- <!-- ๋„๋„› ์ฐจํŠธ -->
1253
- <td style="width:200px;padding:20px;text-align:center;border:1px solid #D5D5D5;vertical-align:middle;background:#FAFBFE;">
1254
- <div style="position:relative;width:140px;height:140px;margin:0 auto;">
1255
- <svg viewBox="0 0 36 36" style="width:140px;height:140px;transform:rotate(-90deg);">
1256
- <circle cx="18" cy="18" r="15.9" fill="none" stroke="#E8ECF0" stroke-width="2.8"/>
1257
- <circle cx="18" cy="18" r="15.9" fill="none" stroke="{gc}" stroke-width="2.8" stroke-dasharray="{plag_pct} {100-plag_pct}" stroke-linecap="round"/>
1258
- </svg>
1259
- <div style="position:absolute;top:50%;left:50%;transform:translate(-50%,-50%);text-align:center;">
1260
- <div style="font-size:9px;color:#888;margin-bottom:2px;">ํ‘œ์ ˆ๋ฅ </div>
1261
- <div style="font-size:32px;font-weight:900;color:{gc};line-height:1;">{plag_pct}<span style="font-size:14px;font-weight:700;">%</span></div>
1262
- </div>
1263
- </div>
1264
- <div style="margin-top:8px;padding:4px 12px;background:{grade_bg};border:1px solid {gc};border-radius:20px;display:inline-block;">
1265
- <span style="font-size:11px;font-weight:800;color:{gc};">{grade}</span>
1266
- </div>
1267
- </td>
1268
- <!-- ์ƒ์„ธ ๊ฒฐ๊ณผ -->
1269
- <td style="padding:0;border:1px solid #D5D5D5;vertical-align:top;">
1270
- <!-- ์นดํ…Œ๊ณ ๋ฆฌ ๋ฐ” (CopyKiller ํ•ต์‹ฌ) -->
1271
- <div style="padding:14px 18px;border-bottom:1px solid #E8E8E8;">
1272
- <div style="display:flex;height:28px;border-radius:4px;overflow:hidden;border:1px solid #D0D0D0;margin-bottom:8px;">
1273
- <div style="background:#E74C3C;width:{bar_suspect_w}%;"></div>
1274
- <div style="background:#E8ECF0;width:{bar_normal_w}%;"></div>
1275
- </div>
1276
- <div style="display:flex;gap:14px;flex-wrap:wrap;font-size:9px;">
1277
- <span style="display:flex;align-items:center;gap:3px;"><span style="display:inline-block;width:10px;height:10px;background:#E74C3C;border-radius:2px;"></span> ์˜์‹ฌ <b>{cat_suspect}</b>๊ฑด</span>
1278
- <span style="display:flex;align-items:center;gap:3px;"><span style="display:inline-block;width:10px;height:10px;background:#F39C12;border-radius:2px;"></span> ์ถœ์ฒ˜ํ‘œ์‹œ <b>0</b>๊ฑด</span>
1279
- <span style="display:flex;align-items:center;gap:3px;"><span style="display:inline-block;width:10px;height:10px;background:#3498DB;border-radius:2px;"></span> ์ธ์šฉ <b>{cat_cited}</b>๊ฑด</span>
1280
- <span style="display:flex;align-items:center;gap:3px;"><span style="display:inline-block;width:10px;height:10px;background:#E8ECF0;border:1px solid #CCC;border-radius:2px;"></span> ์ผ๋ฐ˜ <b>{cat_normal}</b>๊ฑด</span>
1281
- </div>
1282
- </div>
1283
- <!-- ๋น„์œจ ๋ฐ” 3์ค„ -->
1284
- <div style="padding:12px 18px;">
1285
- <div style="margin-bottom:10px;">
1286
- <div style="display:flex;justify-content:space-between;margin-bottom:4px;"><span style="font-size:11px;font-weight:700;color:#E74C3C;">โ–  ํ‘œ์ ˆ๋ฅ </span><span style="font-size:18px;font-weight:900;color:#E74C3C;">{plag_pct}%</span></div>
1287
- <div style="background:#EDEDED;height:16px;border-radius:3px;overflow:hidden;border:1px solid #DDD;"><div style="background:linear-gradient(90deg,#E74C3C,#FF7675);height:100%;width:{max(1,plag_pct)}%;transition:width 0.5s;"></div></div>
1288
- </div>
1289
- <div style="margin-bottom:10px;">
1290
- <div style="display:flex;justify-content:space-between;margin-bottom:4px;"><span style="font-size:11px;font-weight:700;color:#F39C12;">โ–  ์œ ์‚ฌ์œจ</span><span style="font-size:18px;font-weight:900;color:#F39C12;">{similarity_pct}%</span></div>
1291
- <div style="background:#EDEDED;height:16px;border-radius:3px;overflow:hidden;border:1px solid #DDD;"><div style="background:linear-gradient(90deg,#F39C12,#FFEAA7);height:100%;width:{max(1,similarity_pct)}%;transition:width 0.5s;"></div></div>
1292
- </div>
1293
- <div>
1294
- <div style="display:flex;justify-content:space-between;margin-bottom:4px;"><span style="font-size:11px;font-weight:700;color:#3498DB;">โ–  ์ธ์šฉ๋ฅ </span><span style="font-size:18px;font-weight:900;color:#3498DB;">{citation_pct}%</span></div>
1295
- <div style="background:#EDEDED;height:16px;border-radius:3px;overflow:hidden;border:1px solid #DDD;"><div style="background:linear-gradient(90deg,#3498DB,#85C1E9);height:100%;width:{max(1,citation_pct)}%;transition:width 0.5s;"></div></div>
1296
- </div>
1297
- </div>
1298
- <!-- ์š”์•ฝ ์ˆ˜์น˜ -->
1299
- <div style="padding:8px 18px;background:#F8F9FB;border-top:1px solid #E8E8E8;">
1300
- <table style="width:100%;border-collapse:collapse;">
1301
- <tr>
1302
- <td style="padding:4px;font-size:10px;color:#888;">์˜์‹ฌ๋ฌธ์žฅ</td>
1303
- <td style="padding:4px;font-size:12px;font-weight:800;color:#E74C3C;">{cat_suspect}๊ฑด</td>
1304
- <td style="padding:4px;font-size:10px;color:#888;">์ผ๋ฐ˜๋ฌธ์žฅ</td>
1305
- <td style="padding:4px;font-size:12px;font-weight:800;color:#27AE60;">{cat_normal}๊ฑด</td>
1306
- <td style="padding:4px;font-size:10px;color:#888;">์ „์ฒด</td>
1307
- <td style="padding:4px;font-size:12px;font-weight:800;color:#333;">{total_sents}๊ฑด</td>
1308
- </tr>
1309
- </table>
1310
- </div>
1311
- </td>
1312
- </tr>
1313
- </table>
1314
  </div>
1315
- <!-- โ•โ•โ•โ•โ•โ•โ• ์ „์ฒด ํ…์ŠคํŠธ ๋ถ„์„ โ•โ•โ•โ•โ•โ•โ• -->
1316
- <div style="padding:18px 24px 0;">
1317
- <div style="{SEC}">๐Ÿ“ ์ „์ฒด ํ…์ŠคํŠธ ๋ถ„์„</div>
1318
- <div style="display:flex;gap:16px;margin-bottom:8px;font-size:9px;padding:6px 10px;background:#F8F9FB;border:1px solid #E8E8E8;border-radius:4px;">
1319
- <span style="display:flex;align-items:center;gap:4px;"><span style="display:inline-block;width:12px;height:12px;background:#E74C3C;border-radius:2px;"></span> ํ‘œ์ ˆ ์˜์‹ฌ</span>
1320
- <span style="display:flex;align-items:center;gap:4px;"><span style="display:inline-block;width:12px;height:12px;background:#F39C12;border-radius:2px;"></span> ์ถœ์ฒ˜ํ‘œ์‹œ</span>
1321
- <span style="display:flex;align-items:center;gap:4px;"><span style="display:inline-block;width:12px;height:12px;background:#3498DB;border-radius:2px;"></span> ์ธ์šฉ</span>
1322
- <span style="display:flex;align-items:center;gap:4px;"><span style="display:inline-block;width:12px;height:12px;background:#27AE60;border-radius:2px;"></span> ์ž๊ธฐํ‘œ์ ˆ</span>
1323
- <span style="display:flex;align-items:center;gap:4px;"><span style="display:inline-block;width:12px;height:12px;background:#fff;border:1px solid #CCC;border-radius:2px;"></span> ์ผ๋ฐ˜</span>
 
 
 
 
 
 
 
1324
  </div>
1325
- <div style="padding:14px;background:#FAFBFC;border:1px solid #D5D5D5;line-height:2.1;font-size:13px;max-height:300px;overflow-y:auto;">{full_hl}</div>
1326
  </div>
1327
- <!-- โ•โ•โ•โ•โ•โ•โ• ํ‘œ์ ˆ ์˜์‹ฌ ์ถœ์ฒ˜ โ•โ•โ•โ•โ•โ•โ• -->
1328
- <div style="padding:18px 24px 0;">
1329
- <div style="{SEC}">๐Ÿ”— ํ‘œ์ ˆ ์˜์‹ฌ ์ถœ์ฒ˜ ({len(src_list)}๊ฑด)</div>
1330
- <table style="width:100%;border-collapse:collapse;">
1331
- <tr>
1332
- <th style="{TH}width:32px;">No</th>
1333
- <th style="{TH}width:48px;">์œ ํ˜•</th>
1334
- <th style="{TH}">์ถœ์ฒ˜๋ช… / URL</th>
1335
- <th style="{TH}width:55px;">์œ ์‚ฌ์œจ</th>
1336
- <th style="{TH}width:100px;">๋ถ„ํฌ</th>
1337
  </tr>
1338
- {src_rows if src_rows else '<tr><td colspan="5" style="padding:18px;text-align:center;color:#999;font-size:11px;border:1px solid #D5D5D5;background:#FAFBFC;">๋ฐœ๊ฒฌ๋œ ์œ ์‚ฌ ์ถœ์ฒ˜๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.</td></tr>'}
1339
- </table>
1340
- </div>
1341
- <!-- โ•โ•โ•โ•โ•โ•โ• ์˜์‹ฌ ๋ฌธ์žฅ ๋น„๊ต โ•โ•โ•โ•โ•โ•โ• -->
1342
- <div style="padding:18px 24px 0;">
1343
- <div style="{SEC}">โš ๏ธ ์˜์‹ฌ ๋ฌธ์žฅ ๋น„๊ต ({len(sim_sents)}๊ฑด)</div>
1344
- <table style="width:100%;border-collapse:collapse;">
1345
  <tr>
1346
- <th style="{TH}width:32px;">No</th>
1347
- <th style="{TH}width:38%;">๊ฒ€์‚ฌ ๋ฌธ์žฅ (์›๋ฌธ)</th>
1348
- <th style="{TH}width:34%;">๋น„๊ต ๋ฌธ์žฅ (์ถœ์ฒ˜)</th>
1349
- <th style="{TH}">์ถœ์ฒ˜</th>
1350
  </tr>
1351
- {suspect_rows if suspect_rows else '<tr><td colspan="4" style="padding:18px;text-align:center;color:#999;font-size:11px;border:1px solid #D5D5D5;background:#FAFBFC;">์œ ์‚ฌ ์˜์‹ฌ ๋ฌธ์žฅ์ด ๋ฐœ๊ฒฌ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.</td></tr>'}
1352
  </table>
1353
  </div>
1354
- <!-- โ•โ•โ•โ•โ•โ•โ• ๊ฒ€์‚ฌ ์•ˆ๋‚ด โ•โ•โ•โ•โ•โ•โ• -->
1355
- <div style="margin:18px 24px;padding:12px 14px;background:#F5F8FC;border:1px solid #D0DAEA;border-radius:4px;font-size:9px;color:#555;line-height:1.8;">
1356
- <b style="color:#333;">๐Ÿ“Œ ๊ฒ€์‚ฌ ์•ˆ๋‚ด</b><br>
1357
- ยท ๋ณธ ๋ณด๊ณ ์„œ๋Š” <b>{method_str}</b> ๊ธฐ๋ฐ˜ ์ž๋™ ํ‘œ์ ˆ ๊ฒ€์‚ฌ ๊ฒฐ๊ณผ์ž…๋‹ˆ๋‹ค.<br>
1358
- ยท ๊ฒ€์ƒ‰ ๋ฒ”์œ„: ์ธํ„ฐ๋„ท ์›นํŽ˜์ด์ง€, ํ•™์ˆ ๋…ผ๋ฌธ(KCI, RISS), ํ•ด์™ธ๋…ผ๋ฌธ(arXiv)<br>
1359
- ยท ์œ ์‚ฌ๋„๋Š” ๋ฌธ์žฅ ๋‹จ์œ„ ๋งค์นญ ๊ธฐ๋ฐ˜์ด๋ฉฐ, ์ตœ์ข… ํŒ์ •์€ ๊ต์ˆ˜์ž/๊ฒ€ํ† ์ž์˜ ํ™•์ธ์ด ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค.<br>
1360
- ยท ์ธ์šฉ ํ‘œ๊ธฐ(๋”ฐ์˜ดํ‘œ, ๊ฐ์ฃผ ๋“ฑ)๊ฐ€ ํฌํ•จ๋œ ๋ฌธ์žฅ์€ ์ธ์šฉ์œผ๋กœ ๋ถ„๋ฅ˜๋  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
 
 
 
 
 
 
 
1361
  </div>
1362
- <!-- โ•โ•โ•โ•โ•โ•โ• ํ‘ธํ„ฐ (CopyKiller ์Šคํƒ€์ผ) โ•โ•โ•โ•โ•โ•โ• -->
1363
- <div style="padding:10px 24px;background:#F0F3F8;border-top:2px solid {HDR_BG};display:flex;justify-content:space-between;align-items:center;">
1364
- <div>
1365
- <span style="font-size:13px;font-weight:900;color:{HDR_BG};">AI Detector</span>
1366
- <span style="font-size:9px;color:#999;margin-left:6px;">Plagiarism Checker v3.5</span>
1367
- </div>
1368
- <div style="text-align:right;">
1369
- <div style="font-size:9px;color:#AAA;">Powered by Brave ยท KCI ยท RISS ยท arXiv ยท Gemini</div>
1370
- <div style="font-size:8px;color:#CCC;">{now} ยท ID: {doc_id} ยท All Rights Reserved.</div>
1371
- </div>
1372
  </div>
 
1373
  </div>"""
1374
- log = '\n'.join(log_lines) + f"\n\n์ข…ํ•ฉ: {plag_pct}% {grade} | ์ถœ์ฒ˜ {len(unique_sources)}๊ฑด | ์œ ์‚ฌ๋ฌธ์žฅ {matched_sents}/{total_sents}"
1375
- return html, log
 
 
1376
  def run_detection(text, progress=gr.Progress()):
1377
  if not text or len(text.strip())<50: return "<div style='padding:20px;text-align:center;color:#888;'>โš ๏ธ ์ตœ์†Œ 50์ž</div>",""
1378
  text=text.strip()
 
1020
  all_results.extend(duckduckgo_search(f"{query} ๋…ผ๋ฌธ ํ•™์ˆ ", 2))
1021
  return all_results
1022
  def run_plagiarism(text, progress=gr.Progress()):
1023
+ """โœ… Gemini Google Search 100% - ๋‹จ์ˆœ ํ‘œ์ ˆ ๊ฒ€์‚ฌ"""
1024
+ if not text or len(text.strip()) < 50:
1025
  return "<div style='padding:20px;text-align:center;color:#888;'>โš ๏ธ ์ตœ์†Œ 50์ž ์ด์ƒ</div>", ""
1026
+
1027
  text = text.strip()
 
1028
  now = datetime.now().strftime("%Y-%m-%d %H:%M")
1029
+ doc_id = hashlib.md5(text[:100].encode()).hexdigest()[:8].upper()
1030
+
1031
+ plag_pct = 0
 
 
 
 
 
1032
  all_sources = []
1033
+ gemini_response = ""
1034
+
1035
+ # โœ… Gemini Google Search (์ œ๊ณต๋ฐ›์€ ์ฝ”๋“œ ๊ทธ๋Œ€๋กœ)
1036
+ if HAS_GENAI and GEMINI_KEY:
1037
+ progress(0.30, "Gemini Google Search ์‹คํ–‰...")
1038
+
1039
+ try:
1040
+ client = genai.Client(api_key=GEMINI_KEY)
1041
+
1042
+ # ํ”„๋กฌํ”„ํŠธ
1043
+ prompt = f"""ํ‘œ์ ˆ ๊ฒ€์‚ฌ๋ฅผ ํ•ด์ฃผ์„ธ์š”. ์•„๋ž˜ ํ…์ŠคํŠธ๊ฐ€ ์ธํ„ฐ๋„ท์— ์กด์žฌํ•˜๋Š”์ง€ Google Search๋กœ ์ฒ ์ €ํžˆ ๊ฒ€์ƒ‰ํ•˜์„ธ์š”.
1044
+
1045
+ [ํ…์ŠคํŠธ]
1046
+ {text}
1047
+
1048
+ ์‘๋‹ต:
1049
+ 1. ๋ฐœ๊ฒฌ๋œ ์œ ์‚ฌ ๋‚ด์šฉ (์žˆ์œผ๋ฉด ์ œ๋ชฉ, URL, ์œ ์‚ฌ๋„)
1050
+ 2. ๋งˆ์ง€๋ง‰์— "ํ‘œ์ ˆ์œจ: XX%"๋กœ ๊ฒฐ๋ก """
1051
+
1052
+ # ์ œ๊ณต๋ฐ›์€ ์ฝ”๋“œ ๊ตฌ์กฐ๋Œ€๋กœ
1053
+ contents = [
1054
+ types.Content(
1055
+ role="user",
1056
+ parts=[types.Part.from_text(text=prompt)],
1057
+ )
1058
+ ]
1059
+
1060
+ tools = [types.Tool(googleSearch=types.GoogleSearch())]
1061
+
1062
+ generate_content_config = types.GenerateContentConfig(
1063
+ thinking_config=types.ThinkingConfig(thinking_budget=0),
1064
+ tools=tools,
1065
+ temperature=0.3,
1066
+ max_output_tokens=4000,
1067
+ )
1068
+
1069
+ progress(0.50, "Google Search ์‹คํ–‰ ์ค‘...")
1070
+
1071
+ # ์ŠคํŠธ๋ฆฌ๋ฐ ์ˆ˜์ง‘
1072
+ full_response = ""
1073
+ for chunk in client.models.generate_content_stream(
1074
+ model="gemini-2.0-flash-lite-latest",
1075
+ contents=contents,
1076
+ config=generate_content_config,
1077
+ ):
1078
+ if chunk.text:
1079
+ full_response += chunk.text
1080
+
1081
+ gemini_response = full_response
1082
+
1083
+ # ํ‘œ์ ˆ์œจ ์ถ”์ถœ
1084
+ pm = re.search(r'ํ‘œ์ ˆ์œจ[:\s]*(\d+)', full_response)
1085
+ if pm:
1086
+ plag_pct = int(pm.group(1))
1087
+
1088
+ # ์ถœ์ฒ˜ URL ์ถ”์ถœ
1089
+ for m in re.finditer(r'https?://[^\s\)]{10,}', full_response):
1090
+ url = m.group(0)
1091
+ domain = url.split('/')[2] if '/' in url else url
1092
+ all_sources.append({
1093
+ "title": domain,
1094
+ "url": url,
1095
+ })
1096
+
1097
+ except Exception as e:
1098
+ gemini_response = f"์˜ค๋ฅ˜: {str(e)[:200]}"
1099
+
1100
  else:
1101
+ gemini_response = "โŒ Gemini API ํ‚ค ์—†์Œ"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1102
 
1103
+ # ============================================
1104
+ # ํŒ์ •
1105
+ # ============================================
1106
+ progress(0.80, "๋ณด๊ณ ์„œ ์ƒ์„ฑ...")
1107
 
1108
+ if plag_pct >= 50:
1109
+ grade, gc = "๐Ÿšจ ํ‘œ์ ˆ ์˜์‹ฌ", "#FF4444"
1110
+ elif plag_pct >= 30:
1111
+ grade, gc = "โš ๏ธ ์ฃผ์˜ ํ•„์š”", "#FF8800"
1112
+ elif plag_pct >= 15:
1113
+ grade, gc = "๐Ÿ“Œ ์œ ์‚ฌํ‘œํ˜„", "#DDAA00"
1114
+ elif plag_pct >= 5:
1115
+ grade, gc = "โœ“ ์–‘ํ˜ธ", "#4ECDC4"
1116
  else:
1117
+ grade, gc = "โœ… ์šฐ์ˆ˜", "#22AA44"
1118
+
1119
+ # ํ†ต๊ณ„
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1120
  word_count = len(split_words(text))
1121
  char_count = len(text)
1122
+
1123
+ # ์ถœ์ฒ˜ ํ…Œ์ด๋ธ”
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1124
  src_rows = ""
1125
+ for i, s in enumerate(all_sources[:20]):
1126
+ src_rows += f"""<tr style="border-bottom:1px solid #E0E0E0;">
1127
+ <td style="padding:8px;text-align:center;font-size:11px;">{i+1}</td>
1128
+ <td style="padding:8px;"><a href="{s['url']}" target="_blank" style="color:#2E86C1;text-decoration:none;font-weight:600;font-size:11px;">{s['title'][:50]}</a></td>
1129
+ <td style="padding:8px;font-size:9px;color:#888;word-break:break-all;">{s['url'][:70]}</td>
 
 
 
 
 
 
1130
  </tr>"""
1131
+
1132
+ if not src_rows:
1133
+ src_rows = '<tr><td colspan="3" style="padding:20px;text-align:center;color:#999;">๋ฐœ๊ฒฌ๋œ ์ถœ์ฒ˜ ์—†์Œ</td></tr>'
1134
+
1135
+ # HTML ๋ณด๊ณ ์„œ
1136
+ HDR = '#3B7DD8'
1137
+ html = f"""<div style="font-family:'Noto Sans KR',sans-serif;max-width:900px;margin:20px auto;background:#fff;border:1px solid #E0E0E0;border-radius:8px;">
1138
+
1139
+ <!-- ํ—ค๋” -->
1140
+ <div style="background:linear-gradient(135deg,{HDR},#4A8DE0);padding:24px;color:#fff;border-radius:8px 8px 0 0;">
1141
+ <div style="display:flex;justify-content:space-between;align-items:center;">
1142
+ <div>
1143
+ <div style="font-size:24px;font-weight:900;">ํ‘œ์ ˆ ๊ฒ€์‚ฌ ๊ฒฐ๊ณผ</div>
1144
+ <div style="font-size:12px;opacity:0.9;margin-top:4px;">Gemini Google Search ๊ธฐ๋ฐ˜ ๋ถ„์„</div>
1145
+ </div>
1146
+ <div style="text-align:right;font-size:11px;opacity:0.9;">
1147
+ <div>๋ฌธ์„œ: {doc_id}</div>
1148
+ <div>{now}</div>
1149
+ </div>
1150
+ </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1151
  </div>
1152
+
1153
+ <!-- ๊ฒฐ๊ณผ ์š”์•ฝ -->
1154
+ <div style="padding:24px;background:#FAFBFE;border-bottom:1px solid #E0E0E0;">
1155
+ <div style="display:grid;grid-template-columns:1fr 1fr 1fr;gap:16px;">
1156
+ <div style="text-align:center;padding:16px;background:#fff;border-radius:6px;border:1px solid #E0E0E0;">
1157
+ <div style="font-size:48px;font-weight:900;color:{gc};">{plag_pct}%</div>
1158
+ <div style="font-size:12px;color:#666;margin-top:8px;">ํ‘œ์ ˆ์œจ</div>
1159
+ </div>
1160
+ <div style="text-align:center;padding:16px;background:#fff;border-radius:6px;border:1px solid #E0E0E0;">
1161
+ <div style="font-size:28px;font-weight:900;color:{gc};">{grade}</div>
1162
+ <div style="font-size:12px;color:#666;margin-top:8px;">ํŒ์ •</div>
1163
+ </div>
1164
+ <div style="text-align:center;padding:16px;background:#fff;border-radius:6px;border:1px solid #E0E0E0;">
1165
+ <div style="font-size:28px;font-weight:900;color:#666;">{len(all_sources)}</div>
1166
+ <div style="font-size:12px;color:#666;margin-top:8px;">์ถœ์ฒ˜ ๋ฐœ๊ฒฌ</div>
1167
+ </div>
1168
  </div>
 
1169
  </div>
1170
+
1171
+ <!-- ์ •๋ณด -->
1172
+ <div style="padding:24px;border-bottom:1px solid #E0E0E0;">
1173
+ <div style="font-size:13px;font-weight:700;color:#1A3C6E;margin-bottom:12px;">๐Ÿ“‹ ๊ฒ€์‚ฌ ์ •๋ณด</div>
1174
+ <table style="width:100%;font-size:12px;">
1175
+ <tr style="border-bottom:1px solid #E0E0E0;">
1176
+ <td style="padding:8px;color:#666;width:100px;">๊ธ€์ž์ˆ˜</td>
1177
+ <td style="padding:8px;font-weight:600;">{char_count:,}์ž</td>
1178
+ <td style="padding:8px;color:#666;width:100px;">๋‹จ์–ด์ˆ˜</td>
1179
+ <td style="padding:8px;font-weight:600;">{word_count:,}๋‹จ์–ด</td>
1180
  </tr>
 
 
 
 
 
 
 
1181
  <tr>
1182
+ <td style="padding:8px;color:#666;">๊ฒ€์‚ฌ ๋ฐฉ๋ฒ•</td>
1183
+ <td colspan="3" style="padding:8px;font-weight:600;">โœ… Gemini Google Search (100%)</td>
 
 
1184
  </tr>
 
1185
  </table>
1186
  </div>
1187
+
1188
+ <!-- ์ถœ์ฒ˜ -->
1189
+ <div style="padding:24px;">
1190
+ <div style="font-size:13px;font-weight:700;color:#1A3C6E;margin-bottom:12px;">๐Ÿ” ๋ฐœ๊ฒฌ๋œ ์ถœ์ฒ˜</div>
1191
+ <table style="width:100%;border-collapse:collapse;font-size:11px;">
1192
+ <thead>
1193
+ <tr style="background:{HDR};color:white;">
1194
+ <th style="padding:10px;text-align:center;width:40px;">์ˆœ์œ„</th>
1195
+ <th style="padding:10px;text-align:left;">์ถœ์ฒ˜</th>
1196
+ <th style="padding:10px;text-align:left;">URL</th>
1197
+ </tr>
1198
+ </thead>
1199
+ <tbody>{src_rows}</tbody>
1200
+ </table>
1201
  </div>
1202
+
1203
+ <!-- ํ•˜๋‹จ -->
1204
+ <div style="padding:16px 24px;background:#FFF8E1;border-top:1px solid #E0E0E0;border-radius:0 0 8px 8px;font-size:11px;color:#666;line-height:1.6;">
1205
+ <strong style="color:#D63031;">โœ… Gemini Google Search 100%</strong> - ์ธํ„ฐ๋„ท์˜ ๋ชจ๋“  ์œ ์‚ฌ ๋‚ด์šฉ์„ ๊ฒ€์ƒ‰ํ•˜๋Š” ๊ณ ๊ธ‰ AI ๋ถ„์„
 
 
 
 
 
 
1206
  </div>
1207
+
1208
  </div>"""
1209
+
1210
+ progress(0.95, "์™„๋ฃŒ!")
1211
+
1212
+ return html, ""
1213
  def run_detection(text, progress=gr.Progress()):
1214
  if not text or len(text.strip())<50: return "<div style='padding:20px;text-align:center;color:#888;'>โš ๏ธ ์ตœ์†Œ 50์ž</div>",""
1215
  text=text.strip()