Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1020,359 +1020,196 @@ def self_crawl_search(query, max_results=3):
|
|
| 1020 |
all_results.extend(duckduckgo_search(f"{query} ๋
ผ๋ฌธ ํ์ ", 2))
|
| 1021 |
return all_results
|
| 1022 |
def run_plagiarism(text, progress=gr.Progress()):
|
| 1023 |
-
|
|
|
|
| 1024 |
return "<div style='padding:20px;text-align:center;color:#888;'>โ ๏ธ ์ต์ 50์ ์ด์</div>", ""
|
|
|
|
| 1025 |
text = text.strip()
|
| 1026 |
-
sents = split_sentences(text)
|
| 1027 |
now = datetime.now().strftime("%Y-%m-%d %H:%M")
|
| 1028 |
-
|
| 1029 |
-
|
| 1030 |
-
|
| 1031 |
-
blocks = []
|
| 1032 |
-
for i in range(0, len(sents), 4):
|
| 1033 |
-
block = ' '.join(sents[i:i+4])
|
| 1034 |
-
if len(block) > 20:
|
| 1035 |
-
blocks.append({"text": block, "sent_indices": list(range(i, min(i+4, len(sents))))})
|
| 1036 |
all_sources = []
|
| 1037 |
-
|
| 1038 |
-
|
| 1039 |
-
|
| 1040 |
-
if
|
| 1041 |
-
progress(0.
|
| 1042 |
-
|
| 1043 |
-
|
| 1044 |
-
|
| 1045 |
-
|
| 1046 |
-
|
| 1047 |
-
|
| 1048 |
-
|
| 1049 |
-
|
| 1050 |
-
|
| 1051 |
-
|
| 1052 |
-
|
| 1053 |
-
|
| 1054 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1055 |
else:
|
| 1056 |
-
|
| 1057 |
-
crawl_queries = []
|
| 1058 |
-
for b in blocks[:10]: # ์ต๋ 10๋ธ๋ก
|
| 1059 |
-
key_phrase = b["text"][:50].strip()
|
| 1060 |
-
crawl_queries.append((key_phrase, b))
|
| 1061 |
-
with ThreadPoolExecutor(max_workers=5) as executor:
|
| 1062 |
-
futures = {executor.submit(self_crawl_search, q, 3): (q, b) for q, b in crawl_queries}
|
| 1063 |
-
for future in as_completed(futures):
|
| 1064 |
-
q, b = futures[future]
|
| 1065 |
-
try:
|
| 1066 |
-
results = future.result()
|
| 1067 |
-
for r in results:
|
| 1068 |
-
all_sources.append(r)
|
| 1069 |
-
for si in b["sent_indices"]:
|
| 1070 |
-
sent_matches[si].append({"source": r["title"], "url": r["url"], "type": r.get("source","Web")})
|
| 1071 |
-
except: pass
|
| 1072 |
-
log_lines.append(f"์์ฒด ์น๊ฒ์: {len(crawl_queries)}์ฟผ๋ฆฌ (DuckDuckGo)")
|
| 1073 |
-
progress(0.40, "ํ์ DB ๊ฒ์ (KCI/RISS/arXiv)...")
|
| 1074 |
-
words = split_words(text)
|
| 1075 |
-
wf = Counter(words)
|
| 1076 |
-
keywords = [w for w, c in wf.most_common(20) if len(w) >= 2 and c >= 2][:5]
|
| 1077 |
-
kw_query = ' '.join(keywords[:3])
|
| 1078 |
-
academic_results = []
|
| 1079 |
-
with ThreadPoolExecutor(max_workers=3) as executor:
|
| 1080 |
-
futures = [
|
| 1081 |
-
executor.submit(search_kci, kw_query),
|
| 1082 |
-
executor.submit(search_riss, kw_query),
|
| 1083 |
-
executor.submit(search_arxiv, kw_query),
|
| 1084 |
-
]
|
| 1085 |
-
for future in as_completed(futures):
|
| 1086 |
-
try:
|
| 1087 |
-
results = future.result()
|
| 1088 |
-
academic_results.extend(results)
|
| 1089 |
-
all_sources.extend(results)
|
| 1090 |
-
except: pass
|
| 1091 |
-
log_lines.append(f"ํ์ DB: KCI/RISS/arXiv โ {len(academic_results)}๊ฑด")
|
| 1092 |
-
gemini_results = []
|
| 1093 |
-
if has_gemini:
|
| 1094 |
-
progress(0.60, "Gemini + Google Search...")
|
| 1095 |
-
for i, b in enumerate(blocks[:5]): # ์ต๋ 5๋ธ๋ก
|
| 1096 |
-
gr_result = gemini_plagiarism_check(b["text"])
|
| 1097 |
-
if gr_result:
|
| 1098 |
-
gemini_results.append(gr_result)
|
| 1099 |
-
for src in gr_result.get("sources", []):
|
| 1100 |
-
all_sources.append(src)
|
| 1101 |
-
for si in b["sent_indices"]:
|
| 1102 |
-
sent_matches[si].append({"source": src.get("title",""), "url": src.get("url",""), "type": "Google"})
|
| 1103 |
-
log_lines.append(f"Gemini: {len(blocks[:5])}๋ธ๋ก โ {sum(len(r.get('sources',[])) for r in gemini_results)}์ถ์ฒ")
|
| 1104 |
-
progress(0.80, "๋ณด๊ณ ์ ์์ฑ...")
|
| 1105 |
-
matched_sents = sum(1 for si, matches in sent_matches.items() if matches)
|
| 1106 |
-
total_sents = len(sents)
|
| 1107 |
|
| 1108 |
-
#
|
| 1109 |
-
|
|
|
|
|
|
|
| 1110 |
|
| 1111 |
-
if
|
| 1112 |
-
|
| 1113 |
-
|
| 1114 |
-
|
| 1115 |
-
|
| 1116 |
-
|
| 1117 |
-
|
|
|
|
| 1118 |
else:
|
| 1119 |
-
|
| 1120 |
-
|
| 1121 |
-
|
| 1122 |
-
for s in all_sources:
|
| 1123 |
-
url = s.get("url", "")
|
| 1124 |
-
if url and url not in seen_urls:
|
| 1125 |
-
seen_urls.add(url)
|
| 1126 |
-
unique_sources.append(s)
|
| 1127 |
-
if plag_pct >= 50: grade, grade_color, grade_bg = "ํ์ ์์ฌ", "#FF4444", "#FFE0E0"
|
| 1128 |
-
elif plag_pct >= 30: grade, grade_color, grade_bg = "์ฃผ์ ํ์", "#FF8800", "#FFF0DD"
|
| 1129 |
-
elif plag_pct >= 15: grade, grade_color, grade_bg = "์ ์ฌ ํํ ์ผ๋ถ", "#DDAA00", "#FFFBE0"
|
| 1130 |
-
elif plag_pct >= 5: grade, grade_color, grade_bg = "์ํธ", "#4ECDC4", "#E0FFF8"
|
| 1131 |
-
else: grade, grade_color, grade_bg = "์ฐ์ (์๋ณธ์ฑ ๋์)", "#22AA44", "#E0FFE8"
|
| 1132 |
-
sent_analysis = []
|
| 1133 |
-
for i, s in enumerate(sents):
|
| 1134 |
-
matches = sent_matches.get(i, [])
|
| 1135 |
-
if matches:
|
| 1136 |
-
best = matches[0]
|
| 1137 |
-
sent_analysis.append({"idx":i, "text":s, "matched":True, "source":best.get("source","")[:40], "url":best.get("url",""), "type":best.get("type","")})
|
| 1138 |
-
else:
|
| 1139 |
-
sent_analysis.append({"idx":i, "text":s, "matched":False})
|
| 1140 |
-
sim_sents = [s for s in sent_analysis if s["matched"]]
|
| 1141 |
-
src_groups = {}
|
| 1142 |
-
for src in unique_sources:
|
| 1143 |
-
key = src.get("url","")[:80]
|
| 1144 |
-
if key not in src_groups:
|
| 1145 |
-
src_groups[key] = {"title":src.get("title",""), "url":src.get("url",""), "source":src.get("source",""), "count":0}
|
| 1146 |
-
src_groups[key]["count"] += 1
|
| 1147 |
-
src_list = sorted(src_groups.values(), key=lambda x: -x["count"])
|
| 1148 |
-
methods_used = []
|
| 1149 |
-
if has_brave: methods_used.append("Brave Search(๋ณ๋ ฌ)")
|
| 1150 |
-
elif all_sources: methods_used.append("DuckDuckGo(์์ฒดํฌ๋กค๋ง)")
|
| 1151 |
-
methods_used.append("KCI ยท RISS ยท arXiv")
|
| 1152 |
-
if has_gemini: methods_used.append("Gemini+Google Search")
|
| 1153 |
-
method_str = " + ".join(methods_used)
|
| 1154 |
-
gc = grade_color
|
| 1155 |
word_count = len(split_words(text))
|
| 1156 |
char_count = len(text)
|
| 1157 |
-
|
| 1158 |
-
|
| 1159 |
-
citation_pct = 0
|
| 1160 |
-
cat_suspect = len(sim_sents) # ์์ฌ
|
| 1161 |
-
cat_cited = 0 # ์ธ์ฉ (ํ์์ ์ธ์ฉ ๊ฐ์ง)
|
| 1162 |
-
cat_normal = total_sents - cat_suspect - cat_cited # ์ผ๋ฐ
|
| 1163 |
-
cat_suspect_pct = int(cat_suspect / max(1, total_sents) * 100)
|
| 1164 |
-
cat_normal_pct = 100 - cat_suspect_pct
|
| 1165 |
-
def src_icon(s):
|
| 1166 |
-
src = s.get("source","").lower()
|
| 1167 |
-
if "kci" in src: return "๐", "KCI"
|
| 1168 |
-
if "riss" in src: return "๐", "RISS"
|
| 1169 |
-
if "arxiv" in src: return "๐", "arXiv"
|
| 1170 |
-
if "google" in src: return "๐", "Google"
|
| 1171 |
-
if "brave" in src: return "๐", "Brave"
|
| 1172 |
-
return "๐", "Web"
|
| 1173 |
src_rows = ""
|
| 1174 |
-
for i,
|
| 1175 |
-
|
| 1176 |
-
|
| 1177 |
-
|
| 1178 |
-
|
| 1179 |
-
src_rows += f"""<tr>
|
| 1180 |
-
<td style="padding:6px 8px;font-size:11px;text-align:center;border:1px solid #D5D5D5;color:#555;">{i+1}</td>
|
| 1181 |
-
<td style="padding:6px 8px;font-size:11px;text-align:center;border:1px solid #D5D5D5;"><span style="font-size:14px;">{ico}</span><br><span style="font-size:9px;color:#666;">{stype}</span></td>
|
| 1182 |
-
<td style="padding:6px 8px;border:1px solid #D5D5D5;"><div style="font-size:11px;font-weight:600;color:#1A3C6E;margin-bottom:2px;">{title_short}</div><div style="font-size:9px;color:#888;word-break:break-all;">{url_short}</div></td>
|
| 1183 |
-
<td style="padding:6px 8px;font-size:12px;text-align:center;border:1px solid #D5D5D5;font-weight:800;color:#D63031;">{pct}%</td>
|
| 1184 |
-
<td style="padding:6px 10px;border:1px solid #D5D5D5;"><div style="background:#EDEDED;height:14px;border-radius:2px;overflow:hidden;"><div style="background:linear-gradient(90deg,#D63031,#FF7675);height:100%;width:{max(3,pct)}%;border-radius:2px;"></div></div></td>
|
| 1185 |
</tr>"""
|
| 1186 |
-
|
| 1187 |
-
|
| 1188 |
-
|
| 1189 |
-
|
| 1190 |
-
|
| 1191 |
-
|
| 1192 |
-
|
| 1193 |
-
|
| 1194 |
-
|
| 1195 |
-
|
| 1196 |
-
|
| 1197 |
-
|
| 1198 |
-
|
| 1199 |
-
|
| 1200 |
-
|
| 1201 |
-
|
| 1202 |
-
|
| 1203 |
-
|
| 1204 |
-
|
| 1205 |
-
|
| 1206 |
-
TL = 'padding:7px 10px;font-size:11px;color:#444;font-weight:600;background:#EDF2FA;border:1px solid #D5D5D5;'
|
| 1207 |
-
TV = 'padding:7px 10px;font-size:12px;color:#333;border:1px solid #D5D5D5;'
|
| 1208 |
-
SEC = 'font-size:13px;font-weight:800;color:#1A3C6E;margin:0 0 10px 0;padding:8px 12px;background:#EDF2FA;border-left:4px solid {};border-bottom:1px solid #D5D5D5;'.format(HDR_BG)
|
| 1209 |
-
html = f"""<div style="font-family:'Noto Sans KR','Malgun Gothic','Apple SD Gothic Neo',sans-serif;max-width:780px;margin:0 auto;background:#fff;border:2px solid #3B7DD8;box-shadow:0 2px 12px rgba(0,0,0,0.08);">
|
| 1210 |
-
<!-- โโโโโโโ ํค๋ (CopyKiller ์คํ์ผ) โโโโโโโ -->
|
| 1211 |
-
<div style="background:linear-gradient(135deg,{HDR_BG},{HDR_BG2});padding:18px 24px;color:#fff;">
|
| 1212 |
-
<table style="width:100%;"><tr>
|
| 1213 |
-
<td>
|
| 1214 |
-
<div style="font-size:10px;opacity:0.8;letter-spacing:1px;margin-bottom:4px;">AI TEXT DETECTOR ยท PLAGIARISM REPORT</div>
|
| 1215 |
-
<div style="font-size:22px;font-weight:900;letter-spacing:-0.5px;">ํ์ ๊ฒ์ฌ ๊ฒฐ๊ณผ ํ๏ฟฝ๏ฟฝ๏ฟฝ์</div>
|
| 1216 |
-
</td>
|
| 1217 |
-
<td style="text-align:right;vertical-align:bottom;">
|
| 1218 |
-
<div style="font-size:10px;opacity:0.7;">๋ฌธ์๋ฒํธ {doc_id}</div>
|
| 1219 |
-
<div style="font-size:10px;opacity:0.7;">{now}</div>
|
| 1220 |
-
</td>
|
| 1221 |
-
</tr></table>
|
| 1222 |
-
</div>
|
| 1223 |
-
<!-- โโโโโโโ ์ ์ถ ์ ๋ณด ํ
์ด๋ธ โโโโโโโ -->
|
| 1224 |
-
<div style="padding:16px 24px 0;">
|
| 1225 |
-
<div style="{SEC}">๐ ๊ฒ์ฌ ์ ๋ณด</div>
|
| 1226 |
-
<table style="width:100%;border-collapse:collapse;">
|
| 1227 |
-
<tr>
|
| 1228 |
-
<td style="{TL}width:90px;">๊ฒ์ฌ ์ผ์</td>
|
| 1229 |
-
<td style="{TV}">{now}</td>
|
| 1230 |
-
<td style="{TL}width:90px;">๋ฌธ์๋ฒํธ</td>
|
| 1231 |
-
<td style="{TV}">{doc_id}</td>
|
| 1232 |
-
</tr>
|
| 1233 |
-
<tr>
|
| 1234 |
-
<td style="{TL}">๊ฒ์ฌ ๋ฐฉ๋ฒ</td>
|
| 1235 |
-
<td style="{TV}font-size:10px;" colspan="3">{method_str}</td>
|
| 1236 |
-
</tr>
|
| 1237 |
-
<tr>
|
| 1238 |
-
<td style="{TL}">์ ์ฒด ๋ถ๋</td>
|
| 1239 |
-
<td style="{TV}" colspan="3">๊ธ์์ <b>{char_count:,}</b> ยท ์ด์ ์ <b>{word_count:,}</b> ยท ๋ฌธ์ฅ์ <b>{total_sents}</b></td>
|
| 1240 |
-
</tr>
|
| 1241 |
-
<tr>
|
| 1242 |
-
<td style="{TL}">๊ฒ์ ๋ฒ์</td>
|
| 1243 |
-
<td style="{TV}" colspan="3">์ธํฐ๋ท(์น), ํ์ ๋
ผ๋ฌธ(KCIยทRISS), ํด์ธ๋
ผ๋ฌธ(arXiv), Google Scholar</td>
|
| 1244 |
-
</tr>
|
| 1245 |
-
</table>
|
| 1246 |
-
</div>
|
| 1247 |
-
<!-- โโโโโโโ ๊ฒ์ฌ ๊ฒฐ๊ณผ (๋๋ + ๋ฐ) โโโโโโโ -->
|
| 1248 |
-
<div style="padding:18px 24px 0;">
|
| 1249 |
-
<div style="{SEC}">๐ ๊ฒ์ฌ ๊ฒฐ๊ณผ</div>
|
| 1250 |
-
<table style="width:100%;border-collapse:collapse;">
|
| 1251 |
-
<tr>
|
| 1252 |
-
<!-- ๋๋ ์ฐจํธ -->
|
| 1253 |
-
<td style="width:200px;padding:20px;text-align:center;border:1px solid #D5D5D5;vertical-align:middle;background:#FAFBFE;">
|
| 1254 |
-
<div style="position:relative;width:140px;height:140px;margin:0 auto;">
|
| 1255 |
-
<svg viewBox="0 0 36 36" style="width:140px;height:140px;transform:rotate(-90deg);">
|
| 1256 |
-
<circle cx="18" cy="18" r="15.9" fill="none" stroke="#E8ECF0" stroke-width="2.8"/>
|
| 1257 |
-
<circle cx="18" cy="18" r="15.9" fill="none" stroke="{gc}" stroke-width="2.8" stroke-dasharray="{plag_pct} {100-plag_pct}" stroke-linecap="round"/>
|
| 1258 |
-
</svg>
|
| 1259 |
-
<div style="position:absolute;top:50%;left:50%;transform:translate(-50%,-50%);text-align:center;">
|
| 1260 |
-
<div style="font-size:9px;color:#888;margin-bottom:2px;">ํ์ ๋ฅ </div>
|
| 1261 |
-
<div style="font-size:32px;font-weight:900;color:{gc};line-height:1;">{plag_pct}<span style="font-size:14px;font-weight:700;">%</span></div>
|
| 1262 |
-
</div>
|
| 1263 |
-
</div>
|
| 1264 |
-
<div style="margin-top:8px;padding:4px 12px;background:{grade_bg};border:1px solid {gc};border-radius:20px;display:inline-block;">
|
| 1265 |
-
<span style="font-size:11px;font-weight:800;color:{gc};">{grade}</span>
|
| 1266 |
-
</div>
|
| 1267 |
-
</td>
|
| 1268 |
-
<!-- ์์ธ ๊ฒฐ๊ณผ -->
|
| 1269 |
-
<td style="padding:0;border:1px solid #D5D5D5;vertical-align:top;">
|
| 1270 |
-
<!-- ์นดํ
๊ณ ๋ฆฌ ๋ฐ (CopyKiller ํต์ฌ) -->
|
| 1271 |
-
<div style="padding:14px 18px;border-bottom:1px solid #E8E8E8;">
|
| 1272 |
-
<div style="display:flex;height:28px;border-radius:4px;overflow:hidden;border:1px solid #D0D0D0;margin-bottom:8px;">
|
| 1273 |
-
<div style="background:#E74C3C;width:{bar_suspect_w}%;"></div>
|
| 1274 |
-
<div style="background:#E8ECF0;width:{bar_normal_w}%;"></div>
|
| 1275 |
-
</div>
|
| 1276 |
-
<div style="display:flex;gap:14px;flex-wrap:wrap;font-size:9px;">
|
| 1277 |
-
<span style="display:flex;align-items:center;gap:3px;"><span style="display:inline-block;width:10px;height:10px;background:#E74C3C;border-radius:2px;"></span> ์์ฌ <b>{cat_suspect}</b>๊ฑด</span>
|
| 1278 |
-
<span style="display:flex;align-items:center;gap:3px;"><span style="display:inline-block;width:10px;height:10px;background:#F39C12;border-radius:2px;"></span> ์ถ์ฒํ์ <b>0</b>๊ฑด</span>
|
| 1279 |
-
<span style="display:flex;align-items:center;gap:3px;"><span style="display:inline-block;width:10px;height:10px;background:#3498DB;border-radius:2px;"></span> ์ธ์ฉ <b>{cat_cited}</b>๊ฑด</span>
|
| 1280 |
-
<span style="display:flex;align-items:center;gap:3px;"><span style="display:inline-block;width:10px;height:10px;background:#E8ECF0;border:1px solid #CCC;border-radius:2px;"></span> ์ผ๋ฐ <b>{cat_normal}</b>๊ฑด</span>
|
| 1281 |
-
</div>
|
| 1282 |
-
</div>
|
| 1283 |
-
<!-- ๋น์จ ๋ฐ 3์ค -->
|
| 1284 |
-
<div style="padding:12px 18px;">
|
| 1285 |
-
<div style="margin-bottom:10px;">
|
| 1286 |
-
<div style="display:flex;justify-content:space-between;margin-bottom:4px;"><span style="font-size:11px;font-weight:700;color:#E74C3C;">โ ํ์ ๋ฅ </span><span style="font-size:18px;font-weight:900;color:#E74C3C;">{plag_pct}%</span></div>
|
| 1287 |
-
<div style="background:#EDEDED;height:16px;border-radius:3px;overflow:hidden;border:1px solid #DDD;"><div style="background:linear-gradient(90deg,#E74C3C,#FF7675);height:100%;width:{max(1,plag_pct)}%;transition:width 0.5s;"></div></div>
|
| 1288 |
-
</div>
|
| 1289 |
-
<div style="margin-bottom:10px;">
|
| 1290 |
-
<div style="display:flex;justify-content:space-between;margin-bottom:4px;"><span style="font-size:11px;font-weight:700;color:#F39C12;">โ ์ ์ฌ์จ</span><span style="font-size:18px;font-weight:900;color:#F39C12;">{similarity_pct}%</span></div>
|
| 1291 |
-
<div style="background:#EDEDED;height:16px;border-radius:3px;overflow:hidden;border:1px solid #DDD;"><div style="background:linear-gradient(90deg,#F39C12,#FFEAA7);height:100%;width:{max(1,similarity_pct)}%;transition:width 0.5s;"></div></div>
|
| 1292 |
-
</div>
|
| 1293 |
-
<div>
|
| 1294 |
-
<div style="display:flex;justify-content:space-between;margin-bottom:4px;"><span style="font-size:11px;font-weight:700;color:#3498DB;">โ ์ธ์ฉ๋ฅ </span><span style="font-size:18px;font-weight:900;color:#3498DB;">{citation_pct}%</span></div>
|
| 1295 |
-
<div style="background:#EDEDED;height:16px;border-radius:3px;overflow:hidden;border:1px solid #DDD;"><div style="background:linear-gradient(90deg,#3498DB,#85C1E9);height:100%;width:{max(1,citation_pct)}%;transition:width 0.5s;"></div></div>
|
| 1296 |
-
</div>
|
| 1297 |
-
</div>
|
| 1298 |
-
<!-- ์์ฝ ์์น -->
|
| 1299 |
-
<div style="padding:8px 18px;background:#F8F9FB;border-top:1px solid #E8E8E8;">
|
| 1300 |
-
<table style="width:100%;border-collapse:collapse;">
|
| 1301 |
-
<tr>
|
| 1302 |
-
<td style="padding:4px;font-size:10px;color:#888;">์์ฌ๋ฌธ์ฅ</td>
|
| 1303 |
-
<td style="padding:4px;font-size:12px;font-weight:800;color:#E74C3C;">{cat_suspect}๊ฑด</td>
|
| 1304 |
-
<td style="padding:4px;font-size:10px;color:#888;">์ผ๋ฐ๋ฌธ์ฅ</td>
|
| 1305 |
-
<td style="padding:4px;font-size:12px;font-weight:800;color:#27AE60;">{cat_normal}๊ฑด</td>
|
| 1306 |
-
<td style="padding:4px;font-size:10px;color:#888;">์ ์ฒด</td>
|
| 1307 |
-
<td style="padding:4px;font-size:12px;font-weight:800;color:#333;">{total_sents}๊ฑด</td>
|
| 1308 |
-
</tr>
|
| 1309 |
-
</table>
|
| 1310 |
-
</div>
|
| 1311 |
-
</td>
|
| 1312 |
-
</tr>
|
| 1313 |
-
</table>
|
| 1314 |
</div>
|
| 1315 |
-
|
| 1316 |
-
<
|
| 1317 |
-
|
| 1318 |
-
<div style="display:
|
| 1319 |
-
<
|
| 1320 |
-
|
| 1321 |
-
|
| 1322 |
-
<
|
| 1323 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1324 |
</div>
|
| 1325 |
-
<div style="padding:14px;background:#FAFBFC;border:1px solid #D5D5D5;line-height:2.1;font-size:13px;max-height:300px;overflow-y:auto;">{full_hl}</div>
|
| 1326 |
</div>
|
| 1327 |
-
|
| 1328 |
-
<
|
| 1329 |
-
|
| 1330 |
-
<
|
| 1331 |
-
|
| 1332 |
-
|
| 1333 |
-
<
|
| 1334 |
-
<
|
| 1335 |
-
<
|
| 1336 |
-
<
|
| 1337 |
</tr>
|
| 1338 |
-
{src_rows if src_rows else '<tr><td colspan="5" style="padding:18px;text-align:center;color:#999;font-size:11px;border:1px solid #D5D5D5;background:#FAFBFC;">๋ฐ๊ฒฌ๋ ์ ์ฌ ์ถ์ฒ๊ฐ ์์ต๋๋ค.</td></tr>'}
|
| 1339 |
-
</table>
|
| 1340 |
-
</div>
|
| 1341 |
-
<!-- โโโโโโโ ์์ฌ ๋ฌธ์ฅ ๋น๊ต โโโโโโโ -->
|
| 1342 |
-
<div style="padding:18px 24px 0;">
|
| 1343 |
-
<div style="{SEC}">โ ๏ธ ์์ฌ ๋ฌธ์ฅ ๋น๊ต ({len(sim_sents)}๊ฑด)</div>
|
| 1344 |
-
<table style="width:100%;border-collapse:collapse;">
|
| 1345 |
<tr>
|
| 1346 |
-
<
|
| 1347 |
-
<
|
| 1348 |
-
<th style="{TH}width:34%;">๋น๊ต ๋ฌธ์ฅ (์ถ์ฒ)</th>
|
| 1349 |
-
<th style="{TH}">์ถ์ฒ</th>
|
| 1350 |
</tr>
|
| 1351 |
-
{suspect_rows if suspect_rows else '<tr><td colspan="4" style="padding:18px;text-align:center;color:#999;font-size:11px;border:1px solid #D5D5D5;background:#FAFBFC;">์ ์ฌ ์์ฌ ๋ฌธ์ฅ์ด ๋ฐ๊ฒฌ๋์ง ์์์ต๋๋ค.</td></tr>'}
|
| 1352 |
</table>
|
| 1353 |
</div>
|
| 1354 |
-
|
| 1355 |
-
<
|
| 1356 |
-
|
| 1357 |
-
|
| 1358 |
-
|
| 1359 |
-
|
| 1360 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1361 |
</div>
|
| 1362 |
-
|
| 1363 |
-
<
|
| 1364 |
-
|
| 1365 |
-
|
| 1366 |
-
<span style="font-size:9px;color:#999;margin-left:6px;">Plagiarism Checker v3.5</span>
|
| 1367 |
-
</div>
|
| 1368 |
-
<div style="text-align:right;">
|
| 1369 |
-
<div style="font-size:9px;color:#AAA;">Powered by Brave ยท KCI ยท RISS ยท arXiv ยท Gemini</div>
|
| 1370 |
-
<div style="font-size:8px;color:#CCC;">{now} ยท ID: {doc_id} ยท All Rights Reserved.</div>
|
| 1371 |
-
</div>
|
| 1372 |
</div>
|
|
|
|
| 1373 |
</div>"""
|
| 1374 |
-
|
| 1375 |
-
|
|
|
|
|
|
|
| 1376 |
def run_detection(text, progress=gr.Progress()):
|
| 1377 |
if not text or len(text.strip())<50: return "<div style='padding:20px;text-align:center;color:#888;'>โ ๏ธ ์ต์ 50์</div>",""
|
| 1378 |
text=text.strip()
|
|
|
|
| 1020 |
all_results.extend(duckduckgo_search(f"{query} ๋
ผ๋ฌธ ํ์ ", 2))
|
| 1021 |
return all_results
|
| 1022 |
def run_plagiarism(text, progress=gr.Progress()):
|
| 1023 |
+
"""โ
Gemini Google Search 100% - ๋จ์ ํ์ ๊ฒ์ฌ"""
|
| 1024 |
+
if not text or len(text.strip()) < 50:
|
| 1025 |
return "<div style='padding:20px;text-align:center;color:#888;'>โ ๏ธ ์ต์ 50์ ์ด์</div>", ""
|
| 1026 |
+
|
| 1027 |
text = text.strip()
|
|
|
|
| 1028 |
now = datetime.now().strftime("%Y-%m-%d %H:%M")
|
| 1029 |
+
doc_id = hashlib.md5(text[:100].encode()).hexdigest()[:8].upper()
|
| 1030 |
+
|
| 1031 |
+
plag_pct = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1032 |
all_sources = []
|
| 1033 |
+
gemini_response = ""
|
| 1034 |
+
|
| 1035 |
+
# โ
Gemini Google Search (์ ๊ณต๋ฐ์ ์ฝ๋ ๊ทธ๋๋ก)
|
| 1036 |
+
if HAS_GENAI and GEMINI_KEY:
|
| 1037 |
+
progress(0.30, "Gemini Google Search ์คํ...")
|
| 1038 |
+
|
| 1039 |
+
try:
|
| 1040 |
+
client = genai.Client(api_key=GEMINI_KEY)
|
| 1041 |
+
|
| 1042 |
+
# ํ๋กฌํํธ
|
| 1043 |
+
prompt = f"""ํ์ ๊ฒ์ฌ๋ฅผ ํด์ฃผ์ธ์. ์๋ ํ
์คํธ๊ฐ ์ธํฐ๋ท์ ์กด์ฌํ๋์ง Google Search๋ก ์ฒ ์ ํ ๊ฒ์ํ์ธ์.
|
| 1044 |
+
|
| 1045 |
+
[ํ
์คํธ]
|
| 1046 |
+
{text}
|
| 1047 |
+
|
| 1048 |
+
์๋ต:
|
| 1049 |
+
1. ๋ฐ๊ฒฌ๋ ์ ์ฌ ๋ด์ฉ (์์ผ๋ฉด ์ ๋ชฉ, URL, ์ ์ฌ๋)
|
| 1050 |
+
2. ๋ง์ง๋ง์ "ํ์ ์จ: XX%"๋ก ๊ฒฐ๋ก """
|
| 1051 |
+
|
| 1052 |
+
# ์ ๊ณต๋ฐ์ ์ฝ๋ ๊ตฌ์กฐ๋๋ก
|
| 1053 |
+
contents = [
|
| 1054 |
+
types.Content(
|
| 1055 |
+
role="user",
|
| 1056 |
+
parts=[types.Part.from_text(text=prompt)],
|
| 1057 |
+
)
|
| 1058 |
+
]
|
| 1059 |
+
|
| 1060 |
+
tools = [types.Tool(googleSearch=types.GoogleSearch())]
|
| 1061 |
+
|
| 1062 |
+
generate_content_config = types.GenerateContentConfig(
|
| 1063 |
+
thinking_config=types.ThinkingConfig(thinking_budget=0),
|
| 1064 |
+
tools=tools,
|
| 1065 |
+
temperature=0.3,
|
| 1066 |
+
max_output_tokens=4000,
|
| 1067 |
+
)
|
| 1068 |
+
|
| 1069 |
+
progress(0.50, "Google Search ์คํ ์ค...")
|
| 1070 |
+
|
| 1071 |
+
# ์คํธ๋ฆฌ๋ฐ ์์ง
|
| 1072 |
+
full_response = ""
|
| 1073 |
+
for chunk in client.models.generate_content_stream(
|
| 1074 |
+
model="gemini-2.0-flash-lite-latest",
|
| 1075 |
+
contents=contents,
|
| 1076 |
+
config=generate_content_config,
|
| 1077 |
+
):
|
| 1078 |
+
if chunk.text:
|
| 1079 |
+
full_response += chunk.text
|
| 1080 |
+
|
| 1081 |
+
gemini_response = full_response
|
| 1082 |
+
|
| 1083 |
+
# ํ์ ์จ ์ถ์ถ
|
| 1084 |
+
pm = re.search(r'ํ์ ์จ[:\s]*(\d+)', full_response)
|
| 1085 |
+
if pm:
|
| 1086 |
+
plag_pct = int(pm.group(1))
|
| 1087 |
+
|
| 1088 |
+
# ์ถ์ฒ URL ์ถ์ถ
|
| 1089 |
+
for m in re.finditer(r'https?://[^\s\)]{10,}', full_response):
|
| 1090 |
+
url = m.group(0)
|
| 1091 |
+
domain = url.split('/')[2] if '/' in url else url
|
| 1092 |
+
all_sources.append({
|
| 1093 |
+
"title": domain,
|
| 1094 |
+
"url": url,
|
| 1095 |
+
})
|
| 1096 |
+
|
| 1097 |
+
except Exception as e:
|
| 1098 |
+
gemini_response = f"์ค๋ฅ: {str(e)[:200]}"
|
| 1099 |
+
|
| 1100 |
else:
|
| 1101 |
+
gemini_response = "โ Gemini API ํค ์์"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1102 |
|
| 1103 |
+
# ============================================
|
| 1104 |
+
# ํ์
|
| 1105 |
+
# ============================================
|
| 1106 |
+
progress(0.80, "๋ณด๊ณ ์ ์์ฑ...")
|
| 1107 |
|
| 1108 |
+
if plag_pct >= 50:
|
| 1109 |
+
grade, gc = "๐จ ํ์ ์์ฌ", "#FF4444"
|
| 1110 |
+
elif plag_pct >= 30:
|
| 1111 |
+
grade, gc = "โ ๏ธ ์ฃผ์ ํ์", "#FF8800"
|
| 1112 |
+
elif plag_pct >= 15:
|
| 1113 |
+
grade, gc = "๐ ์ ์ฌํํ", "#DDAA00"
|
| 1114 |
+
elif plag_pct >= 5:
|
| 1115 |
+
grade, gc = "โ ์ํธ", "#4ECDC4"
|
| 1116 |
else:
|
| 1117 |
+
grade, gc = "โ
์ฐ์", "#22AA44"
|
| 1118 |
+
|
| 1119 |
+
# ํต๊ณ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1120 |
word_count = len(split_words(text))
|
| 1121 |
char_count = len(text)
|
| 1122 |
+
|
| 1123 |
+
# ์ถ์ฒ ํ
์ด๋ธ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1124 |
src_rows = ""
|
| 1125 |
+
for i, s in enumerate(all_sources[:20]):
|
| 1126 |
+
src_rows += f"""<tr style="border-bottom:1px solid #E0E0E0;">
|
| 1127 |
+
<td style="padding:8px;text-align:center;font-size:11px;">{i+1}</td>
|
| 1128 |
+
<td style="padding:8px;"><a href="{s['url']}" target="_blank" style="color:#2E86C1;text-decoration:none;font-weight:600;font-size:11px;">{s['title'][:50]}</a></td>
|
| 1129 |
+
<td style="padding:8px;font-size:9px;color:#888;word-break:break-all;">{s['url'][:70]}</td>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1130 |
</tr>"""
|
| 1131 |
+
|
| 1132 |
+
if not src_rows:
|
| 1133 |
+
src_rows = '<tr><td colspan="3" style="padding:20px;text-align:center;color:#999;">๋ฐ๊ฒฌ๋ ์ถ์ฒ ์์</td></tr>'
|
| 1134 |
+
|
| 1135 |
+
# HTML ๋ณด๊ณ ์
|
| 1136 |
+
HDR = '#3B7DD8'
|
| 1137 |
+
html = f"""<div style="font-family:'Noto Sans KR',sans-serif;max-width:900px;margin:20px auto;background:#fff;border:1px solid #E0E0E0;border-radius:8px;">
|
| 1138 |
+
|
| 1139 |
+
<!-- ํค๋ -->
|
| 1140 |
+
<div style="background:linear-gradient(135deg,{HDR},#4A8DE0);padding:24px;color:#fff;border-radius:8px 8px 0 0;">
|
| 1141 |
+
<div style="display:flex;justify-content:space-between;align-items:center;">
|
| 1142 |
+
<div>
|
| 1143 |
+
<div style="font-size:24px;font-weight:900;">ํ์ ๊ฒ์ฌ ๊ฒฐ๊ณผ</div>
|
| 1144 |
+
<div style="font-size:12px;opacity:0.9;margin-top:4px;">Gemini Google Search ๊ธฐ๋ฐ ๋ถ์</div>
|
| 1145 |
+
</div>
|
| 1146 |
+
<div style="text-align:right;font-size:11px;opacity:0.9;">
|
| 1147 |
+
<div>๋ฌธ์: {doc_id}</div>
|
| 1148 |
+
<div>{now}</div>
|
| 1149 |
+
</div>
|
| 1150 |
+
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1151 |
</div>
|
| 1152 |
+
|
| 1153 |
+
<!-- ๊ฒฐ๊ณผ ์์ฝ -->
|
| 1154 |
+
<div style="padding:24px;background:#FAFBFE;border-bottom:1px solid #E0E0E0;">
|
| 1155 |
+
<div style="display:grid;grid-template-columns:1fr 1fr 1fr;gap:16px;">
|
| 1156 |
+
<div style="text-align:center;padding:16px;background:#fff;border-radius:6px;border:1px solid #E0E0E0;">
|
| 1157 |
+
<div style="font-size:48px;font-weight:900;color:{gc};">{plag_pct}%</div>
|
| 1158 |
+
<div style="font-size:12px;color:#666;margin-top:8px;">ํ์ ์จ</div>
|
| 1159 |
+
</div>
|
| 1160 |
+
<div style="text-align:center;padding:16px;background:#fff;border-radius:6px;border:1px solid #E0E0E0;">
|
| 1161 |
+
<div style="font-size:28px;font-weight:900;color:{gc};">{grade}</div>
|
| 1162 |
+
<div style="font-size:12px;color:#666;margin-top:8px;">ํ์ </div>
|
| 1163 |
+
</div>
|
| 1164 |
+
<div style="text-align:center;padding:16px;background:#fff;border-radius:6px;border:1px solid #E0E0E0;">
|
| 1165 |
+
<div style="font-size:28px;font-weight:900;color:#666;">{len(all_sources)}</div>
|
| 1166 |
+
<div style="font-size:12px;color:#666;margin-top:8px;">์ถ์ฒ ๋ฐ๊ฒฌ</div>
|
| 1167 |
+
</div>
|
| 1168 |
</div>
|
|
|
|
| 1169 |
</div>
|
| 1170 |
+
|
| 1171 |
+
<!-- ์ ๋ณด -->
|
| 1172 |
+
<div style="padding:24px;border-bottom:1px solid #E0E0E0;">
|
| 1173 |
+
<div style="font-size:13px;font-weight:700;color:#1A3C6E;margin-bottom:12px;">๐ ๊ฒ์ฌ ์ ๋ณด</div>
|
| 1174 |
+
<table style="width:100%;font-size:12px;">
|
| 1175 |
+
<tr style="border-bottom:1px solid #E0E0E0;">
|
| 1176 |
+
<td style="padding:8px;color:#666;width:100px;">๊ธ์์</td>
|
| 1177 |
+
<td style="padding:8px;font-weight:600;">{char_count:,}์</td>
|
| 1178 |
+
<td style="padding:8px;color:#666;width:100px;">๋จ์ด์</td>
|
| 1179 |
+
<td style="padding:8px;font-weight:600;">{word_count:,}๋จ์ด</td>
|
| 1180 |
</tr>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1181 |
<tr>
|
| 1182 |
+
<td style="padding:8px;color:#666;">๊ฒ์ฌ ๋ฐฉ๋ฒ</td>
|
| 1183 |
+
<td colspan="3" style="padding:8px;font-weight:600;">โ
Gemini Google Search (100%)</td>
|
|
|
|
|
|
|
| 1184 |
</tr>
|
|
|
|
| 1185 |
</table>
|
| 1186 |
</div>
|
| 1187 |
+
|
| 1188 |
+
<!-- ์ถ์ฒ -->
|
| 1189 |
+
<div style="padding:24px;">
|
| 1190 |
+
<div style="font-size:13px;font-weight:700;color:#1A3C6E;margin-bottom:12px;">๐ ๋ฐ๊ฒฌ๋ ์ถ์ฒ</div>
|
| 1191 |
+
<table style="width:100%;border-collapse:collapse;font-size:11px;">
|
| 1192 |
+
<thead>
|
| 1193 |
+
<tr style="background:{HDR};color:white;">
|
| 1194 |
+
<th style="padding:10px;text-align:center;width:40px;">์์</th>
|
| 1195 |
+
<th style="padding:10px;text-align:left;">์ถ์ฒ</th>
|
| 1196 |
+
<th style="padding:10px;text-align:left;">URL</th>
|
| 1197 |
+
</tr>
|
| 1198 |
+
</thead>
|
| 1199 |
+
<tbody>{src_rows}</tbody>
|
| 1200 |
+
</table>
|
| 1201 |
</div>
|
| 1202 |
+
|
| 1203 |
+
<!-- ํ๋จ -->
|
| 1204 |
+
<div style="padding:16px 24px;background:#FFF8E1;border-top:1px solid #E0E0E0;border-radius:0 0 8px 8px;font-size:11px;color:#666;line-height:1.6;">
|
| 1205 |
+
<strong style="color:#D63031;">โ
Gemini Google Search 100%</strong> - ์ธํฐ๋ท์ ๋ชจ๋ ์ ์ฌ ๋ด์ฉ์ ๊ฒ์ํ๋ ๊ณ ๊ธ AI ๋ถ์
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1206 |
</div>
|
| 1207 |
+
|
| 1208 |
</div>"""
|
| 1209 |
+
|
| 1210 |
+
progress(0.95, "์๋ฃ!")
|
| 1211 |
+
|
| 1212 |
+
return html, ""
|
| 1213 |
def run_detection(text, progress=gr.Progress()):
|
| 1214 |
if not text or len(text.strip())<50: return "<div style='padding:20px;text-align:center;color:#888;'>โ ๏ธ ์ต์ 50์</div>",""
|
| 1215 |
text=text.strip()
|