Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,7 +10,8 @@ import tempfile
|
|
| 10 |
import base64
|
| 11 |
from datetime import datetime
|
| 12 |
import re
|
| 13 |
-
from bs4 import BeautifulSoup # BeautifulSoup
|
|
|
|
| 14 |
|
| 15 |
# ๋ก๊น
์ค์
|
| 16 |
logging.basicConfig(
|
|
@@ -34,23 +35,53 @@ def get_system_prompt():
|
|
| 34 |
return """
|
| 35 |
๋น์ ์ ์ ๋ฌธ ๋ธ๋ก๊ทธ ์์ฑ ์ ๋ฌธ๊ฐ์
๋๋ค. ๋ชจ๋ ๋ธ๋ก๊ทธ ๊ธ ์์ฑ ์์ฒญ์ ๋ํด ๋ค์์ 8๋จ๊ณ ํ๋ ์์ํฌ๋ฅผ ์ฒ ์ ํ ๋ฐ๋ฅด๋, ์์ฐ์ค๋ฝ๊ณ ๋งค๋ ฅ์ ์ธ ๊ธ์ด ๋๋๋ก ์์ฑํด์ผ ํฉ๋๋ค:
|
| 36 |
|
| 37 |
-
๋
์ ์ฐ๊ฒฐ ๋จ๊ณ
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
์ด๋ฌํ ํ๋ ์์ํฌ๋ฅผ ๋ฐํ์ผ๋ก, ์์ฒญ๋ฐ์ ์ฃผ์ ์ ๋ํด ์ฒด๊ณ์ ์ด๊ณ ๋งค๋ ฅ์ ์ธ ๋ธ๋ก๊ทธ ํฌ์คํธ๋ฅผ ์์ฑํ๊ฒ ์ต๋๋ค.
|
| 56 |
"""
|
|
@@ -205,254 +236,64 @@ def generate_mock_search_results(query):
|
|
| 205 |
|
| 206 |
return notice + "\n".join(summary_lines)
|
| 207 |
|
| 208 |
-
# Google ๊ฒ์ ํจ์ (SerpAPI ๋์ ์ง์ ๊ฒ์)
|
| 209 |
-
# Google ๊ฒ์ ํจ์ (BeautifulSoup์ ์ฌ์ฉํ์ฌ ๊ฒฐ๊ณผ ํ์ฑ)
|
| 210 |
-
# Google ๊ฒ์ ํจ์ ๊ฐ์
|
| 211 |
-
def do_google_search(query, num_results=5):
|
| 212 |
-
try:
|
| 213 |
-
# ๋ค์ํ User-Agent ์ฌ์ฉ (Google ์ฐจ๋จ ๋ฐฉ์ง)
|
| 214 |
-
headers = {
|
| 215 |
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
| 216 |
-
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
| 217 |
-
'Accept-Language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7',
|
| 218 |
-
'Referer': 'https://www.google.com/',
|
| 219 |
-
}
|
| 220 |
-
|
| 221 |
-
# ๊ฒ์ URL
|
| 222 |
-
search_url = f"https://www.google.com/search?q={query}&num={num_results}&hl=ko&gl=kr"
|
| 223 |
-
logging.info(f"๊ตฌ๊ธ ๊ฒ์ URL: {search_url}")
|
| 224 |
-
|
| 225 |
-
# ์์ฒญ ๋ณด๋ด๊ธฐ
|
| 226 |
-
response = requests.get(search_url, headers=headers, timeout=10)
|
| 227 |
-
|
| 228 |
-
# ์๋ต์ด ์ฑ๊ณต์ ์ธ์ง ํ์ธ
|
| 229 |
-
if response.status_code != 200:
|
| 230 |
-
logging.error(f"Google ๊ฒ์ ์๋ต ์ํ ์ฝ๋: {response.status_code}")
|
| 231 |
-
return generate_mock_search_results(query)
|
| 232 |
-
|
| 233 |
-
# HTML ํ์ฑ์ ์ํ ๋๋ฒ๊น
|
| 234 |
-
with open("google_response.html", "w", encoding="utf-8") as f:
|
| 235 |
-
f.write(response.text)
|
| 236 |
-
logging.info("Google ์๋ต HTML์ 'google_response.html'์ ์ ์ฅํ์ต๋๋ค.")
|
| 237 |
-
|
| 238 |
-
# BeautifulSoup์ผ๋ก HTML ํ์ฑ
|
| 239 |
-
soup = BeautifulSoup(response.text, 'html.parser')
|
| 240 |
-
|
| 241 |
-
# ๊ฒ์ ๊ฒฐ๊ณผ ์ถ์ถ
|
| 242 |
-
organic_results = []
|
| 243 |
-
|
| 244 |
-
# 2023-2025 ๊ตฌ๊ธ ๊ฒ์ ๊ฒฐ๊ณผ ์ ํ์ ์๋ (๋ค์ํ ์ ํ์ ์๋)
|
| 245 |
-
selectors = [
|
| 246 |
-
'div.g', 'div.Gx5Zad', 'div.tF2Cxc', 'div.yuRUbf',
|
| 247 |
-
'.v5yQqb', '.MjjYud', 'div[jscontroller]', '.fP1Qef',
|
| 248 |
-
'div[data-sokoban-container]', '.hlcw0c'
|
| 249 |
-
]
|
| 250 |
-
|
| 251 |
-
# ๋ชจ๋ ์ ํ์ ์๋
|
| 252 |
-
containers = []
|
| 253 |
-
for selector in selectors:
|
| 254 |
-
elements = soup.select(selector)
|
| 255 |
-
if elements:
|
| 256 |
-
logging.info(f"์ ํ์ '{selector}'๋ก {len(elements)}๊ฐ ์์๋ฅผ ์ฐพ์์ต๋๋ค.")
|
| 257 |
-
containers.extend(elements)
|
| 258 |
-
# ์ถฉ๋ถํ ์์๋ฅผ ์ฐพ์์ผ๋ฉด ์ค๋จ
|
| 259 |
-
if len(containers) >= num_results * 2: # ์ค๋ณต ๊ฐ๋ฅ์ฑ ๊ณ ๋ คํด 2๋ฐฐ๋ก ์ฐพ์
|
| 260 |
-
break
|
| 261 |
-
|
| 262 |
-
# ๋์: ๋ชจ๋ <a> ํ๊ทธ์์ href๊ฐ http๋ก ์์ํ๋ ๊ฒ๋ง ์ ํ
|
| 263 |
-
if not containers:
|
| 264 |
-
logging.warning("๊ตฌ์กฐํ๋ ์ปจํ
์ด๋๋ฅผ ์ฐพ์ ์ ์์ด ์ง์ ๋งํฌ๋ฅผ ๊ฒ์ํฉ๋๋ค.")
|
| 265 |
-
link_elements = soup.select('a[href^="http"]')
|
| 266 |
-
|
| 267 |
-
for link in link_elements:
|
| 268 |
-
if len(organic_results) >= num_results:
|
| 269 |
-
break
|
| 270 |
-
|
| 271 |
-
href = link.get('href', '')
|
| 272 |
-
# Google ๋ฆฌ๋ค์ด๋ ํธ URL ํํฐ๋ง
|
| 273 |
-
if '/url?' in href or 'google.com' in href:
|
| 274 |
-
continue
|
| 275 |
-
|
| 276 |
-
# ๋ถ๋ชจ ์์์์ ํ
์คํธ ์ถ์ถ ์๋
|
| 277 |
-
parent = link.parent
|
| 278 |
-
title = link.get_text(strip=True) or "์ ๋ชฉ ์์"
|
| 279 |
-
|
| 280 |
-
# ์ถฉ๋ถํ ๊ธธ์ด์ ํ
์คํธ๊ฐ ์๋ ๊ฒฝ์ฐ๋ง ๊ฒฐ๊ณผ๋ก ์ถ๊ฐ
|
| 281 |
-
if len(title) > 5: # ์๋ฏธ ์๋ ์ ๋ชฉ์ ๋ณดํต 5์ ์ด์
|
| 282 |
-
# ์ฃผ๋ณ ํ
์คํธ ์ถ์ถ ์๋
|
| 283 |
-
surrounding_text = ""
|
| 284 |
-
for sibling in parent.next_siblings:
|
| 285 |
-
if sibling.name and sibling.get_text(strip=True):
|
| 286 |
-
surrounding_text = sibling.get_text(strip=True)
|
| 287 |
-
break
|
| 288 |
-
|
| 289 |
-
# ๊ฒฐ๊ณผ ์ถ๊ฐ
|
| 290 |
-
organic_results.append({
|
| 291 |
-
"title": title,
|
| 292 |
-
"link": href,
|
| 293 |
-
"snippet": surrounding_text or "์ถ๊ฐ ์ค๋ช
์์",
|
| 294 |
-
"displayed_link": href.split('/')[2] if '/' in href else href
|
| 295 |
-
})
|
| 296 |
-
|
| 297 |
-
# ์ ํ์ ๊ธฐ๋ฐ ํ์ฑ ์๋
|
| 298 |
-
else:
|
| 299 |
-
for container in containers:
|
| 300 |
-
if len(organic_results) >= num_results:
|
| 301 |
-
break
|
| 302 |
-
|
| 303 |
-
# 1. ๋งํฌ ์ฐพ๊ธฐ
|
| 304 |
-
link_element = container.select_one('a[href^="http"]')
|
| 305 |
-
if not link_element:
|
| 306 |
-
continue
|
| 307 |
-
|
| 308 |
-
link = link_element.get('href', '')
|
| 309 |
-
# Google์ ๋ฆฌ๋ค์ด๋ ํธ URL์ด๋ฉด ๊ฑด๋๋ฐ๊ธฐ
|
| 310 |
-
if '/url?' in link or 'google.com' in link:
|
| 311 |
-
continue
|
| 312 |
-
|
| 313 |
-
# 2. ์ ๋ชฉ ์ฐพ๊ธฐ (๋ค์ํ ์ ํ์)
|
| 314 |
-
title_selectors = ['h3', '.LC20lb', '.DKV0Md', '.l', '.vvjwJb']
|
| 315 |
-
title = None
|
| 316 |
-
for selector in title_selectors:
|
| 317 |
-
title_element = container.select_one(selector)
|
| 318 |
-
if title_element and title_element.get_text(strip=True):
|
| 319 |
-
title = title_element.get_text(strip=True)
|
| 320 |
-
break
|
| 321 |
-
|
| 322 |
-
if not title:
|
| 323 |
-
title = link_element.get_text(strip=True) or "์ ๋ชฉ ์์"
|
| 324 |
-
|
| 325 |
-
# 3. ์ค๋ํซ ์ฐพ๊ธฐ (๋ค์ํ ์ ํ์)
|
| 326 |
-
snippet_selectors = ['.VwiC3b', '.lyLwlc', '.yXK7lf', '.lEBKkf', '.s', '.st']
|
| 327 |
-
snippet = "์ค๋ช
์์"
|
| 328 |
-
for selector in snippet_selectors:
|
| 329 |
-
snippet_element = container.select_one(selector)
|
| 330 |
-
if snippet_element and snippet_element.get_text(strip=True):
|
| 331 |
-
snippet = snippet_element.get_text(strip=True)
|
| 332 |
-
break
|
| 333 |
-
|
| 334 |
-
# 4. ํ์ ๋งํฌ ์ฐพ๊ธฐ
|
| 335 |
-
displayed_link_selectors = ['cite', '.UPmit', '.qLRx3b', '.iUh30']
|
| 336 |
-
displayed_link = link.split('/')[2] if '/' in link else link
|
| 337 |
-
for selector in displayed_link_selectors:
|
| 338 |
-
element = container.select_one(selector)
|
| 339 |
-
if element and element.get_text(strip=True):
|
| 340 |
-
displayed_link = element.get_text(strip=True)
|
| 341 |
-
break
|
| 342 |
-
|
| 343 |
-
# ์ค๋ณต ์ ๊ฑฐ
|
| 344 |
-
is_duplicate = False
|
| 345 |
-
for result in organic_results:
|
| 346 |
-
if result["link"] == link or result["title"] == title:
|
| 347 |
-
is_duplicate = True
|
| 348 |
-
break
|
| 349 |
-
|
| 350 |
-
if not is_duplicate:
|
| 351 |
-
organic_results.append({
|
| 352 |
-
"title": title,
|
| 353 |
-
"link": link,
|
| 354 |
-
"snippet": snippet,
|
| 355 |
-
"displayed_link": displayed_link
|
| 356 |
-
})
|
| 357 |
-
|
| 358 |
-
if not organic_results:
|
| 359 |
-
logging.warning("๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ ํ์ฑํ ์ ์์ต๋๋ค. ๊ฐ์ ๊ฒฐ๊ณผ๋ฅผ ๋ฐํํฉ๋๋ค.")
|
| 360 |
-
return generate_mock_search_results(query)
|
| 361 |
-
|
| 362 |
-
# ๊ฒ์ ๊ฒฐ๊ณผ ๋งํฌ๋ค์ด ํ์์ผ๋ก ๋ณํ
|
| 363 |
-
summary_lines = []
|
| 364 |
-
for idx, item in enumerate(organic_results, start=1):
|
| 365 |
-
title = item.get("title", "No title")
|
| 366 |
-
link = item.get("link", "#")
|
| 367 |
-
snippet = item.get("snippet", "No description")
|
| 368 |
-
displayed_link = item.get("displayed_link", link)
|
| 369 |
-
|
| 370 |
-
summary_lines.append(
|
| 371 |
-
f"### Result {idx}: {title}\n\n"
|
| 372 |
-
f"{snippet}\n\n"
|
| 373 |
-
f"**์ถ์ฒ**: [{displayed_link}]({link})\n\n"
|
| 374 |
-
f"---\n"
|
| 375 |
-
)
|
| 376 |
-
|
| 377 |
-
# ๋ชจ๋ธ์๊ฒ ๋ช
ํํ ์ง์นจ ์ถ๊ฐ
|
| 378 |
-
instructions = """
|
| 379 |
-
# ์น ๊ฒ์ ๊ฒฐ๊ณผ
|
| 380 |
-
์๋๋ ๊ฒ์ ๊ฒฐ๊ณผ์
๋๋ค. ์ง๋ฌธ์ ๋ต๋ณํ ๋ ์ด ์ ๋ณด๋ฅผ ํ์ฉํ์ธ์:
|
| 381 |
-
1. ๊ฐ ๊ฒฐ๊ณผ์ ์ ๋ชฉ, ๋ด์ฉ, ์ถ์ฒ ๋งํฌ๋ฅผ ์ฐธ๊ณ ํ์ธ์
|
| 382 |
-
2. ๋ต๋ณ์ ๊ด๋ จ ์ ๋ณด์ ์ถ์ฒ๋ฅผ ๋ช
์์ ์ผ๋ก ์ธ์ฉํ์ธ์ (์: "X ์ถ์ฒ์ ๋ฐ๋ฅด๋ฉด...")
|
| 383 |
-
3. ์๋ต์ ์ค์ ์ถ์ฒ ๋งํฌ๋ฅผ ํฌํจํ์ธ์
|
| 384 |
-
4. ์ฌ๋ฌ ์ถ์ฒ์ ์ ๋ณด๋ฅผ ์ข
ํฉํ์ฌ ๋ต๋ณํ์ธ์
|
| 385 |
-
"""
|
| 386 |
-
|
| 387 |
-
search_results = instructions + "\n".join(summary_lines)
|
| 388 |
-
logging.info(f"Google ๊ฒ์ ๊ฒฐ๊ณผ {len(organic_results)}๊ฐ ํ์ฑ ์๋ฃ")
|
| 389 |
-
return search_results
|
| 390 |
-
|
| 391 |
-
except Exception as e:
|
| 392 |
-
logging.error(f"Google ๊ฒ์ ์คํจ: {e}")
|
| 393 |
-
return generate_mock_search_results(query)
|
| 394 |
|
| 395 |
-
|
|
|
|
|
|
|
| 396 |
def do_web_search(query: str) -> str:
|
| 397 |
"""
|
| 398 |
-
์น ๊ฒ์์
|
|
|
|
|
|
|
|
|
|
| 399 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 400 |
try:
|
| 401 |
-
# API
|
| 402 |
-
if not SERPHOUSE_API_KEY or "mock" in SERPHOUSE_API_KEY.lower():
|
| 403 |
-
logging.warning("API ํค๊ฐ ์๊ฑฐ๋ Mock ๋ชจ๋์
๋๋ค. ๊ฐ์ ๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ ๋ฐํํฉ๋๋ค.")
|
| 404 |
-
return generate_mock_search_results(query)
|
| 405 |
-
|
| 406 |
-
# SerpHouse API ์ฌ์ฉ
|
| 407 |
url = "https://api.serphouse.com/serp/live"
|
| 408 |
params = {
|
| 409 |
"q": query,
|
| 410 |
"domain": "google.com",
|
| 411 |
-
"serp_type": "web",
|
| 412 |
-
"device": "desktop",
|
| 413 |
-
"lang": "ko", # ํ๊ตญ์ด
|
| 414 |
-
"num": "5" # ๊ฒฐ๊ณผ
|
| 415 |
}
|
| 416 |
-
|
| 417 |
headers = {
|
| 418 |
"Authorization": f"Bearer {SERPHOUSE_API_KEY}"
|
| 419 |
}
|
| 420 |
|
| 421 |
logging.info(f"SerpHouse API ํธ์ถ ์ค... ๊ฒ์์ด: {query}")
|
| 422 |
-
|
| 423 |
-
# ์งง์ ํ์์์์ผ๋ก ์์ฒญ ์๋
|
| 424 |
response = requests.get(url, headers=headers, params=params, timeout=15)
|
| 425 |
response.raise_for_status()
|
| 426 |
|
| 427 |
-
logging.info(f"SerpHouse API ์๋ต ์ํ ์ฝ๋: {response.status_code}")
|
| 428 |
data = response.json()
|
| 429 |
|
| 430 |
-
#
|
| 431 |
results = data.get("results", {})
|
| 432 |
organic = None
|
| 433 |
|
| 434 |
-
# ๊ฐ๋ฅํ ์๋ต ๊ตฌ์กฐ
|
| 435 |
if isinstance(results, dict) and "organic" in results:
|
| 436 |
organic = results["organic"]
|
| 437 |
-
# ๊ฐ๋ฅํ ์๋ต ๊ตฌ์กฐ 2
|
| 438 |
-
elif isinstance(results, dict) and "results" in results:
|
| 439 |
-
if isinstance(results["results"], dict) and "organic" in results["results"]:
|
| 440 |
-
organic = results["results"]["organic"]
|
| 441 |
-
# ๊ฐ๋ฅํ ์๋ต ๊ตฌ์กฐ 3
|
| 442 |
elif "organic" in data:
|
| 443 |
organic = data["organic"]
|
| 444 |
-
|
|
|
|
| 445 |
if not organic:
|
| 446 |
-
logging.warning("์๋ต์์ organic
|
| 447 |
-
return
|
| 448 |
-
|
| 449 |
-
#
|
| 450 |
-
|
| 451 |
-
limited_organic = organic[:max_results]
|
| 452 |
|
| 453 |
-
# ๊ฒฐ๊ณผ
|
| 454 |
summary_lines = []
|
| 455 |
-
for idx, item in enumerate(
|
| 456 |
title = item.get("title", "No title")
|
| 457 |
link = item.get("link", "#")
|
| 458 |
snippet = item.get("snippet", "No description")
|
|
@@ -465,7 +306,6 @@ def do_web_search(query: str) -> str:
|
|
| 465 |
f"---\n"
|
| 466 |
)
|
| 467 |
|
| 468 |
-
# ๋ชจ๋ธ์๊ฒ ๋ช
ํํ ์ง์นจ ์ถ๊ฐ
|
| 469 |
instructions = """
|
| 470 |
# ์น ๊ฒ์ ๊ฒฐ๊ณผ
|
| 471 |
์๋๋ ๊ฒ์ ๊ฒฐ๊ณผ์
๋๋ค. ์ง๋ฌธ์ ๋ต๋ณํ ๋ ์ด ์ ๋ณด๋ฅผ ํ์ฉํ์ธ์:
|
|
@@ -475,16 +315,15 @@ def do_web_search(query: str) -> str:
|
|
| 475 |
4. ์ฌ๋ฌ ์ถ์ฒ์ ์ ๋ณด๋ฅผ ์ข
ํฉํ์ฌ ๋ต๋ณํ์ธ์
|
| 476 |
"""
|
| 477 |
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
return search_results
|
| 481 |
-
|
| 482 |
except requests.exceptions.Timeout:
|
| 483 |
-
logging.error("
|
| 484 |
-
return
|
| 485 |
except Exception as e:
|
| 486 |
-
logging.error(f"
|
| 487 |
-
return
|
|
|
|
| 488 |
|
| 489 |
def chatbot_interface():
|
| 490 |
st.title("Ginigen Blog")
|
|
@@ -631,7 +470,7 @@ def chatbot_interface():
|
|
| 631 |
message_placeholder = st.empty()
|
| 632 |
full_response = ""
|
| 633 |
|
| 634 |
-
# ์น ๊ฒ์ ์ํ (
|
| 635 |
system_prompt = get_system_prompt()
|
| 636 |
if st.session_state.use_web_search:
|
| 637 |
with st.spinner("์น์์ ๊ด๋ จ ์ ๋ณด๋ฅผ ๊ฒ์ ์ค..."):
|
|
@@ -639,7 +478,7 @@ def chatbot_interface():
|
|
| 639 |
search_query = extract_keywords(prompt, top_k=5)
|
| 640 |
st.info(f"๊ฒ์์ด: {search_query}")
|
| 641 |
|
| 642 |
-
#
|
| 643 |
search_results = do_web_search(search_query)
|
| 644 |
|
| 645 |
if "๊ฐ์ ๊ฒ์ ๊ฒฐ๊ณผ" in search_results:
|
|
@@ -773,4 +612,4 @@ if __name__ == "__main__":
|
|
| 773 |
f.write("markdown>=3.5.1\n")
|
| 774 |
f.write("pillow>=10.1.0\n")
|
| 775 |
|
| 776 |
-
main()
|
|
|
|
| 10 |
import base64
|
| 11 |
from datetime import datetime
|
| 12 |
import re
|
| 13 |
+
from bs4 import BeautifulSoup # BeautifulSoup๋ ์ด์ ์ฌ์ฉํ์ง ์์ง๋ง, ํ์ ์ ์ ์ง
|
| 14 |
+
# (์ง์ ๊ตฌ๊ธ ๊ฒ์ ๋ก์ง์ ์ญ์ ํ์ผ๋ฏ๋ก ์ฌ์ค์ BeautifulSoup๋ ํ์ ์์ต๋๋ค.)
|
| 15 |
|
| 16 |
# ๋ก๊น
์ค์
|
| 17 |
logging.basicConfig(
|
|
|
|
| 35 |
return """
|
| 36 |
๋น์ ์ ์ ๋ฌธ ๋ธ๋ก๊ทธ ์์ฑ ์ ๋ฌธ๊ฐ์
๋๋ค. ๋ชจ๋ ๋ธ๋ก๊ทธ ๊ธ ์์ฑ ์์ฒญ์ ๋ํด ๋ค์์ 8๋จ๊ณ ํ๋ ์์ํฌ๋ฅผ ์ฒ ์ ํ ๋ฐ๋ฅด๋, ์์ฐ์ค๋ฝ๊ณ ๋งค๋ ฅ์ ์ธ ๊ธ์ด ๋๋๋ก ์์ฑํด์ผ ํฉ๋๋ค:
|
| 37 |
|
| 38 |
+
๋
์ ์ฐ๊ฒฐ ๋จ๊ณ
|
| 39 |
+
1.1. ๊ณต๊ฐ๋ ํ์ฑ์ ์ํ ์น๊ทผํ ์ธ์ฌ
|
| 40 |
+
1.2. ๋
์์ ์ค์ ๊ณ ๋ฏผ์ ๋ฐ์ํ ๋์
์ง๋ฌธ
|
| 41 |
+
1.3. ์ฃผ์ ์ ๋ํ ์ฆ๊ฐ์ ๊ด์ฌ ์ ๋
|
| 42 |
+
|
| 43 |
+
๋ฌธ์ ์ ์ ๋จ๊ณ
|
| 44 |
+
2.1. ๋
์์ ํ์ธํฌ์ธํธ ๊ตฌ์ฒดํ
|
| 45 |
+
2.2. ๋ฌธ์ ์ ์๊ธ์ฑ๊ณผ ์ํฅ๋ ๋ถ์
|
| 46 |
+
2.3. ํด๊ฒฐ ํ์์ฑ์ ๋ํ ๊ณต๊ฐ๋ ํ์ฑ
|
| 47 |
+
|
| 48 |
+
์ ๋ฌธ์ฑ ์
์ฆ ๋จ๊ณ
|
| 49 |
+
3.1. ๊ฐ๊ด์ ๋ฐ์ดํฐ ๊ธฐ๋ฐ ๋ถ์
|
| 50 |
+
3.2. ์ ๋ฌธ๊ฐ ๊ฒฌํด์ ์ฐ๊ตฌ ๊ฒฐ๊ณผ ์ธ์ฉ
|
| 51 |
+
3.3. ์ค์ ์ฌ๋ก๋ฅผ ํตํ ๋ฌธ์ ๊ตฌ์ฒดํ
|
| 52 |
+
|
| 53 |
+
์๋ฃจ์
์ ๊ณต ๋จ๊ณ
|
| 54 |
+
4.1. ๋จ๊ณ๋ณ ์ค์ฒ ๊ฐ์ด๋๋ผ์ธ ์ ์
|
| 55 |
+
4.2. ์ฆ์ ์ ์ฉ ๊ฐ๋ฅํ ๊ตฌ์ฒด์ ํ
|
| 56 |
+
4.3. ์์ ์ฅ์ ๋ฌผ๊ณผ ๊ทน๋ณต ๋ฐฉ์ ํฌํจ
|
| 57 |
+
|
| 58 |
+
์ ๋ขฐ๋ ๊ฐํ ๋จ๊ณ
|
| 59 |
+
5.1. ์ค์ ์ฑ๊ณต ์ฌ๋ก ์ ์
|
| 60 |
+
5.2. ๊ตฌ์ฒด์ ์ฌ์ฉ์ ํ๊ธฐ ์ธ์ฉ
|
| 61 |
+
5.3. ๊ฐ๊ด์ ๋ฐ์ดํฐ๋ก ํจ๊ณผ ์
์ฆ
|
| 62 |
+
|
| 63 |
+
ํ๋ ์ ๋ ๋จ๊ณ
|
| 64 |
+
6.1. ๋ช
ํํ ์ฒซ ์ค์ฒ ๋จ๊ณ ์ ์
|
| 65 |
+
6.2. ์๊ธ์ฑ์ ๊ฐ์กฐํ ํ๋ ์ด๊ตฌ
|
| 66 |
+
6.3. ์ค์ฒ ๋๊ธฐ ๋ถ์ฌ ์์ ํฌํจ
|
| 67 |
+
|
| 68 |
+
์ง์ ์ฑ ๊ฐํ ๋จ๊ณ
|
| 69 |
+
7.1. ์๋ฃจ์
์ ํ๊ณ ํฌ๋ช
ํ๊ฒ ๊ณต๊ฐ
|
| 70 |
+
7.2. ๊ฐ์ธ๋ณ ์ฐจ์ด ์กด์ฌ ์ธ์
|
| 71 |
+
7.3. ํ์ ์กฐ๊ฑด๊ณผ ์ฃผ์์ฌํญ ๋ช
์
|
| 72 |
+
|
| 73 |
+
๊ด๊ณ ์ง์ ๋จ๊ณ
|
| 74 |
+
8.1. ์ง์ ์ฑ ์๋ ๊ฐ์ฌ ์ธ์ฌ
|
| 75 |
+
8.2. ๋ค์ ์ปจํ
์ธ ์๊ณ ๋ก ๊ธฐ๋๊ฐ ์กฐ์ฑ
|
| 76 |
+
8.3. ์ํต ์ฑ๋ ์๋ด
|
| 77 |
+
|
| 78 |
+
์์ฑ ์ ์ค์์ฌํญ
|
| 79 |
+
9.1. ๊ธ์ ์: 1500-2000์ ๋ด์ธ
|
| 80 |
+
9.2. ๋ฌธ๋จ ๊ธธ์ด: 3-4๋ฌธ์ฅ ์ด๋ด
|
| 81 |
+
9.3. ์๊ฐ์ ๊ตฌ๋ถ: ์์ ๋ชฉ, ๊ตฌ๋ถ์ , ๋ฒํธ ๋ชฉ๋ก ํ์ฉ
|
| 82 |
+
9.4. ํค์ค๋งค๋: ์น๊ทผํ๊ณ ์ ๋ฌธ์ ์ธ ๋ํ์ฒด
|
| 83 |
+
9.5. ๋ฐ์ดํฐ: ๋ชจ๋ ์ ๋ณด์ ์ถ์ฒ ๋ช
์
|
| 84 |
+
9.6. ๊ฐ๋
์ฑ: ๋ช
ํํ ๋จ๋ฝ ๊ตฌ๋ถ๊ณผ ๊ฐ์กฐ์ ์ฌ์ฉ
|
| 85 |
|
| 86 |
์ด๋ฌํ ํ๋ ์์ํฌ๋ฅผ ๋ฐํ์ผ๋ก, ์์ฒญ๋ฐ์ ์ฃผ์ ์ ๋ํด ์ฒด๊ณ์ ์ด๊ณ ๋งค๋ ฅ์ ์ธ ๋ธ๋ก๊ทธ ํฌ์คํธ๋ฅผ ์์ฑํ๊ฒ ์ต๋๋ค.
|
| 87 |
"""
|
|
|
|
| 236 |
|
| 237 |
return notice + "\n".join(summary_lines)
|
| 238 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
|
| 240 |
+
###################################################
|
| 241 |
+
# SerpHouse ๋ง ์ฌ์ฉํ๋ ์น ๊ฒ์ ํจ์
|
| 242 |
+
###################################################
|
| 243 |
def do_web_search(query: str) -> str:
|
| 244 |
"""
|
| 245 |
+
์น ๊ฒ์์ **SerpHouse**๋ก๋ง ์ํํ๋ ํจ์.
|
| 246 |
+
- SERPHOUSE_API_KEY๊ฐ ์๊ฑฐ๋ mock์ผ ๊ฒฝ์ฐ์๋ ๊ฐ์ ๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ ๋ฐํ
|
| 247 |
+
- API ํธ์ถ์ด ์ฑ๊ณตํ๋ฉด ๊ทธ ๊ฒฐ๊ณผ๋ฅผ ํ์ฑํ์ฌ ๋งํฌ๋ค์ด์ผ๋ก ๋ฐํ
|
| 248 |
+
- ์คํจํ๋ฉด ๊ฐ์ ๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ ๋ฐํ
|
| 249 |
"""
|
| 250 |
+
# 1) API ํค ์ ํจ์ฑ ์ฒดํฌ
|
| 251 |
+
if not SERPHOUSE_API_KEY or "mock" in SERPHOUSE_API_KEY.lower():
|
| 252 |
+
logging.warning("API ํค๊ฐ ์๊ฑฐ๋ Mock ๋ชจ๋์
๋๋ค. => ๊ฐ์ ๊ฒ์ ๊ฒฐ๊ณผ ๋ฐํ")
|
| 253 |
+
return generate_mock_search_results(query)
|
| 254 |
+
|
| 255 |
try:
|
| 256 |
+
# SerpHouse API
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
url = "https://api.serphouse.com/serp/live"
|
| 258 |
params = {
|
| 259 |
"q": query,
|
| 260 |
"domain": "google.com",
|
| 261 |
+
"serp_type": "web", # ์น ๊ฒ์
|
| 262 |
+
"device": "desktop",
|
| 263 |
+
"lang": "ko", # ํ๊ตญ์ด
|
| 264 |
+
"num": "5" # ๊ฒฐ๊ณผ ์ต๋ 5๊ฐ
|
| 265 |
}
|
|
|
|
| 266 |
headers = {
|
| 267 |
"Authorization": f"Bearer {SERPHOUSE_API_KEY}"
|
| 268 |
}
|
| 269 |
|
| 270 |
logging.info(f"SerpHouse API ํธ์ถ ์ค... ๊ฒ์์ด: {query}")
|
|
|
|
|
|
|
| 271 |
response = requests.get(url, headers=headers, params=params, timeout=15)
|
| 272 |
response.raise_for_status()
|
| 273 |
|
|
|
|
| 274 |
data = response.json()
|
| 275 |
|
| 276 |
+
# ๊ฒฐ๊ณผ ๊ตฌ์กฐ ํ์
|
| 277 |
results = data.get("results", {})
|
| 278 |
organic = None
|
| 279 |
|
| 280 |
+
# ๊ฐ๋ฅํ ์๋ต ๊ตฌ์กฐ ํ์ธ
|
| 281 |
if isinstance(results, dict) and "organic" in results:
|
| 282 |
organic = results["organic"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
elif "organic" in data:
|
| 284 |
organic = data["organic"]
|
| 285 |
+
|
| 286 |
+
# organic ๊ฒฐ๊ณผ๊ฐ ์๋ค๋ฉด ๊ฐ์ ๊ฒฐ๊ณผ
|
| 287 |
if not organic:
|
| 288 |
+
logging.warning("SerpHouse ์๋ต์์ organic ํญ๋ชฉ์ ์ฐพ์ ์ ์์ต๋๋ค.")
|
| 289 |
+
return generate_mock_search_results(query)
|
| 290 |
+
|
| 291 |
+
# ์ต๋ 5๊ฐ๋ง ์ฌ๋ผ์ด์ฑ
|
| 292 |
+
organic = organic[:5]
|
|
|
|
| 293 |
|
| 294 |
+
# ๊ฒฐ๊ณผ ์์ฝ ๋ฌธ์์ด ๋ง๋ค๊ธฐ
|
| 295 |
summary_lines = []
|
| 296 |
+
for idx, item in enumerate(organic, start=1):
|
| 297 |
title = item.get("title", "No title")
|
| 298 |
link = item.get("link", "#")
|
| 299 |
snippet = item.get("snippet", "No description")
|
|
|
|
| 306 |
f"---\n"
|
| 307 |
)
|
| 308 |
|
|
|
|
| 309 |
instructions = """
|
| 310 |
# ์น ๊ฒ์ ๊ฒฐ๊ณผ
|
| 311 |
์๋๋ ๊ฒ์ ๊ฒฐ๊ณผ์
๋๋ค. ์ง๋ฌธ์ ๋ต๋ณํ ๋ ์ด ์ ๋ณด๋ฅผ ํ์ฉํ์ธ์:
|
|
|
|
| 315 |
4. ์ฌ๋ฌ ์ถ์ฒ์ ์ ๋ณด๋ฅผ ์ข
ํฉํ์ฌ ๋ต๋ณํ์ธ์
|
| 316 |
"""
|
| 317 |
|
| 318 |
+
return instructions + "\n".join(summary_lines)
|
| 319 |
+
|
|
|
|
|
|
|
| 320 |
except requests.exceptions.Timeout:
|
| 321 |
+
logging.error("SerpHouse ๊ฒ์ ํ์์์. => ๊ฐ์ ๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ ๋ฐํํฉ๋๋ค.")
|
| 322 |
+
return generate_mock_search_results(query)
|
| 323 |
except Exception as e:
|
| 324 |
+
logging.error(f"SerpHouse ๊ฒ์ ์คํจ: {e}")
|
| 325 |
+
return generate_mock_search_results(query)
|
| 326 |
+
|
| 327 |
|
| 328 |
def chatbot_interface():
|
| 329 |
st.title("Ginigen Blog")
|
|
|
|
| 470 |
message_placeholder = st.empty()
|
| 471 |
full_response = ""
|
| 472 |
|
| 473 |
+
# ์น ๊ฒ์ ์ํ (์ต์
์ด ์ผ์ ธ ์์ ๊ฒฝ์ฐ)
|
| 474 |
system_prompt = get_system_prompt()
|
| 475 |
if st.session_state.use_web_search:
|
| 476 |
with st.spinner("์น์์ ๊ด๋ จ ์ ๋ณด๋ฅผ ๊ฒ์ ์ค..."):
|
|
|
|
| 478 |
search_query = extract_keywords(prompt, top_k=5)
|
| 479 |
st.info(f"๊ฒ์์ด: {search_query}")
|
| 480 |
|
| 481 |
+
# SerpHouse API๋ก ๊ฒ์
|
| 482 |
search_results = do_web_search(search_query)
|
| 483 |
|
| 484 |
if "๊ฐ์ ๊ฒ์ ๊ฒฐ๊ณผ" in search_results:
|
|
|
|
| 612 |
f.write("markdown>=3.5.1\n")
|
| 613 |
f.write("pillow>=10.1.0\n")
|
| 614 |
|
| 615 |
+
main()
|