jwlee-ai's picture
Upload folder using huggingface_hub
c2446d5 verified
"""์›น ๊ฒ€์ƒ‰ ํˆด. ๋ฐฑ์—”๋“œ ์šฐ์„ ์ˆœ์œ„: SearXNG โ†’ Tavily โ†’ Brave โ†’ DuckDuckGo.
SearXNG ๋Š” ํ‚ค ์—†์ด ๋™์ž‘ํ•˜๋Š” ๋ฉ”ํƒ€๊ฒ€์ƒ‰(๊ณต๊ฐœ ์ธ์Šคํ„ด์Šค ํด๋ฐฑ). Tavily/Brave ๋Š” ํ™˜๊ฒฝ๋ณ€์ˆ˜๋กœ
API ํ‚ค๊ฐ€ ์„ค์ •๋œ ๊ฒฝ์šฐ์—๋งŒ ์‚ฌ์šฉ. ๋‘˜ ๋‹ค ๋ฌด๋ฃŒ ํ‹ฐ์–ด๊ฐ€ ์žˆ๋‹ค (Tavily 1k/์›”, Brave 2k/์›”
๊ฐ€๋Ÿ‰). DDG๋Š” ๋งˆ์ง€๋ง‰ ์•ˆ์ „๋ง์ด์ง€๋งŒ ์ข…์ข… ์ฐจ๋‹จ/๋””์ฝ”๋”ฉ ์—๋Ÿฌ๊ฐ€ ๋‚˜๋ฏ€๋กœ ์šฐ์„  ๋ฐฑ์—”๋“œ๋“ค์„
์•ž์— ๋‘๋Š” ๊ฒŒ ์•ˆ์ •์ ์ด๋‹ค.
๊ฐ ๋ฐฑ์—”๋“œ๋Š” ๊ฒฐ๊ณผ๊ฐ€ ์žˆ์œผ๋ฉด ํฌ๋งท๋œ ๋ฌธ์ž์—ด, ์—†์œผ๋ฉด None ์„ ๋ฐ˜ํ™˜. ํ˜ธ์ถœ์ž(web_search)๋Š”
None ์„ ๋งŒ๋‚˜๋ฉด ๋‹ค์Œ ๋ฐฑ์—”๋“œ๋กœ ํด๋ฐฑํ•œ๋‹ค. DDG ๋Š” ๋งˆ์ง€๋ง‰ ํด๋ฐฑ์ด๋ผ None ๋Œ€์‹  ํ•ญ์ƒ
๋ฌธ์ž์—ด(์—๋Ÿฌ ๋ฉ”์‹œ์ง€ ๋˜๋Š” "No results found.")์„ ๋ฐ˜ํ™˜ํ•œ๋‹ค.
ํ™˜๊ฒฝ๋ณ€์ˆ˜:
TAVILY_API_KEY Tavily Search API ํ‚ค (์˜ต์…˜)
BRAVE_API_KEY Brave Search API ํ‚ค (์˜ต์…˜)
"""
import os
import random
import requests
from smolagents import tool
_TAVILY_URL = "https://api.tavily.com/search"
_BRAVE_URL = "https://api.search.brave.com/res/v1/web/search"
# SearXNG ๊ณต๊ฐœ ์ธ์Šคํ„ด์Šค ํ’€. ํ‚ค ๋ถˆํ•„์š”. ํ˜ธ์ถœ๋งˆ๋‹ค ์ผ๋ถ€๋งŒ ๋ฌด์ž‘์œ„๋กœ ๊ณจ๋ผ ์‹œ๋„ํ•ด์„œ
# (a) ํ•œ ์ธ์Šคํ„ด์Šค๊ฐ€ IP ์ฐจ๋‹จ ๊ฐ€์†๋˜๋Š” ๊ฑธ ๋ถ„์‚ฐํ•˜๊ณ  (b) ๋ˆ„์  timeout ์ƒํ•œ์„ ํ†ต์ œํ•œ๋‹ค.
# searx.space ๊ฐ€์šฉ ๋ชฉ๋ก์„ ์ฃผ๊ธฐ์ ์œผ๋กœ ๊ฐฑ์‹ ํ•  ๊ฒƒ.
_SEARXNG_INSTANCES = (
"https://searx.be",
"https://searx.tiekoetter.com",
"https://search.inetol.net",
"https://searxng.online",
"https://priv.au",
)
_SEARXNG_TRY_COUNT = 3 # ํ˜ธ์ถœ๋‹น ์‹œ๋„ํ•  ์ธ์Šคํ„ด์Šค ์ˆ˜
_SEARXNG_TIMEOUT = 5 # ์ธ์Šคํ„ด์Šค๋‹น ํƒ€์ž„์•„์›ƒ(์ดˆ) โ€” ๋ˆ„์  ์ƒํ•œ ~15s
def _format_results(items) -> str:
"""๊ณตํ†ต ํฌ๋งคํ„ฐ: (title, url, snippet) ํŠœํ”Œ ๋ฆฌ์ŠคํŠธ๋ฅผ LLM-friendly ํ…์ŠคํŠธ๋กœ."""
lines = [f"- {t}\n {u}\n {b}" for t, u, b in items if (t or u or b)]
return "\n".join(lines) if lines else ""
def _search_searxng(query: str) -> str | None:
"""SearXNG ๋ฉ”ํƒ€๊ฒ€์ƒ‰. Google/Bing/DDG ๋“ฑ 70+ ์—”์ง„์„ ๋ฌถ์–ด ๋ฐ˜ํ™˜. ํ‚ค ๋ถˆํ•„์š”.
๊ณต๊ฐœ ์ธ์Šคํ„ด์Šค ํด๋ฐฑ โ€” ํ•œ ๊ณณ ์ฃฝ์œผ๋ฉด ๋‹ค์Œ์œผ๋กœ. ๋ชจ๋‘ ์‹คํŒจํ•˜๋ฉด None ๋ฐ˜ํ™˜ํ•ด
ํ˜ธ์ถœ์ž๊ฐ€ ๋‹ค์Œ ๋ฐฑ์—”๋“œ(Tavily/Brave/DDG)๋กœ ํด๋ฐฑํ•˜๊ฒŒ ํ•œ๋‹ค.
์ผ๋ถ€ ์ธ์Šคํ„ด์Šค๋Š” ๋นˆ UA ๋˜๋Š” ๋ด‡์ฒ˜๋Ÿผ ๋ณด์ด๋Š” ์š”์ฒญ์„ ์ฐจ๋‹จํ•˜๋ฏ€๋กœ ๋ธŒ๋ผ์šฐ์ € UA๋ฅผ ๋ช…์‹œ.
"""
headers = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
),
"Accept": "application/json",
}
# ํ˜ธ์ถœ๋งˆ๋‹ค ๋ฌด์ž‘์œ„ ๋ถ€๋ถ„์ง‘ํ•ฉ โ†’ ๋ถ€ํ•˜ ๋ถ„์‚ฐ + ๋ˆ„์  timeout ํ†ต์ œ.
candidates = random.sample(_SEARXNG_INSTANCES, _SEARXNG_TRY_COUNT)
for base in candidates:
try:
r = requests.get(
f"{base}/search",
params={"q": query, "format": "json", "language": "en"},
headers=headers,
timeout=_SEARXNG_TIMEOUT,
)
if r.status_code != 200:
continue
results = r.json().get("results", [])
if not results:
continue
items = [
(x.get("title", ""), x.get("url", ""), x.get("content", ""))
for x in results[:8] # ํ† ํฐ ์ œ์–ด, DDG์™€ ๋™์ผํ•œ max_results=8
]
formatted = _format_results(items)
if formatted:
return formatted
except Exception as e:
print(f"SearXNG ({base}) failed: {e}")
continue
return None
def _search_tavily(query: str) -> str | None:
"""Tavily Search API. TAVILY_API_KEY ๊ฐ€ ์žˆ์–ด์•ผ ํ˜ธ์ถœ."""
api_key = os.getenv("TAVILY_API_KEY")
if not api_key:
return None
try:
r = requests.post(
_TAVILY_URL,
json={"api_key": api_key, "query": query, "max_results": 8},
timeout=15,
)
r.raise_for_status()
results = r.json().get("results", [])
if not results:
return None
items = [
(x.get("title", ""), x.get("url", ""), x.get("content", ""))
for x in results
]
formatted = _format_results(items)
return formatted or None
except Exception as e:
print(f"Tavily search failed (falling back): {e}")
return None
def _search_brave(query: str) -> str | None:
"""Brave Search API. BRAVE_API_KEY ๊ฐ€ ์žˆ์–ด์•ผ ํ˜ธ์ถœ."""
api_key = os.getenv("BRAVE_API_KEY")
if not api_key:
return None
try:
r = requests.get(
_BRAVE_URL,
params={"q": query, "count": 8},
headers={
"X-Subscription-Token": api_key,
"Accept": "application/json",
},
timeout=15,
)
r.raise_for_status()
results = r.json().get("web", {}).get("results", [])
if not results:
return None
items = [
(x.get("title", ""), x.get("url", ""), x.get("description", ""))
for x in results
]
formatted = _format_results(items)
return formatted or None
except Exception as e:
print(f"Brave search failed (falling back): {e}")
return None
def _search_ddg(query: str) -> str:
"""DuckDuckGo. ddgs ํŒจํ‚ค์ง€ ์šฐ์„ , ์‹คํŒจ ์‹œ ๊ตฌ duckduckgo_search ํด๋ฐฑ.
๋งˆ์ง€๋ง‰ ํด๋ฐฑ์ด๋ผ None ๋Œ€์‹  ํ•ญ์ƒ ๋ฌธ์ž์—ด์„ ๋ฐ˜ํ™˜ํ•œ๋‹ค(์—๋Ÿฌ ๋ฉ”์‹œ์ง€ ๋˜๋Š” "No results found.")."""
# DDG ํด๋ผ์ด์–ธํŠธ ํŒจํ‚ค์ง€ ์ด๋ฆ„์ด `duckduckgo_search` โ†’ `ddgs`๋กœ ๋ฐ”๋€Œ์—ˆ๊ณ 
# ๊ตฌ ํŒจํ‚ค์ง€์—์„œ๋Š” "Body collection error: ..." ๊ฐ™์€ ๋””์ฝ”๋”ฉ ์—๋Ÿฌ๊ฐ€ ๋นˆ๋ฒˆํ–ˆ๋‹ค.
last_err = None
for module_name in ("ddgs", "duckduckgo_search"):
try:
mod = __import__(module_name, fromlist=["DDGS"])
DDGS = getattr(mod, "DDGS")
with DDGS() as ddgs:
# max_results=8: ๋„ˆ๋ฌด ์ ์œผ๋ฉด ์ •๋‹ต ์‚ฌ์ดํŠธ ๋ˆ„๋ฝ, ๋„ˆ๋ฌด ๋งŽ์œผ๋ฉด ์ปจํ…์ŠคํŠธ ๋‚ญ๋น„.
results = list(ddgs.text(query, max_results=8))
if not results:
continue
# ๋‘ ํŒจํ‚ค์ง€๊ฐ€ ํ‚ค ์ด๋ฆ„์ด ๋ฏธ๋ฌ˜ํ•˜๊ฒŒ ๋‹ค๋ฅด๋ฏ€๋กœ ์–‘์ชฝ ๋ชจ๋‘ ์ฒ˜๋ฆฌ.
items = [
(
r.get("title", ""),
r.get("href", "") or r.get("url", ""),
r.get("body", "") or r.get("snippet", ""),
)
for r in results
]
formatted = _format_results(items)
if formatted:
return formatted
except Exception as e:
last_err = e
continue
if last_err:
return f"web_search error: {last_err}"
return "No results found."
@tool
def web_search(query: str) -> str:
"""Search the web and return a list of titles, URLs, and snippets.
Backend priority: SearXNG public instances (no key) -> Tavily/Brave (only if their
API keys are set in environment variables TAVILY_API_KEY, BRAVE_API_KEY) ->
DuckDuckGo fallback.
Args:
query: The search query string.
"""
# SearXNG๊ฐ€ 1์ˆœ์œ„: ํ‚ค ์—†์ด ๊ฐ€์žฅ ์–‘์งˆ์˜ ๊ฒฐ๊ณผ๋ฅผ ์ฃผ๋Š” ๋ฐฑ์—”๋“œ.
# Tavily/Brave๋Š” ํ‚ค๊ฐ€ ํ™˜๊ฒฝ๋ณ€์ˆ˜์— ์žˆ์„ ๋•Œ๋งŒ ์‹œ๋„(์—†์œผ๋ฉด None ๋ฐ˜ํ™˜ํ•˜๊ณ  ํ†ต๊ณผ).
# DDG๋Š” ๋งˆ์ง€๋ง‰ ์•ˆ์ „๋ง.
out = _search_searxng(query)
if out:
return out
for backend in (_search_tavily, _search_brave):
out = backend(query)
if out:
return out
return _search_ddg(query)