Spaces:
Sleeping
Sleeping
| """์น ๊ฒ์ ํด. ๋ฐฑ์๋ ์ฐ์ ์์: SearXNG โ Tavily โ Brave โ DuckDuckGo. | |
| SearXNG ๋ ํค ์์ด ๋์ํ๋ ๋ฉํ๊ฒ์(๊ณต๊ฐ ์ธ์คํด์ค ํด๋ฐฑ). Tavily/Brave ๋ ํ๊ฒฝ๋ณ์๋ก | |
| API ํค๊ฐ ์ค์ ๋ ๊ฒฝ์ฐ์๋ง ์ฌ์ฉ. ๋ ๋ค ๋ฌด๋ฃ ํฐ์ด๊ฐ ์๋ค (Tavily 1k/์, Brave 2k/์ | |
| ๊ฐ๋). DDG๋ ๋ง์ง๋ง ์์ ๋ง์ด์ง๋ง ์ข ์ข ์ฐจ๋จ/๋์ฝ๋ฉ ์๋ฌ๊ฐ ๋๋ฏ๋ก ์ฐ์ ๋ฐฑ์๋๋ค์ | |
| ์์ ๋๋ ๊ฒ ์์ ์ ์ด๋ค. | |
| ๊ฐ ๋ฐฑ์๋๋ ๊ฒฐ๊ณผ๊ฐ ์์ผ๋ฉด ํฌ๋งท๋ ๋ฌธ์์ด, ์์ผ๋ฉด None ์ ๋ฐํ. ํธ์ถ์(web_search)๋ | |
| None ์ ๋ง๋๋ฉด ๋ค์ ๋ฐฑ์๋๋ก ํด๋ฐฑํ๋ค. DDG ๋ ๋ง์ง๋ง ํด๋ฐฑ์ด๋ผ None ๋์ ํญ์ | |
| ๋ฌธ์์ด(์๋ฌ ๋ฉ์์ง ๋๋ "No results found.")์ ๋ฐํํ๋ค. | |
| ํ๊ฒฝ๋ณ์: | |
| TAVILY_API_KEY Tavily Search API ํค (์ต์ ) | |
| BRAVE_API_KEY Brave Search API ํค (์ต์ ) | |
| """ | |
| import os | |
| import random | |
| import requests | |
| from smolagents import tool | |
| _TAVILY_URL = "https://api.tavily.com/search" | |
| _BRAVE_URL = "https://api.search.brave.com/res/v1/web/search" | |
| # SearXNG ๊ณต๊ฐ ์ธ์คํด์ค ํ. ํค ๋ถํ์. ํธ์ถ๋ง๋ค ์ผ๋ถ๋ง ๋ฌด์์๋ก ๊ณจ๋ผ ์๋ํด์ | |
| # (a) ํ ์ธ์คํด์ค๊ฐ IP ์ฐจ๋จ ๊ฐ์๋๋ ๊ฑธ ๋ถ์ฐํ๊ณ (b) ๋์ timeout ์ํ์ ํต์ ํ๋ค. | |
| # searx.space ๊ฐ์ฉ ๋ชฉ๋ก์ ์ฃผ๊ธฐ์ ์ผ๋ก ๊ฐฑ์ ํ ๊ฒ. | |
| _SEARXNG_INSTANCES = ( | |
| "https://searx.be", | |
| "https://searx.tiekoetter.com", | |
| "https://search.inetol.net", | |
| "https://searxng.online", | |
| "https://priv.au", | |
| ) | |
| _SEARXNG_TRY_COUNT = 3 # ํธ์ถ๋น ์๋ํ ์ธ์คํด์ค ์ | |
| _SEARXNG_TIMEOUT = 5 # ์ธ์คํด์ค๋น ํ์์์(์ด) โ ๋์ ์ํ ~15s | |
| def _format_results(items) -> str: | |
| """๊ณตํต ํฌ๋งคํฐ: (title, url, snippet) ํํ ๋ฆฌ์คํธ๋ฅผ LLM-friendly ํ ์คํธ๋ก.""" | |
| lines = [f"- {t}\n {u}\n {b}" for t, u, b in items if (t or u or b)] | |
| return "\n".join(lines) if lines else "" | |
| def _search_searxng(query: str) -> str | None: | |
| """SearXNG ๋ฉํ๊ฒ์. Google/Bing/DDG ๋ฑ 70+ ์์ง์ ๋ฌถ์ด ๋ฐํ. ํค ๋ถํ์. | |
| ๊ณต๊ฐ ์ธ์คํด์ค ํด๋ฐฑ โ ํ ๊ณณ ์ฃฝ์ผ๋ฉด ๋ค์์ผ๋ก. ๋ชจ๋ ์คํจํ๋ฉด None ๋ฐํํด | |
| ํธ์ถ์๊ฐ ๋ค์ ๋ฐฑ์๋(Tavily/Brave/DDG)๋ก ํด๋ฐฑํ๊ฒ ํ๋ค. | |
| ์ผ๋ถ ์ธ์คํด์ค๋ ๋น UA ๋๋ ๋ด์ฒ๋ผ ๋ณด์ด๋ ์์ฒญ์ ์ฐจ๋จํ๋ฏ๋ก ๋ธ๋ผ์ฐ์ UA๋ฅผ ๋ช ์. | |
| """ | |
| headers = { | |
| "User-Agent": ( | |
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " | |
| "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" | |
| ), | |
| "Accept": "application/json", | |
| } | |
| # ํธ์ถ๋ง๋ค ๋ฌด์์ ๋ถ๋ถ์งํฉ โ ๋ถํ ๋ถ์ฐ + ๋์ timeout ํต์ . | |
| candidates = random.sample(_SEARXNG_INSTANCES, _SEARXNG_TRY_COUNT) | |
| for base in candidates: | |
| try: | |
| r = requests.get( | |
| f"{base}/search", | |
| params={"q": query, "format": "json", "language": "en"}, | |
| headers=headers, | |
| timeout=_SEARXNG_TIMEOUT, | |
| ) | |
| if r.status_code != 200: | |
| continue | |
| results = r.json().get("results", []) | |
| if not results: | |
| continue | |
| items = [ | |
| (x.get("title", ""), x.get("url", ""), x.get("content", "")) | |
| for x in results[:8] # ํ ํฐ ์ ์ด, DDG์ ๋์ผํ max_results=8 | |
| ] | |
| formatted = _format_results(items) | |
| if formatted: | |
| return formatted | |
| except Exception as e: | |
| print(f"SearXNG ({base}) failed: {e}") | |
| continue | |
| return None | |
| def _search_tavily(query: str) -> str | None: | |
| """Tavily Search API. TAVILY_API_KEY ๊ฐ ์์ด์ผ ํธ์ถ.""" | |
| api_key = os.getenv("TAVILY_API_KEY") | |
| if not api_key: | |
| return None | |
| try: | |
| r = requests.post( | |
| _TAVILY_URL, | |
| json={"api_key": api_key, "query": query, "max_results": 8}, | |
| timeout=15, | |
| ) | |
| r.raise_for_status() | |
| results = r.json().get("results", []) | |
| if not results: | |
| return None | |
| items = [ | |
| (x.get("title", ""), x.get("url", ""), x.get("content", "")) | |
| for x in results | |
| ] | |
| formatted = _format_results(items) | |
| return formatted or None | |
| except Exception as e: | |
| print(f"Tavily search failed (falling back): {e}") | |
| return None | |
| def _search_brave(query: str) -> str | None: | |
| """Brave Search API. BRAVE_API_KEY ๊ฐ ์์ด์ผ ํธ์ถ.""" | |
| api_key = os.getenv("BRAVE_API_KEY") | |
| if not api_key: | |
| return None | |
| try: | |
| r = requests.get( | |
| _BRAVE_URL, | |
| params={"q": query, "count": 8}, | |
| headers={ | |
| "X-Subscription-Token": api_key, | |
| "Accept": "application/json", | |
| }, | |
| timeout=15, | |
| ) | |
| r.raise_for_status() | |
| results = r.json().get("web", {}).get("results", []) | |
| if not results: | |
| return None | |
| items = [ | |
| (x.get("title", ""), x.get("url", ""), x.get("description", "")) | |
| for x in results | |
| ] | |
| formatted = _format_results(items) | |
| return formatted or None | |
| except Exception as e: | |
| print(f"Brave search failed (falling back): {e}") | |
| return None | |
| def _search_ddg(query: str) -> str: | |
| """DuckDuckGo. ddgs ํจํค์ง ์ฐ์ , ์คํจ ์ ๊ตฌ duckduckgo_search ํด๋ฐฑ. | |
| ๋ง์ง๋ง ํด๋ฐฑ์ด๋ผ None ๋์ ํญ์ ๋ฌธ์์ด์ ๋ฐํํ๋ค(์๋ฌ ๋ฉ์์ง ๋๋ "No results found.").""" | |
| # DDG ํด๋ผ์ด์ธํธ ํจํค์ง ์ด๋ฆ์ด `duckduckgo_search` โ `ddgs`๋ก ๋ฐ๋์๊ณ | |
| # ๊ตฌ ํจํค์ง์์๋ "Body collection error: ..." ๊ฐ์ ๋์ฝ๋ฉ ์๋ฌ๊ฐ ๋น๋ฒํ๋ค. | |
| last_err = None | |
| for module_name in ("ddgs", "duckduckgo_search"): | |
| try: | |
| mod = __import__(module_name, fromlist=["DDGS"]) | |
| DDGS = getattr(mod, "DDGS") | |
| with DDGS() as ddgs: | |
| # max_results=8: ๋๋ฌด ์ ์ผ๋ฉด ์ ๋ต ์ฌ์ดํธ ๋๋ฝ, ๋๋ฌด ๋ง์ผ๋ฉด ์ปจํ ์คํธ ๋ญ๋น. | |
| results = list(ddgs.text(query, max_results=8)) | |
| if not results: | |
| continue | |
| # ๋ ํจํค์ง๊ฐ ํค ์ด๋ฆ์ด ๋ฏธ๋ฌํ๊ฒ ๋ค๋ฅด๋ฏ๋ก ์์ชฝ ๋ชจ๋ ์ฒ๋ฆฌ. | |
| items = [ | |
| ( | |
| r.get("title", ""), | |
| r.get("href", "") or r.get("url", ""), | |
| r.get("body", "") or r.get("snippet", ""), | |
| ) | |
| for r in results | |
| ] | |
| formatted = _format_results(items) | |
| if formatted: | |
| return formatted | |
| except Exception as e: | |
| last_err = e | |
| continue | |
| if last_err: | |
| return f"web_search error: {last_err}" | |
| return "No results found." | |
| def web_search(query: str) -> str: | |
| """Search the web and return a list of titles, URLs, and snippets. | |
| Backend priority: SearXNG public instances (no key) -> Tavily/Brave (only if their | |
| API keys are set in environment variables TAVILY_API_KEY, BRAVE_API_KEY) -> | |
| DuckDuckGo fallback. | |
| Args: | |
| query: The search query string. | |
| """ | |
| # SearXNG๊ฐ 1์์: ํค ์์ด ๊ฐ์ฅ ์์ง์ ๊ฒฐ๊ณผ๋ฅผ ์ฃผ๋ ๋ฐฑ์๋. | |
| # Tavily/Brave๋ ํค๊ฐ ํ๊ฒฝ๋ณ์์ ์์ ๋๋ง ์๋(์์ผ๋ฉด None ๋ฐํํ๊ณ ํต๊ณผ). | |
| # DDG๋ ๋ง์ง๋ง ์์ ๋ง. | |
| out = _search_searxng(query) | |
| if out: | |
| return out | |
| for backend in (_search_tavily, _search_brave): | |
| out = backend(query) | |
| if out: | |
| return out | |
| return _search_ddg(query) | |