Spaces:
Sleeping
Sleeping
| import logging | |
| import os | |
| import time | |
| from typing import List | |
| import requests | |
| from tavily import TavilyClient # type: ignore[import] | |
| from src.agent.state import SearchResult | |
| logger = logging.getLogger(__name__) | |
| def search_stackoverflow(query: str, limit: int = 3) -> List[SearchResult]: | |
| """Stack Overflow์์ ๊ด๋ จ ์ง๋ฌธ์ ๊ฒ์ํ๋ค. | |
| Args: | |
| query: ๊ฒ์ ์ฟผ๋ฆฌ | |
| limit: ๋ฐํํ ์ต๋ ๊ฒฐ๊ณผ ์ | |
| Returns: | |
| SearchResult ๋ฆฌ์คํธ (์คํจ ์ ๋น ๋ฆฌ์คํธ) | |
| """ | |
| if not query.strip(): | |
| logger.warning("Stack Overflow ๊ฒ์: ๋น ์ฟผ๋ฆฌ") | |
| return [] | |
| try: | |
| url = "https://api.stackexchange.com/2.3/search/advanced" | |
| params = { | |
| "q": query, | |
| "order": "desc", | |
| "sort": "votes", | |
| "site": "stackoverflow", | |
| "pagesize": limit, | |
| "filter": "withbody", | |
| } | |
| response = requests.get(url, params=params, timeout=10) | |
| response.raise_for_status() | |
| data = response.json() | |
| items = data.get("items", []) | |
| results = [] | |
| max_score = max((item.get("score", 0) for item in items), default=1) | |
| for item in items: | |
| title = item.get("title", "") | |
| body = item.get("body", "")[:500] # ๋ณธ๋ฌธ ์ผ๋ถ๋ง ํฌํจ | |
| content = f"{title}\n\n{body}" | |
| score = item.get("score", 0) | |
| # ์ ๊ทํ: 0-1 ๋ฒ์๋ก ๋ณํ | |
| relevance = min(score / max(max_score, 1), 1.0) if max_score > 0 else 0.5 | |
| results.append( | |
| SearchResult( | |
| source="Stack Overflow", | |
| content=content, | |
| url=item.get("link"), | |
| relevance_score=relevance, | |
| ) | |
| ) | |
| logger.info("Stack Overflow ๊ฒ์ ์ฑ๊ณต: %d๊ฐ ๊ฒฐ๊ณผ", len(results)) | |
| # Rate limit ์ค์ | |
| time.sleep(1) | |
| return results | |
| except Exception as e: | |
| logger.error("Stack Overflow ๊ฒ์ ์คํจ: %s", e, exc_info=True) | |
| return [] | |
| def search_github(query: str, limit: int = 3) -> List[SearchResult]: | |
| """GitHub์์ ๊ด๋ จ ์ฝ๋๋ฅผ ๊ฒ์ํ๋ค. | |
| Args: | |
| query: ๊ฒ์ ์ฟผ๋ฆฌ | |
| limit: ๋ฐํํ ์ต๋ ๊ฒฐ๊ณผ ์ | |
| Returns: | |
| SearchResult ๋ฆฌ์คํธ (์คํจ ์ ๋น ๋ฆฌ์คํธ) | |
| """ | |
| if not query.strip(): | |
| logger.warning("GitHub ๊ฒ์: ๋น ์ฟผ๋ฆฌ") | |
| return [] | |
| try: | |
| url = "https://api.github.com/search/code" | |
| # Python ์ฝ๋๋ก ์ ํ (์ธ์ด ๊ฐ์ง ๋ก์ง์ ์ถํ ํ์ฅ ๊ฐ๋ฅ) | |
| search_query = f"{query} language:python" | |
| params = { | |
| "q": search_query, | |
| "sort": "indexed", | |
| "per_page": limit, | |
| } | |
| headers = { | |
| "Accept": "application/vnd.github.v3+json", | |
| } | |
| # GitHub ํ ํฐ์ด ์์ผ๋ฉด Authorization ํค๋ ์ถ๊ฐ | |
| github_token = os.getenv("GITHUB_TOKEN") | |
| if github_token: | |
| headers["Authorization"] = f"token {github_token}" | |
| logger.debug("GitHub ํ ํฐ ์ฌ์ฉ (์ธ์ฆ๋ ์์ฒญ)") | |
| else: | |
| logger.warning( | |
| "GITHUB_TOKEN์ด ์ค์ ๋์ง ์์ - rate limit ์ ํ์ (60 req/hr). " | |
| "ํ ํฐ ์ค์ ์ 5,000 req/hr๋ก ์ฆ๊ฐ" | |
| ) | |
| response = requests.get(url, params=params, headers=headers, timeout=10) | |
| response.raise_for_status() | |
| data = response.json() | |
| items = data.get("items", []) | |
| results = [] | |
| for item in items: | |
| repo_name = item.get("repository", {}).get("full_name", "unknown") | |
| path = item.get("path", "") | |
| content = f"Repository: {repo_name}\nFile: {path}" | |
| results.append( | |
| SearchResult( | |
| source="GitHub", | |
| content=content, | |
| url=item.get("html_url"), | |
| relevance_score=0.8, # GitHub ๊ฒฐ๊ณผ๋ ์ผ๋ฐ์ ์ผ๋ก ๋์ ๊ด๋ จ๋ | |
| ) | |
| ) | |
| logger.info("GitHub ๊ฒ์ ์ฑ๊ณต: %d๊ฐ ๊ฒฐ๊ณผ", len(results)) | |
| # Rate limit ์ค์ | |
| time.sleep(1) | |
| return results | |
| except requests.exceptions.HTTPError as e: | |
| if e.response.status_code == 403: | |
| logger.warning("GitHub API rate limit ์ด๊ณผ") | |
| else: | |
| logger.error("GitHub ๊ฒ์ HTTP ์๋ฌ: %s", e, exc_info=True) | |
| return [] | |
| except Exception as e: | |
| logger.error("GitHub ๊ฒ์ ์คํจ: %s", e, exc_info=True) | |
| return [] | |
| def search_official_docs(query: str, limit: int = 3) -> List[SearchResult]: | |
| """Tavily API๋ฅผ ์ฌ์ฉํด ๊ณต์ ๋ฌธ์๋ฅผ ๊ฒ์ํ๋ค. | |
| Args: | |
| query: ๊ฒ์ ์ฟผ๋ฆฌ | |
| limit: ๋ฐํํ ์ต๋ ๊ฒฐ๊ณผ ์ | |
| Returns: | |
| SearchResult ๋ฆฌ์คํธ (์คํจ ์ ๋น ๋ฆฌ์คํธ) | |
| """ | |
| if not query.strip(): | |
| logger.warning("Official Docs ๊ฒ์: ๋น ์ฟผ๋ฆฌ") | |
| return [] | |
| api_key = os.getenv("TAVILY_API_KEY") | |
| if not api_key: | |
| logger.error("TAVILY_API_KEY ํ๊ฒฝ ๋ณ์๊ฐ ์ค์ ๋์ด ์์ง ์์ต๋๋ค.") | |
| return [] | |
| try: | |
| client = TavilyClient(api_key=api_key) | |
| response = client.search( | |
| query=query, | |
| search_depth="basic", | |
| max_results=limit, | |
| include_domains=[ | |
| "docs.python.org", | |
| "docs.oracle.com", | |
| "spring.io/guides", | |
| "developer.mozilla.org", | |
| "reactjs.org/docs", | |
| ], | |
| ) | |
| results = [] | |
| for item in response.get("results", []): | |
| content = item.get("content", "") | |
| url = item.get("url", "") | |
| score = item.get("score", 0.5) # Tavily๊ฐ ์ ๊ณตํ๋ ๊ด๋ จ๋ ์ ์ | |
| results.append( | |
| SearchResult( | |
| source="Official Docs", | |
| content=content, | |
| url=url, | |
| relevance_score=score, | |
| ) | |
| ) | |
| logger.info("Tavily ๊ฒ์ ์ฑ๊ณต: %d๊ฐ ๊ฒฐ๊ณผ", len(results)) | |
| return results | |
| except Exception as e: | |
| logger.error("Tavily ๊ฒ์ ์คํจ: %s", e, exc_info=True) | |
| return [] | |