| from __future__ import annotations |
|
|
| from typing import Annotated, List |
|
|
| import gradio as gr |
| from ddgs import DDGS |
|
|
| from app import _log_call_end, _log_call_start, _search_rate_limiter, _truncate_for_log |
| from ._docstrings import autodoc |
|
|
|
|
| |
| TOOL_SUMMARY = ( |
| "Run a DuckDuckGo-backed search across text, news, images, videos, or books. " |
| "Readable results include pagination hints and next_offset when more results are available; " |
| "Use in combination with `Web_Fetch` to navigate the web." |
| ) |
|
|
|
|
| def _extract_date_from_snippet(snippet: str) -> str: |
| if not snippet: |
| return "" |
| import re |
|
|
| date_patterns = [ |
| r"\b(\d{4}[-/]\d{1,2}[-/]\d{1,2})\b", |
| r"\b([A-Za-z]{3,9}\s+\d{1,2},?\s+\d{4})\b", |
| r"\b(\d{1,2}\s+[A-Za-z]{3,9}\s+\d{4})\b", |
| r"\b(\d+\s+(?:day|week|month|year)s?\s+ago)\b", |
| r"(?:Published|Updated|Posted):\s*([^,\n]+?)(?:[,\n]|$)", |
| ] |
| for pattern in date_patterns: |
| matches = re.findall(pattern, snippet, re.IGNORECASE) |
| if matches: |
| return matches[0].strip() |
| return "" |
|
|
|
|
| def _format_search_result(result: dict, search_type: str, index: int) -> List[str]: |
| lines: List[str] = [] |
| if search_type == "text": |
| title = result.get("title", "").strip() |
| url = result.get("href", "").strip() |
| snippet = result.get("body", "").strip() |
| date = _extract_date_from_snippet(snippet) |
| lines.append(f"{index}. {title}") |
| lines.append(f" URL: {url}") |
| if snippet: |
| lines.append(f" Summary: {snippet}") |
| if date: |
| lines.append(f" Date: {date}") |
| elif search_type == "news": |
| title = result.get("title", "").strip() |
| url = result.get("url", "").strip() |
| body = result.get("body", "").strip() |
| date = result.get("date", "").strip() |
| source = result.get("source", "").strip() |
| lines.append(f"{index}. {title}") |
| lines.append(f" URL: {url}") |
| if source: |
| lines.append(f" Source: {source}") |
| if date: |
| lines.append(f" Date: {date}") |
| if body: |
| lines.append(f" Summary: {body}") |
| elif search_type == "images": |
| title = result.get("title", "").strip() |
| image_url = result.get("image", "").strip() |
| source_url = result.get("url", "").strip() |
| source = result.get("source", "").strip() |
| width = result.get("width", "") |
| height = result.get("height", "") |
| lines.append(f"{index}. {title}") |
| lines.append(f" Image: {image_url}") |
| lines.append(f" Source: {source_url}") |
| if source: |
| lines.append(f" Publisher: {source}") |
| if width and height: |
| lines.append(f" Dimensions: {width}x{height}") |
| elif search_type == "videos": |
| title = result.get("title", "").strip() |
| description = result.get("description", "").strip() |
| duration = result.get("duration", "").strip() |
| published = result.get("published", "").strip() |
| uploader = result.get("uploader", "").strip() |
| embed_url = result.get("embed_url", "").strip() |
| lines.append(f"{index}. {title}") |
| if embed_url: |
| lines.append(f" Video: {embed_url}") |
| if uploader: |
| lines.append(f" Uploader: {uploader}") |
| if duration: |
| lines.append(f" Duration: {duration}") |
| if published: |
| lines.append(f" Published: {published}") |
| if description: |
| lines.append(f" Description: {description}") |
| elif search_type == "books": |
| title = result.get("title", "").strip() |
| url = result.get("url", "").strip() |
| body = result.get("body", "").strip() |
| lines.append(f"{index}. {title}") |
| lines.append(f" URL: {url}") |
| if body: |
| lines.append(f" Description: {body}") |
| return lines |
|
|
|
|
| @autodoc( |
| summary=TOOL_SUMMARY, |
| ) |
| def Web_Search( |
| query: Annotated[str, "The search query (supports operators like site:, quotes, OR)."], |
| max_results: Annotated[int, "Number of results to return (1–20)."] = 5, |
| page: Annotated[int, "Page number for pagination (1-based, each page contains max_results items)."] = 1, |
| search_type: Annotated[str, "Type of search: 'text' (web pages), 'news', 'images', 'videos', or 'books'."] = "text", |
| offset: Annotated[int, "Result offset to start from (overrides page if > 0, for precise continuation)."] = 0, |
| ) -> str: |
| _log_call_start("Web_Search", query=query, max_results=max_results, page=page, search_type=search_type, offset=offset) |
| if not query or not query.strip(): |
| result = "No search query provided. Please enter a search term." |
| _log_call_end("Web_Search", _truncate_for_log(result)) |
| return result |
| max_results = max(1, min(20, max_results)) |
| page = max(1, page) |
| offset = max(0, offset) |
| valid_types = ["text", "news", "images", "videos", "books"] |
| if search_type not in valid_types: |
| search_type = "text" |
| if offset > 0: |
| actual_offset = offset |
| calculated_page = (offset // max_results) + 1 |
| else: |
| actual_offset = (page - 1) * max_results |
| calculated_page = page |
| total_needed = actual_offset + max_results |
| used_fallback = False |
| original_search_type = search_type |
|
|
| def _perform_search(stype: str) -> list[dict]: |
| try: |
| _search_rate_limiter.acquire() |
| with DDGS() as ddgs: |
| if stype == "text": |
| raw_gen = ddgs.text(query, max_results=total_needed + 10) |
| elif stype == "news": |
| raw_gen = ddgs.news(query, max_results=total_needed + 10) |
| elif stype == "images": |
| raw_gen = ddgs.images(query, max_results=total_needed + 10) |
| elif stype == "videos": |
| raw_gen = ddgs.videos(query, max_results=total_needed + 10) |
| else: |
| raw_gen = ddgs.books(query, max_results=total_needed + 10) |
| try: |
| return list(raw_gen) |
| except Exception as inner_exc: |
| if "no results" in str(inner_exc).lower() or "not found" in str(inner_exc).lower(): |
| return [] |
| raise inner_exc |
| except Exception as exc: |
| error_msg = f"Search failed: {str(exc)[:200]}" |
| lowered = str(exc).lower() |
| if "blocked" in lowered or "rate" in lowered: |
| error_msg = "Search temporarily blocked due to rate limiting. Please try again in a few minutes." |
| elif "timeout" in lowered: |
| error_msg = "Search timed out. Please try again with a simpler query." |
| elif "network" in lowered or "connection" in lowered: |
| error_msg = "Network connection error. Please check your internet connection and try again." |
| elif "no results" in lowered or "not found" in lowered: |
| return [] |
| raise Exception(error_msg) |
|
|
| try: |
| raw = _perform_search(search_type) |
| except Exception as exc: |
| result = f"Error: {exc}" |
| _log_call_end("Web_Search", _truncate_for_log(result)) |
| return result |
|
|
| if not raw and search_type == "news": |
| try: |
| raw = _perform_search("text") |
| if raw: |
| used_fallback = True |
| search_type = "text" |
| except Exception: |
| pass |
|
|
| if not raw: |
| fallback_note = " (also tried 'text' search as fallback)" if original_search_type == "news" and used_fallback else "" |
| result = f"No {original_search_type} results found for query: {query}{fallback_note}" |
| _log_call_end("Web_Search", _truncate_for_log(result)) |
| return result |
|
|
| paginated_results = raw[actual_offset: actual_offset + max_results] |
| if not paginated_results: |
| if actual_offset >= len(raw): |
| result = f"Offset {actual_offset} exceeds available results ({len(raw)} total). Try offset=0 to start from beginning." |
| else: |
| result = f"No {original_search_type} results found on page {calculated_page} for query: {query}. Try page 1 or reduce page number." |
| _log_call_end("Web_Search", _truncate_for_log(result)) |
| return result |
|
|
| total_available = len(raw) |
| start_num = actual_offset + 1 |
| end_num = actual_offset + len(paginated_results) |
| next_offset = actual_offset + len(paginated_results) |
| search_label = original_search_type.title() |
| if used_fallback: |
| search_label += " → Text (Smart Fallback)" |
| pagination_info = f"Page {calculated_page}" |
| if offset > 0: |
| pagination_info = f"Offset {actual_offset} (≈ {pagination_info})" |
| lines = [f"{search_label} search results for: {query}"] |
| if used_fallback: |
| lines.append("📍 Note: News search returned no results, automatically searched general web content instead") |
| lines.append(f"{pagination_info} (results {start_num}-{end_num} of ~{total_available}+ available)\n") |
| for i, result in enumerate(paginated_results, start_num): |
| result_lines = _format_search_result(result, search_type, i) |
| lines.extend(result_lines) |
| lines.append("") |
| if total_available > end_num: |
| lines.append("💡 More results available:") |
| lines.append(f" • Next page: page={calculated_page + 1}") |
| lines.append(f" • Next offset: offset={next_offset}") |
| lines.append(f" • Use offset={next_offset} to continue exactly from result {next_offset + 1}") |
| result = "\n".join(lines) |
| search_info = f"type={original_search_type}" |
| if used_fallback: |
| search_info += "→text" |
| _log_call_end("Web_Search", f"{search_info} page={calculated_page} offset={actual_offset} results={len(paginated_results)} chars={len(result)}") |
| return result |
|
|
|
|
| def build_interface() -> gr.Interface: |
| return gr.Interface( |
| fn=Web_Search, |
| inputs=[ |
| gr.Textbox(label="Query", placeholder="topic OR site:example.com", max_lines=1), |
| gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"), |
| gr.Slider(minimum=1, maximum=10, value=1, step=1, label="Page", info="Page number for pagination (ignored if offset > 0)"), |
| gr.Radio( |
| label="Search Type", |
| choices=["text", "news", "images", "videos", "books"], |
| value="text", |
| info="Type of content to search for", |
| ), |
| gr.Slider( |
| minimum=0, |
| maximum=1000, |
| value=0, |
| step=1, |
| label="Offset", |
| info="Result offset to start from (overrides page if > 0, use next_offset from previous search)", |
| ), |
| ], |
| outputs=gr.Textbox(label="Search Results", interactive=False, lines=20, max_lines=20), |
| title="Web Search", |
| description=( |
| "<div style=\"text-align:center\">Multi-type web search with readable output format, date detection, and flexible pagination. " |
| "Supports text, news, images, videos, and books. Features smart fallback for news searches and precise offset control.</div>" |
| ), |
| api_description=TOOL_SUMMARY, |
| flagging_mode="never", |
| submit_btn="Search", |
| ) |
|
|
|
|
| __all__ = ["Web_Search", "build_interface"] |
|
|