Spaces:

Nymbo
/

Tools

Running

File size: 10,713 Bytes

588592f

from __future__ import annotations

import os
from typing import Annotated, Any, Literal

import httpx
import gradio as gr

from app import _log_call_end, _log_call_start, _truncate_for_log
from ._docstrings import autodoc


# ===========================================================================
# Constants
# ===========================================================================

BASE_URL = "https://text.pollinations.ai"

# Model mappings for different depth levels
MODEL_MAPPING = {
    "fast": "gemini-search",
    "normal": "perplexity-fast",
    "deep": "perplexity-reasoning",
}

# System prompts for different detail levels
SYSTEM_PROMPTS = {
    True: "Search the web and provide a comprehensive answer with sources. Include relevant details and cite your sources.",
    False: "Search the web and provide a concise, accurate answer. Include source URLs.",
}

# Timeout settings (seconds)
REQUEST_TIMEOUT = 30.0

# Single source of truth for the LLM-facing tool description
TOOL_SUMMARY = (
    "Search the web using AI-powered search models with source citations. "
    "Supports different depth levels: fast (Gemini with Google Search), normal (Perplexity Sonar), "
    "and deep (Perplexity Sonar Reasoning). Returns answers with source URLs."
)


# ===========================================================================
# Core Client
# ===========================================================================


class PollinationsClient:
    """
    HTTP client for Pollinations AI web search API.

    Provides web search functionality with different depth levels and citation support.
    """

    def __init__(
        self,
        base_url: str = BASE_URL,
        timeout: float = REQUEST_TIMEOUT,
        api_key: str | None = None,
    ) -> None:
        """
        Initialize the Pollinations client.

        Args:
            base_url: Base URL for the Pollinations API (default: https://text.pollinations.ai)
            timeout: Request timeout in seconds (default: 30)
            api_key: Optional API key (reads from POLLINATIONS_API_KEY env var if not provided)
        """
        self.base_url = base_url.rstrip("/")
        self.timeout = timeout
        self.api_key = api_key or os.getenv("POLLINATIONS_API_KEY")

    def _get_headers(self) -> dict[str, str]:
        """Get request headers including API key if available."""
        headers = {
            "Content-Type": "application/json",
        }
        if self.api_key:
            headers["Authorization"] = f"Bearer {self.api_key}"
        return headers

    def _resolve_model(self, depth: str) -> str:
        """
        Resolve depth level to actual model name.

        Args:
            depth: Depth level ('fast', 'normal', or 'deep')

        Returns:
            The model identifier for the Pollinations API
        """
        return MODEL_MAPPING.get(depth, "perplexity-fast")

    async def web_search(
        self,
        query: str,
        depth: str = "normal",
        detailed: bool = False,
    ) -> dict[str, Any]:
        """
        Perform web search using Pollinations AI.

        Args:
            query: The search query
            depth: Search depth level ('fast', 'normal', or 'deep')
            detailed: Whether to request a comprehensive answer

        Returns:
            Dictionary with keys:
                - answer: The generated answer
                - sources: List of source URLs (citations)
                - model: The model used
                - query: The original query

        Raises:
            httpx.HTTPError: For network/HTTP errors
            ValueError: For invalid parameters
        """
        if not query or not query.strip():
            raise ValueError("Query cannot be empty")

        if depth not in MODEL_MAPPING:
            raise ValueError(f"Invalid depth: {depth}. Must be one of {list(MODEL_MAPPING.keys())}")

        model = self._resolve_model(depth)
        system_prompt = SYSTEM_PROMPTS.get(detailed, SYSTEM_PROMPTS[False])

        # Prepare OpenAI-compatible request
        payload = {
            "model": model,
            "messages": [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": query},
            ],
        }

        url = f"{self.base_url}/v1/chat/completions"

        async with httpx.AsyncClient(timeout=self.timeout) as client:
            try:
                response = await client.post(
                    url,
                    json=payload,
                    headers=self._get_headers(),
                )
                response.raise_for_status()
            except httpx.TimeoutException as exc:
                raise httpx.HTTPError(f"Request timed out after {self.timeout}s") from exc
            except httpx.HTTPStatusError as exc:
                # Handle rate limiting specifically
                if exc.response.status_code == 429:
                    raise httpx.HTTPError("Rate limited. Please try again later.") from exc
                raise

        data = response.json()

        # Extract answer and citations from response
        answer = ""
        sources = []

        # OpenAI-compatible response format
        if "choices" in data and data["choices"]:
            answer = data["choices"][0].get("message", {}).get("content", "")

        # Extract citations if present (Pollinations-specific extension)
        if "citations" in data:
            sources = data["citations"]

        # Also check if citations are embedded in the message
        if not sources and isinstance(answer, str):
            # Try to extract URLs from the answer
            import re
            url_pattern = r'https?://[^\s<>"\'\)]+'
            sources = list(dict.fromkeys(re.findall(url_pattern, answer)))  # Unique URLs

        return {
            "answer": answer,
            "sources": sources,
            "model": model,
            "query": query,
        }

    def web_search_sync(
        self,
        query: str,
        depth: str = "normal",
        detailed: bool = False,
    ) -> dict[str, Any]:
        """
        Synchronous version of web_search.

        Args:
            query: The search query
            depth: Search depth level ('fast', 'normal', or 'deep')
            detailed: Whether to request a comprehensive answer

        Returns:
            Dictionary with answer, sources, model, and query
        """
        import asyncio

        return asyncio.run(self.web_search(query, depth, detailed))


# ===========================================================================
# Gradio Tool Function
# ===========================================================================


@autodoc(
    summary=TOOL_SUMMARY,
)
def Pollinations_Web_Search(
    query: Annotated[str, "The search query string"],
    depth: Annotated[
        Literal["fast", "normal", "deep"],
        "Search depth: 'fast' (Gemini with Google Search), 'normal' (Perplexity Sonar), or 'deep' (Perplexity Sonar Reasoning).",
    ] = "normal",
    detailed: Annotated[bool, "Request a comprehensive answer instead of concise summary"] = False,
) -> str:
    """
    Search the web using Pollinations AI with source citations.

    Uses AI-powered search models that provide direct answers with source citations.
    Supports three depth levels for different search capabilities.
    """
    _log_call_start("Pollinations_Web_Search", query=query, depth=depth, detailed=detailed)

    try:
        client = PollinationsClient()
        result = client.web_search_sync(query, depth, detailed)

        # Format the result for display
        lines = [
            f"Query: {result['query']}",
            f"Model: {result['model']}",
            f"Depth: {depth}",
            "",
            "Answer:",
            result["answer"] or "No answer generated.",
        ]

        if result["sources"]:
            lines.append("")
            lines.append("Sources:")
            for i, source in enumerate(result["sources"], 1):
                lines.append(f"  {i}. {source}")
        else:
            lines.append("")
            lines.append("(No sources provided)")

        formatted_result = "\n".join(lines)
        _log_call_end("Pollinations_Web_Search", _truncate_for_log(formatted_result))
        return formatted_result

    except ValueError as exc:
        error_msg = f"Invalid input: {exc}"
        _log_call_end("Pollinations_Web_Search", error_msg)
        return error_msg
    except httpx.HTTPError as exc:
        error_msg = f"Search failed: {exc}"
        _log_call_end("Pollinations_Web_Search", error_msg)
        return error_msg
    except Exception as exc:
        error_msg = f"Unexpected error: {exc}"
        _log_call_end("Pollinations_Web_Search", error_msg)
        return error_msg


# ===========================================================================
# Gradio Interface
# ===========================================================================


def build_interface() -> gr.Interface:
    """Build the Gradio interface for Pollinations web search."""
    return gr.Interface(
        fn=Pollinations_Web_Search,
        inputs=[
            gr.Textbox(
                label="Query",
                placeholder="Enter your search query here...",
                max_lines=2,
                info="The search query",
            ),
            gr.Radio(
                label="Search Depth",
                choices=["fast", "normal", "deep"],
                value="normal",
                info="Search depth level: fast (Gemini), normal (Perplexity), deep (Reasoning)",
            ),
            gr.Checkbox(
                label="Detailed Answer",
                value=False,
                info="Request a comprehensive answer instead of concise summary",
            ),
        ],
        outputs=gr.Textbox(
            label="Search Results",
            interactive=False,
            lines=15,
            max_lines=20,
        ),
        title="Pollinations Web Search",
        description=(
            "<div style=\"text-align:center\">AI-powered web search with source citations. "
            "Uses Google Search, Perplexity Sonar, and Perplexity Sonar Reasoning models "
            "to provide direct answers with reliable source URLs.</div>"
        ),
        api_description=TOOL_SUMMARY,
        flagging_mode="never",
        submit_btn="Search",
    )


# ===========================================================================
# Public API
# ===========================================================================

__all__ = [
    "PollinationsClient",
    "Pollinations_Web_Search",
    "build_interface",
]