Spaces:

Nymbo
/

Tools

Running

File size: 32,788 Bytes

588592f

from __future__ import annotations

import json
import os
from typing import Annotated, Any, Literal

import gradio as gr

from app import _log_call_end, _log_call_start, _truncate_for_log
from ._core import _resolve_path
from ._docstrings import autodoc

TOOL_SUMMARY = (
    "Scrape and extract structured data from known URLs using ScrapeGraphAI with "
    "Mistral-only models. Supports single-page extraction, bounded crawl extraction, "
    "multi-URL extraction, rendered markdown, and image-aware extraction."
)

ACTION_CHOICES = [
    "extract",
    "crawl_extract",
    "multi_extract",
    "render_markdown",
    "vision_extract",
]

RENDER_CHOICES = ["auto", "browser", "http"]

TEXT_MODEL_ENV = "SCRAPEGRAPH_TEXT_MODEL"
VISION_MODEL_ENV = "SCRAPEGRAPH_VISION_MODEL"
DEFAULT_TEXT_MODEL = "mistral-small-latest"
DEFAULT_VISION_MODEL = "pixtral-12b-latest"

_IMPORT_ERROR: Exception | None = None

try:
    from langchain.chat_models import init_chat_model
    from pydantic import BaseModel, Field, create_model
    from scrapegraphai.graphs import SmartScraperGraph, SmartScraperMultiGraph
    from scrapegraphai.graphs.abstract_graph import AbstractGraph
    from scrapegraphai.graphs.base_graph import BaseGraph
    from scrapegraphai.nodes import (
        DescriptionNode,
        FetchNode,
        FetchNodeLevelK,
        GenerateAnswerNodeKLevel,
        GenerateAnswerOmniNode,
        ImageToTextNode,
        ParseNode,
        ParseNodeDepthK,
        RAGNode,
    )
    from scrapegraphai.utils.convert_to_md import convert_to_md
except Exception as exc:  # pragma: no cover - import error path is runtime-only
    _IMPORT_ERROR = exc
    init_chat_model = None
    BaseModel = None
    Field = None
    create_model = None
    SmartScraperGraph = None
    SmartScraperMultiGraph = None
    AbstractGraph = None
    BaseGraph = None
    DescriptionNode = None
    FetchNode = None
    FetchNodeLevelK = None
    GenerateAnswerNodeKLevel = None
    GenerateAnswerOmniNode = None
    ImageToTextNode = None
    ParseNode = None
    ParseNodeDepthK = None
    RAGNode = None
    convert_to_md = None
else:
    class _LimitedFetchNodeLevelK(FetchNodeLevelK):
        def __init__(self, *args, **kwargs):
            super().__init__(*args, **kwargs)
            self.max_pages = None if self.node_config is None else self.node_config.get("max_pages")

        def obtain_content(self, documents, loader_kwargs):
            documents = super().obtain_content(documents, loader_kwargs)
            if self.max_pages and len(documents) > self.max_pages:
                return documents[: self.max_pages]
            return documents

    class _BoundedDepthSearchGraph(AbstractGraph):
        def __init__(self, prompt: str, source: str, config: dict, schema: type[BaseModel] | None = None):
            super().__init__(prompt, config, source, schema)
            self.input_key = "url" if source.startswith("http") else "local_dir"

        def _create_graph(self):
            fetch_node_k = _LimitedFetchNodeLevelK(
                input="url| local_dir",
                output=["docs"],
                node_config={
                    "loader_kwargs": self.config.get("loader_kwargs", {}),
                    "force": self.config.get("force", False),
                    "cut": self.config.get("cut", True),
                    "browser_base": self.config.get("browser_base"),
                    "storage_state": self.config.get("storage_state"),
                    "depth": self.config.get("depth", 1),
                    "only_inside_links": self.config.get("only_inside_links", False),
                    "max_pages": self.config.get("max_pages"),
                },
            )
            parse_node_k = ParseNodeDepthK(
                input="docs",
                output=["docs"],
                node_config={"verbose": self.config.get("verbose", False)},
            )
            description_node = DescriptionNode(
                input="docs",
                output=["docs"],
                node_config={
                    "llm_model": self.llm_model,
                    "verbose": self.config.get("verbose", False),
                    "cache_path": self.config.get("cache_path", False),
                },
            )
            rag_node = RAGNode(
                input="docs",
                output=["vectorial_db"],
                node_config={
                    "llm_model": self.llm_model,
                    "embedder_model": self.config.get("embedder_model", False),
                    "verbose": self.config.get("verbose", False),
                },
            )
            generate_answer_k = GenerateAnswerNodeKLevel(
                input="vectorial_db",
                output=["answer"],
                node_config={
                    "llm_model": self.llm_model,
                    "embedder_model": self.config.get("embedder_model", False),
                    "verbose": self.config.get("verbose", False),
                    "schema": self.schema,
                },
            )
            return BaseGraph(
                nodes=[fetch_node_k, parse_node_k, description_node, rag_node, generate_answer_k],
                edges=[
                    (fetch_node_k, parse_node_k),
                    (parse_node_k, description_node),
                    (description_node, rag_node),
                    (rag_node, generate_answer_k),
                ],
                entry_point=fetch_node_k,
                graph_name=self.__class__.__name__,
            )

        def run(self):
            inputs = {"user_prompt": self.prompt, self.input_key: self.source}
            self.final_state, self.execution_info = self.graph.execute(inputs)
            return self.final_state.get("answer", "No answer found.")

    class _MistralOmniScraperGraph(AbstractGraph):
        def __init__(self, prompt: str, source: str, config: dict, schema: type[BaseModel] | None = None):
            self.max_images = config.get("max_images", 5)
            super().__init__(prompt, config, source, schema)
            self.input_key = "url" if source.startswith("http") else "local_dir"

        def _create_graph(self):
            vision_model = init_chat_model(
                model=self.config.get("vision_model", DEFAULT_VISION_MODEL),
                model_provider="mistralai",
                api_key=self.config["llm"]["api_key"],
                temperature=0,
            )
            fetch_node = FetchNode(
                input="url | local_dir",
                output=["doc"],
                node_config={
                    "loader_kwargs": self.config.get("loader_kwargs", {}),
                    "storage_state": self.config.get("storage_state"),
                    "use_soup": self.config.get("use_soup", False),
                    "timeout": self.config.get("timeout", 30),
                },
            )
            parse_node = ParseNode(
                input="doc & (url | local_dir)",
                output=["parsed_doc", "link_urls", "img_urls"],
                node_config={
                    "chunk_size": self.model_token,
                    "parse_urls": True,
                    "llm_model": self.llm_model,
                },
            )
            image_to_text_node = ImageToTextNode(
                input="img_urls",
                output=["img_desc"],
                node_config={
                    "llm_model": vision_model,
                    "max_images": self.max_images,
                },
            )
            generate_answer_omni_node = GenerateAnswerOmniNode(
                input="user_prompt & (relevant_chunks | parsed_doc | doc) & img_desc",
                output=["answer"],
                node_config={
                    "llm_model": self.llm_model,
                    "additional_info": self.config.get("additional_info"),
                    "schema": self.schema,
                },
            )
            return BaseGraph(
                nodes=[fetch_node, parse_node, image_to_text_node, generate_answer_omni_node],
                edges=[
                    (fetch_node, parse_node),
                    (parse_node, image_to_text_node),
                    (image_to_text_node, generate_answer_omni_node),
                ],
                entry_point=fetch_node,
                graph_name=self.__class__.__name__,
            )

        def run(self):
            inputs = {"user_prompt": self.prompt, self.input_key: self.source}
            self.final_state, self.execution_info = self.graph.execute(inputs)
            return self.final_state.get("answer", "No answer found.")


class ScrapeGraphToolError(RuntimeError):
    def __init__(self, code: str, message: str, hint: str | None = None):
        super().__init__(message)
        self.code = code
        self.message = message
        self.hint = hint


def _json_response(payload: dict[str, Any]) -> str:
    return json.dumps(payload, ensure_ascii=False, indent=2, default=str)


def _error_response(action: str, code: str, message: str, hint: str | None = None) -> str:
    return _json_response(
        {
            "action": action,
            "error": {"code": code, "message": message, **({"hint": hint} if hint else {})},
        }
    )


def _require_scrapegraph() -> None:
    if _IMPORT_ERROR is not None:
        raise ScrapeGraphToolError(
            "missing_scrapegraph_dependencies",
            f"ScrapeGraphAI dependencies are unavailable: {_IMPORT_ERROR}",
            "Install `scrapegraphai>=1.75.1` and its runtime dependencies.",
        )


def _require_mistral_key() -> str:
    api_key = os.getenv("MISTRAL_API_KEY", "").strip()
    if not api_key:
        raise ScrapeGraphToolError(
            "missing_mistral_api_key",
            "MISTRAL_API_KEY is not configured.",
            "Set MISTRAL_API_KEY in the environment before using ScrapeGraphAI extraction actions.",
        )
    return api_key


def _coerce_urls(urls: Any) -> list[str]:
    if urls is None or urls == "":
        return []
    if isinstance(urls, list):
        return [str(url).strip() for url in urls if str(url).strip()]
    if isinstance(urls, str):
        text = urls.strip()
        if not text:
            return []
        if text.startswith("["):
            parsed = json.loads(text)
            if not isinstance(parsed, list):
                raise ScrapeGraphToolError("invalid_urls", "urls must be a JSON array of URL strings.")
            return [str(url).strip() for url in parsed if str(url).strip()]
        return [part.strip() for part in text.replace("\r", "\n").replace(",", "\n").split("\n") if part.strip()]
    raise ScrapeGraphToolError("invalid_urls", "urls must be provided as a list or JSON array string.")


def _coerce_schema(schema_json: Any) -> dict[str, Any] | None:
    if schema_json in (None, "", {}):
        return None
    if isinstance(schema_json, dict):
        return schema_json
    if isinstance(schema_json, str):
        try:
            parsed = json.loads(schema_json)
        except json.JSONDecodeError as exc:
            raise ScrapeGraphToolError("invalid_schema_json", f"schema_json is not valid JSON: {exc}") from exc
        if not isinstance(parsed, dict):
            raise ScrapeGraphToolError("invalid_schema_json", "schema_json must decode to a JSON object.")
        return parsed
    raise ScrapeGraphToolError("invalid_schema_json", "schema_json must be a JSON object or JSON string.")


def _schema_to_type(name: str, schema: dict[str, Any]) -> Any:
    schema_type = schema.get("type")
    if schema_type == "string":
        return str
    if schema_type == "integer":
        return int
    if schema_type == "number":
        return float
    if schema_type == "boolean":
        return bool
    if schema_type == "array":
        item_schema = schema.get("items", {})
        return list[_schema_to_type(f"{name}Item", item_schema)]
    if schema_type == "object" or "properties" in schema:
        properties = schema.get("properties", {})
        required = set(schema.get("required", []))
        fields: dict[str, tuple[Any, Any]] = {}
        for prop_name, prop_schema in properties.items():
            prop_type = _schema_to_type(f"{name}{prop_name.title()}", prop_schema)
            description = prop_schema.get("description")
            is_required = prop_name in required
            annotation = prop_type if is_required else (prop_type | None)
            default = Field(... if is_required else None, description=description)
            fields[prop_name] = (annotation, default)
        return create_model(name, **fields)
    return Any


def _schema_to_model(schema: dict[str, Any] | None) -> type[BaseModel] | None:
    if not schema:
        return None
    if schema.get("type") not in (None, "object") and "properties" not in schema:
        raise ScrapeGraphToolError(
            "invalid_schema_json",
            "Only object-shaped JSON schemas are supported for schema_json.",
        )
    model_type = _schema_to_type("ScrapeGraphResult", schema)
    if not isinstance(model_type, type) or not issubclass(model_type, BaseModel):
        raise ScrapeGraphToolError(
            "invalid_schema_json",
            "schema_json must define an object with properties for structured extraction.",
        )
    return model_type


def _resolve_storage_state(storage_state_path: str | None) -> str | None:
    if not storage_state_path:
        return None
    candidate = storage_state_path.strip()
    if not candidate:
        return None
    if os.path.isabs(candidate):
        resolved = candidate
    else:
        resolved, _ = _resolve_path(candidate)
    if not os.path.exists(resolved):
        raise ScrapeGraphToolError(
            "invalid_storage_state_path",
            f"Storage state file not found: {candidate}",
        )
    return resolved


def _build_config(
    *,
    api_key: str | None,
    text_model: str | None = None,
    render_mode: str = "auto",
    timeout_s: int = 30,
    storage_state_path: str | None = None,
    depth: int | None = None,
    max_pages: int | None = None,
    same_domain_only: bool | None = None,
    max_images: int | None = None,
    vision_model: str | None = None,
) -> dict[str, Any]:
    if render_mode not in RENDER_CHOICES:
        raise ScrapeGraphToolError("invalid_render_mode", f"Unsupported render_mode: {render_mode}")
    config: dict[str, Any] = {
        "headless": True,
        "verbose": False,
        "timeout": max(5, int(timeout_s)),
        "use_soup": render_mode == "http",
    }
    if api_key:
        config["llm"] = {
            "api_key": api_key,
            "model": f"mistralai/{text_model or os.getenv(TEXT_MODEL_ENV, DEFAULT_TEXT_MODEL)}",
            "temperature": 0,
        }
    if storage_state_path:
        config["storage_state"] = storage_state_path
    if depth is not None:
        config["depth"] = max(1, int(depth))
    if max_pages is not None:
        config["max_pages"] = max(1, int(max_pages))
    if same_domain_only is not None:
        config["only_inside_links"] = bool(same_domain_only)
    if max_images is not None:
        config["max_images"] = max(1, int(max_images))
    if vision_model:
        config["vision_model"] = vision_model
    return config


def _json_safe(value: Any) -> Any:
    if BaseModel is not None and isinstance(value, BaseModel):
        return value.model_dump(mode="json")
    if isinstance(value, dict):
        return {key: _json_safe(val) for key, val in value.items()}
    if isinstance(value, list):
        return [_json_safe(item) for item in value]
    if isinstance(value, tuple):
        return [_json_safe(item) for item in value]
    if hasattr(value, "metadata") and hasattr(value, "page_content"):
        return {
            "page_content": getattr(value, "page_content", ""),
            "metadata": _json_safe(getattr(value, "metadata", {})),
        }
    if isinstance(value, str):
        stripped = value.strip()
        if stripped.startswith("{") or stripped.startswith("["):
            try:
                return json.loads(stripped)
            except Exception:
                return value
    return value


def _extract_sources(state: dict[str, Any], fallback: list[str] | None = None) -> list[str]:
    sources: list[str] = []
    for item in state.get("docs", []) or []:
        source = item.get("source") if isinstance(item, dict) else None
        if source and source not in sources:
            sources.append(source)
    for doc in state.get("doc", []) or []:
        metadata = getattr(doc, "metadata", {}) or {}
        source = metadata.get("source")
        if source and source not in sources:
            sources.append(source)
    if not sources and fallback:
        sources.extend([source for source in fallback if source])
    return sources


def _extract_links_and_images(doc_state: dict[str, Any], url: str) -> tuple[list[str], list[str]]:
    parse_node = ParseNode(
        input="doc & url",
        output=["parsed_doc", "link_urls", "img_urls"],
        node_config={
            "parse_urls": True,
            "parse_html": True,
            "chunk_size": 8192,
            "llm_model": None,
        },
    )
    docs = doc_state.get("doc")
    if not docs:
        docs = doc_state.get("html_content", [])
    if not docs:
        return [], []
    state = {"doc": docs, "url": url}
    parse_node.execute(state)
    return state.get("link_urls", []) or [], state.get("img_urls", []) or []


def _render_markdown_with_fetch(url: str, config: dict[str, Any]) -> tuple[dict[str, Any], list[dict[str, Any]]]:
    fetch_node = FetchNode(
        input="url",
        output=["doc"],
        node_config=config,
    )
    state = {"url": url}
    state = fetch_node.execute(state)
    docs = state.get("doc", []) or []
    if not docs:
        raise ScrapeGraphToolError("fetch_failed", "ScrapeGraph fetch returned no documents for render_markdown.")
    html = getattr(docs[0], "page_content", None) or ""
    if not html.strip():
        raise ScrapeGraphToolError("fetch_failed", "Fetched document for render_markdown had empty content.")
    state["markdown"] = convert_to_md(html)
    return state, []


@autodoc(summary=TOOL_SUMMARY)
def ScrapeGraphAI(
    action: Annotated[
        Literal["extract", "crawl_extract", "multi_extract", "render_markdown", "vision_extract"],
        "Action to run: extract, crawl_extract, multi_extract, render_markdown, or vision_extract.",
    ] = "extract",
    url: Annotated[str, "Single URL for extract, crawl_extract, render_markdown, or vision_extract."] = "",
    urls: Annotated[list[str] | str | None, "Explicit list of URLs for multi_extract. Accepts a list or JSON array string."] = None,
    prompt: Annotated[str, "Natural-language extraction prompt. Required for extraction actions."] = "",
    schema_json: Annotated[dict[str, Any] | str | None, "Optional object-shaped JSON schema for structured extraction."] = None,
    render_mode: Annotated[Literal["auto", "browser", "http"], "Fetch mode. `browser` uses ScrapeGraph browser loading, `http` uses requests + soup, `auto` currently follows ScrapeGraph's browser-first path."] = "auto",
    include_images: Annotated[bool, "For `extract`, include page images in the extraction context."] = False,
    depth: Annotated[int, "For `crawl_extract`, crawl depth from the starting URL."] = 1,
    max_pages: Annotated[int, "For `crawl_extract`, soft cap on fetched pages."] = 4,
    same_domain_only: Annotated[bool, "For `crawl_extract`, stay within the starting site's links only."] = True,
    max_urls: Annotated[int, "For `multi_extract`, maximum URLs allowed in one call."] = 8,
    max_images: Annotated[int, "For `vision_extract` and image-aware extraction, maximum images to describe."] = 5,
    max_chars: Annotated[int, "For `render_markdown`, trim returned markdown to this many characters."] = 12000,
    include_links: Annotated[bool, "For `render_markdown`, include discovered page links."] = True,
    timeout_s: Annotated[int, "Timeout in seconds passed to ScrapeGraph fetch and generation nodes."] = 30,
    storage_state_path: Annotated[str, "Optional Playwright storage state JSON path for authenticated pages."] = "",
    return_debug: Annotated[bool, "Include execution metadata and graph execution info in the response."] = False,
) -> str:
    _log_call_start(
        "ScrapeGraphAI",
        action=action,
        url=url,
        urls=urls,
        prompt=_truncate_for_log(prompt or "", 180),
        render_mode=render_mode,
        include_images=include_images,
        depth=depth,
        max_pages=max_pages,
        max_urls=max_urls,
        max_images=max_images,
        timeout_s=timeout_s,
        storage_state_path=storage_state_path,
        return_debug=return_debug,
    )

    try:
        _require_scrapegraph()
        storage_state = _resolve_storage_state(storage_state_path)
        schema = _coerce_schema(schema_json)
        schema_model = _schema_to_model(schema)
        text_model_name = os.getenv(TEXT_MODEL_ENV, DEFAULT_TEXT_MODEL)
        vision_model_name = os.getenv(VISION_MODEL_ENV, DEFAULT_VISION_MODEL)

        if action == "render_markdown":
            if not url.strip():
                raise ScrapeGraphToolError("missing_url", "url is required for render_markdown.")
            final_state, exec_info = _render_markdown_with_fetch(
                url.strip(),
                _build_config(
                    api_key=None,
                    render_mode=render_mode,
                    timeout_s=timeout_s,
                    storage_state_path=storage_state,
                ),
            )
            markdown = (final_state.get("markdown") or "")[: max(1000, int(max_chars))]
            links, images = _extract_links_and_images(final_state, url.strip())
            response = {
                "action": action,
                "result": {"markdown": markdown},
                "sources": [url.strip()],
                "artifacts": {
                    "markdown": markdown,
                    "links": links if include_links else [],
                    "images": images if include_images else [],
                    "per_url_results": [],
                },
                "meta": {
                    "render_mode_used": render_mode,
                    "text_model": None,
                    "vision_model": None,
                },
                "warnings": [],
            }
            if return_debug:
                response["debug"] = {"final_state": _json_safe(final_state), "execution_info": _json_safe(exec_info)}
            result = _json_response(response)
            _log_call_end("ScrapeGraphAI", _truncate_for_log(result))
            return result

        api_key = _require_mistral_key()
        if action == "extract":
            if not url.strip() or not prompt.strip():
                raise ScrapeGraphToolError("missing_arguments", "url and prompt are required for extract.")
            config = _build_config(
                api_key=api_key,
                text_model=text_model_name,
                render_mode=render_mode,
                timeout_s=timeout_s,
                storage_state_path=storage_state,
                max_images=max_images,
                vision_model=vision_model_name,
            )
            graph_cls = _MistralOmniScraperGraph if include_images else SmartScraperGraph
            graph = graph_cls(prompt=prompt.strip(), source=url.strip(), config=config, schema=schema_model)
            result_data = _json_safe(graph.run())
            final_state = graph.get_state()
            response = {
                "action": action,
                "result": result_data,
                "sources": _extract_sources(final_state, [url.strip()]),
                "artifacts": {
                    "markdown": None,
                    "links": final_state.get("link_urls", []) or [],
                    "images": final_state.get("img_urls", []) or [],
                    "per_url_results": [],
                },
                "meta": {
                    "render_mode_used": render_mode,
                    "text_model": text_model_name,
                    "vision_model": vision_model_name if include_images else None,
                },
                "warnings": [],
            }
            if return_debug:
                response["debug"] = {"final_state": _json_safe(final_state), "execution_info": _json_safe(graph.get_execution_info())}
            result = _json_response(response)
            _log_call_end("ScrapeGraphAI", _truncate_for_log(result))
            return result

        if action == "vision_extract":
            if not url.strip() or not prompt.strip():
                raise ScrapeGraphToolError("missing_arguments", "url and prompt are required for vision_extract.")
            graph = _MistralOmniScraperGraph(
                prompt=prompt.strip(),
                source=url.strip(),
                config=_build_config(
                    api_key=api_key,
                    text_model=text_model_name,
                    render_mode=render_mode,
                    timeout_s=timeout_s,
                    storage_state_path=storage_state,
                    max_images=max_images,
                    vision_model=vision_model_name,
                ),
                schema=schema_model,
            )
            result_data = _json_safe(graph.run())
            final_state = graph.get_state()
            img_urls = final_state.get("img_urls", []) or []
            if not img_urls:
                raise ScrapeGraphToolError("no_images_found", "No images were found on the page for vision_extract.")
            response = {
                "action": action,
                "result": result_data,
                "sources": _extract_sources(final_state, [url.strip()]),
                "artifacts": {
                    "markdown": None,
                    "links": final_state.get("link_urls", []) or [],
                    "images": img_urls,
                    "per_url_results": [],
                },
                "meta": {
                    "render_mode_used": render_mode,
                    "text_model": text_model_name,
                    "vision_model": vision_model_name,
                },
                "warnings": [],
            }
            if return_debug:
                response["debug"] = {"final_state": _json_safe(final_state), "execution_info": _json_safe(graph.get_execution_info())}
            result = _json_response(response)
            _log_call_end("ScrapeGraphAI", _truncate_for_log(result))
            return result

        if action == "multi_extract":
            normalized_urls = _coerce_urls(urls)
            if not normalized_urls or not prompt.strip():
                raise ScrapeGraphToolError("missing_arguments", "urls and prompt are required for multi_extract.")
            if len(normalized_urls) > max(1, int(max_urls)):
                raise ScrapeGraphToolError("too_many_urls", f"multi_extract supports at most {max_urls} URLs per call.")
            graph = SmartScraperMultiGraph(
                prompt=prompt.strip(),
                source=normalized_urls,
                config=_build_config(
                    api_key=api_key,
                    text_model=text_model_name,
                    render_mode=render_mode,
                    timeout_s=timeout_s,
                    storage_state_path=storage_state,
                ),
                schema=schema_model,
            )
            result_data = _json_safe(graph.run())
            final_state = graph.get_state()
            response = {
                "action": action,
                "result": result_data,
                "sources": normalized_urls,
                "artifacts": {
                    "markdown": None,
                    "links": [],
                    "images": [],
                    "per_url_results": _json_safe(final_state.get("results", [])),
                },
                "meta": {
                    "render_mode_used": render_mode,
                    "text_model": text_model_name,
                    "vision_model": None,
                },
                "warnings": [],
            }
            if return_debug:
                response["debug"] = {"final_state": _json_safe(final_state), "execution_info": _json_safe(graph.get_execution_info())}
            result = _json_response(response)
            _log_call_end("ScrapeGraphAI", _truncate_for_log(result))
            return result

        if action == "crawl_extract":
            if not url.strip() or not prompt.strip():
                raise ScrapeGraphToolError("missing_arguments", "url and prompt are required for crawl_extract.")
            graph = _BoundedDepthSearchGraph(
                prompt=prompt.strip(),
                source=url.strip(),
                config=_build_config(
                    api_key=api_key,
                    text_model=text_model_name,
                    render_mode=render_mode,
                    timeout_s=timeout_s,
                    storage_state_path=storage_state,
                    depth=depth,
                    max_pages=max_pages,
                    same_domain_only=same_domain_only,
                ),
                schema=schema_model,
            )
            result_data = _json_safe(graph.run())
            final_state = graph.get_state()
            response = {
                "action": action,
                "result": result_data,
                "sources": _extract_sources(final_state, [url.strip()]),
                "artifacts": {
                    "markdown": None,
                    "links": [],
                    "images": [],
                    "per_url_results": [],
                },
                "meta": {
                    "render_mode_used": render_mode,
                    "text_model": text_model_name,
                    "vision_model": None,
                },
                "warnings": [],
            }
            if return_debug:
                response["debug"] = {"final_state": _json_safe(final_state), "execution_info": _json_safe(graph.get_execution_info())}
            result = _json_response(response)
            _log_call_end("ScrapeGraphAI", _truncate_for_log(result))
            return result

        raise ScrapeGraphToolError("unsupported_action", f"Unsupported action: {action}")
    except ScrapeGraphToolError as exc:
        result = _error_response(action, exc.code, exc.message, exc.hint)
        _log_call_end("ScrapeGraphAI", _truncate_for_log(result))
        return result
    except Exception as exc:  # pragma: no cover - runtime integration path
        code = "browser_unavailable" if "playwright" in str(exc).lower() or "chromium" in str(exc).lower() else "fetch_failed"
        result = _error_response(action, code, f"ScrapeGraphAI action failed: {exc}")
        _log_call_end("ScrapeGraphAI", _truncate_for_log(result))
        return result


def build_interface() -> gr.Interface:
    return gr.Interface(
        fn=ScrapeGraphAI,
        inputs=[
            gr.Dropdown(choices=ACTION_CHOICES, value="extract", label="Action"),
            gr.Textbox(label="URL", placeholder="https://example.com"),
            gr.JSON(label="URLs", value=[]),
            gr.Textbox(label="Prompt", lines=4, placeholder="Extract pricing tiers and main limits."),
            gr.JSON(label="Schema JSON", value={}),
            gr.Dropdown(choices=RENDER_CHOICES, value="auto", label="Render Mode"),
            gr.Checkbox(label="Include Images", value=False),
            gr.Number(label="Depth", value=1, precision=0),
            gr.Number(label="Max Pages", value=4, precision=0),
            gr.Checkbox(label="Same Domain Only", value=True),
            gr.Number(label="Max URLs", value=8, precision=0),
            gr.Number(label="Max Images", value=5, precision=0),
            gr.Number(label="Max Chars", value=12000, precision=0),
            gr.Checkbox(label="Include Links", value=True),
            gr.Number(label="Timeout (seconds)", value=30, precision=0),
            gr.Textbox(label="Storage State Path", placeholder="Optional Playwright storage_state JSON path"),
            gr.Checkbox(label="Return Debug", value=False),
        ],
        outputs=gr.Textbox(label="Result", lines=20, max_lines=40),
        title="ScrapeGraphAI",
        description="<div style=\"text-align:center\">Mistral-only structured scraping using ScrapeGraphAI graphs.</div>",
        api_description=TOOL_SUMMARY,
        flagging_mode="never",
    )


__all__ = ["ScrapeGraphAI", "build_interface"]